Default Skyvern 2.0 to google.com if generated website fails to load (#2291)
This commit is contained in:
@@ -238,6 +238,8 @@ class Settings(BaseSettings):
|
|||||||
ENABLE_LOG_ARTIFACTS: bool = False
|
ENABLE_LOG_ARTIFACTS: bool = False
|
||||||
ENABLE_CODE_BLOCK: bool = False
|
ENABLE_CODE_BLOCK: bool = False
|
||||||
|
|
||||||
|
TASK_BLOCKED_SITE_FALLBACK_URL: str = "https://www.google.com"
|
||||||
|
|
||||||
# SkyvernClient Settings
|
# SkyvernClient Settings
|
||||||
SKYVERN_BASE_URL: str = "https://api.skyvern.com"
|
SKYVERN_BASE_URL: str = "https://api.skyvern.com"
|
||||||
SKYVERN_API_KEY: str = "PLACEHOLDER"
|
SKYVERN_API_KEY: str = "PLACEHOLDER"
|
||||||
|
|||||||
@@ -10,7 +10,13 @@ from playwright.async_api import Page
|
|||||||
from sqlalchemy.exc import OperationalError
|
from sqlalchemy.exc import OperationalError
|
||||||
|
|
||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
from skyvern.exceptions import FailedToSendWebhook, TaskTerminationError, TaskV2NotFound, UrlGenerationFailure
|
from skyvern.exceptions import (
|
||||||
|
FailedToSendWebhook,
|
||||||
|
MissingBrowserState,
|
||||||
|
TaskTerminationError,
|
||||||
|
TaskV2NotFound,
|
||||||
|
UrlGenerationFailure,
|
||||||
|
)
|
||||||
from skyvern.forge import app
|
from skyvern.forge import app
|
||||||
from skyvern.forge.prompts import prompt_engine
|
from skyvern.forge.prompts import prompt_engine
|
||||||
from skyvern.forge.sdk.artifact.models import ArtifactType
|
from skyvern.forge.sdk.artifact.models import ArtifactType
|
||||||
@@ -481,31 +487,86 @@ async def run_task_v2_helper(
|
|||||||
task_history_record: dict[str, Any] = {}
|
task_history_record: dict[str, Any] = {}
|
||||||
context = skyvern_context.ensure_context()
|
context = skyvern_context.ensure_context()
|
||||||
|
|
||||||
|
# Always ensure browser_state is available at the start of the loop
|
||||||
current_url: str | None = None
|
current_url: str | None = None
|
||||||
page: Page | None = None
|
page: Page | None = None
|
||||||
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id, workflow_run.parent_workflow_run_id)
|
fallback_url = settings.TASK_BLOCKED_SITE_FALLBACK_URL
|
||||||
if browser_state:
|
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(
|
||||||
|
workflow_run_id=workflow_run_id, parent_workflow_run_id=workflow_run.parent_workflow_run_id
|
||||||
|
)
|
||||||
|
if browser_state is None:
|
||||||
|
fallback_occurred = False
|
||||||
|
try:
|
||||||
|
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
||||||
|
workflow_run=workflow_run,
|
||||||
|
url=url,
|
||||||
|
browser_session_id=browser_session_id,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
LOG.warning("Failed to get or create browser state, fallback to Google", exc_info=True, url=url)
|
||||||
|
|
||||||
|
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
||||||
|
workflow_run=workflow_run,
|
||||||
|
url=fallback_url,
|
||||||
|
browser_session_id=browser_session_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
fallback_occurred = True
|
||||||
|
|
||||||
|
if browser_state is None:
|
||||||
|
LOG.error("Failed to create browser state even after fallback", workflow_run_id=workflow_run_id)
|
||||||
|
raise MissingBrowserState(workflow_run_id=workflow_run_id)
|
||||||
|
|
||||||
|
page = await browser_state.get_working_page()
|
||||||
|
|
||||||
|
page_loaded = False
|
||||||
|
if page:
|
||||||
|
try:
|
||||||
|
# Check if the page has a body element to verify it loaded
|
||||||
|
# page will always be None if browser state failed to load
|
||||||
|
page_loaded = await browser_state.validate_browser_context(page)
|
||||||
|
except Exception:
|
||||||
|
page_loaded = False
|
||||||
|
LOG.warning(
|
||||||
|
"Page failed to load properly, fallback to Google",
|
||||||
|
exc_info=True,
|
||||||
|
url=url,
|
||||||
|
current_url=current_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not page_loaded:
|
||||||
|
# Page failed to load properly, fallback to Google
|
||||||
|
if page:
|
||||||
|
try:
|
||||||
|
await page.goto(fallback_url, timeout=15000)
|
||||||
|
fallback_occurred = True
|
||||||
|
except Exception:
|
||||||
|
LOG.exception("Failed to load Google fallback", exc_info=True, url=url, current_url=current_url)
|
||||||
|
else:
|
||||||
page = await browser_state.get_working_page()
|
page = await browser_state.get_working_page()
|
||||||
if page:
|
if page:
|
||||||
current_url = await SkyvernFrame.get_url(page)
|
current_url = await SkyvernFrame.get_url(page)
|
||||||
|
|
||||||
if i == 0 and current_url != url:
|
if i == 0 and current_url != url:
|
||||||
# The first iteration is always a GOTO_URL task
|
if fallback_occurred:
|
||||||
task_type = "goto_url"
|
plan = f"Go to Google because the intended website ({url}) failed to load properly."
|
||||||
plan = f"Go to this website: {url}"
|
task_type = "goto_url"
|
||||||
task_history_record = {"type": task_type, "task": plan}
|
task_history_record = {"type": task_type, "task": plan}
|
||||||
block, block_yaml_list, parameter_yaml_list = await _generate_goto_url_task(
|
block, block_yaml_list, parameter_yaml_list = await _generate_goto_url_task(
|
||||||
workflow_id=workflow_id,
|
workflow_id=workflow_id,
|
||||||
url=url,
|
url=fallback_url,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
# Page loaded successfully, proceed with original URL
|
||||||
|
plan = f"Go to this website: {url}"
|
||||||
|
task_type = "goto_url"
|
||||||
|
task_history_record = {"type": task_type, "task": plan}
|
||||||
|
block, block_yaml_list, parameter_yaml_list = await _generate_goto_url_task(
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
url=url,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
if browser_state is None:
|
|
||||||
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
|
||||||
workflow_run=workflow_run,
|
|
||||||
url=url,
|
|
||||||
browser_session_id=browser_session_id,
|
|
||||||
)
|
|
||||||
scraped_page = await scrape_website(
|
scraped_page = await scrape_website(
|
||||||
browser_state,
|
browser_state,
|
||||||
url,
|
url,
|
||||||
|
|||||||
@@ -274,16 +274,6 @@ class BrowserContextFactory:
|
|||||||
|
|
||||||
raise UnknownErrorWhileCreatingBrowserContext(browser_type, e) from e
|
raise UnknownErrorWhileCreatingBrowserContext(browser_type, e) from e
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def set_validate_browser_context(cls, validator: Callable[[Page], Awaitable[bool]]) -> None:
|
|
||||||
cls._validator = validator
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
async def validate_browser_context(cls, page: Page) -> bool:
|
|
||||||
if cls._validator is None:
|
|
||||||
return True
|
|
||||||
return await cls._validator(page)
|
|
||||||
|
|
||||||
|
|
||||||
class VideoArtifact(BaseModel):
|
class VideoArtifact(BaseModel):
|
||||||
video_path: str | None = None
|
video_path: str | None = None
|
||||||
@@ -603,6 +593,7 @@ class BrowserState:
|
|||||||
loading_time=end_time - start_time,
|
loading_time=end_time - start_time,
|
||||||
url=url,
|
url=url,
|
||||||
)
|
)
|
||||||
|
# Do we need this?
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
LOG.info(f"Successfully went to {url}", url=url, retry_time=retry_time)
|
LOG.info(f"Successfully went to {url}", url=url, retry_time=retry_time)
|
||||||
return
|
return
|
||||||
@@ -639,6 +630,30 @@ class BrowserState:
|
|||||||
await self.set_working_page(last_page, len(self.browser_context.pages) - 1)
|
await self.set_working_page(last_page, len(self.browser_context.pages) - 1)
|
||||||
return last_page
|
return last_page
|
||||||
|
|
||||||
|
async def validate_browser_context(self, page: Page) -> bool:
|
||||||
|
# validate the content
|
||||||
|
try:
|
||||||
|
skyvern_frame = await SkyvernFrame.create_instance(frame=page)
|
||||||
|
html = await skyvern_frame.get_content()
|
||||||
|
except Exception:
|
||||||
|
LOG.error(
|
||||||
|
"Error happened while getting the first page content",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
if "Bad gateway error" in html:
|
||||||
|
LOG.warning("Bad gateway error on the page, recreate a new browser context with another proxy node")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if "client_connect_forbidden_host" in html:
|
||||||
|
LOG.warning(
|
||||||
|
"capture the client_connect_forbidden_host error on the page, recreate a new browser context with another proxy node"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
async def must_get_working_page(self) -> Page:
|
async def must_get_working_page(self) -> Page:
|
||||||
page = await self.get_working_page()
|
page = await self.get_working_page()
|
||||||
assert page is not None
|
assert page is not None
|
||||||
@@ -710,7 +725,7 @@ class BrowserState:
|
|||||||
)
|
)
|
||||||
page = await self.__assert_page()
|
page = await self.__assert_page()
|
||||||
|
|
||||||
if not await BrowserContextFactory.validate_browser_context(await self.get_working_page()):
|
if not await self.validate_browser_context(await self.get_working_page()):
|
||||||
if not await self.close_current_open_page():
|
if not await self.close_current_open_page():
|
||||||
LOG.warning("Failed to close the current open page, going to skip the browser context validation")
|
LOG.warning("Failed to close the current open page, going to skip the browser context validation")
|
||||||
return page
|
return page
|
||||||
|
|||||||
Reference in New Issue
Block a user