diff --git a/skyvern/constants.py b/skyvern/constants.py index 15705093..5bbe7455 100644 --- a/skyvern/constants.py +++ b/skyvern/constants.py @@ -14,6 +14,7 @@ BROWSER_DOWNLOAD_TIMEOUT = 600 # 10 minute DOWNLOAD_FILE_PREFIX = "downloads" SAVE_DOWNLOADED_FILES_TIMEOUT = 180 GET_DOWNLOADED_FILES_TIMEOUT = 30 +NAVIGATION_MAX_RETRY_TIME = 3 # reserved fields for navigation payload SPECIAL_FIELD_VERIFICATION_CODE = "verification_code" diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index 58edfd11..21ba8cbd 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -19,14 +19,12 @@ import filetype import structlog from email_validator import EmailNotValidError, validate_email from jinja2 import Template -from playwright.async_api import Error from pydantic import BaseModel, Field from skyvern.config import settings from skyvern.exceptions import ( ContextParameterValueNotFound, DisabledBlockExecutionError, - FailedToNavigateToUrl, MissingBrowserState, MissingBrowserStatePage, SkyvernException, @@ -370,70 +368,60 @@ class BaseTaskBlock(Block): raise Exception(f"Organization is missing organization_id={workflow.organization_id}") browser_state: BrowserState | None = None - try: - if is_first_task: + if is_first_task: + # the first task block will create the browser state and do the navigation + try: browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( workflow_run=workflow_run, url=self.url ) - else: - browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id=workflow_run_id) - if browser_state is None: - raise MissingBrowserState(task_id=task.task_id, workflow_run_id=workflow_run_id) - except FailedToNavigateToUrl as e: - # Make sure the task is marked as failed in the database before raising the exception - await app.DATABASE.update_task( - task.task_id, - status=TaskStatus.failed, - organization_id=workflow.organization_id, - failure_reason=str(e), - ) - raise e - except Exception as e: - await app.DATABASE.update_task( - task.task_id, - status=TaskStatus.failed, - organization_id=workflow.organization_id, - failure_reason=str(e), - ) - LOG.exception( - "Failed to get browser state for task", - task_id=task.task_id, - workflow_run_id=workflow_run_id, - ) - raise e - - working_page = await browser_state.get_working_page() - if not working_page: - LOG.error( - "BrowserState has no page", - workflow_run_id=workflow_run.workflow_run_id, - ) - raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id) - - LOG.info( - "Navigating to page", - url=self.url, - workflow_run_id=workflow_run_id, - task_id=task.task_id, - workflow_id=workflow.workflow_id, - organization_id=workflow.organization_id, - step_id=step.step_id, - ) - - if self.url: - try: - await working_page.goto(self.url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS) - except Error as playright_error: - LOG.warning(f"Error while navigating to url: {str(playright_error)}") + except Exception as e: + LOG.exception( + "Failed to get browser state for first task", + task_id=task.task_id, + workflow_run_id=workflow_run_id, + ) # Make sure the task is marked as failed in the database before raising the exception - exc = FailedToNavigateToUrl(url=self.url, error_message=str(playright_error)) await app.DATABASE.update_task( task.task_id, status=TaskStatus.failed, organization_id=workflow.organization_id, - failure_reason=str(exc), + failure_reason=str(e), ) - raise exc + raise e + else: + # if not the first task block, need to navigate manually + browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id=workflow_run_id) + if browser_state is None: + raise MissingBrowserState(task_id=task.task_id, workflow_run_id=workflow_run_id) + + working_page = await browser_state.get_working_page() + if not working_page: + LOG.error( + "BrowserState has no page", + workflow_run_id=workflow_run.workflow_run_id, + ) + raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id) + + if self.url: + LOG.info( + "Navigating to page", + url=self.url, + workflow_run_id=workflow_run_id, + task_id=task.task_id, + workflow_id=workflow.workflow_id, + organization_id=workflow.organization_id, + step_id=step.step_id, + ) + try: + await browser_state.navigate_to_url(page=working_page, url=self.url) + except Exception as e: + await app.DATABASE.update_task( + task.task_id, + status=TaskStatus.failed, + organization_id=workflow.organization_id, + failure_reason=str(e), + ) + raise e try: await app.agent.execute_step( diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index 5957a2da..fde71c66 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -14,7 +14,7 @@ from playwright.async_api import BrowserContext, ConsoleMessage, Download, Error from pydantic import BaseModel, PrivateAttr from skyvern.config import settings -from skyvern.constants import BROWSER_CLOSE_TIMEOUT, BROWSER_DOWNLOAD_TIMEOUT +from skyvern.constants import BROWSER_CLOSE_TIMEOUT, BROWSER_DOWNLOAD_TIMEOUT, NAVIGATION_MAX_RETRY_TIME from skyvern.exceptions import ( FailedToNavigateToUrl, FailedToReloadPage, @@ -346,49 +346,48 @@ class BrowserState: LOG.info("browser context is created") if await self.get_working_page() is None: - success = False - retries = 0 + page = await self.browser_context.new_page() + await self.set_working_page(page, 0) + await self._close_all_other_pages() - while not success and retries < 3: - try: - LOG.info("Creating a new page") - page = await self.browser_context.new_page() - await self.set_working_page(page, 0) - await self._close_all_other_pages() - LOG.info("A new page is created") - if url: - LOG.info(f"Navigating page to {url} and waiting for 5 seconds") - try: - start_time = time.time() - await page.goto(url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS) - end_time = time.time() - LOG.info( - "Page loading time", - loading_time=end_time - start_time, - url=url, - ) - await asyncio.sleep(5) - except Error as playright_error: - LOG.warning( - f"Error while navigating to url: {str(playright_error)}", - exc_info=True, - ) - raise FailedToNavigateToUrl(url=url, error_message=str(playright_error)) - success = True - LOG.info(f"Successfully went to {url}") - else: - success = True - except Exception as e: - LOG.exception( - f"Error while creating or navigating to a new page. Waiting for 5 seconds. Error: {str(e)}", - ) - retries += 1 - # Wait for 5 seconds before retrying - await asyncio.sleep(5) - if retries >= 3: - LOG.exception(f"Failed to create a new page after 3 retries: {str(e)}") - raise e - LOG.info(f"Retrying to create a new page. Retry count: {retries}") + if url: + await self.navigate_to_url(page=page, url=url) + + async def navigate_to_url(self, page: Page, url: str, retry_times: int = NAVIGATION_MAX_RETRY_TIME) -> None: + navigation_error: Exception = FailedToNavigateToUrl(url=url, error_message="") + for retry_time in range(retry_times): + LOG.info(f"Trying to navigate to {url} and waiting for 5 seconds.", url=url, retry_time=retry_time) + try: + start_time = time.time() + await page.goto(url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS) + end_time = time.time() + LOG.info( + "Page loading time", + loading_time=end_time - start_time, + url=url, + ) + await asyncio.sleep(5) + LOG.info(f"Successfully went to {url}", url=url, retry_time=retry_time) + return + + except Exception as e: + navigation_error = e + LOG.warning( + f"Error while navigating to url: {str(navigation_error)}", + exc_info=True, + url=url, + retry_time=retry_time, + ) + # Wait for 5 seconds before retrying + await asyncio.sleep(5) + else: + LOG.exception( + f"Failed to navigate to {url} after {retry_times} retries: {str(navigation_error)}", + url=url, + ) + if isinstance(navigation_error, Error): + raise FailedToNavigateToUrl(url=url, error_message=str(navigation_error)) + raise navigation_error async def get_working_page(self) -> Page | None: # HACK: currently, assuming the last page is always the working page.