refactor navigation logic (#1318)

This commit is contained in:
LawyZheng
2024-12-04 22:46:07 +08:00
committed by GitHub
parent 521e355591
commit bee4d7b415
3 changed files with 88 additions and 100 deletions

View File

@@ -14,6 +14,7 @@ BROWSER_DOWNLOAD_TIMEOUT = 600 # 10 minute
DOWNLOAD_FILE_PREFIX = "downloads" DOWNLOAD_FILE_PREFIX = "downloads"
SAVE_DOWNLOADED_FILES_TIMEOUT = 180 SAVE_DOWNLOADED_FILES_TIMEOUT = 180
GET_DOWNLOADED_FILES_TIMEOUT = 30 GET_DOWNLOADED_FILES_TIMEOUT = 30
NAVIGATION_MAX_RETRY_TIME = 3
# reserved fields for navigation payload # reserved fields for navigation payload
SPECIAL_FIELD_VERIFICATION_CODE = "verification_code" SPECIAL_FIELD_VERIFICATION_CODE = "verification_code"

View File

@@ -19,14 +19,12 @@ import filetype
import structlog import structlog
from email_validator import EmailNotValidError, validate_email from email_validator import EmailNotValidError, validate_email
from jinja2 import Template from jinja2 import Template
from playwright.async_api import Error
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from skyvern.config import settings from skyvern.config import settings
from skyvern.exceptions import ( from skyvern.exceptions import (
ContextParameterValueNotFound, ContextParameterValueNotFound,
DisabledBlockExecutionError, DisabledBlockExecutionError,
FailedToNavigateToUrl,
MissingBrowserState, MissingBrowserState,
MissingBrowserStatePage, MissingBrowserStatePage,
SkyvernException, SkyvernException,
@@ -370,70 +368,60 @@ class BaseTaskBlock(Block):
raise Exception(f"Organization is missing organization_id={workflow.organization_id}") raise Exception(f"Organization is missing organization_id={workflow.organization_id}")
browser_state: BrowserState | None = None browser_state: BrowserState | None = None
try: if is_first_task:
if is_first_task: # the first task block will create the browser state and do the navigation
try:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, url=self.url workflow_run=workflow_run, url=self.url
) )
else: except Exception as e:
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id=workflow_run_id) LOG.exception(
if browser_state is None: "Failed to get browser state for first task",
raise MissingBrowserState(task_id=task.task_id, workflow_run_id=workflow_run_id) task_id=task.task_id,
except FailedToNavigateToUrl as e: workflow_run_id=workflow_run_id,
# Make sure the task is marked as failed in the database before raising the exception )
await app.DATABASE.update_task(
task.task_id,
status=TaskStatus.failed,
organization_id=workflow.organization_id,
failure_reason=str(e),
)
raise e
except Exception as e:
await app.DATABASE.update_task(
task.task_id,
status=TaskStatus.failed,
organization_id=workflow.organization_id,
failure_reason=str(e),
)
LOG.exception(
"Failed to get browser state for task",
task_id=task.task_id,
workflow_run_id=workflow_run_id,
)
raise e
working_page = await browser_state.get_working_page()
if not working_page:
LOG.error(
"BrowserState has no page",
workflow_run_id=workflow_run.workflow_run_id,
)
raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id)
LOG.info(
"Navigating to page",
url=self.url,
workflow_run_id=workflow_run_id,
task_id=task.task_id,
workflow_id=workflow.workflow_id,
organization_id=workflow.organization_id,
step_id=step.step_id,
)
if self.url:
try:
await working_page.goto(self.url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS)
except Error as playright_error:
LOG.warning(f"Error while navigating to url: {str(playright_error)}")
# Make sure the task is marked as failed in the database before raising the exception # Make sure the task is marked as failed in the database before raising the exception
exc = FailedToNavigateToUrl(url=self.url, error_message=str(playright_error))
await app.DATABASE.update_task( await app.DATABASE.update_task(
task.task_id, task.task_id,
status=TaskStatus.failed, status=TaskStatus.failed,
organization_id=workflow.organization_id, organization_id=workflow.organization_id,
failure_reason=str(exc), failure_reason=str(e),
) )
raise exc raise e
else:
# if not the first task block, need to navigate manually
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id=workflow_run_id)
if browser_state is None:
raise MissingBrowserState(task_id=task.task_id, workflow_run_id=workflow_run_id)
working_page = await browser_state.get_working_page()
if not working_page:
LOG.error(
"BrowserState has no page",
workflow_run_id=workflow_run.workflow_run_id,
)
raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id)
if self.url:
LOG.info(
"Navigating to page",
url=self.url,
workflow_run_id=workflow_run_id,
task_id=task.task_id,
workflow_id=workflow.workflow_id,
organization_id=workflow.organization_id,
step_id=step.step_id,
)
try:
await browser_state.navigate_to_url(page=working_page, url=self.url)
except Exception as e:
await app.DATABASE.update_task(
task.task_id,
status=TaskStatus.failed,
organization_id=workflow.organization_id,
failure_reason=str(e),
)
raise e
try: try:
await app.agent.execute_step( await app.agent.execute_step(

View File

@@ -14,7 +14,7 @@ from playwright.async_api import BrowserContext, ConsoleMessage, Download, Error
from pydantic import BaseModel, PrivateAttr from pydantic import BaseModel, PrivateAttr
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import BROWSER_CLOSE_TIMEOUT, BROWSER_DOWNLOAD_TIMEOUT from skyvern.constants import BROWSER_CLOSE_TIMEOUT, BROWSER_DOWNLOAD_TIMEOUT, NAVIGATION_MAX_RETRY_TIME
from skyvern.exceptions import ( from skyvern.exceptions import (
FailedToNavigateToUrl, FailedToNavigateToUrl,
FailedToReloadPage, FailedToReloadPage,
@@ -346,49 +346,48 @@ class BrowserState:
LOG.info("browser context is created") LOG.info("browser context is created")
if await self.get_working_page() is None: if await self.get_working_page() is None:
success = False page = await self.browser_context.new_page()
retries = 0 await self.set_working_page(page, 0)
await self._close_all_other_pages()
while not success and retries < 3: if url:
try: await self.navigate_to_url(page=page, url=url)
LOG.info("Creating a new page")
page = await self.browser_context.new_page() async def navigate_to_url(self, page: Page, url: str, retry_times: int = NAVIGATION_MAX_RETRY_TIME) -> None:
await self.set_working_page(page, 0) navigation_error: Exception = FailedToNavigateToUrl(url=url, error_message="")
await self._close_all_other_pages() for retry_time in range(retry_times):
LOG.info("A new page is created") LOG.info(f"Trying to navigate to {url} and waiting for 5 seconds.", url=url, retry_time=retry_time)
if url: try:
LOG.info(f"Navigating page to {url} and waiting for 5 seconds") start_time = time.time()
try: await page.goto(url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS)
start_time = time.time() end_time = time.time()
await page.goto(url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS) LOG.info(
end_time = time.time() "Page loading time",
LOG.info( loading_time=end_time - start_time,
"Page loading time", url=url,
loading_time=end_time - start_time, )
url=url, await asyncio.sleep(5)
) LOG.info(f"Successfully went to {url}", url=url, retry_time=retry_time)
await asyncio.sleep(5) return
except Error as playright_error:
LOG.warning( except Exception as e:
f"Error while navigating to url: {str(playright_error)}", navigation_error = e
exc_info=True, LOG.warning(
) f"Error while navigating to url: {str(navigation_error)}",
raise FailedToNavigateToUrl(url=url, error_message=str(playright_error)) exc_info=True,
success = True url=url,
LOG.info(f"Successfully went to {url}") retry_time=retry_time,
else: )
success = True # Wait for 5 seconds before retrying
except Exception as e: await asyncio.sleep(5)
LOG.exception( else:
f"Error while creating or navigating to a new page. Waiting for 5 seconds. Error: {str(e)}", LOG.exception(
) f"Failed to navigate to {url} after {retry_times} retries: {str(navigation_error)}",
retries += 1 url=url,
# Wait for 5 seconds before retrying )
await asyncio.sleep(5) if isinstance(navigation_error, Error):
if retries >= 3: raise FailedToNavigateToUrl(url=url, error_message=str(navigation_error))
LOG.exception(f"Failed to create a new page after 3 retries: {str(e)}") raise navigation_error
raise e
LOG.info(f"Retrying to create a new page. Retry count: {retries}")
async def get_working_page(self) -> Page | None: async def get_working_page(self) -> Page | None:
# HACK: currently, assuming the last page is always the working page. # HACK: currently, assuming the last page is always the working page.