refactor navigation logic (#1318)
This commit is contained in:
@@ -14,6 +14,7 @@ BROWSER_DOWNLOAD_TIMEOUT = 600 # 10 minute
|
|||||||
DOWNLOAD_FILE_PREFIX = "downloads"
|
DOWNLOAD_FILE_PREFIX = "downloads"
|
||||||
SAVE_DOWNLOADED_FILES_TIMEOUT = 180
|
SAVE_DOWNLOADED_FILES_TIMEOUT = 180
|
||||||
GET_DOWNLOADED_FILES_TIMEOUT = 30
|
GET_DOWNLOADED_FILES_TIMEOUT = 30
|
||||||
|
NAVIGATION_MAX_RETRY_TIME = 3
|
||||||
|
|
||||||
# reserved fields for navigation payload
|
# reserved fields for navigation payload
|
||||||
SPECIAL_FIELD_VERIFICATION_CODE = "verification_code"
|
SPECIAL_FIELD_VERIFICATION_CODE = "verification_code"
|
||||||
|
|||||||
@@ -19,14 +19,12 @@ import filetype
|
|||||||
import structlog
|
import structlog
|
||||||
from email_validator import EmailNotValidError, validate_email
|
from email_validator import EmailNotValidError, validate_email
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
from playwright.async_api import Error
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
from skyvern.exceptions import (
|
from skyvern.exceptions import (
|
||||||
ContextParameterValueNotFound,
|
ContextParameterValueNotFound,
|
||||||
DisabledBlockExecutionError,
|
DisabledBlockExecutionError,
|
||||||
FailedToNavigateToUrl,
|
|
||||||
MissingBrowserState,
|
MissingBrowserState,
|
||||||
MissingBrowserStatePage,
|
MissingBrowserStatePage,
|
||||||
SkyvernException,
|
SkyvernException,
|
||||||
@@ -370,70 +368,60 @@ class BaseTaskBlock(Block):
|
|||||||
raise Exception(f"Organization is missing organization_id={workflow.organization_id}")
|
raise Exception(f"Organization is missing organization_id={workflow.organization_id}")
|
||||||
|
|
||||||
browser_state: BrowserState | None = None
|
browser_state: BrowserState | None = None
|
||||||
try:
|
if is_first_task:
|
||||||
if is_first_task:
|
# the first task block will create the browser state and do the navigation
|
||||||
|
try:
|
||||||
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
||||||
workflow_run=workflow_run, url=self.url
|
workflow_run=workflow_run, url=self.url
|
||||||
)
|
)
|
||||||
else:
|
except Exception as e:
|
||||||
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id=workflow_run_id)
|
LOG.exception(
|
||||||
if browser_state is None:
|
"Failed to get browser state for first task",
|
||||||
raise MissingBrowserState(task_id=task.task_id, workflow_run_id=workflow_run_id)
|
task_id=task.task_id,
|
||||||
except FailedToNavigateToUrl as e:
|
workflow_run_id=workflow_run_id,
|
||||||
# Make sure the task is marked as failed in the database before raising the exception
|
)
|
||||||
await app.DATABASE.update_task(
|
|
||||||
task.task_id,
|
|
||||||
status=TaskStatus.failed,
|
|
||||||
organization_id=workflow.organization_id,
|
|
||||||
failure_reason=str(e),
|
|
||||||
)
|
|
||||||
raise e
|
|
||||||
except Exception as e:
|
|
||||||
await app.DATABASE.update_task(
|
|
||||||
task.task_id,
|
|
||||||
status=TaskStatus.failed,
|
|
||||||
organization_id=workflow.organization_id,
|
|
||||||
failure_reason=str(e),
|
|
||||||
)
|
|
||||||
LOG.exception(
|
|
||||||
"Failed to get browser state for task",
|
|
||||||
task_id=task.task_id,
|
|
||||||
workflow_run_id=workflow_run_id,
|
|
||||||
)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
working_page = await browser_state.get_working_page()
|
|
||||||
if not working_page:
|
|
||||||
LOG.error(
|
|
||||||
"BrowserState has no page",
|
|
||||||
workflow_run_id=workflow_run.workflow_run_id,
|
|
||||||
)
|
|
||||||
raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id)
|
|
||||||
|
|
||||||
LOG.info(
|
|
||||||
"Navigating to page",
|
|
||||||
url=self.url,
|
|
||||||
workflow_run_id=workflow_run_id,
|
|
||||||
task_id=task.task_id,
|
|
||||||
workflow_id=workflow.workflow_id,
|
|
||||||
organization_id=workflow.organization_id,
|
|
||||||
step_id=step.step_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.url:
|
|
||||||
try:
|
|
||||||
await working_page.goto(self.url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS)
|
|
||||||
except Error as playright_error:
|
|
||||||
LOG.warning(f"Error while navigating to url: {str(playright_error)}")
|
|
||||||
# Make sure the task is marked as failed in the database before raising the exception
|
# Make sure the task is marked as failed in the database before raising the exception
|
||||||
exc = FailedToNavigateToUrl(url=self.url, error_message=str(playright_error))
|
|
||||||
await app.DATABASE.update_task(
|
await app.DATABASE.update_task(
|
||||||
task.task_id,
|
task.task_id,
|
||||||
status=TaskStatus.failed,
|
status=TaskStatus.failed,
|
||||||
organization_id=workflow.organization_id,
|
organization_id=workflow.organization_id,
|
||||||
failure_reason=str(exc),
|
failure_reason=str(e),
|
||||||
)
|
)
|
||||||
raise exc
|
raise e
|
||||||
|
else:
|
||||||
|
# if not the first task block, need to navigate manually
|
||||||
|
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id=workflow_run_id)
|
||||||
|
if browser_state is None:
|
||||||
|
raise MissingBrowserState(task_id=task.task_id, workflow_run_id=workflow_run_id)
|
||||||
|
|
||||||
|
working_page = await browser_state.get_working_page()
|
||||||
|
if not working_page:
|
||||||
|
LOG.error(
|
||||||
|
"BrowserState has no page",
|
||||||
|
workflow_run_id=workflow_run.workflow_run_id,
|
||||||
|
)
|
||||||
|
raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id)
|
||||||
|
|
||||||
|
if self.url:
|
||||||
|
LOG.info(
|
||||||
|
"Navigating to page",
|
||||||
|
url=self.url,
|
||||||
|
workflow_run_id=workflow_run_id,
|
||||||
|
task_id=task.task_id,
|
||||||
|
workflow_id=workflow.workflow_id,
|
||||||
|
organization_id=workflow.organization_id,
|
||||||
|
step_id=step.step_id,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await browser_state.navigate_to_url(page=working_page, url=self.url)
|
||||||
|
except Exception as e:
|
||||||
|
await app.DATABASE.update_task(
|
||||||
|
task.task_id,
|
||||||
|
status=TaskStatus.failed,
|
||||||
|
organization_id=workflow.organization_id,
|
||||||
|
failure_reason=str(e),
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await app.agent.execute_step(
|
await app.agent.execute_step(
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from playwright.async_api import BrowserContext, ConsoleMessage, Download, Error
|
|||||||
from pydantic import BaseModel, PrivateAttr
|
from pydantic import BaseModel, PrivateAttr
|
||||||
|
|
||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
from skyvern.constants import BROWSER_CLOSE_TIMEOUT, BROWSER_DOWNLOAD_TIMEOUT
|
from skyvern.constants import BROWSER_CLOSE_TIMEOUT, BROWSER_DOWNLOAD_TIMEOUT, NAVIGATION_MAX_RETRY_TIME
|
||||||
from skyvern.exceptions import (
|
from skyvern.exceptions import (
|
||||||
FailedToNavigateToUrl,
|
FailedToNavigateToUrl,
|
||||||
FailedToReloadPage,
|
FailedToReloadPage,
|
||||||
@@ -346,49 +346,48 @@ class BrowserState:
|
|||||||
LOG.info("browser context is created")
|
LOG.info("browser context is created")
|
||||||
|
|
||||||
if await self.get_working_page() is None:
|
if await self.get_working_page() is None:
|
||||||
success = False
|
page = await self.browser_context.new_page()
|
||||||
retries = 0
|
await self.set_working_page(page, 0)
|
||||||
|
await self._close_all_other_pages()
|
||||||
|
|
||||||
while not success and retries < 3:
|
if url:
|
||||||
try:
|
await self.navigate_to_url(page=page, url=url)
|
||||||
LOG.info("Creating a new page")
|
|
||||||
page = await self.browser_context.new_page()
|
async def navigate_to_url(self, page: Page, url: str, retry_times: int = NAVIGATION_MAX_RETRY_TIME) -> None:
|
||||||
await self.set_working_page(page, 0)
|
navigation_error: Exception = FailedToNavigateToUrl(url=url, error_message="")
|
||||||
await self._close_all_other_pages()
|
for retry_time in range(retry_times):
|
||||||
LOG.info("A new page is created")
|
LOG.info(f"Trying to navigate to {url} and waiting for 5 seconds.", url=url, retry_time=retry_time)
|
||||||
if url:
|
try:
|
||||||
LOG.info(f"Navigating page to {url} and waiting for 5 seconds")
|
start_time = time.time()
|
||||||
try:
|
await page.goto(url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS)
|
||||||
start_time = time.time()
|
end_time = time.time()
|
||||||
await page.goto(url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS)
|
LOG.info(
|
||||||
end_time = time.time()
|
"Page loading time",
|
||||||
LOG.info(
|
loading_time=end_time - start_time,
|
||||||
"Page loading time",
|
url=url,
|
||||||
loading_time=end_time - start_time,
|
)
|
||||||
url=url,
|
await asyncio.sleep(5)
|
||||||
)
|
LOG.info(f"Successfully went to {url}", url=url, retry_time=retry_time)
|
||||||
await asyncio.sleep(5)
|
return
|
||||||
except Error as playright_error:
|
|
||||||
LOG.warning(
|
except Exception as e:
|
||||||
f"Error while navigating to url: {str(playright_error)}",
|
navigation_error = e
|
||||||
exc_info=True,
|
LOG.warning(
|
||||||
)
|
f"Error while navigating to url: {str(navigation_error)}",
|
||||||
raise FailedToNavigateToUrl(url=url, error_message=str(playright_error))
|
exc_info=True,
|
||||||
success = True
|
url=url,
|
||||||
LOG.info(f"Successfully went to {url}")
|
retry_time=retry_time,
|
||||||
else:
|
)
|
||||||
success = True
|
# Wait for 5 seconds before retrying
|
||||||
except Exception as e:
|
await asyncio.sleep(5)
|
||||||
LOG.exception(
|
else:
|
||||||
f"Error while creating or navigating to a new page. Waiting for 5 seconds. Error: {str(e)}",
|
LOG.exception(
|
||||||
)
|
f"Failed to navigate to {url} after {retry_times} retries: {str(navigation_error)}",
|
||||||
retries += 1
|
url=url,
|
||||||
# Wait for 5 seconds before retrying
|
)
|
||||||
await asyncio.sleep(5)
|
if isinstance(navigation_error, Error):
|
||||||
if retries >= 3:
|
raise FailedToNavigateToUrl(url=url, error_message=str(navigation_error))
|
||||||
LOG.exception(f"Failed to create a new page after 3 retries: {str(e)}")
|
raise navigation_error
|
||||||
raise e
|
|
||||||
LOG.info(f"Retrying to create a new page. Retry count: {retries}")
|
|
||||||
|
|
||||||
async def get_working_page(self) -> Page | None:
|
async def get_working_page(self) -> Page | None:
|
||||||
# HACK: currently, assuming the last page is always the working page.
|
# HACK: currently, assuming the last page is always the working page.
|
||||||
|
|||||||
Reference in New Issue
Block a user