From 39bb4558bfb6ae2519b60cc335b011b44ac1556b Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Sun, 19 Jan 2025 19:24:39 -0800 Subject: [PATCH] introduce a way to reload the page (#1597) --- skyvern/forge/agent.py | 33 ++++++++++++++++++++++- skyvern/forge/sdk/core/skyvern_context.py | 1 + skyvern/webeye/actions/actions.py | 6 +++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 68bd3897..421ad466 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -58,10 +58,12 @@ from skyvern.forge.sdk.workflow.models.block import ActionBlock, BaseTaskBlock, from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun, WorkflowRunStatus from skyvern.webeye.actions.actions import ( Action, + ActionStatus, ActionType, CompleteAction, CompleteVerifyResult, DecisiveAction, + ReloadPageAction, UserDefinedError, WebAction, ) @@ -69,7 +71,7 @@ from skyvern.webeye.actions.caching import retrieve_action_plan from skyvern.webeye.actions.handler import ActionHandler, poll_verification_code from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput from skyvern.webeye.actions.parse_actions import parse_actions -from skyvern.webeye.actions.responses import ActionResult +from skyvern.webeye.actions.responses import ActionResult, ActionSuccess from skyvern.webeye.browser_factory import BrowserState from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website from skyvern.webeye.utils.page import SkyvernFrame @@ -800,6 +802,35 @@ class ForgeAgent: element_id_to_last_action: dict[str, int] = dict() for action_idx, action_node in enumerate(action_linked_list): + context = skyvern_context.ensure_context() + if context.refresh_working_page: + LOG.warning( + "Detected the signal to reload the page, going to reload and skip the rest of the actions", + task_id=task.task_id, + step_id=step.step_id, + step_order=step.order, + ) + await browser_state.reload_page() + context.refresh_working_page = False + action_result = ActionSuccess() + action_result.step_order = step.order + action_result.step_retry_number = step.retry_index + detailed_agent_step_output.actions_and_results[action_idx] = ( + ReloadPageAction( + reasoning="Something wrong with the current page, reload to continue", + status=ActionStatus.completed, + organization_id=task.organization_id, + workflow_run_id=task.workflow_run_id, + task_id=task.task_id, + step_id=step.step_id, + step_order=step.order, + action_order=action_idx, + ), + [action_result], + ) + await self.record_artifacts_after_action(task, step, browser_state) + break + action = action_node.action if isinstance(action, WebAction): previous_action_idx = element_id_to_last_action.get(action.element_id) diff --git a/skyvern/forge/sdk/core/skyvern_context.py b/skyvern/forge/sdk/core/skyvern_context.py index 994d2baf..bcbc305f 100644 --- a/skyvern/forge/sdk/core/skyvern_context.py +++ b/skyvern/forge/sdk/core/skyvern_context.py @@ -16,6 +16,7 @@ class SkyvernContext: totp_codes: dict[str, str | None] = field(default_factory=dict) log: list[dict] = field(default_factory=list) hashed_href_map: dict[str, str] = field(default_factory=dict) + refresh_working_page: bool = False def __repr__(self) -> str: return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, max_steps_override={self.max_steps_override})" diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py index 5e42f3b9..7e014948 100644 --- a/skyvern/webeye/actions/actions.py +++ b/skyvern/webeye/actions/actions.py @@ -25,6 +25,7 @@ class ActionType(StrEnum): SOLVE_CAPTCHA = "solve_captcha" TERMINATE = "terminate" COMPLETE = "complete" + RELOAD_PAGE = "reload_page" def is_web_action(self) -> bool: return self in [ @@ -161,6 +162,11 @@ class DecisiveAction(Action): errors: list[UserDefinedError] = [] +# TODO: consider to implement this as a WebAction in the future +class ReloadPageAction(Action): + action_type: ActionType = ActionType.RELOAD_PAGE + + class ClickAction(WebAction): action_type: ActionType = ActionType.CLICK file_url: str | None = None