From d19ff2bd6904272c680c9b0132f210ab770e5c54 Mon Sep 17 00:00:00 2001 From: Kerem Yilmaz Date: Tue, 17 Sep 2024 18:59:40 -0700 Subject: [PATCH] Add complete action verification (#845) --- skyvern/exceptions.py | 6 ++ skyvern/forge/agent.py | 81 ++++++++++++++++++- .../forge/prompts/skyvern/check-user-goal.j2 | 29 +++++++ 3 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 skyvern/forge/prompts/skyvern/check-user-goal.j2 diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index f6154602..8bf3a75d 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -502,3 +502,9 @@ class FailToFindAutocompleteOption(SkyvernException): super().__init__( f"Can't find a suitable auto completion for the current value, maybe retry with another reasonable value. current_value={current_value}" ) + + +class IllegitComplete(SkyvernException): + def __init__(self, data: dict | None = None) -> None: + data_str = f", data={data}" if data else "" + super().__init__(f"Illegit complete{data_str}") diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index b41f7279..2f96c9b8 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -50,7 +50,7 @@ from skyvern.webeye.actions.actions import ( WebAction, parse_actions, ) -from skyvern.webeye.actions.handler import ActionHandler, poll_verification_code +from skyvern.webeye.actions.handler import ActionHandler, handle_complete_action, poll_verification_code from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput from skyvern.webeye.actions.responses import ActionResult from skyvern.webeye.browser_factory import BrowserState @@ -773,6 +773,36 @@ class ForgeAgent: step_retry=step.retry_index, action_results=action_results, ) + if app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached( + "CHECK_USER_GOAL_SUCCESS_EVERY_STEP", + task.workflow_run_id or task.task_id, + properties={ + "organization_id": task.organization_id, + "organization_created_at": str(organization.created_at) if organization else None, + }, + ): + LOG.info("Checking if user goal is achieved after re-scraping the page") + # Check if navigation goal is achieved after re-scraping the page + new_scraped_page = await self._scrape_with_type( + task=task, + step=step, + browser_state=browser_state, + scrape_type=ScrapeType.NORMAL, + organization=organization, + ) + if new_scraped_page is None: + LOG.warning("Failed to scrape the page before checking user goal success, skipping check...") + else: + working_page = await browser_state.get_working_page() + result_tuple = await self.check_user_goal_success( + page=working_page, + scraped_page=new_scraped_page, + task=task, + step=step, + ) + if result_tuple is not None: + complete_action, action_results = result_tuple + detailed_agent_step_output.actions_and_results.append((complete_action, action_results)) # If no action errors return the agent state and output completed_step = await self.update_step( step=step, @@ -811,6 +841,55 @@ class ForgeAgent: ) return failed_step, detailed_agent_step_output.get_clean_detailed_output() + @staticmethod + async def check_user_goal_success( + page: Page, scraped_page: ScrapedPage, task: Task, step: Step + ) -> tuple[CompleteAction, list[ActionResult]] | None: + try: + # Check if Skyvern already returned a complete action, if so, don't run verification + if step.output and step.output.actions_and_results: + for action, results in step.output.actions_and_results: + if isinstance(action, CompleteAction): + return None + + verification_prompt = prompt_engine.load_prompt( + "check-user-goal", + navigation_goal=task.navigation_goal, + navigation_payload=task.navigation_payload, + elements=scraped_page.build_element_tree(ElementTreeFormat.HTML), + ) + screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url) + + verification_llm_api_handler = app.SECONDARY_LLM_API_HANDLER + + verification_response = await verification_llm_api_handler( + prompt=verification_prompt, step=step, screenshots=screenshots + ) + if "user_goal_achieved" not in verification_response or "reasoning" not in verification_response: + LOG.error( + "Invalid LLM response for user goal success verification, skipping verification", + verification_response=verification_response, + ) + return None + + user_goal_achieved: bool = verification_response["user_goal_achieved"] + complete_action = CompleteAction( + reasoning=verification_response["reasoning"], + data_extraction_goal=task.data_extraction_goal, + ) + # We don't want to return a complete action if the user goal is not achieved since we're checking at every step + if not user_goal_achieved: + return None + + LOG.info("User goal achieved, executing complete action") + action_results = await handle_complete_action(complete_action, page, scraped_page, task, step) + + return complete_action, action_results + + except Exception: + LOG.error("LLM verification failed for complete action, skipping LLM verification", exc_info=True) + return None + async def record_artifacts_after_action(self, task: Task, step: Step, browser_state: BrowserState) -> None: working_page = await browser_state.get_working_page() if not working_page: diff --git a/skyvern/forge/prompts/skyvern/check-user-goal.j2 b/skyvern/forge/prompts/skyvern/check-user-goal.j2 new file mode 100644 index 00000000..6b2ddb96 --- /dev/null +++ b/skyvern/forge/prompts/skyvern/check-user-goal.j2 @@ -0,0 +1,29 @@ +Based on the content of the screenshot and the elements on the page, determine whether the user goal has been successfully completed or not. + +The JSON object should be in this format: +```json +{ + "reasoning": str, // Describe the state of the user goal and explain why it has been completed or not completed. + "user_goal_achieved": bool // True if the user goal has been completed, False otherwise. +} + +Make sure to ONLY return the JSON object, with no additional text before or after it. Do not make any assumptions based on the screenshot, return a response solely based on what you observe in the screenshot and nothing else. + +Examples: +{ + "reasoning": "The screenshot shows a success message for a file upload field. Since the user's goal is to upload a file, it has been successfully completed.", + "user_goal_achieved": true +} +{ + "reasoning": "The screenshot shows a job application form with fields. Since the user's goal is to submit a job application, it has not been successfully completed.", + "user_goal_achieved": false +} + +Elements on the page: +{{ elements }} + +User Goal: +{{ navigation_goal }} + +User Details: +{{ navigation_payload }} \ No newline at end of file