add action result, including downloaded files to step.output for both agent and caching runs (#4477)

This commit is contained in:
Shuchang Zheng
2026-01-18 11:17:02 -08:00
committed by GitHub
parent 6a32d8b696
commit bd041ed52f
8 changed files with 245 additions and 16 deletions

View File

@@ -1,20 +1,30 @@
from __future__ import annotations
import asyncio
import os
from pathlib import Path
from typing import Any, Callable
import structlog
from playwright.async_api import Page
from skyvern.config import settings
from skyvern.constants import BROWSER_DOWNLOAD_TIMEOUT
from skyvern.core.script_generations.real_skyvern_page_ai import RealSkyvernPageAi, render_template
from skyvern.core.script_generations.skyvern_page import ActionCall, ActionMetadata, RunContext, SkyvernPage
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
from skyvern.errors.errors import UserDefinedError
from skyvern.exceptions import ScriptTerminationException, WorkflowRunNotFound
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.files import (
check_downloading_files_and_wait_for_download_to_complete,
get_path_for_workflow_download_directory,
list_files_in_directory,
)
from skyvern.forge.sdk.artifact.models import ArtifactType
from skyvern.forge.sdk.core import skyvern_context
from skyvern.schemas.steps import AgentStepOutput
from skyvern.services.otp_service import poll_otp_value
from skyvern.utils.url_validators import prepend_scheme_and_validate_url
from skyvern.webeye.actions.action_types import ActionType
@@ -22,6 +32,7 @@ from skyvern.webeye.actions.actions import (
Action,
ActionStatus,
CompleteAction,
DecisiveAction,
ExtractAction,
SelectOption,
SolveCaptchaAction,
@@ -32,6 +43,7 @@ from skyvern.webeye.actions.handler import (
get_actual_value_of_parameter_if_secret,
handle_complete_action,
)
from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
from skyvern.webeye.browser_state import BrowserState
from skyvern.webeye.scraper.scraped_page import ScrapedPage
@@ -122,6 +134,29 @@ class ScriptSkyvernPage(SkyvernPage):
support_empty_page=True,
)
async def _ensure_download_to_complete(
self,
download_dir: Path,
browser_session_id: str | None = None,
) -> None:
context = skyvern_context.current()
if not context or not context.organization_id:
return
if not download_dir.exists():
return
organization_id = context.organization_id
download_timeout = BROWSER_DOWNLOAD_TIMEOUT
if context.task_id:
task = await app.DATABASE.get_task(context.task_id, organization_id=organization_id)
if task and task.download_timeout:
download_timeout = task.download_timeout
await check_downloading_files_and_wait_for_download_to_complete(
download_dir=download_dir,
organization_id=organization_id,
browser_session_id=browser_session_id,
timeout=download_timeout,
)
async def _decorate_call(
self,
fn: Callable,
@@ -176,6 +211,28 @@ class ScriptSkyvernPage(SkyvernPage):
else:
print()
# Download detection for click actions
download_triggered: bool | None = None
downloaded_files: list[str] | None = None
files_before: list[str] = []
download_dir: Path | None = None
# Capture files before click action for download detection
if action == ActionType.CLICK and context and context.workflow_run_id:
try:
download_dir = get_path_for_workflow_download_directory(context.workflow_run_id)
if download_dir.exists():
files_before = list_files_in_directory(download_dir)
if context.browser_session_id and context.organization_id:
browser_session_downloaded_files = await app.STORAGE.list_downloaded_files_in_browser_session(
organization_id=context.organization_id,
browser_session_id=context.browser_session_id,
)
files_before = files_before + browser_session_downloaded_files
except Exception:
pass # Don't block action execution if file listing fails
try:
call.result = await fn(self, *args, **kwargs)
@@ -198,23 +255,58 @@ class ScriptSkyvernPage(SkyvernPage):
# LLM fallback hook could go here ...
raise
finally:
# Add a small buffer between cached actions to give slow pages time to settle
if settings.CACHED_ACTION_DELAY_SECONDS > 0:
await asyncio.sleep(settings.CACHED_ACTION_DELAY_SECONDS)
# Check for downloaded files after click action
if action == ActionType.CLICK and context and context.workflow_run_id and download_dir:
try:
if download_dir.exists():
await self._ensure_download_to_complete(
download_dir=download_dir,
browser_session_id=context.browser_session_id,
)
files_after = list_files_in_directory(download_dir)
if context.browser_session_id and context.organization_id:
browser_session_downloaded_files = (
await app.STORAGE.list_downloaded_files_in_browser_session(
organization_id=context.organization_id,
browser_session_id=context.browser_session_id,
)
)
files_after = files_after + browser_session_downloaded_files
new_file_paths = set(files_after) - set(files_before)
if new_file_paths:
download_triggered = True
downloaded_files = [os.path.basename(fp) for fp in new_file_paths]
LOG.info(
"Script click action detected download",
downloaded_files=downloaded_files,
workflow_run_id=context.workflow_run_id,
)
else:
download_triggered = False
except Exception:
pass # Don't block if download detection fails
self._record(call)
# Auto-create action after execution
await self._create_action_after_execution(
# Auto-create action after execution and store result
await self._create_action_and_result_after_execution(
action_type=action,
intention=prompt,
status=action_status,
kwargs=kwargs,
call_result=call.result,
call_error=call.error,
download_triggered=download_triggered,
downloaded_files=downloaded_files,
)
# Auto-create screenshot artifact after execution
await self._create_screenshot_after_execution()
# Add a small buffer between cached actions to give slow pages time to settle
if settings.CACHED_ACTION_DELAY_SECONDS > 0:
await asyncio.sleep(settings.CACHED_ACTION_DELAY_SECONDS)
async def _update_action_reasoning(
self,
action_id: str,
@@ -265,20 +357,27 @@ class ScriptSkyvernPage(SkyvernPage):
)
return reasoning
async def _create_action_after_execution(
async def _create_action_and_result_after_execution(
self,
action_type: ActionType,
intention: str = "",
status: ActionStatus = ActionStatus.pending,
kwargs: dict[str, Any] | None = None,
call_result: Any | None = None,
) -> Action | None:
"""Create an action record in the database before execution if task_id and step_id are available."""
call_error: Exception | None = None,
download_triggered: bool | None = None,
downloaded_files: list[str] | None = None,
) -> tuple[Action | None, list[ActionResult]]:
"""Create an action record and result in the database after execution if task_id and step_id are available.
Returns a tuple of (Action, list[ActionResult]) similar to how the agent stores actions and results.
"""
results: list[ActionResult] = []
try:
context = skyvern_context.current()
if not context or not context.task_id or not context.step_id:
return None
return None, results
# Create action record. TODO: store more action fields
kwargs = kwargs or {}
@@ -320,6 +419,9 @@ class ScriptSkyvernPage(SkyvernPage):
file_url=file_url,
response=response,
xpath=xpath,
download=download_triggered,
download_triggered=download_triggered,
downloaded_files=downloaded_files,
created_by="script",
)
data_extraction_goal = None
@@ -363,11 +465,32 @@ class ScriptSkyvernPage(SkyvernPage):
context.action_order += 1
return created_action
# Create ActionResult based on success/failure
if call_error:
result = ActionFailure(exception=call_error, download_triggered=download_triggered)
else:
# For extract actions, include the extracted data in the result
result_data = None
if action_type == ActionType.EXTRACT and call_result:
result_data = call_result
result = ActionSuccess(
data=result_data,
download_triggered=download_triggered,
downloaded_files=downloaded_files,
)
results = [result]
# Store action and results in RunContext for step output
run_context = script_run_context_manager.get_run_context()
if run_context:
run_context.actions_and_results.append((created_action, results))
return created_action, results
except Exception:
# If action creation fails, don't block the actual action execution
return None
return None, results
@classmethod
async def _create_screenshot_after_execution(cls) -> None:
@@ -492,6 +615,15 @@ class ScriptSkyvernPage(SkyvernPage):
task = await app.DATABASE.get_task(context.task_id, context.organization_id)
step = await app.DATABASE.get_step(context.step_id, context.organization_id)
if task and step:
# CRITICAL: Update step.output with actions_and_results BEFORE validation
# This ensures complete_verify() can access action history (including download info)
# when checking if the goal was achieved
await self._update_step_output_before_complete(context)
# Refresh step to get updated output for validation
step = await app.DATABASE.get_step(context.step_id, context.organization_id)
if not step:
return
action = CompleteAction(
organization_id=context.organization_id,
task_id=context.task_id,
@@ -504,6 +636,48 @@ class ScriptSkyvernPage(SkyvernPage):
if result and result[-1].success is False:
raise ScriptTerminationException(result[-1].exception_message)
async def _update_step_output_before_complete(self, context: skyvern_context.SkyvernContext) -> None:
"""Update step.output with actions_and_results before complete validation.
This is critical for cached runs because complete_verify() reads action history
from step.output.actions_and_results to check if goals were achieved (e.g., file downloads).
Without this, the validation has no visibility into what actions were performed.
"""
# Validate required context fields
if not context.step_id or not context.task_id or not context.organization_id:
return
run_context = script_run_context_manager.get_run_context()
if not run_context or not run_context.actions_and_results:
return
# Extract errors from DecisiveActions (similar to agent flow)
errors: list[UserDefinedError] = []
for action, _ in run_context.actions_and_results:
if isinstance(action, DecisiveAction):
errors.extend(action.errors)
# Create AgentStepOutput similar to how agent does it
step_output = AgentStepOutput(
actions_and_results=run_context.actions_and_results,
action_results=[result for _, results in run_context.actions_and_results for result in results],
errors=errors,
)
await app.DATABASE.update_step(
step_id=context.step_id,
task_id=context.task_id,
organization_id=context.organization_id,
output=step_output,
)
LOG.info(
"Updated step output with cached actions before complete validation",
step_id=context.step_id,
task_id=context.task_id,
num_actions=len(run_context.actions_and_results),
)
class ScriptRunContextManager:
"""

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
import asyncio
import copy
from dataclasses import dataclass
from typing import Any, Callable, Literal, overload
from typing import TYPE_CHECKING, Any, Callable, Literal, overload
import structlog
from playwright.async_api import Locator, Page
@@ -16,6 +16,10 @@ from skyvern.library.ai_locator import AILocator
from skyvern.webeye.actions import handler_utils
from skyvern.webeye.actions.action_types import ActionType
if TYPE_CHECKING:
from skyvern.webeye.actions.actions import Action
from skyvern.webeye.actions.responses import ActionResult
LOG = structlog.get_logger()
@@ -1007,3 +1011,5 @@ class RunContext:
self.parameters[key] = value
self.page = page
self.trace: list[ActionCall] = []
# Store actions and results for step output (similar to agent flow)
self.actions_and_results: list[tuple[Action, list[ActionResult]]] = []