Smarter select_option & input_text actions (#3440)

This commit is contained in:
Shuchang Zheng
2025-09-15 13:16:34 -07:00
committed by GitHub
parent 6f212ff327
commit 6ee329866b
10 changed files with 300 additions and 105 deletions

View File

@@ -80,9 +80,8 @@ from skyvern.forge.sdk.workflow.models.block import ActionBlock, BaseTaskBlock,
from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun, WorkflowRunStatus
from skyvern.schemas.runs import CUA_ENGINES, RunEngine
from skyvern.schemas.steps import AgentStepOutput
from skyvern.services import run_service
from skyvern.services import run_service, service_utils
from skyvern.services.action_service import get_action_history
from skyvern.services.task_v1_service import is_cua_task
from skyvern.utils.image_resizer import Resolution
from skyvern.utils.prompt_engine import MaxStepsReasonResponse, load_prompt_with_elements
from skyvern.webeye.actions.action_types import ActionType
@@ -1669,7 +1668,7 @@ class ForgeAgent:
)
scroll = True
llm_key_override = task.llm_key
if await is_cua_task(task=task):
if await service_utils.is_cua_task(task=task):
scroll = False
llm_key_override = None
@@ -2709,7 +2708,7 @@ class ForgeAgent:
steps_results.append(step_result)
scroll = True
if await is_cua_task(task=task):
if await service_utils.is_cua_task(task=task):
scroll = False
screenshots: list[bytes] = []
@@ -2971,7 +2970,7 @@ class ForgeAgent:
verification_code_check=False,
)
llm_key_override = task.llm_key
if await is_cua_task(task=task):
if await service_utils.is_cua_task(task=task):
llm_key_override = None
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
llm_key_override, default=app.LLM_API_HANDLER

View File

@@ -1,18 +1,19 @@
You are an expert at analyzing user interface automation actions and generating meaningful field names for data structures.
Given a list of input_text actions with their intentions and text values, generate appropriate field names for a Pydantic BaseModel class called "GeneratedWorkflowParameters".
Given a list of input_text, upload_file and select_option actions with their intentions and values, generate appropriate field names for a Pydantic BaseModel class called "GeneratedWorkflowParameters".
## Rules:
1. Field names should be valid Python identifiers (snake_case, no spaces, no special characters except underscore)
2. Field names should be descriptive and based on the intention of the action
3. If multiple actions input the same text value, they should map to the same field name
3. If multiple actions use the same text value, they should map to the same field name
4. Field names should be concise but clear about what data they represent
5. Avoid generic names like "field1", "input1" - use meaningful names based on the intention
## Input Actions:
{% for action in input_actions %}
## Actions:
{% for action in custom_field_actions %}
Action {{ loop.index }}:
- Text: "{{ action.text }}"
- Action type: "{{ action.action_type }}"
- Value: "{{ action.value }}"
- Intention: "{{ action.intention }}"
{% endfor %}

View File

@@ -0,0 +1,17 @@
# Goal
You are an expert in uploading files on a webpage. Help the user figure out the specific file url to use to upload a file.
# Provided information:{% if goal %}
- User's overall goal: {{ goal }}{% endif %}
- Context and details: {{ data }}
- The question or the intention for this file upload action: {{ intention }}
# Output
- Your answer should be a valid url to a file.
- YOUR RESPONSE HAS TO BE IN JSON FORMAT. DO NOT RETURN ANYTHING ELSE.
- DO NOT INCLUDE ANY UNRELATED INFORMATION OR UNNECESSARY DETAILS IN YOUR ANSWER.
EXAMPLE RESPONSE FORMAT:
{
"answer": "string",
}

View File

@@ -326,7 +326,7 @@ class AgentDB:
LOG.error("UnexpectedError", exc_info=True)
raise
async def get_step(self, task_id: str, step_id: str, organization_id: str | None = None) -> Step | None:
async def get_step(self, step_id: str, organization_id: str | None = None) -> Step | None:
try:
async with self.Session() as session:
if step := (
@@ -588,7 +588,7 @@ class AgentDB:
step.cached_token_count = incremental_cached_tokens + (step.cached_token_count or 0)
await session.commit()
updated_step = await self.get_step(task_id, step_id, organization_id)
updated_step = await self.get_step(step_id, organization_id)
if not updated_step:
raise NotFoundError("Step not found")
return updated_step