Smarter select_option & input_text actions (#3440)

2025-09-15 13:16:34 -07:00
parent 6f212ff327
commit 6ee329866b
10 changed files with 300 additions and 105 deletions
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -80,9 +80,8 @@ from skyvern.forge.sdk.workflow.models.block import ActionBlock, BaseTaskBlock,
 from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun, WorkflowRunStatus
 from skyvern.schemas.runs import CUA_ENGINES, RunEngine
 from skyvern.schemas.steps import AgentStepOutput
-from skyvern.services import run_service
+from skyvern.services import run_service, service_utils
 from skyvern.services.action_service import get_action_history
-from skyvern.services.task_v1_service import is_cua_task
 from skyvern.utils.image_resizer import Resolution
 from skyvern.utils.prompt_engine import MaxStepsReasonResponse, load_prompt_with_elements
 from skyvern.webeye.actions.action_types import ActionType
@@ -1669,7 +1668,7 @@ class ForgeAgent:
        )
        scroll = True
        llm_key_override = task.llm_key
-        if await is_cua_task(task=task):
+        if await service_utils.is_cua_task(task=task):
            scroll = False
            llm_key_override = None

@@ -2709,7 +2708,7 @@ class ForgeAgent:
                steps_results.append(step_result)

            scroll = True
-            if await is_cua_task(task=task):
+            if await service_utils.is_cua_task(task=task):
                scroll = False

            screenshots: list[bytes] = []
@@ -2971,7 +2970,7 @@ class ForgeAgent:
                verification_code_check=False,
            )
            llm_key_override = task.llm_key
-            if await is_cua_task(task=task):
+            if await service_utils.is_cua_task(task=task):
                llm_key_override = None
            llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
                llm_key_override, default=app.LLM_API_HANDLER
--- a/skyvern/forge/prompts/skyvern/generate-workflow-parameters.j2
+++ b/skyvern/forge/prompts/skyvern/generate-workflow-parameters.j2
@@ -1,18 +1,19 @@
 You are an expert at analyzing user interface automation actions and generating meaningful field names for data structures.

-Given a list of input_text actions with their intentions and text values, generate appropriate field names for a Pydantic BaseModel class called "GeneratedWorkflowParameters".
+Given a list of input_text, upload_file and select_option actions with their intentions and values, generate appropriate field names for a Pydantic BaseModel class called "GeneratedWorkflowParameters".

 ## Rules:
 1. Field names should be valid Python identifiers (snake_case, no spaces, no special characters except underscore)
 2. Field names should be descriptive and based on the intention of the action
-3. If multiple actions input the same text value, they should map to the same field name
+3. If multiple actions use the same text value, they should map to the same field name
 4. Field names should be concise but clear about what data they represent
 5. Avoid generic names like "field1", "input1" - use meaningful names based on the intention

-## Input Actions:
-{% for action in input_actions %}
+## Actions:
+{% for action in custom_field_actions %}
 Action {{ loop.index }}:
- Text: "{{ action.text }}"
+- Action type: "{{ action.action_type }}"
+- Value: "{{ action.value }}"
 - Intention: "{{ action.intention }}"
 {% endfor %}

--- a/skyvern/forge/prompts/skyvern/script-generation-file-url-generation.j2
+++ b/skyvern/forge/prompts/skyvern/script-generation-file-url-generation.j2
@@ -0,0 +1,17 @@
+# Goal
+You are an expert in uploading files on a webpage. Help the user figure out the specific file url to use to upload a file.
+
+# Provided information:{% if goal %}
+- User's overall goal: {{ goal }}{% endif %}
+- Context and details: {{ data }}
+- The question or the intention for this file upload action: {{ intention }}
+
+# Output
+- Your answer should be a valid url to a file.
+- YOUR RESPONSE HAS TO BE IN JSON FORMAT. DO NOT RETURN ANYTHING ELSE. 
+- DO NOT INCLUDE ANY UNRELATED INFORMATION OR UNNECESSARY DETAILS IN YOUR ANSWER.
+
+EXAMPLE RESPONSE FORMAT:
+{
+  "answer": "string",
+}
--- a/skyvern/forge/sdk/db/client.py
+++ b/skyvern/forge/sdk/db/client.py
@@ -326,7 +326,7 @@ class AgentDB:
            LOG.error("UnexpectedError", exc_info=True)
            raise

-    async def get_step(self, task_id: str, step_id: str, organization_id: str | None = None) -> Step | None:
+    async def get_step(self, step_id: str, organization_id: str | None = None) -> Step | None:
        try:
            async with self.Session() as session:
                if step := (
@@ -588,7 +588,7 @@ class AgentDB:
                        step.cached_token_count = incremental_cached_tokens + (step.cached_token_count or 0)

                    await session.commit()
-                    updated_step = await self.get_step(task_id, step_id, organization_id)
+                    updated_step = await self.get_step(step_id, organization_id)
                    if not updated_step:
                        raise NotFoundError("Step not found")
                    return updated_step