From 438a49f4703a6f57c5a60fc74472dfca0509cdac Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Fri, 11 Apr 2025 17:25:43 -0700 Subject: [PATCH] add cua cost to task step (#2131) --- skyvern/forge/agent.py | 32 ++++++++++++++++++++++ skyvern/forge/sdk/routes/agent_protocol.py | 7 +++-- skyvern/services/run_service.py | 7 +++-- skyvern/webeye/actions/parse_actions.py | 21 +++++++++++++- 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 5aee71e1..efbe4b11 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -1247,6 +1247,22 @@ class ForgeAgent: truncation="auto", ) previous_response = first_response + input_tokens = first_response.usage.input_tokens or 0 + output_tokens = first_response.usage.output_tokens or 0 + first_response.usage.total_tokens or 0 + cached_tokens = first_response.usage.input_tokens_details.cached_tokens or 0 + reasoning_tokens = first_response.usage.output_tokens_details.reasoning_tokens or 0 + llm_cost = (3.0 / 1000000) * input_tokens + (12.0 / 1000000) * output_tokens + await app.DATABASE.update_step( + task_id=task.task_id, + step_id=step.step_id, + organization_id=task.organization_id, + incremental_cost=llm_cost, + incremental_input_tokens=input_tokens if input_tokens > 0 else None, + incremental_output_tokens=output_tokens if output_tokens > 0 else None, + incremental_reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None, + incremental_cached_tokens=cached_tokens if cached_tokens > 0 else None, + ) computer_calls = [item for item in previous_response.output if item.type == "computer_call"] if not computer_calls: @@ -1284,6 +1300,22 @@ class ForgeAgent: }, truncation="auto", ) + input_tokens = current_response.usage.input_tokens or 0 + output_tokens = current_response.usage.output_tokens or 0 + current_response.usage.total_tokens or 0 + cached_tokens = current_response.usage.input_tokens_details.cached_tokens or 0 + reasoning_tokens = current_response.usage.output_tokens_details.reasoning_tokens or 0 + llm_cost = (3.0 / 1000000) * input_tokens + (12.0 / 1000000) * output_tokens + await app.DATABASE.update_step( + task_id=task.task_id, + step_id=step.step_id, + organization_id=task.organization_id, + incremental_cost=llm_cost, + incremental_input_tokens=input_tokens if input_tokens > 0 else None, + incremental_output_tokens=output_tokens if output_tokens > 0 else None, + incremental_reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None, + incremental_cached_tokens=cached_tokens if cached_tokens > 0 else None, + ) return parse_cua_actions(task, step, current_response), current_response diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 42a8ccf8..b63f83e3 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -1503,17 +1503,20 @@ async def run_task( request=request, background_tasks=background_tasks, ) + run_type = RunType.task_v1 + if run_request.engine == RunEngine.openai_cua: + run_type = RunType.openai_cua # build the task run response return TaskRunResponse( run_id=task_v1_response.task_id, - run_type=RunType.task_v1, + run_type=run_type, status=str(task_v1_response.status), output=task_v1_response.extracted_information, failure_reason=task_v1_response.failure_reason, created_at=task_v1_response.created_at, modified_at=task_v1_response.modified_at, run_request=TaskRunRequest( - engine=RunEngine.skyvern_v1, + engine=run_request.engine, prompt=task_v1_response.navigation_goal, url=task_v1_response.url, webhook_url=task_v1_response.webhook_callback_url, diff --git a/skyvern/services/run_service.py b/skyvern/services/run_service.py index bcbd4002..7022f401 100644 --- a/skyvern/services/run_service.py +++ b/skyvern/services/run_service.py @@ -13,11 +13,14 @@ async def get_run_response(run_id: str, organization_id: str | None = None) -> R if not run: return None - if run.task_run_type == RunType.task_v1: + if run.task_run_type == RunType.task_v1 or run.task_run_type == RunType.openai_cua: # fetch task v1 from db and transform to task run response task_v1 = await app.DATABASE.get_task(run.run_id, organization_id=organization_id) if not task_v1: return None + run_engine = RunEngine.skyvern_v1 + if run.task_run_type == RunType.openai_cua: + run_engine = RunEngine.openai_cua return TaskRunResponse( run_id=run.run_id, run_type=run.task_run_type, @@ -27,7 +30,7 @@ async def get_run_response(run_id: str, organization_id: str | None = None) -> R created_at=task_v1.created_at, modified_at=task_v1.modified_at, run_request=TaskRunRequest( - engine=RunEngine.skyvern_v1, + engine=run_engine, prompt=task_v1.navigation_goal, url=task_v1.url, webhook_url=task_v1.webhook_callback_url, diff --git a/skyvern/webeye/actions/parse_actions.py b/skyvern/webeye/actions/parse_actions.py index b379b35b..1d2f2802 100644 --- a/skyvern/webeye/actions/parse_actions.py +++ b/skyvern/webeye/actions/parse_actions.py @@ -297,5 +297,24 @@ def parse_cua_actions( ) break if not actions: - return [CompleteAction(reasoning="No actions generated", verified=True)] + LOG.info( + "Empty action returned by CUA", + task_id=task.task_id, + step_id=step.step_id, + organization_id=task.organization_id, + workflow_run_id=task.workflow_run_id, + response=response.dict(), + ) + complete_action = CompleteAction( + reasoning="No more actions to take", + verified=True, + data_extraction_goal=task.data_extraction_goal, + organization_id=task.organization_id, + workflow_run_id=task.workflow_run_id, + task_id=task.task_id, + step_id=step.step_id, + step_order=step.order, + action_order=0, + ) + return [complete_action] return actions