From 0b47dff89d0a57877f686a76f32cc0640e2fe6ce Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Thu, 20 Nov 2025 02:24:00 +0800 Subject: [PATCH] fix cua engine (#4036) --- .../forge/sdk/api/llm/api_handler_factory.py | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py index 547ccfe2..f16dbe3a 100644 --- a/skyvern/forge/sdk/api/llm/api_handler_factory.py +++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py @@ -1121,6 +1121,31 @@ class LLMCaller: thought_cost=call_stats.llm_cost, ) + organization_id = organization_id or ( + step.organization_id if step else (thought.organization_id if thought else None) + ) + # Track LLM API handler duration, token counts, and cost + duration_seconds = time.perf_counter() - start_time + LOG.info( + "LLM API handler duration metrics", + llm_key=self.llm_key, + prompt_name=prompt_name, + model=self.llm_config.model_name, + duration_seconds=duration_seconds, + step_id=step.step_id if step else None, + thought_id=thought.observer_thought_id if thought else None, + organization_id=organization_id, + input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None, + output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None, + reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None, + cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None, + llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None, + ) + + # Raw response is used for CUA engine LLM calls. + if raw_response: + return response.model_dump(exclude_none=True) + parsed_response = parse_api_response(response, self.llm_config.add_assistant_prefix) parsed_response_json = json.dumps(parsed_response, indent=2) if step and not is_speculative_step: @@ -1149,27 +1174,6 @@ class LLMCaller: ai_suggestion=ai_suggestion, ) - organization_id = organization_id or ( - step.organization_id if step else (thought.organization_id if thought else None) - ) - # Track LLM API handler duration, token counts, and cost - duration_seconds = time.perf_counter() - start_time - LOG.info( - "LLM API handler duration metrics", - llm_key=self.llm_key, - prompt_name=prompt_name, - model=self.llm_config.model_name, - duration_seconds=duration_seconds, - step_id=step.step_id if step else None, - thought_id=thought.observer_thought_id if thought else None, - organization_id=organization_id, - input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None, - output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None, - reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None, - cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None, - llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None, - ) - if step and is_speculative_step: step.speculative_llm_metadata = SpeculativeLLMMetadata( prompt=llm_prompt_value, @@ -1187,9 +1191,6 @@ class LLMCaller: llm_cost=call_stats.llm_cost, ) - if raw_response: - return response.model_dump(exclude_none=True) - return parsed_response def get_screenshot_resize_target_dimension(self, window_dimension: Resolution | None) -> Resolution: