fix cua engine (#4036)

2025-11-20 02:24:00 +08:00
parent 31a79a1313
commit 0b47dff89d
1 changed files with 25 additions and 24 deletions
--- a/skyvern/forge/sdk/api/llm/api_handler_factory.py
+++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py
@@ -1121,6 +1121,31 @@ class LLMCaller:
                thought_cost=call_stats.llm_cost,
            )
        organization_id = organization_id or (
            step.organization_id if step else (thought.organization_id if thought else None)
        )
        # Track LLM API handler duration, token counts, and cost
        duration_seconds = time.perf_counter() - start_time
        LOG.info(
            "LLM API handler duration metrics",
            llm_key=self.llm_key,
            prompt_name=prompt_name,
            model=self.llm_config.model_name,
            duration_seconds=duration_seconds,
            step_id=step.step_id if step else None,
            thought_id=thought.observer_thought_id if thought else None,
            organization_id=organization_id,
            input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None,
            output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None,
            reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None,
            cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None,
            llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None,
        )
        # Raw response is used for CUA engine LLM calls.
        if raw_response:
            return response.model_dump(exclude_none=True)
        parsed_response = parse_api_response(response, self.llm_config.add_assistant_prefix)
        parsed_response_json = json.dumps(parsed_response, indent=2)
        if step and not is_speculative_step:
@@ -1149,27 +1174,6 @@ class LLMCaller:
                    ai_suggestion=ai_suggestion,
                )
        organization_id = organization_id or (
            step.organization_id if step else (thought.organization_id if thought else None)
        )
        # Track LLM API handler duration, token counts, and cost
        duration_seconds = time.perf_counter() - start_time
        LOG.info(
            "LLM API handler duration metrics",
            llm_key=self.llm_key,
            prompt_name=prompt_name,
            model=self.llm_config.model_name,
            duration_seconds=duration_seconds,
            step_id=step.step_id if step else None,
            thought_id=thought.observer_thought_id if thought else None,
            organization_id=organization_id,
            input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None,
            output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None,
            reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None,
            cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None,
            llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None,
        )
        if step and is_speculative_step:
            step.speculative_llm_metadata = SpeculativeLLMMetadata(
                prompt=llm_prompt_value,
@@ -1187,9 +1191,6 @@ class LLMCaller:
                llm_cost=call_stats.llm_cost,
            )
        if raw_response:
            return response.model_dump(exclude_none=True)
        return parsed_response
    def get_screenshot_resize_target_dimension(self, window_dimension: Resolution | None) -> Resolution: