fix cua engine (#4036)
This commit is contained in:
@@ -1121,6 +1121,31 @@ class LLMCaller:
|
|||||||
thought_cost=call_stats.llm_cost,
|
thought_cost=call_stats.llm_cost,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
organization_id = organization_id or (
|
||||||
|
step.organization_id if step else (thought.organization_id if thought else None)
|
||||||
|
)
|
||||||
|
# Track LLM API handler duration, token counts, and cost
|
||||||
|
duration_seconds = time.perf_counter() - start_time
|
||||||
|
LOG.info(
|
||||||
|
"LLM API handler duration metrics",
|
||||||
|
llm_key=self.llm_key,
|
||||||
|
prompt_name=prompt_name,
|
||||||
|
model=self.llm_config.model_name,
|
||||||
|
duration_seconds=duration_seconds,
|
||||||
|
step_id=step.step_id if step else None,
|
||||||
|
thought_id=thought.observer_thought_id if thought else None,
|
||||||
|
organization_id=organization_id,
|
||||||
|
input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None,
|
||||||
|
output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None,
|
||||||
|
reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None,
|
||||||
|
cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None,
|
||||||
|
llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Raw response is used for CUA engine LLM calls.
|
||||||
|
if raw_response:
|
||||||
|
return response.model_dump(exclude_none=True)
|
||||||
|
|
||||||
parsed_response = parse_api_response(response, self.llm_config.add_assistant_prefix)
|
parsed_response = parse_api_response(response, self.llm_config.add_assistant_prefix)
|
||||||
parsed_response_json = json.dumps(parsed_response, indent=2)
|
parsed_response_json = json.dumps(parsed_response, indent=2)
|
||||||
if step and not is_speculative_step:
|
if step and not is_speculative_step:
|
||||||
@@ -1149,27 +1174,6 @@ class LLMCaller:
|
|||||||
ai_suggestion=ai_suggestion,
|
ai_suggestion=ai_suggestion,
|
||||||
)
|
)
|
||||||
|
|
||||||
organization_id = organization_id or (
|
|
||||||
step.organization_id if step else (thought.organization_id if thought else None)
|
|
||||||
)
|
|
||||||
# Track LLM API handler duration, token counts, and cost
|
|
||||||
duration_seconds = time.perf_counter() - start_time
|
|
||||||
LOG.info(
|
|
||||||
"LLM API handler duration metrics",
|
|
||||||
llm_key=self.llm_key,
|
|
||||||
prompt_name=prompt_name,
|
|
||||||
model=self.llm_config.model_name,
|
|
||||||
duration_seconds=duration_seconds,
|
|
||||||
step_id=step.step_id if step else None,
|
|
||||||
thought_id=thought.observer_thought_id if thought else None,
|
|
||||||
organization_id=organization_id,
|
|
||||||
input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None,
|
|
||||||
output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None,
|
|
||||||
reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None,
|
|
||||||
cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None,
|
|
||||||
llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
if step and is_speculative_step:
|
if step and is_speculative_step:
|
||||||
step.speculative_llm_metadata = SpeculativeLLMMetadata(
|
step.speculative_llm_metadata = SpeculativeLLMMetadata(
|
||||||
prompt=llm_prompt_value,
|
prompt=llm_prompt_value,
|
||||||
@@ -1187,9 +1191,6 @@ class LLMCaller:
|
|||||||
llm_cost=call_stats.llm_cost,
|
llm_cost=call_stats.llm_cost,
|
||||||
)
|
)
|
||||||
|
|
||||||
if raw_response:
|
|
||||||
return response.model_dump(exclude_none=True)
|
|
||||||
|
|
||||||
return parsed_response
|
return parsed_response
|
||||||
|
|
||||||
def get_screenshot_resize_target_dimension(self, window_dimension: Resolution | None) -> Resolution:
|
def get_screenshot_resize_target_dimension(self, window_dimension: Resolution | None) -> Resolution:
|
||||||
|
|||||||
Reference in New Issue
Block a user