From 5055daad007e8ee640e29532188b45ff72817bba Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Fri, 22 Aug 2025 13:02:15 -0700 Subject: [PATCH] GPT-5 Support + Better Logs (#3277) --- skyvern/config.py | 5 ++++ .../forge/sdk/api/llm/api_handler_factory.py | 29 +++++++++++++++++-- skyvern/forge/sdk/api/llm/config_registry.py | 19 +++++++++++- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/skyvern/config.py b/skyvern/config.py index 5c52b951..726ec12b 100644 --- a/skyvern/config.py +++ b/skyvern/config.py @@ -273,6 +273,11 @@ class Settings(BaseSettings): GROQ_MODEL: str | None = None GROQ_API_BASE: str = "https://api.groq.com/openai/v1" + # MOONSHOT AI + ENABLE_MOONSHOT: bool = False + MOONSHOT_API_KEY: str | None = None + MOONSHOT_API_BASE: str = "https://api.moonshot.cn/v1" + # TOTP Settings TOTP_LIFESPAN_MINUTES: int = 10 VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40 diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py index e4131d3a..95762bac 100644 --- a/skyvern/forge/sdk/api/llm/api_handler_factory.py +++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py @@ -259,7 +259,7 @@ class LLMAPIHandlerFactory: ai_suggestion=ai_suggestion, ) - # Track LLM API handler duration + # Track LLM API handler duration, token counts, and cost duration_seconds = time.time() - start_time LOG.info( "LLM API handler duration metrics", @@ -270,6 +270,11 @@ class LLMAPIHandlerFactory: step_id=step.step_id if step else None, thought_id=thought.observer_thought_id if thought else None, organization_id=step.organization_id if step else (thought.organization_id if thought else None), + input_tokens=prompt_tokens if prompt_tokens > 0 else None, + output_tokens=completion_tokens if completion_tokens > 0 else None, + reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None, + cached_tokens=cached_tokens if cached_tokens > 0 else None, + llm_cost=llm_cost if llm_cost > 0 else None, ) return parsed_response @@ -403,6 +408,13 @@ class LLMAPIHandlerFactory: ai_suggestion=ai_suggestion, ) + prompt_tokens = 0 + completion_tokens = 0 + reasoning_tokens = 0 + cached_tokens = 0 + completion_token_detail = None + cached_token_detail = None + llm_cost = 0 if step or thought: try: # FIXME: volcengine doesn't support litellm cost calculation. @@ -464,7 +476,7 @@ class LLMAPIHandlerFactory: ai_suggestion=ai_suggestion, ) - # Track LLM API handler duration + # Track LLM API handler duration, token counts, and cost duration_seconds = time.time() - start_time LOG.info( "LLM API handler duration metrics", @@ -475,6 +487,11 @@ class LLMAPIHandlerFactory: step_id=step.step_id if step else None, thought_id=thought.observer_thought_id if thought else None, organization_id=step.organization_id if step else (thought.organization_id if thought else None), + input_tokens=prompt_tokens if prompt_tokens > 0 else None, + output_tokens=completion_tokens if completion_tokens > 0 else None, + reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None, + cached_tokens=cached_tokens if cached_tokens > 0 else None, + llm_cost=llm_cost if llm_cost > 0 else None, ) return parsed_response @@ -678,6 +695,7 @@ class LLMCaller: ai_suggestion=ai_suggestion, ) + call_stats = None if step or thought: call_stats = await self.get_call_stats(response) if step: @@ -701,7 +719,7 @@ class LLMCaller: cached_token_count=call_stats.cached_tokens, thought_cost=call_stats.llm_cost, ) - # Track LLM API handler duration + # Track LLM API handler duration, token counts, and cost duration_seconds = time.perf_counter() - start_time LOG.info( "LLM API handler duration metrics", @@ -712,6 +730,11 @@ class LLMCaller: step_id=step.step_id if step else None, thought_id=thought.observer_thought_id if thought else None, organization_id=step.organization_id if step else (thought.organization_id if thought else None), + input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None, + output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None, + reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None, + cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None, + llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None, ) if raw_response: return response.model_dump(exclude_none=True) diff --git a/skyvern/forge/sdk/api/llm/config_registry.py b/skyvern/forge/sdk/api/llm/config_registry.py index f354f0df..508bfb2b 100644 --- a/skyvern/forge/sdk/api/llm/config_registry.py +++ b/skyvern/forge/sdk/api/llm/config_registry.py @@ -234,7 +234,6 @@ if settings.ENABLE_OPENAI: ), ) - if settings.ENABLE_ANTHROPIC: LLMConfigRegistry.register_config( "ANTHROPIC_CLAUDE3", @@ -1205,6 +1204,24 @@ if settings.ENABLE_GROQ: ), ), ) + +if settings.ENABLE_MOONSHOT: + LLMConfigRegistry.register_config( + "MOONSHOT_KIMI_K2", + LLMConfig( + "moonshot/kimi-k2", + ["MOONSHOT_API_KEY"], + supports_vision=True, + add_assistant_prefix=False, + max_completion_tokens=32768, + litellm_params=LiteLLMParams( + api_key=settings.MOONSHOT_API_KEY, + api_base=settings.MOONSHOT_API_BASE, + api_version=None, + model_info={"model_name": "moonshot/kimi-k2"}, + ), + ), + ) # Add support for dynamically configuring OpenAI-compatible LLM models # Based on liteLLM's support for OpenAI-compatible APIs # See documentation: https://docs.litellm.ai/docs/providers/openai_compatible