From 5055daad007e8ee640e29532188b45ff72817bba Mon Sep 17 00:00:00 2001
From: Shuchang Zheng <wintonzheng0325@gmail.com>
Date: Fri, 22 Aug 2025 13:02:15 -0700
Subject: [PATCH] GPT-5 Support + Better Logs (#3277)

---
 skyvern/config.py                             |  5 ++++
 .../forge/sdk/api/llm/api_handler_factory.py  | 29 +++++++++++++++++--
 skyvern/forge/sdk/api/llm/config_registry.py  | 19 +++++++++++-
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/skyvern/config.py b/skyvern/config.py
index 5c52b951..726ec12b 100644
--- a/skyvern/config.py
+++ b/skyvern/config.py
@@ -273,6 +273,11 @@ class Settings(BaseSettings):
     GROQ_MODEL: str | None = None
     GROQ_API_BASE: str = "https://api.groq.com/openai/v1"
 
+    # MOONSHOT AI
+    ENABLE_MOONSHOT: bool = False
+    MOONSHOT_API_KEY: str | None = None
+    MOONSHOT_API_BASE: str = "https://api.moonshot.cn/v1"
+
     # TOTP Settings
     TOTP_LIFESPAN_MINUTES: int = 10
     VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40
diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py
index e4131d3a..95762bac 100644
--- a/skyvern/forge/sdk/api/llm/api_handler_factory.py
+++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py
@@ -259,7 +259,7 @@ class LLMAPIHandlerFactory:
                     ai_suggestion=ai_suggestion,
                 )
 
-            # Track LLM API handler duration
+            # Track LLM API handler duration, token counts, and cost
             duration_seconds = time.time() - start_time
             LOG.info(
                 "LLM API handler duration metrics",
@@ -270,6 +270,11 @@ class LLMAPIHandlerFactory:
                 step_id=step.step_id if step else None,
                 thought_id=thought.observer_thought_id if thought else None,
                 organization_id=step.organization_id if step else (thought.organization_id if thought else None),
+                input_tokens=prompt_tokens if prompt_tokens > 0 else None,
+                output_tokens=completion_tokens if completion_tokens > 0 else None,
+                reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
+                cached_tokens=cached_tokens if cached_tokens > 0 else None,
+                llm_cost=llm_cost if llm_cost > 0 else None,
             )
 
             return parsed_response
@@ -403,6 +408,13 @@ class LLMAPIHandlerFactory:
                 ai_suggestion=ai_suggestion,
             )
 
+            prompt_tokens = 0
+            completion_tokens = 0
+            reasoning_tokens = 0
+            cached_tokens = 0
+            completion_token_detail = None
+            cached_token_detail = None
+            llm_cost = 0
             if step or thought:
                 try:
                     # FIXME: volcengine doesn't support litellm cost calculation.
@@ -464,7 +476,7 @@ class LLMAPIHandlerFactory:
                     ai_suggestion=ai_suggestion,
                 )
 
-            # Track LLM API handler duration
+            # Track LLM API handler duration, token counts, and cost
             duration_seconds = time.time() - start_time
             LOG.info(
                 "LLM API handler duration metrics",
@@ -475,6 +487,11 @@ class LLMAPIHandlerFactory:
                 step_id=step.step_id if step else None,
                 thought_id=thought.observer_thought_id if thought else None,
                 organization_id=step.organization_id if step else (thought.organization_id if thought else None),
+                input_tokens=prompt_tokens if prompt_tokens > 0 else None,
+                output_tokens=completion_tokens if completion_tokens > 0 else None,
+                reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
+                cached_tokens=cached_tokens if cached_tokens > 0 else None,
+                llm_cost=llm_cost if llm_cost > 0 else None,
             )
 
             return parsed_response
@@ -678,6 +695,7 @@ class LLMCaller:
             ai_suggestion=ai_suggestion,
         )
 
+        call_stats = None
         if step or thought:
             call_stats = await self.get_call_stats(response)
             if step:
@@ -701,7 +719,7 @@ class LLMCaller:
                     cached_token_count=call_stats.cached_tokens,
                     thought_cost=call_stats.llm_cost,
                 )
-        # Track LLM API handler duration
+        # Track LLM API handler duration, token counts, and cost
         duration_seconds = time.perf_counter() - start_time
         LOG.info(
             "LLM API handler duration metrics",
@@ -712,6 +730,11 @@ class LLMCaller:
             step_id=step.step_id if step else None,
             thought_id=thought.observer_thought_id if thought else None,
             organization_id=step.organization_id if step else (thought.organization_id if thought else None),
+            input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None,
+            output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None,
+            reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None,
+            cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None,
+            llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None,
         )
         if raw_response:
             return response.model_dump(exclude_none=True)
diff --git a/skyvern/forge/sdk/api/llm/config_registry.py b/skyvern/forge/sdk/api/llm/config_registry.py
index f354f0df..508bfb2b 100644
--- a/skyvern/forge/sdk/api/llm/config_registry.py
+++ b/skyvern/forge/sdk/api/llm/config_registry.py
@@ -234,7 +234,6 @@ if settings.ENABLE_OPENAI:
         ),
     )
 
-
 if settings.ENABLE_ANTHROPIC:
     LLMConfigRegistry.register_config(
         "ANTHROPIC_CLAUDE3",
@@ -1205,6 +1204,24 @@ if settings.ENABLE_GROQ:
                 ),
             ),
         )
+
+if settings.ENABLE_MOONSHOT:
+    LLMConfigRegistry.register_config(
+        "MOONSHOT_KIMI_K2",
+        LLMConfig(
+            "moonshot/kimi-k2",
+            ["MOONSHOT_API_KEY"],
+            supports_vision=True,
+            add_assistant_prefix=False,
+            max_completion_tokens=32768,
+            litellm_params=LiteLLMParams(
+                api_key=settings.MOONSHOT_API_KEY,
+                api_base=settings.MOONSHOT_API_BASE,
+                api_version=None,
+                model_info={"model_name": "moonshot/kimi-k2"},
+            ),
+        ),
+    )
 # Add support for dynamically configuring OpenAI-compatible LLM models
 # Based on liteLLM's support for OpenAI-compatible APIs
 # See documentation: https://docs.litellm.ai/docs/providers/openai_compatible