GPT-5 Support + Better Logs (#3277)

2025-08-22 13:02:15 -07:00
parent 9d307a2933
commit 5055daad00
3 changed files with 49 additions and 4 deletions
--- a/skyvern/config.py
+++ b/skyvern/config.py
@@ -273,6 +273,11 @@ class Settings(BaseSettings):
    GROQ_MODEL: str | None = None
    GROQ_API_BASE: str = "https://api.groq.com/openai/v1"

+    # MOONSHOT AI
+    ENABLE_MOONSHOT: bool = False
+    MOONSHOT_API_KEY: str | None = None
+    MOONSHOT_API_BASE: str = "https://api.moonshot.cn/v1"
+
    # TOTP Settings
    TOTP_LIFESPAN_MINUTES: int = 10
    VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40
--- a/skyvern/forge/sdk/api/llm/api_handler_factory.py
+++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py
@@ -259,7 +259,7 @@ class LLMAPIHandlerFactory:
                    ai_suggestion=ai_suggestion,
                )

-            # Track LLM API handler duration
+            # Track LLM API handler duration, token counts, and cost
            duration_seconds = time.time() - start_time
            LOG.info(
                "LLM API handler duration metrics",
@@ -270,6 +270,11 @@ class LLMAPIHandlerFactory:
                step_id=step.step_id if step else None,
                thought_id=thought.observer_thought_id if thought else None,
                organization_id=step.organization_id if step else (thought.organization_id if thought else None),
+                input_tokens=prompt_tokens if prompt_tokens > 0 else None,
+                output_tokens=completion_tokens if completion_tokens > 0 else None,
+                reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
+                cached_tokens=cached_tokens if cached_tokens > 0 else None,
+                llm_cost=llm_cost if llm_cost > 0 else None,
            )

            return parsed_response
@@ -403,6 +408,13 @@ class LLMAPIHandlerFactory:
                ai_suggestion=ai_suggestion,
            )

+            prompt_tokens = 0
+            completion_tokens = 0
+            reasoning_tokens = 0
+            cached_tokens = 0
+            completion_token_detail = None
+            cached_token_detail = None
+            llm_cost = 0
            if step or thought:
                try:
                    # FIXME: volcengine doesn't support litellm cost calculation.
@@ -464,7 +476,7 @@ class LLMAPIHandlerFactory:
                    ai_suggestion=ai_suggestion,
                )

-            # Track LLM API handler duration
+            # Track LLM API handler duration, token counts, and cost
            duration_seconds = time.time() - start_time
            LOG.info(
                "LLM API handler duration metrics",
@@ -475,6 +487,11 @@ class LLMAPIHandlerFactory:
                step_id=step.step_id if step else None,
                thought_id=thought.observer_thought_id if thought else None,
                organization_id=step.organization_id if step else (thought.organization_id if thought else None),
+                input_tokens=prompt_tokens if prompt_tokens > 0 else None,
+                output_tokens=completion_tokens if completion_tokens > 0 else None,
+                reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
+                cached_tokens=cached_tokens if cached_tokens > 0 else None,
+                llm_cost=llm_cost if llm_cost > 0 else None,
            )

            return parsed_response
@@ -678,6 +695,7 @@ class LLMCaller:
            ai_suggestion=ai_suggestion,
        )

+        call_stats = None
        if step or thought:
            call_stats = await self.get_call_stats(response)
            if step:
@@ -701,7 +719,7 @@ class LLMCaller:
                    cached_token_count=call_stats.cached_tokens,
                    thought_cost=call_stats.llm_cost,
                )
-        # Track LLM API handler duration
+        # Track LLM API handler duration, token counts, and cost
        duration_seconds = time.perf_counter() - start_time
        LOG.info(
            "LLM API handler duration metrics",
@@ -712,6 +730,11 @@ class LLMCaller:
            step_id=step.step_id if step else None,
            thought_id=thought.observer_thought_id if thought else None,
            organization_id=step.organization_id if step else (thought.organization_id if thought else None),
+            input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None,
+            output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None,
+            reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None,
+            cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None,
+            llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None,
        )
        if raw_response:
            return response.model_dump(exclude_none=True)
--- a/skyvern/forge/sdk/api/llm/config_registry.py
+++ b/skyvern/forge/sdk/api/llm/config_registry.py
@@ -234,7 +234,6 @@ if settings.ENABLE_OPENAI:
        ),
    )

-
 if settings.ENABLE_ANTHROPIC:
    LLMConfigRegistry.register_config(
        "ANTHROPIC_CLAUDE3",
@@ -1205,6 +1204,24 @@ if settings.ENABLE_GROQ:
                ),
            ),
        )
+
+if settings.ENABLE_MOONSHOT:
+    LLMConfigRegistry.register_config(
+        "MOONSHOT_KIMI_K2",
+        LLMConfig(
+            "moonshot/kimi-k2",
+            ["MOONSHOT_API_KEY"],
+            supports_vision=True,
+            add_assistant_prefix=False,
+            max_completion_tokens=32768,
+            litellm_params=LiteLLMParams(
+                api_key=settings.MOONSHOT_API_KEY,
+                api_base=settings.MOONSHOT_API_BASE,
+                api_version=None,
+                model_info={"model_name": "moonshot/kimi-k2"},
+            ),
+        ),
+    )
 # Add support for dynamically configuring OpenAI-compatible LLM models
 # Based on liteLLM's support for OpenAI-compatible APIs
 # See documentation: https://docs.litellm.ai/docs/providers/openai_compatible