GPT-5 Support + Better Logs (#3277)

This commit is contained in:
Shuchang Zheng
2025-08-22 13:02:15 -07:00
committed by GitHub
parent 9d307a2933
commit 5055daad00
3 changed files with 49 additions and 4 deletions

View File

@@ -273,6 +273,11 @@ class Settings(BaseSettings):
GROQ_MODEL: str | None = None
GROQ_API_BASE: str = "https://api.groq.com/openai/v1"
# MOONSHOT AI
ENABLE_MOONSHOT: bool = False
MOONSHOT_API_KEY: str | None = None
MOONSHOT_API_BASE: str = "https://api.moonshot.cn/v1"
# TOTP Settings
TOTP_LIFESPAN_MINUTES: int = 10
VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40

View File

@@ -259,7 +259,7 @@ class LLMAPIHandlerFactory:
ai_suggestion=ai_suggestion,
)
# Track LLM API handler duration
# Track LLM API handler duration, token counts, and cost
duration_seconds = time.time() - start_time
LOG.info(
"LLM API handler duration metrics",
@@ -270,6 +270,11 @@ class LLMAPIHandlerFactory:
step_id=step.step_id if step else None,
thought_id=thought.observer_thought_id if thought else None,
organization_id=step.organization_id if step else (thought.organization_id if thought else None),
input_tokens=prompt_tokens if prompt_tokens > 0 else None,
output_tokens=completion_tokens if completion_tokens > 0 else None,
reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
cached_tokens=cached_tokens if cached_tokens > 0 else None,
llm_cost=llm_cost if llm_cost > 0 else None,
)
return parsed_response
@@ -403,6 +408,13 @@ class LLMAPIHandlerFactory:
ai_suggestion=ai_suggestion,
)
prompt_tokens = 0
completion_tokens = 0
reasoning_tokens = 0
cached_tokens = 0
completion_token_detail = None
cached_token_detail = None
llm_cost = 0
if step or thought:
try:
# FIXME: volcengine doesn't support litellm cost calculation.
@@ -464,7 +476,7 @@ class LLMAPIHandlerFactory:
ai_suggestion=ai_suggestion,
)
# Track LLM API handler duration
# Track LLM API handler duration, token counts, and cost
duration_seconds = time.time() - start_time
LOG.info(
"LLM API handler duration metrics",
@@ -475,6 +487,11 @@ class LLMAPIHandlerFactory:
step_id=step.step_id if step else None,
thought_id=thought.observer_thought_id if thought else None,
organization_id=step.organization_id if step else (thought.organization_id if thought else None),
input_tokens=prompt_tokens if prompt_tokens > 0 else None,
output_tokens=completion_tokens if completion_tokens > 0 else None,
reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
cached_tokens=cached_tokens if cached_tokens > 0 else None,
llm_cost=llm_cost if llm_cost > 0 else None,
)
return parsed_response
@@ -678,6 +695,7 @@ class LLMCaller:
ai_suggestion=ai_suggestion,
)
call_stats = None
if step or thought:
call_stats = await self.get_call_stats(response)
if step:
@@ -701,7 +719,7 @@ class LLMCaller:
cached_token_count=call_stats.cached_tokens,
thought_cost=call_stats.llm_cost,
)
# Track LLM API handler duration
# Track LLM API handler duration, token counts, and cost
duration_seconds = time.perf_counter() - start_time
LOG.info(
"LLM API handler duration metrics",
@@ -712,6 +730,11 @@ class LLMCaller:
step_id=step.step_id if step else None,
thought_id=thought.observer_thought_id if thought else None,
organization_id=step.organization_id if step else (thought.organization_id if thought else None),
input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None,
output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None,
reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None,
cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None,
llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None,
)
if raw_response:
return response.model_dump(exclude_none=True)

View File

@@ -234,7 +234,6 @@ if settings.ENABLE_OPENAI:
),
)
if settings.ENABLE_ANTHROPIC:
LLMConfigRegistry.register_config(
"ANTHROPIC_CLAUDE3",
@@ -1205,6 +1204,24 @@ if settings.ENABLE_GROQ:
),
),
)
if settings.ENABLE_MOONSHOT:
LLMConfigRegistry.register_config(
"MOONSHOT_KIMI_K2",
LLMConfig(
"moonshot/kimi-k2",
["MOONSHOT_API_KEY"],
supports_vision=True,
add_assistant_prefix=False,
max_completion_tokens=32768,
litellm_params=LiteLLMParams(
api_key=settings.MOONSHOT_API_KEY,
api_base=settings.MOONSHOT_API_BASE,
api_version=None,
model_info={"model_name": "moonshot/kimi-k2"},
),
),
)
# Add support for dynamically configuring OpenAI-compatible LLM models
# Based on liteLLM's support for OpenAI-compatible APIs
# See documentation: https://docs.litellm.ai/docs/providers/openai_compatible