GPT-5 Support + Better Logs (#3277)
This commit is contained in:
@@ -273,6 +273,11 @@ class Settings(BaseSettings):
|
|||||||
GROQ_MODEL: str | None = None
|
GROQ_MODEL: str | None = None
|
||||||
GROQ_API_BASE: str = "https://api.groq.com/openai/v1"
|
GROQ_API_BASE: str = "https://api.groq.com/openai/v1"
|
||||||
|
|
||||||
|
# MOONSHOT AI
|
||||||
|
ENABLE_MOONSHOT: bool = False
|
||||||
|
MOONSHOT_API_KEY: str | None = None
|
||||||
|
MOONSHOT_API_BASE: str = "https://api.moonshot.cn/v1"
|
||||||
|
|
||||||
# TOTP Settings
|
# TOTP Settings
|
||||||
TOTP_LIFESPAN_MINUTES: int = 10
|
TOTP_LIFESPAN_MINUTES: int = 10
|
||||||
VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40
|
VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40
|
||||||
|
|||||||
@@ -259,7 +259,7 @@ class LLMAPIHandlerFactory:
|
|||||||
ai_suggestion=ai_suggestion,
|
ai_suggestion=ai_suggestion,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Track LLM API handler duration
|
# Track LLM API handler duration, token counts, and cost
|
||||||
duration_seconds = time.time() - start_time
|
duration_seconds = time.time() - start_time
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"LLM API handler duration metrics",
|
"LLM API handler duration metrics",
|
||||||
@@ -270,6 +270,11 @@ class LLMAPIHandlerFactory:
|
|||||||
step_id=step.step_id if step else None,
|
step_id=step.step_id if step else None,
|
||||||
thought_id=thought.observer_thought_id if thought else None,
|
thought_id=thought.observer_thought_id if thought else None,
|
||||||
organization_id=step.organization_id if step else (thought.organization_id if thought else None),
|
organization_id=step.organization_id if step else (thought.organization_id if thought else None),
|
||||||
|
input_tokens=prompt_tokens if prompt_tokens > 0 else None,
|
||||||
|
output_tokens=completion_tokens if completion_tokens > 0 else None,
|
||||||
|
reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
|
||||||
|
cached_tokens=cached_tokens if cached_tokens > 0 else None,
|
||||||
|
llm_cost=llm_cost if llm_cost > 0 else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
return parsed_response
|
return parsed_response
|
||||||
@@ -403,6 +408,13 @@ class LLMAPIHandlerFactory:
|
|||||||
ai_suggestion=ai_suggestion,
|
ai_suggestion=ai_suggestion,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
prompt_tokens = 0
|
||||||
|
completion_tokens = 0
|
||||||
|
reasoning_tokens = 0
|
||||||
|
cached_tokens = 0
|
||||||
|
completion_token_detail = None
|
||||||
|
cached_token_detail = None
|
||||||
|
llm_cost = 0
|
||||||
if step or thought:
|
if step or thought:
|
||||||
try:
|
try:
|
||||||
# FIXME: volcengine doesn't support litellm cost calculation.
|
# FIXME: volcengine doesn't support litellm cost calculation.
|
||||||
@@ -464,7 +476,7 @@ class LLMAPIHandlerFactory:
|
|||||||
ai_suggestion=ai_suggestion,
|
ai_suggestion=ai_suggestion,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Track LLM API handler duration
|
# Track LLM API handler duration, token counts, and cost
|
||||||
duration_seconds = time.time() - start_time
|
duration_seconds = time.time() - start_time
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"LLM API handler duration metrics",
|
"LLM API handler duration metrics",
|
||||||
@@ -475,6 +487,11 @@ class LLMAPIHandlerFactory:
|
|||||||
step_id=step.step_id if step else None,
|
step_id=step.step_id if step else None,
|
||||||
thought_id=thought.observer_thought_id if thought else None,
|
thought_id=thought.observer_thought_id if thought else None,
|
||||||
organization_id=step.organization_id if step else (thought.organization_id if thought else None),
|
organization_id=step.organization_id if step else (thought.organization_id if thought else None),
|
||||||
|
input_tokens=prompt_tokens if prompt_tokens > 0 else None,
|
||||||
|
output_tokens=completion_tokens if completion_tokens > 0 else None,
|
||||||
|
reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
|
||||||
|
cached_tokens=cached_tokens if cached_tokens > 0 else None,
|
||||||
|
llm_cost=llm_cost if llm_cost > 0 else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
return parsed_response
|
return parsed_response
|
||||||
@@ -678,6 +695,7 @@ class LLMCaller:
|
|||||||
ai_suggestion=ai_suggestion,
|
ai_suggestion=ai_suggestion,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
call_stats = None
|
||||||
if step or thought:
|
if step or thought:
|
||||||
call_stats = await self.get_call_stats(response)
|
call_stats = await self.get_call_stats(response)
|
||||||
if step:
|
if step:
|
||||||
@@ -701,7 +719,7 @@ class LLMCaller:
|
|||||||
cached_token_count=call_stats.cached_tokens,
|
cached_token_count=call_stats.cached_tokens,
|
||||||
thought_cost=call_stats.llm_cost,
|
thought_cost=call_stats.llm_cost,
|
||||||
)
|
)
|
||||||
# Track LLM API handler duration
|
# Track LLM API handler duration, token counts, and cost
|
||||||
duration_seconds = time.perf_counter() - start_time
|
duration_seconds = time.perf_counter() - start_time
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"LLM API handler duration metrics",
|
"LLM API handler duration metrics",
|
||||||
@@ -712,6 +730,11 @@ class LLMCaller:
|
|||||||
step_id=step.step_id if step else None,
|
step_id=step.step_id if step else None,
|
||||||
thought_id=thought.observer_thought_id if thought else None,
|
thought_id=thought.observer_thought_id if thought else None,
|
||||||
organization_id=step.organization_id if step else (thought.organization_id if thought else None),
|
organization_id=step.organization_id if step else (thought.organization_id if thought else None),
|
||||||
|
input_tokens=call_stats.input_tokens if call_stats and call_stats.input_tokens else None,
|
||||||
|
output_tokens=call_stats.output_tokens if call_stats and call_stats.output_tokens else None,
|
||||||
|
reasoning_tokens=call_stats.reasoning_tokens if call_stats and call_stats.reasoning_tokens else None,
|
||||||
|
cached_tokens=call_stats.cached_tokens if call_stats and call_stats.cached_tokens else None,
|
||||||
|
llm_cost=call_stats.llm_cost if call_stats and call_stats.llm_cost else None,
|
||||||
)
|
)
|
||||||
if raw_response:
|
if raw_response:
|
||||||
return response.model_dump(exclude_none=True)
|
return response.model_dump(exclude_none=True)
|
||||||
|
|||||||
@@ -234,7 +234,6 @@ if settings.ENABLE_OPENAI:
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if settings.ENABLE_ANTHROPIC:
|
if settings.ENABLE_ANTHROPIC:
|
||||||
LLMConfigRegistry.register_config(
|
LLMConfigRegistry.register_config(
|
||||||
"ANTHROPIC_CLAUDE3",
|
"ANTHROPIC_CLAUDE3",
|
||||||
@@ -1205,6 +1204,24 @@ if settings.ENABLE_GROQ:
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if settings.ENABLE_MOONSHOT:
|
||||||
|
LLMConfigRegistry.register_config(
|
||||||
|
"MOONSHOT_KIMI_K2",
|
||||||
|
LLMConfig(
|
||||||
|
"moonshot/kimi-k2",
|
||||||
|
["MOONSHOT_API_KEY"],
|
||||||
|
supports_vision=True,
|
||||||
|
add_assistant_prefix=False,
|
||||||
|
max_completion_tokens=32768,
|
||||||
|
litellm_params=LiteLLMParams(
|
||||||
|
api_key=settings.MOONSHOT_API_KEY,
|
||||||
|
api_base=settings.MOONSHOT_API_BASE,
|
||||||
|
api_version=None,
|
||||||
|
model_info={"model_name": "moonshot/kimi-k2"},
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
# Add support for dynamically configuring OpenAI-compatible LLM models
|
# Add support for dynamically configuring OpenAI-compatible LLM models
|
||||||
# Based on liteLLM's support for OpenAI-compatible APIs
|
# Based on liteLLM's support for OpenAI-compatible APIs
|
||||||
# See documentation: https://docs.litellm.ai/docs/providers/openai_compatible
|
# See documentation: https://docs.litellm.ai/docs/providers/openai_compatible
|
||||||
|
|||||||
Reference in New Issue
Block a user