support volcengine + migrate ui tars to volcengine (#2705)
This commit is contained in:
@@ -198,6 +198,7 @@ class LLMAPIHandlerFactory:
|
||||
)
|
||||
if step or thought:
|
||||
try:
|
||||
# FIXME: volcengine doesn't support litellm cost calculation.
|
||||
llm_cost = litellm.completion_cost(completion_response=response)
|
||||
except Exception as e:
|
||||
LOG.debug("Failed to calculate LLM cost", error=str(e), exc_info=True)
|
||||
@@ -401,6 +402,7 @@ class LLMAPIHandlerFactory:
|
||||
|
||||
if step or thought:
|
||||
try:
|
||||
# FIXME: volcengine doesn't support litellm cost calculation.
|
||||
llm_cost = litellm.completion_cost(completion_response=response)
|
||||
except Exception as e:
|
||||
LOG.debug("Failed to calculate LLM cost", error=str(e), exc_info=True)
|
||||
@@ -746,7 +748,7 @@ class LLMCaller:
|
||||
tools: list | None = None,
|
||||
timeout: float = settings.LLM_CONFIG_TIMEOUT,
|
||||
**active_parameters: dict[str, Any],
|
||||
) -> ModelResponse | CustomStreamWrapper | AnthropicMessage | Any:
|
||||
) -> ModelResponse | CustomStreamWrapper | AnthropicMessage | UITarsResponse:
|
||||
if self.llm_key and "ANTHROPIC" in self.llm_key:
|
||||
return await self._call_anthropic(messages, tools, timeout, **active_parameters)
|
||||
|
||||
@@ -802,14 +804,14 @@ class LLMCaller:
|
||||
tools: list | None = None,
|
||||
timeout: float = settings.LLM_CONFIG_TIMEOUT,
|
||||
**active_parameters: dict[str, Any],
|
||||
) -> Any:
|
||||
) -> UITarsResponse:
|
||||
"""Custom UI-TARS API call using OpenAI client with VolcEngine endpoint."""
|
||||
max_tokens = active_parameters.get("max_completion_tokens") or active_parameters.get("max_tokens") or 400
|
||||
model_name = self.llm_config.model_name
|
||||
model_name = self.llm_config.model_name.replace("volcengine/", "")
|
||||
|
||||
if not app.UI_TARS_CLIENT:
|
||||
raise ValueError(
|
||||
"UI_TARS_CLIENT not initialized. Please ensure ENABLE_UI_TARS=true and UI_TARS_API_KEY is set."
|
||||
"UI_TARS_CLIENT not initialized. Please ensure ENABLE_VOLCENGINE=true and VOLCENGINE_API_KEY is set."
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
@@ -851,39 +853,18 @@ class LLMCaller:
|
||||
return response
|
||||
|
||||
async def get_call_stats(
|
||||
self, response: ModelResponse | CustomStreamWrapper | AnthropicMessage | dict[str, Any] | Any
|
||||
self, response: ModelResponse | CustomStreamWrapper | AnthropicMessage | UITarsResponse
|
||||
) -> LLMCallStats:
|
||||
empty_call_stats = LLMCallStats()
|
||||
|
||||
# Handle UI-TARS response (UITarsResponse object from _call_ui_tars)
|
||||
if hasattr(response, "usage") and hasattr(response, "choices") and hasattr(response, "model"):
|
||||
usage = response.usage
|
||||
# Use Doubao pricing: ¥0.8/1M input, ¥2/1M output (convert to USD: ~$0.11/$0.28)
|
||||
input_token_cost = (0.11 / 1000000) * usage.get("prompt_tokens", 0)
|
||||
output_token_cost = (0.28 / 1000000) * usage.get("completion_tokens", 0)
|
||||
llm_cost = input_token_cost + output_token_cost
|
||||
|
||||
if isinstance(response, UITarsResponse):
|
||||
ui_tars_usage = response.usage
|
||||
return LLMCallStats(
|
||||
llm_cost=llm_cost,
|
||||
input_tokens=usage.get("prompt_tokens", 0),
|
||||
output_tokens=usage.get("completion_tokens", 0),
|
||||
cached_tokens=0, # UI-TARS doesn't have cached tokens
|
||||
reasoning_tokens=0,
|
||||
)
|
||||
|
||||
# Handle UI-TARS response (dict format - fallback)
|
||||
if isinstance(response, dict) and "choices" in response and "usage" in response:
|
||||
usage = response["usage"]
|
||||
# Use Doubao pricing: ¥0.8/1M input, ¥2/1M output (convert to USD: ~$0.11/$0.28)
|
||||
input_token_cost = (0.11 / 1000000) * usage.get("prompt_tokens", 0)
|
||||
output_token_cost = (0.28 / 1000000) * usage.get("completion_tokens", 0)
|
||||
llm_cost = input_token_cost + output_token_cost
|
||||
|
||||
return LLMCallStats(
|
||||
llm_cost=llm_cost,
|
||||
input_tokens=usage.get("prompt_tokens", 0),
|
||||
output_tokens=usage.get("completion_tokens", 0),
|
||||
cached_tokens=0, # UI-TARS doesn't have cached tokens
|
||||
llm_cost=0, # TODO: calculate the cost according to the price: https://www.volcengine.com/docs/82379/1544106
|
||||
input_tokens=ui_tars_usage.get("prompt_tokens", 0),
|
||||
output_tokens=ui_tars_usage.get("completion_tokens", 0),
|
||||
cached_tokens=0, # only part of model support cached tokens
|
||||
reasoning_tokens=0,
|
||||
)
|
||||
|
||||
|
||||
@@ -568,16 +568,46 @@ if settings.ENABLE_AZURE_O3:
|
||||
max_completion_tokens=100000,
|
||||
),
|
||||
)
|
||||
if settings.ENABLE_UI_TARS:
|
||||
if settings.ENABLE_VOLCENGINE:
|
||||
LLMConfigRegistry.register_config(
|
||||
"UI_TARS_SEED1_5_VL",
|
||||
"VOLCENGINE_DOUBAO_SEED_1_6",
|
||||
LLMConfig(
|
||||
settings.UI_TARS_MODEL,
|
||||
["UI_TARS_API_KEY"],
|
||||
"volcengine/doubao-seed-1.6-250615",
|
||||
["VOLCENGINE_API_KEY"],
|
||||
litellm_params=LiteLLMParams(
|
||||
api_base=settings.VOLCENGINE_API_BASE,
|
||||
api_key=settings.VOLCENGINE_API_KEY,
|
||||
),
|
||||
supports_vision=True,
|
||||
add_assistant_prefix=False,
|
||||
),
|
||||
)
|
||||
|
||||
LLMConfigRegistry.register_config(
|
||||
"VOLCENGINE_DOUBAO_SEED_1_6_FLASH",
|
||||
LLMConfig(
|
||||
"volcengine/doubao-seed-1.6-flash-250615",
|
||||
["VOLCENGINE_API_KEY"],
|
||||
litellm_params=LiteLLMParams(
|
||||
api_base=settings.VOLCENGINE_API_BASE,
|
||||
api_key=settings.VOLCENGINE_API_KEY,
|
||||
),
|
||||
supports_vision=True,
|
||||
add_assistant_prefix=False,
|
||||
),
|
||||
)
|
||||
|
||||
LLMConfigRegistry.register_config(
|
||||
"VOLCENGINE_DOUBAO_1_5_THINKING_VISION_PRO",
|
||||
LLMConfig(
|
||||
"volcengine/doubao-1-5-thinking-vision-pro-250428",
|
||||
["VOLCENGINE_API_KEY"],
|
||||
litellm_params=LiteLLMParams(
|
||||
api_base=settings.VOLCENGINE_API_BASE,
|
||||
api_key=settings.VOLCENGINE_API_KEY,
|
||||
),
|
||||
supports_vision=True,
|
||||
add_assistant_prefix=False,
|
||||
max_tokens=400,
|
||||
temperature=0.0,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ class UITarsLLMCaller(LLMCaller):
|
||||
# Handle None case for navigation_goal
|
||||
instruction = task.navigation_goal or "Default navigation task"
|
||||
system_prompt = _build_system_prompt(instruction)
|
||||
self.message_history = [{"role": "user", "content": system_prompt}]
|
||||
self.message_history: list = [{"role": "user", "content": system_prompt}]
|
||||
self._conversation_initialized = True
|
||||
LOG.debug("Initialized UI-TARS conversation", task_id=task.task_id)
|
||||
|
||||
|
||||
@@ -3,21 +3,25 @@
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from anthropic import BaseModel
|
||||
|
||||
class UITarsResponse:
|
||||
|
||||
class Message:
|
||||
def __init__(self, content: str):
|
||||
self.content = content
|
||||
self.role = "assistant"
|
||||
|
||||
|
||||
class Choice:
|
||||
def __init__(self, content: str):
|
||||
self.message = Message(content)
|
||||
|
||||
|
||||
class UITarsResponse(BaseModel):
|
||||
"""A response object that mimics the ModelResponse interface for UI-TARS API responses."""
|
||||
|
||||
def __init__(self, content: str, model: str):
|
||||
# Create choice objects with proper nested structure for parse_api_response
|
||||
class Message:
|
||||
def __init__(self, content: str):
|
||||
self.content = content
|
||||
self.role = "assistant"
|
||||
|
||||
class Choice:
|
||||
def __init__(self, content: str):
|
||||
self.message = Message(content)
|
||||
|
||||
self.choices = [Choice(content)]
|
||||
self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
||||
self.model = model
|
||||
|
||||
Reference in New Issue
Block a user