add LLMCaller that supports message history (#2204)

2025-04-24 03:37:26 +08:00
parent 6636d949dc
commit 56cfb55096
3 changed files with 229 additions and 2 deletions
--- a/skyvern/forge/sdk/api/llm/api_handler_factory.py
+++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py
@@ -19,7 +19,7 @@ from skyvern.forge.sdk.api.llm.exceptions import (
    LLMProviderErrorRetryableTask,
 )
 from skyvern.forge.sdk.api.llm.models import LLMAPIHandler, LLMConfig, LLMRouterConfig, dummy_llm_api_handler
-from skyvern.forge.sdk.api.llm.utils import llm_messages_builder, parse_api_response
+from skyvern.forge.sdk.api.llm.utils import llm_messages_builder, llm_messages_builder_with_history, parse_api_response
 from skyvern.forge.sdk.artifact.models import ArtifactType
 from skyvern.forge.sdk.core import skyvern_context
 from skyvern.forge.sdk.models import Step
@@ -444,3 +444,199 @@ class LLMAPIHandlerFactory:
        if llm_key in cls._custom_handlers:
            raise DuplicateCustomLLMProviderError(llm_key)
        cls._custom_handlers[llm_key] = handler
+
+
+class LLMCaller:
+    """
+    An LLMCaller instance defines the LLM configs and keeps the chat history if needed.
+    """
+
+    def __init__(self, llm_key: str, base_parameters: dict[str, Any] | None = None):
+        self.llm_key = llm_key
+        self.llm_config = LLMConfigRegistry.get_config(llm_key)
+        self.base_parameters = base_parameters
+        self.message_history: list[dict[str, Any]] = []
+
+    async def call(
+        self,
+        prompt: str,
+        prompt_name: str,
+        step: Step | None = None,
+        task_v2: TaskV2 | None = None,
+        thought: Thought | None = None,
+        ai_suggestion: AISuggestion | None = None,
+        screenshots: list[bytes] | None = None,
+        parameters: dict[str, Any] | None = None,
+        tools: list | None = None,
+        use_message_history: bool = False,
+    ) -> dict[str, Any]:
+        start_time = time.perf_counter()
+        active_parameters = self.base_parameters or {}
+        if parameters is None:
+            parameters = LLMAPIHandlerFactory.get_api_parameters(self.llm_config)
+
+        active_parameters.update(parameters)
+        if self.llm_config.litellm_params:  # type: ignore
+            active_parameters.update(self.llm_config.litellm_params)  # type: ignore
+
+        context = skyvern_context.current()
+        if context and len(context.hashed_href_map) > 0:
+            await app.ARTIFACT_MANAGER.create_llm_artifact(
+                data=json.dumps(context.hashed_href_map, indent=2).encode("utf-8"),
+                artifact_type=ArtifactType.HASHED_HREF_MAP,
+                step=step,
+                task_v2=task_v2,
+                thought=thought,
+                ai_suggestion=ai_suggestion,
+            )
+
+        await app.ARTIFACT_MANAGER.create_llm_artifact(
+            data=prompt.encode("utf-8"),
+            artifact_type=ArtifactType.LLM_PROMPT,
+            screenshots=screenshots,
+            step=step,
+            task_v2=task_v2,
+            thought=thought,
+            ai_suggestion=ai_suggestion,
+        )
+
+        if not self.llm_config.supports_vision:
+            screenshots = None
+
+        if use_message_history:
+            # self.message_history will be updated in place
+            messages = await llm_messages_builder_with_history(prompt, screenshots, self.message_history)
+        else:
+            messages = await llm_messages_builder_with_history(prompt, screenshots)
+        await app.ARTIFACT_MANAGER.create_llm_artifact(
+            data=json.dumps(
+                {
+                    "model": self.llm_config.model_name,
+                    "messages": messages,
+                    # we're not using active_parameters here because it may contain sensitive information
+                    **parameters,
+                }
+            ).encode("utf-8"),
+            artifact_type=ArtifactType.LLM_REQUEST,
+            step=step,
+            task_v2=task_v2,
+            thought=thought,
+            ai_suggestion=ai_suggestion,
+        )
+        t_llm_request = time.perf_counter()
+        try:
+            response = await litellm.acompletion(
+                model=self.llm_config.model_name,
+                messages=messages,
+                tools=tools,
+                timeout=settings.LLM_CONFIG_TIMEOUT,
+                **active_parameters,
+            )
+            if use_message_history:
+                # only update message_history when the request is successful
+                self.message_history = messages
+        except litellm.exceptions.APIError as e:
+            raise LLMProviderErrorRetryableTask(self.llm_key) from e
+        except litellm.exceptions.ContextWindowExceededError as e:
+            LOG.exception(
+                "Context window exceeded",
+                llm_key=self.llm_key,
+                model=self.llm_config.model_name,
+            )
+            raise SkyvernContextWindowExceededError() from e
+        except CancelledError:
+            t_llm_cancelled = time.perf_counter()
+            LOG.error(
+                "LLM request got cancelled",
+                llm_key=self.llm_key,
+                model=self.llm_config.model_name,
+                duration=t_llm_cancelled - t_llm_request,
+            )
+            raise LLMProviderError(self.llm_key)
+        except Exception as e:
+            LOG.exception("LLM request failed unexpectedly", llm_key=self.llm_key)
+            raise LLMProviderError(self.llm_key) from e
+
+        await app.ARTIFACT_MANAGER.create_llm_artifact(
+            data=response.model_dump_json(indent=2).encode("utf-8"),
+            artifact_type=ArtifactType.LLM_RESPONSE,
+            step=step,
+            task_v2=task_v2,
+            thought=thought,
+            ai_suggestion=ai_suggestion,
+        )
+
+        if step or thought:
+            try:
+                llm_cost = litellm.completion_cost(completion_response=response)
+            except Exception as e:
+                LOG.debug("Failed to calculate LLM cost", error=str(e), exc_info=True)
+                llm_cost = 0
+            prompt_tokens = response.get("usage", {}).get("prompt_tokens", 0)
+            completion_tokens = response.get("usage", {}).get("completion_tokens", 0)
+            reasoning_tokens = 0
+            completion_token_detail = response.get("usage", {}).get("completion_tokens_details")
+            if completion_token_detail:
+                reasoning_tokens = completion_token_detail.reasoning_tokens or 0
+            cached_tokens = 0
+            cached_token_detail = response.get("usage", {}).get("prompt_tokens_details")
+            if cached_token_detail:
+                cached_tokens = cached_token_detail.cached_tokens or 0
+            if step:
+                await app.DATABASE.update_step(
+                    task_id=step.task_id,
+                    step_id=step.step_id,
+                    organization_id=step.organization_id,
+                    incremental_cost=llm_cost,
+                    incremental_input_tokens=prompt_tokens if prompt_tokens > 0 else None,
+                    incremental_output_tokens=completion_tokens if completion_tokens > 0 else None,
+                    incremental_reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
+                    incremental_cached_tokens=cached_tokens if cached_tokens > 0 else None,
+                )
+            if thought:
+                await app.DATABASE.update_thought(
+                    thought_id=thought.observer_thought_id,
+                    organization_id=thought.organization_id,
+                    input_token_count=prompt_tokens if prompt_tokens > 0 else None,
+                    output_token_count=completion_tokens if completion_tokens > 0 else None,
+                    reasoning_token_count=reasoning_tokens if reasoning_tokens > 0 else None,
+                    cached_token_count=cached_tokens if cached_tokens > 0 else None,
+                    thought_cost=llm_cost,
+                )
+        parsed_response = parse_api_response(response, self.llm_config.add_assistant_prefix)
+        await app.ARTIFACT_MANAGER.create_llm_artifact(
+            data=json.dumps(parsed_response, indent=2).encode("utf-8"),
+            artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
+            step=step,
+            task_v2=task_v2,
+            thought=thought,
+            ai_suggestion=ai_suggestion,
+        )
+
+        if context and len(context.hashed_href_map) > 0:
+            llm_content = json.dumps(parsed_response)
+            rendered_content = Template(llm_content).render(context.hashed_href_map)
+            parsed_response = json.loads(rendered_content)
+            await app.ARTIFACT_MANAGER.create_llm_artifact(
+                data=json.dumps(parsed_response, indent=2).encode("utf-8"),
+                artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
+                step=step,
+                task_v2=task_v2,
+                thought=thought,
+                ai_suggestion=ai_suggestion,
+            )
+
+        # Track LLM API handler duration
+        duration_seconds = time.perf_counter() - start_time
+        LOG.info(
+            "LLM API handler duration metrics",
+            llm_key=self.llm_key,
+            prompt_name=prompt_name,
+            model=self.llm_config.model_name,
+            duration_seconds=duration_seconds,
+            step_id=step.step_id if step else None,
+            thought_id=thought.observer_thought_id if thought else None,
+            organization_id=step.organization_id if step else (thought.organization_id if thought else None),
+        )
+
+        return parsed_response
--- a/skyvern/forge/sdk/api/llm/config_registry.py
+++ b/skyvern/forge/sdk/api/llm/config_registry.py
@@ -318,7 +318,7 @@ if settings.ENABLE_BEDROCK:
            ["AWS_REGION"],
            supports_vision=True,
            add_assistant_prefix=True,
-            max_completion_tokens=200000,
+            max_completion_tokens=64000,
        ),
    )

--- a/skyvern/forge/sdk/api/llm/utils.py
+++ b/skyvern/forge/sdk/api/llm/utils.py
@@ -1,4 +1,5 @@
 import base64
+import copy
 import json
 import re
 from typing import Any
@@ -45,6 +46,36 @@ async def llm_messages_builder(
    return [{"role": "user", "content": messages}]


+async def llm_messages_builder_with_history(
+    prompt: str,
+    screenshots: list[bytes] | None = None,
+    message_history: list[dict[str, Any]] | None = None,
+) -> list[dict[str, Any]]:
+    messages: list[dict[str, Any]] = []
+    if message_history:
+        messages = copy.deepcopy(message_history)
+    current_user_messages: list[dict[str, Any]] = [
+        {
+            "type": "text",
+            "text": prompt,
+        }
+    ]
+
+    if screenshots:
+        for screenshot in screenshots:
+            encoded_image = base64.b64encode(screenshot).decode("utf-8")
+            current_user_messages.append(
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/png;base64,{encoded_image}",
+                    },
+                }
+            )
+    messages.append({"role": "user", "content": current_user_messages})
+    return messages
+
+
 def parse_api_response(response: litellm.ModelResponse, add_assistant_prefix: bool = False) -> dict[str, Any]:
    content = None
    try: