diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py index c7fd8745..678edc79 100644 --- a/skyvern/forge/sdk/api/llm/api_handler_factory.py +++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py @@ -517,11 +517,24 @@ class LLMCaller: if not self.llm_config.supports_vision: screenshots = None + message_pattern = "openai" + if "ANTHROPIC" in self.llm_key: + message_pattern = "anthropic" + if use_message_history: # self.message_history will be updated in place - messages = await llm_messages_builder_with_history(prompt, screenshots, self.message_history) + messages = await llm_messages_builder_with_history( + prompt, + screenshots, + self.message_history, + message_pattern=message_pattern, + ) else: - messages = await llm_messages_builder_with_history(prompt, screenshots) + messages = await llm_messages_builder_with_history( + prompt, + screenshots, + message_pattern=message_pattern, + ) await app.ARTIFACT_MANAGER.create_llm_artifact( data=json.dumps( { @@ -680,11 +693,12 @@ class LLMCaller: max_tokens = active_parameters.get("max_completion_tokens") or active_parameters.get("max_tokens") or 4096 model_name = self.llm_config.model_name.replace("bedrock/", "").replace("anthropic/", "") betas = active_parameters.get("betas", NOT_GIVEN) + LOG.info("Anthropic request", betas=betas, tools=tools, timeout=timeout) response = await app.ANTHROPIC_CLIENT.beta.messages.create( max_tokens=max_tokens, messages=messages, model=model_name, - tools=tools, + tools=tools or NOT_GIVEN, timeout=timeout, betas=betas, ) diff --git a/skyvern/forge/sdk/api/llm/utils.py b/skyvern/forge/sdk/api/llm/utils.py index 811a2247..266ee05a 100644 --- a/skyvern/forge/sdk/api/llm/utils.py +++ b/skyvern/forge/sdk/api/llm/utils.py @@ -18,6 +18,7 @@ async def llm_messages_builder( prompt: str, screenshots: list[bytes] | None = None, add_assistant_prefix: bool = False, + message_pattern: str = "openai", ) -> list[dict[str, Any]]: messages: list[dict[str, Any]] = [ { @@ -29,14 +30,23 @@ async def llm_messages_builder( if screenshots: for screenshot in screenshots: encoded_image = base64.b64encode(screenshot).decode("utf-8") - messages.append( - { + if message_pattern == "anthropic": + message = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": encoded_image, + }, + } + else: + message = { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{encoded_image}", }, } - ) + messages.append(message) # Anthropic models seems to struggle to always output a valid json object so we need to prefill the response to force it: if add_assistant_prefix: return [ @@ -50,6 +60,7 @@ async def llm_messages_builder_with_history( prompt: str | None = None, screenshots: list[bytes] | None = None, message_history: list[dict[str, Any]] | None = None, + message_pattern: str = "openai", ) -> list[dict[str, Any]]: messages: list[dict[str, Any]] = [] if message_history: @@ -67,14 +78,23 @@ async def llm_messages_builder_with_history( if screenshots: for screenshot in screenshots: encoded_image = base64.b64encode(screenshot).decode("utf-8") - current_user_messages.append( - { + if message_pattern == "anthropic": + message = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": encoded_image, + }, + } + else: + message = { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{encoded_image}", }, } - ) + current_user_messages.append(message) messages.append({"role": "user", "content": current_user_messages}) return messages