fix anthropic messages image input pattern (#2239)

This commit is contained in:
Shuchang Zheng
2025-04-28 23:12:45 +08:00
committed by GitHub
parent 1530338cad
commit 9e7906a083
2 changed files with 43 additions and 9 deletions

View File

@@ -517,11 +517,24 @@ class LLMCaller:
if not self.llm_config.supports_vision: if not self.llm_config.supports_vision:
screenshots = None screenshots = None
message_pattern = "openai"
if "ANTHROPIC" in self.llm_key:
message_pattern = "anthropic"
if use_message_history: if use_message_history:
# self.message_history will be updated in place # self.message_history will be updated in place
messages = await llm_messages_builder_with_history(prompt, screenshots, self.message_history) messages = await llm_messages_builder_with_history(
prompt,
screenshots,
self.message_history,
message_pattern=message_pattern,
)
else: else:
messages = await llm_messages_builder_with_history(prompt, screenshots) messages = await llm_messages_builder_with_history(
prompt,
screenshots,
message_pattern=message_pattern,
)
await app.ARTIFACT_MANAGER.create_llm_artifact( await app.ARTIFACT_MANAGER.create_llm_artifact(
data=json.dumps( data=json.dumps(
{ {
@@ -680,11 +693,12 @@ class LLMCaller:
max_tokens = active_parameters.get("max_completion_tokens") or active_parameters.get("max_tokens") or 4096 max_tokens = active_parameters.get("max_completion_tokens") or active_parameters.get("max_tokens") or 4096
model_name = self.llm_config.model_name.replace("bedrock/", "").replace("anthropic/", "") model_name = self.llm_config.model_name.replace("bedrock/", "").replace("anthropic/", "")
betas = active_parameters.get("betas", NOT_GIVEN) betas = active_parameters.get("betas", NOT_GIVEN)
LOG.info("Anthropic request", betas=betas, tools=tools, timeout=timeout)
response = await app.ANTHROPIC_CLIENT.beta.messages.create( response = await app.ANTHROPIC_CLIENT.beta.messages.create(
max_tokens=max_tokens, max_tokens=max_tokens,
messages=messages, messages=messages,
model=model_name, model=model_name,
tools=tools, tools=tools or NOT_GIVEN,
timeout=timeout, timeout=timeout,
betas=betas, betas=betas,
) )

View File

@@ -18,6 +18,7 @@ async def llm_messages_builder(
prompt: str, prompt: str,
screenshots: list[bytes] | None = None, screenshots: list[bytes] | None = None,
add_assistant_prefix: bool = False, add_assistant_prefix: bool = False,
message_pattern: str = "openai",
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
messages: list[dict[str, Any]] = [ messages: list[dict[str, Any]] = [
{ {
@@ -29,14 +30,23 @@ async def llm_messages_builder(
if screenshots: if screenshots:
for screenshot in screenshots: for screenshot in screenshots:
encoded_image = base64.b64encode(screenshot).decode("utf-8") encoded_image = base64.b64encode(screenshot).decode("utf-8")
messages.append( if message_pattern == "anthropic":
{ message = {
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": encoded_image,
},
}
else:
message = {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": f"data:image/png;base64,{encoded_image}", "url": f"data:image/png;base64,{encoded_image}",
}, },
} }
) messages.append(message)
# Anthropic models seems to struggle to always output a valid json object so we need to prefill the response to force it: # Anthropic models seems to struggle to always output a valid json object so we need to prefill the response to force it:
if add_assistant_prefix: if add_assistant_prefix:
return [ return [
@@ -50,6 +60,7 @@ async def llm_messages_builder_with_history(
prompt: str | None = None, prompt: str | None = None,
screenshots: list[bytes] | None = None, screenshots: list[bytes] | None = None,
message_history: list[dict[str, Any]] | None = None, message_history: list[dict[str, Any]] | None = None,
message_pattern: str = "openai",
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
messages: list[dict[str, Any]] = [] messages: list[dict[str, Any]] = []
if message_history: if message_history:
@@ -67,14 +78,23 @@ async def llm_messages_builder_with_history(
if screenshots: if screenshots:
for screenshot in screenshots: for screenshot in screenshots:
encoded_image = base64.b64encode(screenshot).decode("utf-8") encoded_image = base64.b64encode(screenshot).decode("utf-8")
current_user_messages.append( if message_pattern == "anthropic":
{ message = {
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": encoded_image,
},
}
else:
message = {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": f"data:image/png;base64,{encoded_image}", "url": f"data:image/png;base64,{encoded_image}",
}, },
} }
) current_user_messages.append(message)
messages.append({"role": "user", "content": current_user_messages}) messages.append({"role": "user", "content": current_user_messages})
return messages return messages