fix anthropic messages image input pattern (#2239)
This commit is contained in:
@@ -517,11 +517,24 @@ class LLMCaller:
|
|||||||
if not self.llm_config.supports_vision:
|
if not self.llm_config.supports_vision:
|
||||||
screenshots = None
|
screenshots = None
|
||||||
|
|
||||||
|
message_pattern = "openai"
|
||||||
|
if "ANTHROPIC" in self.llm_key:
|
||||||
|
message_pattern = "anthropic"
|
||||||
|
|
||||||
if use_message_history:
|
if use_message_history:
|
||||||
# self.message_history will be updated in place
|
# self.message_history will be updated in place
|
||||||
messages = await llm_messages_builder_with_history(prompt, screenshots, self.message_history)
|
messages = await llm_messages_builder_with_history(
|
||||||
|
prompt,
|
||||||
|
screenshots,
|
||||||
|
self.message_history,
|
||||||
|
message_pattern=message_pattern,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
messages = await llm_messages_builder_with_history(prompt, screenshots)
|
messages = await llm_messages_builder_with_history(
|
||||||
|
prompt,
|
||||||
|
screenshots,
|
||||||
|
message_pattern=message_pattern,
|
||||||
|
)
|
||||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||||
data=json.dumps(
|
data=json.dumps(
|
||||||
{
|
{
|
||||||
@@ -680,11 +693,12 @@ class LLMCaller:
|
|||||||
max_tokens = active_parameters.get("max_completion_tokens") or active_parameters.get("max_tokens") or 4096
|
max_tokens = active_parameters.get("max_completion_tokens") or active_parameters.get("max_tokens") or 4096
|
||||||
model_name = self.llm_config.model_name.replace("bedrock/", "").replace("anthropic/", "")
|
model_name = self.llm_config.model_name.replace("bedrock/", "").replace("anthropic/", "")
|
||||||
betas = active_parameters.get("betas", NOT_GIVEN)
|
betas = active_parameters.get("betas", NOT_GIVEN)
|
||||||
|
LOG.info("Anthropic request", betas=betas, tools=tools, timeout=timeout)
|
||||||
response = await app.ANTHROPIC_CLIENT.beta.messages.create(
|
response = await app.ANTHROPIC_CLIENT.beta.messages.create(
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
model=model_name,
|
model=model_name,
|
||||||
tools=tools,
|
tools=tools or NOT_GIVEN,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
betas=betas,
|
betas=betas,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ async def llm_messages_builder(
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
screenshots: list[bytes] | None = None,
|
screenshots: list[bytes] | None = None,
|
||||||
add_assistant_prefix: bool = False,
|
add_assistant_prefix: bool = False,
|
||||||
|
message_pattern: str = "openai",
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
messages: list[dict[str, Any]] = [
|
messages: list[dict[str, Any]] = [
|
||||||
{
|
{
|
||||||
@@ -29,14 +30,23 @@ async def llm_messages_builder(
|
|||||||
if screenshots:
|
if screenshots:
|
||||||
for screenshot in screenshots:
|
for screenshot in screenshots:
|
||||||
encoded_image = base64.b64encode(screenshot).decode("utf-8")
|
encoded_image = base64.b64encode(screenshot).decode("utf-8")
|
||||||
messages.append(
|
if message_pattern == "anthropic":
|
||||||
{
|
message = {
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": "image/png",
|
||||||
|
"data": encoded_image,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
message = {
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {
|
||||||
"url": f"data:image/png;base64,{encoded_image}",
|
"url": f"data:image/png;base64,{encoded_image}",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
messages.append(message)
|
||||||
# Anthropic models seems to struggle to always output a valid json object so we need to prefill the response to force it:
|
# Anthropic models seems to struggle to always output a valid json object so we need to prefill the response to force it:
|
||||||
if add_assistant_prefix:
|
if add_assistant_prefix:
|
||||||
return [
|
return [
|
||||||
@@ -50,6 +60,7 @@ async def llm_messages_builder_with_history(
|
|||||||
prompt: str | None = None,
|
prompt: str | None = None,
|
||||||
screenshots: list[bytes] | None = None,
|
screenshots: list[bytes] | None = None,
|
||||||
message_history: list[dict[str, Any]] | None = None,
|
message_history: list[dict[str, Any]] | None = None,
|
||||||
|
message_pattern: str = "openai",
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
messages: list[dict[str, Any]] = []
|
messages: list[dict[str, Any]] = []
|
||||||
if message_history:
|
if message_history:
|
||||||
@@ -67,14 +78,23 @@ async def llm_messages_builder_with_history(
|
|||||||
if screenshots:
|
if screenshots:
|
||||||
for screenshot in screenshots:
|
for screenshot in screenshots:
|
||||||
encoded_image = base64.b64encode(screenshot).decode("utf-8")
|
encoded_image = base64.b64encode(screenshot).decode("utf-8")
|
||||||
current_user_messages.append(
|
if message_pattern == "anthropic":
|
||||||
{
|
message = {
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": "image/png",
|
||||||
|
"data": encoded_image,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
message = {
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {
|
||||||
"url": f"data:image/png;base64,{encoded_image}",
|
"url": f"data:image/png;base64,{encoded_image}",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
current_user_messages.append(message)
|
||||||
messages.append({"role": "user", "content": current_user_messages})
|
messages.append({"role": "user", "content": current_user_messages})
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user