From b2fdb187fa0cdf898aa14ce94b4bb153ea219811 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Mon, 30 Dec 2024 11:31:29 -0800 Subject: [PATCH] Use json_repair (#1453) --- poetry.lock | 15 ++++++++-- pyproject.toml | 1 + skyvern/forge/sdk/api/llm/utils.py | 44 ++++++++++++++++++------------ 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/poetry.lock b/poetry.lock index 52eab935..75b5bcff 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "aioboto3" @@ -2164,6 +2164,17 @@ files = [ {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] +[[package]] +name = "json-repair" +version = "0.34.0" +description = "A package to repair broken json strings" +optional = false +python-versions = ">=3.9" +files = [ + {file = "json_repair-0.34.0-py3-none-any.whl", hash = "sha256:a0bb0d3993838b320adf6c82c11c92419d3df794901689bffb3abed208472adf"}, + {file = "json_repair-0.34.0.tar.gz", hash = "sha256:401d454e039e24425659cfb41e1a7a3800123abfb0d81653282585dc289862cb"}, +] + [[package]] name = "json5" version = "0.9.27" @@ -6171,4 +6182,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.11,<3.12" -content-hash = "1ea1b9d066616616dba8e886e4fd51fd274f22a3f93e2997ab35b91a4786d421" +content-hash = "908718e1dfd3b4a34106c187c59a8627f00b3491fead6b7fac6e48143afb6f17" diff --git a/pyproject.toml b/pyproject.toml index bf2f73e4..f25a33f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ posthog = "^3.7.0" aiofiles = "^24.1.0" pyotp = "^2.9.0" asyncpg = "^0.30.0" +json-repair = "^0.34.0" [tool.poetry.group.dev.dependencies] isort = "^5.13.2" diff --git a/skyvern/forge/sdk/api/llm/utils.py b/skyvern/forge/sdk/api/llm/utils.py index 1f7ba93d..91042088 100644 --- a/skyvern/forge/sdk/api/llm/utils.py +++ b/skyvern/forge/sdk/api/llm/utils.py @@ -4,6 +4,7 @@ import re from typing import Any import commentjson +import json_repair import litellm import structlog @@ -51,24 +52,33 @@ def parse_api_response(response: litellm.ModelResponse, add_assistant_prefix: bo # Since we prefilled Anthropic response with "{" we need to add it back to the response to have a valid json object: if add_assistant_prefix: content = "{" + content - content = try_to_extract_json_from_markdown_format(content) - if not content: - raise EmptyLLMResponseError(str(response)) - return commentjson.loads(content) - except Exception as e: - if content: - LOG.warning( - "Failed to parse LLM response. Will retry auto-fixing the response for unescaped quotes.", - exc_info=True, - content=content, - ) - try: - return fix_and_parse_json_string(content) - except Exception as e2: - LOG.exception("Failed to auto-fix LLM response.", error=str(e2)) - raise InvalidLLMResponseFormat(str(response)) from e2 - raise InvalidLLMResponseFormat(str(response)) from e + return json_repair.loads(content) + + except Exception: + LOG.warning( + "Failed to parse LLM response using json_repair. Will retry auto-fixing the response for unescaped quotes.", + exc_info=True, + ) + try: + if not content: + raise EmptyLLMResponseError(str(response)) + content = try_to_extract_json_from_markdown_format(content) + return commentjson.loads(content) + except Exception as e: + if content: + LOG.warning( + "Failed to parse LLM response. Will retry auto-fixing the response for unescaped quotes.", + exc_info=True, + content=content, + ) + try: + return fix_and_parse_json_string(content) + except Exception as e2: + LOG.exception("Failed to auto-fix LLM response.", error=str(e2)) + raise InvalidLLMResponseFormat(str(response)) from e2 + + raise InvalidLLMResponseFormat(str(response)) from e def fix_cutoff_json(json_string: str, error_position: int) -> dict[str, Any]: