Use json_repair (#1453)

2024-12-30 11:31:29 -08:00
parent 625af80b89
commit b2fdb187fa
3 changed files with 41 additions and 19 deletions
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand.

 [[package]]
 name = "aioboto3"
@@ -2164,6 +2164,17 @@ files = [
    {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
 ]

+[[package]]
+name = "json-repair"
+version = "0.34.0"
+description = "A package to repair broken json strings"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "json_repair-0.34.0-py3-none-any.whl", hash = "sha256:a0bb0d3993838b320adf6c82c11c92419d3df794901689bffb3abed208472adf"},
+    {file = "json_repair-0.34.0.tar.gz", hash = "sha256:401d454e039e24425659cfb41e1a7a3800123abfb0d81653282585dc289862cb"},
+]
+
 [[package]]
 name = "json5"
 version = "0.9.27"
@@ -6171,4 +6182,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11,<3.12"
-content-hash = "1ea1b9d066616616dba8e886e4fd51fd274f22a3f93e2997ab35b91a4786d421"
+content-hash = "908718e1dfd3b4a34106c187c59a8627f00b3491fead6b7fac6e48143afb6f17"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,6 +52,7 @@ posthog = "^3.7.0"
 aiofiles = "^24.1.0"
 pyotp = "^2.9.0"
 asyncpg = "^0.30.0"
+json-repair = "^0.34.0"

 [tool.poetry.group.dev.dependencies]
 isort = "^5.13.2"
--- a/skyvern/forge/sdk/api/llm/utils.py
+++ b/skyvern/forge/sdk/api/llm/utils.py
@@ -4,6 +4,7 @@ import re
 from typing import Any

 import commentjson
+import json_repair
 import litellm
 import structlog

@@ -51,24 +52,33 @@ def parse_api_response(response: litellm.ModelResponse, add_assistant_prefix: bo
        # Since we prefilled Anthropic response with "{" we need to add it back to the response to have a valid json object:
        if add_assistant_prefix:
            content = "{" + content
-        content = try_to_extract_json_from_markdown_format(content)
-        if not content:
-            raise EmptyLLMResponseError(str(response))
-        return commentjson.loads(content)
-    except Exception as e:
-        if content:
-            LOG.warning(
-                "Failed to parse LLM response. Will retry auto-fixing the response for unescaped quotes.",
-                exc_info=True,
-                content=content,
-            )
-            try:
-                return fix_and_parse_json_string(content)
-            except Exception as e2:
-                LOG.exception("Failed to auto-fix LLM response.", error=str(e2))
-                raise InvalidLLMResponseFormat(str(response)) from e2

-        raise InvalidLLMResponseFormat(str(response)) from e
+        return json_repair.loads(content)
+
+    except Exception:
+        LOG.warning(
+            "Failed to parse LLM response using json_repair. Will retry auto-fixing the response for unescaped quotes.",
+            exc_info=True,
+        )
+        try:
+            if not content:
+                raise EmptyLLMResponseError(str(response))
+            content = try_to_extract_json_from_markdown_format(content)
+            return commentjson.loads(content)
+        except Exception as e:
+            if content:
+                LOG.warning(
+                    "Failed to parse LLM response. Will retry auto-fixing the response for unescaped quotes.",
+                    exc_info=True,
+                    content=content,
+                )
+                try:
+                    return fix_and_parse_json_string(content)
+                except Exception as e2:
+                    LOG.exception("Failed to auto-fix LLM response.", error=str(e2))
+                    raise InvalidLLMResponseFormat(str(response)) from e2
+
+            raise InvalidLLMResponseFormat(str(response)) from e


 def fix_cutoff_json(json_string: str, error_position: int) -> dict[str, Any]: