From b0724d278129fe7c6aad4f375c974f75cab229b9 Mon Sep 17 00:00:00 2001 From: Asher Foa Date: Thu, 12 Jun 2025 10:50:16 -0400 Subject: [PATCH] vendor and update commentjson since it is abandoned (#2689) --- poetry.lock | 33 ++--- pyproject.toml | 2 +- skyvern/forge/sdk/api/llm/commentjson.py | 167 +++++++++++++++++++++++ skyvern/forge/sdk/api/llm/utils.py | 2 +- 4 files changed, 182 insertions(+), 22 deletions(-) create mode 100644 skyvern/forge/sdk/api/llm/commentjson.py diff --git a/poetry.lock b/poetry.lock index 962b60b2..1f6ddd88 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "about-time" @@ -1179,20 +1179,6 @@ traitlets = ">=4" [package.extras] test = ["pytest"] -[[package]] -name = "commentjson" -version = "0.9.0" -description = "Add Python and JavaScript style comments in your JSON files." -optional = false -python-versions = "*" -groups = ["main"] -files = [ - {file = "commentjson-0.9.0.tar.gz", hash = "sha256:42f9f231d97d93aff3286a4dc0de39bfd91ae823d1d9eba9fa901fe0c7113dd4"}, -] - -[package.dependencies] -lark-parser = ">=0.7.1,<0.8.0" - [[package]] name = "cryptography" version = "44.0.2" @@ -3657,16 +3643,23 @@ test = ["pyfakefs", "pytest (>=6,!=8.1.*)"] type = ["pygobject-stubs", "pytest-mypy", "shtab", "types-pywin32"] [[package]] -name = "lark-parser" -version = "0.7.8" +name = "lark" +version = "1.2.2" description = "a modern parsing library" optional = false -python-versions = "*" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "lark-parser-0.7.8.tar.gz", hash = "sha256:26215ebb157e6fb2ee74319aa4445b9f3b7e456e26be215ce19fdaaa901c20a4"}, + {file = "lark-1.2.2-py3-none-any.whl", hash = "sha256:c2276486b02f0f1b90be155f2c8ba4a8e194d42775786db622faccd652d8e80c"}, + {file = "lark-1.2.2.tar.gz", hash = "sha256:ca807d0162cd16cef15a8feecb862d7319e7a09bdb13aef927968e45040fed80"}, ] +[package.extras] +atomic-cache = ["atomicwrites"] +interegular = ["interegular (>=0.3.1,<0.4.0)"] +nearley = ["js2py"] +regex = ["regex"] + [[package]] name = "lazy-object-proxy" version = "1.11.0" @@ -8759,4 +8752,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.14" -content-hash = "42bde1cb27d171911f966360781659a19ef5df896a220161c7d780c05a6a4758" +content-hash = "f4b8e2deae971c4cc7812d81bd636f3869c1ff2317e3092220af66bd1430f94f" diff --git a/pyproject.toml b/pyproject.toml index dbe43b45..80343ccf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,6 @@ alembic = "^1.12.1" python-jose = {extras = ["cryptography"], version = "^3.3.0"} cachetools = "^5.3.2" aioboto3 = "^14.3.0" -commentjson = "^0.9.0" asyncache = "^0.3.1" orjson = "^3.9.10" structlog = "^23.2.0" @@ -75,6 +74,7 @@ alive-progress = "^3.2.0" colorama = "^0.4.6" onepassword-sdk = "0.3.0" types-boto3 = {extras = ["full"], version = "^1.38.31"} +lark = "^1.2.2" [tool.poetry.group.dev.dependencies] isort = "^5.13.2" diff --git a/skyvern/forge/sdk/api/llm/commentjson.py b/skyvern/forge/sdk/api/llm/commentjson.py new file mode 100644 index 00000000..419b3e1a --- /dev/null +++ b/skyvern/forge/sdk/api/llm/commentjson.py @@ -0,0 +1,167 @@ +# vendored from https://github.com/vaidik/commentjson/blob/master/commentjson/commentjson.py since that project seems to be abandoned. + +import codecs +import json +import traceback +from typing import Any, TypeVar + +import lark +from lark import Lark +from lark.lexer import Token +from lark.reconstruct import Reconstructor +from lark.tree import Tree + +parser = Lark( + """ + ?start: value + ?value: object + | array + | string + | SIGNED_NUMBER -> number + | "true" -> true + | "false" -> false + | "null" -> null + array : "[" [value ("," value)*] TRAILING_COMMA? "]" + object : "{" [pair ("," pair)*] TRAILING_COMMA? "}" + pair : string ":" value + string : ESCAPED_STRING + + COMMENT: /(#|\\/\\/)[^\\n]*/ + TRAILING_COMMA: "," + + %import common.ESCAPED_STRING + %import common.SIGNED_NUMBER + %import common.WS + %ignore WS + %ignore COMMENT +""", + maybe_placeholders=False, + parser="lalr", +) + +serializer = Reconstructor(parser) + + +def detect_encoding(b: bytes) -> str: + """ + Taken from `json` package in CPython 3.7. + + Source can be found at https://bit.ly/2OHqCIK. + """ + + bstartswith = b.startswith + if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): + return "utf-32" + if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): + return "utf-16" + if bstartswith(codecs.BOM_UTF8): + return "utf-8-sig" + + if len(b) >= 4: + if not b[0]: + # 00 00 -- -- - utf-32-be + # 00 XX -- -- - utf-16-be + return "utf-16-be" if b[1] else "utf-32-be" + if not b[1]: + # XX 00 00 00 - utf-32-le + # XX 00 00 XX - utf-16-le + # XX 00 XX -- - utf-16-le + return "utf-16-le" if b[2] or b[3] else "utf-32-le" + elif len(b) == 2: + if not b[0]: + # 00 XX - utf-16-be + return "utf-16-be" + if not b[1]: + # XX 00 - utf-16-le + return "utf-16-le" + # default + return "utf-8" + + +class BaseException(Exception): + """Base exception to be implemented and raised while handling exceptions + raised by libraries used in `commentjson`. + + Sets message of self in a way that it clearly calls out that the exception + was raised by another library, along with the entire stacktrace of the + exception raised by the other library. + """ + + library: str | None = None + message: str + + def __init__(self, exc: Exception) -> None: + if self.library is None: + raise NotImplementedError("Value of library must be set in the inherited exception class.") + + tb = traceback.format_exc() + tb = "\n".join(" " * 4 + line_ for line_ in tb.split("\n")) + + error = getattr(exc, "msg", None) or getattr(exc, "message", None) or str(exc) + self.message = "\n".join( + [ + "JSON Library Exception\n", + ("Exception thrown by library ({}): \033[4;37m{}\033[0m\n".format(self.library, error)), + "%s" % tb, + ] + ) + Exception.__init__(self, self.message) + + +class ParserException(BaseException): + """Exception raised when the `lark` raises an exception i.e. + the exception is not caused by `commentjson` and caused by the use of + `lark` in `commentjson`. + """ + + library = "lark" + + +class JSONLibraryException(BaseException): + """Exception raised when the `json` raises an exception i.e. + the exception is not caused by `commentjson` and caused by the use of + `json` in `commentjson`. + + .. note:: + + As of now, ``commentjson`` supports only standard library's ``json`` + module. It might start supporting other widely-used contributed JSON + libraries in the future. + """ + + library = "json" + + +T = TypeVar("T", Tree, Token) + + +def _remove_trailing_commas(tree: T) -> T: + if isinstance(tree, Tree): + tree.children = [ + _remove_trailing_commas(ch) + for ch in tree.children + if not (isinstance(ch, Token) and ch.type == "TRAILING_COMMA") + ] + return tree + + +def loads(text: str | bytes | bytearray, *args: Any, **kwargs: Any) -> Any: + """Deserialize `text` (a `str` or `unicode` instance containing a JSON + document with Python or JavaScript like comments) to a Python object. + + :param text: serialized JSON string with or without comments. + :param kwargs: all the arguments that `json.loads `_ accepts. + :returns: dict or list. + """ + + if isinstance(text, (bytes, bytearray)): + text = text.decode(detect_encoding(text), "surrogatepass") + + try: + parsed = _remove_trailing_commas(parser.parse(text)) + final_text = serializer.reconstruct(parsed) + except lark.exceptions.UnexpectedCharacters: + raise ValueError("Unable to parse text", text) + + return json.loads(final_text, *args, **kwargs) diff --git a/skyvern/forge/sdk/api/llm/utils.py b/skyvern/forge/sdk/api/llm/utils.py index c3a75a0e..b9e5a835 100644 --- a/skyvern/forge/sdk/api/llm/utils.py +++ b/skyvern/forge/sdk/api/llm/utils.py @@ -4,12 +4,12 @@ import json import re from typing import Any -import commentjson import json_repair import litellm import structlog from skyvern.constants import MAX_IMAGE_MESSAGES +from skyvern.forge.sdk.api.llm import commentjson from skyvern.forge.sdk.api.llm.exceptions import EmptyLLMResponseError, InvalidLLMResponseFormat LOG = structlog.get_logger()