vendor and update commentjson since it is abandoned (#2689)
This commit is contained in:
167
skyvern/forge/sdk/api/llm/commentjson.py
Normal file
167
skyvern/forge/sdk/api/llm/commentjson.py
Normal file
@@ -0,0 +1,167 @@
|
||||
# vendored from https://github.com/vaidik/commentjson/blob/master/commentjson/commentjson.py since that project seems to be abandoned.
|
||||
|
||||
import codecs
|
||||
import json
|
||||
import traceback
|
||||
from typing import Any, TypeVar
|
||||
|
||||
import lark
|
||||
from lark import Lark
|
||||
from lark.lexer import Token
|
||||
from lark.reconstruct import Reconstructor
|
||||
from lark.tree import Tree
|
||||
|
||||
parser = Lark(
|
||||
"""
|
||||
?start: value
|
||||
?value: object
|
||||
| array
|
||||
| string
|
||||
| SIGNED_NUMBER -> number
|
||||
| "true" -> true
|
||||
| "false" -> false
|
||||
| "null" -> null
|
||||
array : "[" [value ("," value)*] TRAILING_COMMA? "]"
|
||||
object : "{" [pair ("," pair)*] TRAILING_COMMA? "}"
|
||||
pair : string ":" value
|
||||
string : ESCAPED_STRING
|
||||
|
||||
COMMENT: /(#|\\/\\/)[^\\n]*/
|
||||
TRAILING_COMMA: ","
|
||||
|
||||
%import common.ESCAPED_STRING
|
||||
%import common.SIGNED_NUMBER
|
||||
%import common.WS
|
||||
%ignore WS
|
||||
%ignore COMMENT
|
||||
""",
|
||||
maybe_placeholders=False,
|
||||
parser="lalr",
|
||||
)
|
||||
|
||||
serializer = Reconstructor(parser)
|
||||
|
||||
|
||||
def detect_encoding(b: bytes) -> str:
|
||||
"""
|
||||
Taken from `json` package in CPython 3.7.
|
||||
|
||||
Source can be found at https://bit.ly/2OHqCIK.
|
||||
"""
|
||||
|
||||
bstartswith = b.startswith
|
||||
if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
|
||||
return "utf-32"
|
||||
if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
|
||||
return "utf-16"
|
||||
if bstartswith(codecs.BOM_UTF8):
|
||||
return "utf-8-sig"
|
||||
|
||||
if len(b) >= 4:
|
||||
if not b[0]:
|
||||
# 00 00 -- -- - utf-32-be
|
||||
# 00 XX -- -- - utf-16-be
|
||||
return "utf-16-be" if b[1] else "utf-32-be"
|
||||
if not b[1]:
|
||||
# XX 00 00 00 - utf-32-le
|
||||
# XX 00 00 XX - utf-16-le
|
||||
# XX 00 XX -- - utf-16-le
|
||||
return "utf-16-le" if b[2] or b[3] else "utf-32-le"
|
||||
elif len(b) == 2:
|
||||
if not b[0]:
|
||||
# 00 XX - utf-16-be
|
||||
return "utf-16-be"
|
||||
if not b[1]:
|
||||
# XX 00 - utf-16-le
|
||||
return "utf-16-le"
|
||||
# default
|
||||
return "utf-8"
|
||||
|
||||
|
||||
class BaseException(Exception):
|
||||
"""Base exception to be implemented and raised while handling exceptions
|
||||
raised by libraries used in `commentjson`.
|
||||
|
||||
Sets message of self in a way that it clearly calls out that the exception
|
||||
was raised by another library, along with the entire stacktrace of the
|
||||
exception raised by the other library.
|
||||
"""
|
||||
|
||||
library: str | None = None
|
||||
message: str
|
||||
|
||||
def __init__(self, exc: Exception) -> None:
|
||||
if self.library is None:
|
||||
raise NotImplementedError("Value of library must be set in the inherited exception class.")
|
||||
|
||||
tb = traceback.format_exc()
|
||||
tb = "\n".join(" " * 4 + line_ for line_ in tb.split("\n"))
|
||||
|
||||
error = getattr(exc, "msg", None) or getattr(exc, "message", None) or str(exc)
|
||||
self.message = "\n".join(
|
||||
[
|
||||
"JSON Library Exception\n",
|
||||
("Exception thrown by library ({}): \033[4;37m{}\033[0m\n".format(self.library, error)),
|
||||
"%s" % tb,
|
||||
]
|
||||
)
|
||||
Exception.__init__(self, self.message)
|
||||
|
||||
|
||||
class ParserException(BaseException):
|
||||
"""Exception raised when the `lark` raises an exception i.e.
|
||||
the exception is not caused by `commentjson` and caused by the use of
|
||||
`lark` in `commentjson`.
|
||||
"""
|
||||
|
||||
library = "lark"
|
||||
|
||||
|
||||
class JSONLibraryException(BaseException):
|
||||
"""Exception raised when the `json` raises an exception i.e.
|
||||
the exception is not caused by `commentjson` and caused by the use of
|
||||
`json` in `commentjson`.
|
||||
|
||||
.. note::
|
||||
|
||||
As of now, ``commentjson`` supports only standard library's ``json``
|
||||
module. It might start supporting other widely-used contributed JSON
|
||||
libraries in the future.
|
||||
"""
|
||||
|
||||
library = "json"
|
||||
|
||||
|
||||
T = TypeVar("T", Tree, Token)
|
||||
|
||||
|
||||
def _remove_trailing_commas(tree: T) -> T:
|
||||
if isinstance(tree, Tree):
|
||||
tree.children = [
|
||||
_remove_trailing_commas(ch)
|
||||
for ch in tree.children
|
||||
if not (isinstance(ch, Token) and ch.type == "TRAILING_COMMA")
|
||||
]
|
||||
return tree
|
||||
|
||||
|
||||
def loads(text: str | bytes | bytearray, *args: Any, **kwargs: Any) -> Any:
|
||||
"""Deserialize `text` (a `str` or `unicode` instance containing a JSON
|
||||
document with Python or JavaScript like comments) to a Python object.
|
||||
|
||||
:param text: serialized JSON string with or without comments.
|
||||
:param kwargs: all the arguments that `json.loads <http://docs.python.org/
|
||||
2/library/json.html#json.loads>`_ accepts.
|
||||
:returns: dict or list.
|
||||
"""
|
||||
|
||||
if isinstance(text, (bytes, bytearray)):
|
||||
text = text.decode(detect_encoding(text), "surrogatepass")
|
||||
|
||||
try:
|
||||
parsed = _remove_trailing_commas(parser.parse(text))
|
||||
final_text = serializer.reconstruct(parsed)
|
||||
except lark.exceptions.UnexpectedCharacters:
|
||||
raise ValueError("Unable to parse text", text)
|
||||
|
||||
return json.loads(final_text, *args, **kwargs)
|
||||
Reference in New Issue
Block a user