Files
Dorod-Sky/skyvern/forge/sdk/forge_log.py

344 lines
11 KiB
Python

import logging
from types import TracebackType
import structlog
from structlog.typing import EventDict
from skyvern._version import __version__
from skyvern.config import settings
from skyvern.forge.sdk.core import skyvern_context
LOGGING_LEVEL_MAP: dict[str, int] = {
"DEBUG": logging.DEBUG,
"INFO": logging.INFO,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
}
def add_kv_pairs_to_msg(logger: logging.Logger, method_name: str, event_dict: EventDict) -> EventDict:
"""
A custom processor to add key-value pairs to the 'msg' field.
"""
# Add context to the log
context = skyvern_context.current()
if context:
if context.request_id:
event_dict["request_id"] = context.request_id
if context.organization_id:
event_dict["organization_id"] = context.organization_id
if context.organization_name:
event_dict["organization_name"] = context.organization_name
if context.step_id:
event_dict["step_id"] = context.step_id
if context.task_id:
event_dict["task_id"] = context.task_id
if context.run_id:
event_dict["run_id"] = context.run_id
if context.workflow_id:
event_dict["workflow_id"] = context.workflow_id
if context.workflow_run_id:
event_dict["workflow_run_id"] = context.workflow_run_id
if context.workflow_permanent_id:
event_dict["workflow_permanent_id"] = context.workflow_permanent_id
if context.task_v2_id:
event_dict["task_v2_id"] = context.task_v2_id
if context.browser_session_id:
event_dict["browser_session_id"] = context.browser_session_id
if context.browser_container_ip:
event_dict["browser_container_ip"] = context.browser_container_ip
if context.browser_container_task_arn:
event_dict["browser_container_task_arn"] = context.browser_container_task_arn
# Add env to the log
event_dict["env"] = settings.ENV
event_dict["version"] = __version__
if method_name not in ["info", "warning", "error", "critical", "exception"]:
# Only modify the log for these log levels
return event_dict
# Assuming 'event' or 'msg' is the field to update
msg_field = event_dict.get("msg", "")
# Add key-value pairs
kv_pairs = {k: v for k, v in event_dict.items() if k not in ["msg", "timestamp", "level"]}
if kv_pairs:
additional_info = ", ".join(f"{k}={v}" for k, v in kv_pairs.items())
msg_field += f" | {additional_info}"
event_dict["msg"] = msg_field
return event_dict
def skyvern_logs_processor(logger: logging.Logger, method_name: str, event_dict: EventDict) -> EventDict:
"""
A custom processor to add skyvern logs to the context
"""
if method_name not in ["info", "warning", "error", "critical", "exception"]:
return event_dict
context = skyvern_context.current()
if context:
log_entry = dict(event_dict)
context.log.append(log_entry)
return event_dict
def add_filename_section(logger: logging.Logger, method_name: str, event_dict: EventDict) -> EventDict:
"""
Add a fixed-width, bracketed filename:lineno section after the log level for console logs.
"""
filename = event_dict.get("filename", "")
lineno = event_dict.get("lineno", "")
padded = f"[{filename:<30}:{lineno:<4}]" if filename else "[unknown ]"
event_dict["file"] = padded
event_dict.pop("filename", None)
event_dict.pop("lineno", None)
return event_dict
class CustomConsoleRenderer(structlog.dev.ConsoleRenderer):
"""
Show the bracketed filename:lineno section after the log level for console logs, and
colorize it.
"""
def __init__(self) -> None:
super().__init__(sort_keys=False)
def __call__(self, logger: logging.Logger, name: str, event_dict: EventDict) -> str:
file_section = event_dict.pop("file", "")
file_section_colored = f"\x1b[90m{file_section}\x1b[0m" if file_section else ""
rendered = super().__call__(logger, name, event_dict)
first_bracket = rendered.find("]")
if first_bracket != -1:
return rendered[: first_bracket + 1] + f" {file_section_colored}" + rendered[first_bracket + 1 :]
else:
return f"{file_section_colored} {rendered}"
def add_error_processor(logger: logging.Logger, method_name: str, event_dict: EventDict) -> EventDict:
"""
A custom processor extending error logs with additional info
"""
import sys # noqa: PLC0415
exc_info = event_dict.get("exc_info")
if exc_info:
if exc_info is True:
exc_info = sys.exc_info()
if isinstance(exc_info, tuple) and len(exc_info) >= 2:
exc_type = exc_info[0]
exc_traceback: TracebackType | None = exc_info[2] if len(exc_info) >= 3 else None
if exc_type is not None:
# Get the fully qualified exception name (module.ClassName)
error_type = (
f"{exc_type.__module__}.{exc_type.__name__}"
if hasattr(exc_type, "__module__")
else exc_type.__name__
)
event_dict["error_type"] = error_type
# Categorize the exception
category = _categorize_exception(exc_type, exc_type.__name__)
event_dict["error_category"] = category
# Generate exception hash from stack trace (stable identifier)
if exc_traceback is not None:
exc_hash = _generate_exception_hash(exc_type, exc_traceback)
event_dict["exception_hash"] = exc_hash
return event_dict
def _generate_exception_hash(exc_type: type, tb: TracebackType) -> str:
"""
Generate a stable hash for an exception based on:
- Exception type
- Stack trace (filename, line number, function name)
Excludes dynamic data like error messages to ensure the same
error from the same location always produces the same hash.
"""
import hashlib # noqa: PLC0415
from pathlib import Path # noqa: PLC0415
hasher = hashlib.sha256()
hasher.update(f"{exc_type.__module__}.{exc_type.__name__}".encode())
current_tb: TracebackType | None = tb
while current_tb is not None:
frame = current_tb.tb_frame
code = frame.f_code
filename = Path(code.co_filename).name
lineno = current_tb.tb_lineno
func_name = code.co_name
hasher.update(f"{filename}:{lineno}:{func_name}".encode())
current_tb = current_tb.tb_next
return hasher.hexdigest()[:16]
def _categorize_exception(exc_type: type, exc_name: str) -> str:
"""
Categorize an exception into TRANSIENT, BUG, or ERROR.
TRANSIENT: Network/IO errors that might succeed on retry
BUG: Programming errors indicating bugs
ERROR: Everything else
"""
# Check if it's a subclass of known exception types
# TRANSIENT - IO and network related errors
transient_exceptions = (
IOError,
OSError,
ConnectionError,
TimeoutError,
ConnectionRefusedError,
ConnectionAbortedError,
ConnectionResetError,
BrokenPipeError,
)
# BUG - Programming errors that indicate bugs
bug_exceptions = (
ZeroDivisionError,
AttributeError,
TypeError,
KeyError,
IndexError,
NameError,
AssertionError,
NotImplementedError,
RecursionError,
UnboundLocalError,
IndentationError,
SyntaxError,
)
# Check for common HTTP/network library exceptions by name
# (to avoid import dependencies)
transient_patterns = [
"HTTPError",
"RequestException",
"Timeout",
"ConnectionError",
"ConnectTimeout",
"ReadTimeout",
"ProxyError",
"SSLError",
"ChunkedEncodingError",
"ContentDecodingError",
"StreamConsumedError",
"RetryError",
"MaxRetryError",
"URLError",
"ProtocolError",
]
# Check if exception is a subclass of transient exceptions
try:
if issubclass(exc_type, transient_exceptions):
return "TRANSIENT"
except TypeError:
pass
# Check if exception is a subclass of bug exceptions
try:
if issubclass(exc_type, bug_exceptions):
return "BUG"
except TypeError:
pass
# Check exception name against patterns
for pattern in transient_patterns:
if pattern in exc_name:
return "TRANSIENT"
# Default to ERROR for everything else
return "ERROR"
def setup_logger() -> None:
"""
Setup the logger with the specified format
"""
# logging.config.dictConfig(logging_config)
renderer = structlog.processors.JSONRenderer() if settings.JSON_LOGGING else CustomConsoleRenderer()
additional_processors = (
[
structlog.processors.EventRenamer("msg"),
add_kv_pairs_to_msg,
structlog.processors.CallsiteParameterAdder(
{
structlog.processors.CallsiteParameter.PATHNAME,
structlog.processors.CallsiteParameter.FILENAME,
structlog.processors.CallsiteParameter.MODULE,
structlog.processors.CallsiteParameter.FUNC_NAME,
structlog.processors.CallsiteParameter.LINENO,
}
),
]
if settings.JSON_LOGGING
else [
structlog.processors.CallsiteParameterAdder(
{
structlog.processors.CallsiteParameter.FILENAME,
structlog.processors.CallsiteParameter.LINENO,
}
),
add_filename_section,
]
)
LOG_LEVEL_VAL = LOGGING_LEVEL_MAP.get(settings.LOG_LEVEL, logging.INFO)
structlog.configure(
wrapper_class=structlog.make_filtering_bound_logger(LOG_LEVEL_VAL),
logger_factory=structlog.stdlib.LoggerFactory(),
processors=[
structlog.stdlib.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
add_error_processor,
structlog.processors.format_exc_info,
]
+ additional_processors
+ [skyvern_logs_processor, structlog.stdlib.ProcessorFormatter.wrap_for_formatter],
)
handler = logging.StreamHandler()
handler.setFormatter(
structlog.stdlib.ProcessorFormatter(
processors=[
structlog.stdlib.add_log_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.format_exc_info,
renderer,
]
)
)
root_logger = logging.getLogger()
root_logger.handlers.clear()
root_logger.addHandler(handler)
# Root at WARNING so third-party loggers (temporalio, grpc, litellm, …)
# only surface warnings and errors. Our packages get the configured level.
root_logger.setLevel(logging.WARNING)
for name in ("skyvern", "cloud", "workers", "scripts", "browser_controller"):
logging.getLogger(name).setLevel(LOG_LEVEL_VAL)
uvicorn_error = logging.getLogger("uvicorn.error")
uvicorn_error.disabled = True
uvicorn_access = logging.getLogger("uvicorn.access")
uvicorn_access.disabled = True