Log response body in api.raw_request for all status codes (#SKY-7987) (#4779)

Co-authored-by: Suchintan Singh <suchintan@skyvern.com>
2026-02-17 23:40:47 -05:00
parent 47cf46f956
commit f85af654b4
2 changed files with 355 additions and 9 deletions
--- a/skyvern/forge/request_logging.py
+++ b/skyvern/forge/request_logging.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import json
 import time
 import typing

@@ -24,7 +25,29 @@ _SENSITIVE_ENDPOINTS = {
    "POST /v1/credentials/azure_credential/create",
 }
 _MAX_BODY_LENGTH = 1000
+_MAX_RESPONSE_READ_BYTES = 1024 * 1024  # 1 MB — skip logging bodies larger than this
 _BINARY_PLACEHOLDER = "<binary>"
+_REDACTED = "****"
+_LOGGABLE_CONTENT_TYPES = {"text/", "application/json"}
+_STREAMING_CONTENT_TYPE = "text/event-stream"
+
+# Exact field names that are always redacted.  Use a set for O(1) lookup
+# instead of regex substring matching to avoid false positives like
+# credential_id, author, page_token, etc.
+_SENSITIVE_FIELDS: set[str] = {
+    "password",
+    "secret",
+    "token",
+    "api_key",
+    "apikey",
+    "api-key",
+    "credential",
+    "access_key",
+    "private_key",
+    "auth",
+    "authorization",
+    "secret_key",
+}


 def _sanitize_headers(headers: typing.Mapping[str, str]) -> dict[str, str]:
@@ -38,7 +61,7 @@ def _sanitize_headers(headers: typing.Mapping[str, str]) -> dict[str, str]:

 def _sanitize_body(request: Request, body: bytes, content_type: str | None) -> str:
    if f"{request.method.upper()} {request.url.path.rstrip('/')}" in _SENSITIVE_ENDPOINTS:
-        return "****"
+        return _REDACTED
    if not body:
        return ""
    if content_type and not (content_type.startswith("text/") or content_type.startswith("application/json")):
@@ -52,18 +75,76 @@ def _sanitize_body(request: Request, body: bytes, content_type: str | None) -> s
    return text


-async def _get_response_body_str(response: Response) -> str:
+def _is_sensitive_key(key: str) -> bool:
+    return key.lower() in _SENSITIVE_FIELDS
+
+
+def _redact_sensitive_fields(obj: typing.Any, _depth: int = 0) -> typing.Any:
+    """Redact dict values whose *key name* exactly matches a known sensitive field.
+
+    Uses exact-match (case-insensitive) rather than substring/regex to avoid
+    false positives on fields like ``credential_id``, ``author``, or
+    ``page_token`` which contain sensitive substrings but are not secrets.
+    """
+    if _depth > 20:
+        # Stop recursing but still redact sensitive keys at this level
+        if isinstance(obj, dict):
+            return {k: _REDACTED if _is_sensitive_key(k) else v for k, v in obj.items()}
+        return obj
+    if isinstance(obj, dict):
+        return {
+            k: _REDACTED if _is_sensitive_key(k) else _redact_sensitive_fields(v, _depth + 1) for k, v in obj.items()
+        }
+    if isinstance(obj, list):
+        return [_redact_sensitive_fields(item, _depth + 1) for item in obj]
+    return obj
+
+
+def _is_loggable_content_type(content_type: str | None) -> bool:
+    if not content_type:
+        return True  # assume text when header is missing
+    return any(content_type.startswith(prefix) for prefix in _LOGGABLE_CONTENT_TYPES)
+
+
+def _sanitize_response_body(request: Request, body_str: str | None, content_type: str | None) -> str:
+    if f"{request.method.upper()} {request.url.path.rstrip('/')}" in _SENSITIVE_ENDPOINTS:
+        return _REDACTED
+    if body_str is None:
+        return _BINARY_PLACEHOLDER
+    if not body_str:
+        return ""
+    if not _is_loggable_content_type(content_type):
+        return _BINARY_PLACEHOLDER
+    try:
+        parsed = json.loads(body_str)
+        redacted = _redact_sensitive_fields(parsed)
+        text = json.dumps(redacted)
+    except (json.JSONDecodeError, TypeError):
+        text = body_str
+    if len(text) > _MAX_BODY_LENGTH:
+        return text[:_MAX_BODY_LENGTH] + "...[truncated]"
+    return text
+
+
+async def _get_response_body_str(response: Response) -> str | None:
+    """Read and reconstitute the response body for logging.
+
+    Returns ``None`` when the body is binary or exceeds
+    ``_MAX_RESPONSE_READ_BYTES`` to avoid buffering large payloads
+    solely for logging purposes.
+    """
    response_body = b""
    async for chunk in response.body_iterator:
        response_body += chunk
    response.body_iterator = iterate_in_threadpool(iter([response_body]))

+    if len(response_body) > _MAX_RESPONSE_READ_BYTES:
+        return None
+
    try:
        return response_body.decode("utf-8")
    except UnicodeDecodeError:
-        return str(response_body)
-    except Exception:
-        return str(response_body)
+        return None


 async def log_raw_request_middleware(request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response:
@@ -88,13 +169,17 @@ async def log_raw_request_middleware(request: Request, call_next: Callable[[Requ

        if response.status_code >= 500:
            log_method = LOG.error
-            error_body = await _get_response_body_str(response)
        elif response.status_code >= 400:
            log_method = LOG.warning
-            error_body = await _get_response_body_str(response)
        else:
            log_method = LOG.info
-            error_body = None
+
+        resp_content_type = response.headers.get("content-type", "")
+        if _STREAMING_CONTENT_TYPE in resp_content_type:
+            response_body = "<streaming>"
+        else:
+            raw_response_body = await _get_response_body_str(response)
+            response_body = _sanitize_response_body(request, raw_response_body, resp_content_type)

        log_method(
            "api.raw_request",
@@ -103,7 +188,9 @@ async def log_raw_request_middleware(request: Request, call_next: Callable[[Requ
            status_code=response.status_code,
            body=body_text,
            headers=sanitized_headers,
-            error_body=error_body,
+            response_body=response_body,
+            # backwards-compat: keep error_body for existing Datadog queries
+            error_body=response_body if response.status_code >= 400 else None,
            duration_seconds=time.monotonic() - start_time,
        )
        return response