2025-06-17 23:34:39 -04:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2026-02-17 23:40:47 -05:00
|
|
|
import json
|
2026-01-08 19:58:07 -07:00
|
|
|
import time
|
2025-06-17 23:34:39 -04:00
|
|
|
import typing
|
|
|
|
|
|
|
|
|
|
import structlog
|
2025-11-28 09:59:37 -07:00
|
|
|
from starlette.concurrency import iterate_in_threadpool
|
2025-06-17 23:34:39 -04:00
|
|
|
|
|
|
|
|
from skyvern.config import settings
|
|
|
|
|
|
|
|
|
|
if typing.TYPE_CHECKING: # pragma: no cover - import only for type hints
|
|
|
|
|
from typing import Awaitable, Callable
|
|
|
|
|
|
|
|
|
|
from fastapi import Response
|
|
|
|
|
from starlette.requests import Request
|
|
|
|
|
|
|
|
|
|
LOG = structlog.get_logger()
|
|
|
|
|
|
2025-06-27 09:14:50 +09:00
|
|
|
_SENSITIVE_HEADERS = {"authorization", "cookie", "x-api-key"}
|
2025-10-10 19:56:47 -06:00
|
|
|
_SENSITIVE_ENDPOINTS = {
|
|
|
|
|
"POST /api/v1/credentials",
|
2025-12-01 13:22:33 -07:00
|
|
|
"POST /v1/credentials",
|
2025-10-10 19:56:47 -06:00
|
|
|
"POST /v1/credentials/onepassword/create",
|
|
|
|
|
"POST /v1/credentials/azure_credential/create",
|
|
|
|
|
}
|
2025-06-17 23:34:39 -04:00
|
|
|
_MAX_BODY_LENGTH = 1000
|
2026-02-17 23:40:47 -05:00
|
|
|
_MAX_RESPONSE_READ_BYTES = 1024 * 1024 # 1 MB — skip logging bodies larger than this
|
2025-06-17 23:34:39 -04:00
|
|
|
_BINARY_PLACEHOLDER = "<binary>"
|
2026-02-17 23:40:47 -05:00
|
|
|
_REDACTED = "****"
|
|
|
|
|
_LOGGABLE_CONTENT_TYPES = {"text/", "application/json"}
|
|
|
|
|
_STREAMING_CONTENT_TYPE = "text/event-stream"
|
|
|
|
|
|
|
|
|
|
# Exact field names that are always redacted. Use a set for O(1) lookup
|
|
|
|
|
# instead of regex substring matching to avoid false positives like
|
|
|
|
|
# credential_id, author, page_token, etc.
|
|
|
|
|
_SENSITIVE_FIELDS: set[str] = {
|
|
|
|
|
"password",
|
|
|
|
|
"secret",
|
|
|
|
|
"token",
|
|
|
|
|
"api_key",
|
|
|
|
|
"apikey",
|
|
|
|
|
"api-key",
|
|
|
|
|
"credential",
|
|
|
|
|
"access_key",
|
|
|
|
|
"private_key",
|
|
|
|
|
"auth",
|
|
|
|
|
"authorization",
|
|
|
|
|
"secret_key",
|
|
|
|
|
}
|
2025-06-17 23:34:39 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _sanitize_headers(headers: typing.Mapping[str, str]) -> dict[str, str]:
|
|
|
|
|
sanitized: dict[str, str] = {}
|
|
|
|
|
for key, value in headers.items():
|
|
|
|
|
if key.lower() in _SENSITIVE_HEADERS:
|
|
|
|
|
continue
|
|
|
|
|
sanitized[key] = value
|
|
|
|
|
return sanitized
|
|
|
|
|
|
|
|
|
|
|
2025-10-10 19:56:47 -06:00
|
|
|
def _sanitize_body(request: Request, body: bytes, content_type: str | None) -> str:
|
|
|
|
|
if f"{request.method.upper()} {request.url.path.rstrip('/')}" in _SENSITIVE_ENDPOINTS:
|
2026-02-17 23:40:47 -05:00
|
|
|
return _REDACTED
|
2025-06-17 23:34:39 -04:00
|
|
|
if not body:
|
|
|
|
|
return ""
|
|
|
|
|
if content_type and not (content_type.startswith("text/") or content_type.startswith("application/json")):
|
|
|
|
|
return _BINARY_PLACEHOLDER
|
|
|
|
|
try:
|
|
|
|
|
text = body.decode("utf-8", errors="replace")
|
|
|
|
|
except Exception:
|
|
|
|
|
return _BINARY_PLACEHOLDER
|
|
|
|
|
if len(text) > _MAX_BODY_LENGTH:
|
|
|
|
|
return text[:_MAX_BODY_LENGTH] + "...[truncated]"
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
2026-02-17 23:40:47 -05:00
|
|
|
def _is_sensitive_key(key: str) -> bool:
|
|
|
|
|
return key.lower() in _SENSITIVE_FIELDS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _redact_sensitive_fields(obj: typing.Any, _depth: int = 0) -> typing.Any:
|
|
|
|
|
"""Redact dict values whose *key name* exactly matches a known sensitive field.
|
|
|
|
|
|
|
|
|
|
Uses exact-match (case-insensitive) rather than substring/regex to avoid
|
|
|
|
|
false positives on fields like ``credential_id``, ``author``, or
|
|
|
|
|
``page_token`` which contain sensitive substrings but are not secrets.
|
|
|
|
|
"""
|
|
|
|
|
if _depth > 20:
|
|
|
|
|
# Stop recursing but still redact sensitive keys at this level
|
|
|
|
|
if isinstance(obj, dict):
|
|
|
|
|
return {k: _REDACTED if _is_sensitive_key(k) else v for k, v in obj.items()}
|
|
|
|
|
return obj
|
|
|
|
|
if isinstance(obj, dict):
|
|
|
|
|
return {
|
|
|
|
|
k: _REDACTED if _is_sensitive_key(k) else _redact_sensitive_fields(v, _depth + 1) for k, v in obj.items()
|
|
|
|
|
}
|
|
|
|
|
if isinstance(obj, list):
|
|
|
|
|
return [_redact_sensitive_fields(item, _depth + 1) for item in obj]
|
|
|
|
|
return obj
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_loggable_content_type(content_type: str | None) -> bool:
|
|
|
|
|
if not content_type:
|
|
|
|
|
return True # assume text when header is missing
|
|
|
|
|
return any(content_type.startswith(prefix) for prefix in _LOGGABLE_CONTENT_TYPES)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _sanitize_response_body(request: Request, body_str: str | None, content_type: str | None) -> str:
|
|
|
|
|
if f"{request.method.upper()} {request.url.path.rstrip('/')}" in _SENSITIVE_ENDPOINTS:
|
|
|
|
|
return _REDACTED
|
|
|
|
|
if body_str is None:
|
|
|
|
|
return _BINARY_PLACEHOLDER
|
|
|
|
|
if not body_str:
|
|
|
|
|
return ""
|
|
|
|
|
if not _is_loggable_content_type(content_type):
|
|
|
|
|
return _BINARY_PLACEHOLDER
|
|
|
|
|
try:
|
|
|
|
|
parsed = json.loads(body_str)
|
|
|
|
|
redacted = _redact_sensitive_fields(parsed)
|
|
|
|
|
text = json.dumps(redacted)
|
|
|
|
|
except (json.JSONDecodeError, TypeError):
|
|
|
|
|
text = body_str
|
|
|
|
|
if len(text) > _MAX_BODY_LENGTH:
|
|
|
|
|
return text[:_MAX_BODY_LENGTH] + "...[truncated]"
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _get_response_body_str(response: Response) -> str | None:
|
|
|
|
|
"""Read and reconstitute the response body for logging.
|
|
|
|
|
|
|
|
|
|
Returns ``None`` when the body is binary or exceeds
|
|
|
|
|
``_MAX_RESPONSE_READ_BYTES`` to avoid buffering large payloads
|
|
|
|
|
solely for logging purposes.
|
|
|
|
|
"""
|
2025-11-28 09:59:37 -07:00
|
|
|
response_body = b""
|
|
|
|
|
async for chunk in response.body_iterator:
|
|
|
|
|
response_body += chunk
|
|
|
|
|
response.body_iterator = iterate_in_threadpool(iter([response_body]))
|
|
|
|
|
|
2026-02-17 23:40:47 -05:00
|
|
|
if len(response_body) > _MAX_RESPONSE_READ_BYTES:
|
|
|
|
|
return None
|
|
|
|
|
|
2025-11-28 09:59:37 -07:00
|
|
|
try:
|
|
|
|
|
return response_body.decode("utf-8")
|
|
|
|
|
except UnicodeDecodeError:
|
2026-02-17 23:40:47 -05:00
|
|
|
return None
|
2025-11-28 09:59:37 -07:00
|
|
|
|
|
|
|
|
|
2025-06-17 23:34:39 -04:00
|
|
|
async def log_raw_request_middleware(request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response:
|
|
|
|
|
if not settings.LOG_RAW_API_REQUESTS:
|
|
|
|
|
return await call_next(request)
|
|
|
|
|
|
2026-01-08 19:58:07 -07:00
|
|
|
start_time = time.monotonic()
|
2025-06-17 23:34:39 -04:00
|
|
|
body_bytes = await request.body()
|
|
|
|
|
# ensure downstream handlers can access body again
|
|
|
|
|
try:
|
|
|
|
|
request._body = body_bytes # type: ignore[attr-defined]
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-10-14 12:36:39 +08:00
|
|
|
url_path = request.url.path
|
|
|
|
|
http_method = request.method
|
2025-06-17 23:34:39 -04:00
|
|
|
sanitized_headers = _sanitize_headers(dict(request.headers))
|
2025-10-10 19:56:47 -06:00
|
|
|
body_text = _sanitize_body(request, body_bytes, request.headers.get("content-type"))
|
2025-06-17 23:34:39 -04:00
|
|
|
|
2025-10-14 12:36:39 +08:00
|
|
|
try:
|
2025-10-29 11:26:07 -06:00
|
|
|
response = await call_next(request)
|
|
|
|
|
|
|
|
|
|
if response.status_code >= 500:
|
|
|
|
|
log_method = LOG.error
|
|
|
|
|
elif response.status_code >= 400:
|
|
|
|
|
log_method = LOG.warning
|
|
|
|
|
else:
|
|
|
|
|
log_method = LOG.info
|
2026-02-17 23:40:47 -05:00
|
|
|
|
|
|
|
|
resp_content_type = response.headers.get("content-type", "")
|
|
|
|
|
if _STREAMING_CONTENT_TYPE in resp_content_type:
|
|
|
|
|
response_body = "<streaming>"
|
|
|
|
|
else:
|
|
|
|
|
raw_response_body = await _get_response_body_str(response)
|
|
|
|
|
response_body = _sanitize_response_body(request, raw_response_body, resp_content_type)
|
2025-11-28 09:59:37 -07:00
|
|
|
|
2025-10-29 11:26:07 -06:00
|
|
|
log_method(
|
|
|
|
|
"api.raw_request",
|
|
|
|
|
method=http_method,
|
|
|
|
|
path=url_path,
|
|
|
|
|
status_code=response.status_code,
|
|
|
|
|
body=body_text,
|
|
|
|
|
headers=sanitized_headers,
|
2026-02-17 23:40:47 -05:00
|
|
|
response_body=response_body,
|
|
|
|
|
# backwards-compat: keep error_body for existing Datadog queries
|
|
|
|
|
error_body=response_body if response.status_code >= 400 else None,
|
2026-01-08 19:58:07 -07:00
|
|
|
duration_seconds=time.monotonic() - start_time,
|
2025-10-29 11:26:07 -06:00
|
|
|
)
|
|
|
|
|
return response
|
|
|
|
|
except Exception:
|
|
|
|
|
LOG.error(
|
2025-10-14 12:36:39 +08:00
|
|
|
"api.raw_request",
|
|
|
|
|
method=http_method,
|
|
|
|
|
path=url_path,
|
|
|
|
|
body=body_text,
|
2025-10-14 16:50:41 -06:00
|
|
|
headers=sanitized_headers,
|
2025-10-29 11:26:07 -06:00
|
|
|
exc_info=True,
|
2026-01-08 19:58:07 -07:00
|
|
|
duration_seconds=time.monotonic() - start_time,
|
2025-10-14 12:36:39 +08:00
|
|
|
)
|
2025-10-29 11:26:07 -06:00
|
|
|
raise
|