Webhook Replay Test URL (#3769)
Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com>
This commit is contained in:
@@ -1,8 +1,34 @@
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from __future__ import annotations
|
||||
|
||||
from skyvern.forge.sdk.schemas.tasks import TaskRequest, TaskResponse, TaskStatus
|
||||
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunResponseBase, WorkflowRunStatus
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from time import perf_counter
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
from fastapi import status
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.exceptions import (
|
||||
BlockedHost,
|
||||
MissingApiKey,
|
||||
MissingWebhookTarget,
|
||||
SkyvernHTTPException,
|
||||
TaskNotFound,
|
||||
WebhookReplayError,
|
||||
WorkflowRunNotFound,
|
||||
)
|
||||
from skyvern.forge import app
|
||||
from skyvern.forge.sdk.core.security import generate_skyvern_webhook_headers
|
||||
from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
|
||||
from skyvern.forge.sdk.schemas.task_v2 import TaskV2
|
||||
from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskResponse, TaskStatus
|
||||
from skyvern.forge.sdk.workflow.models.workflow import (
|
||||
WorkflowRun,
|
||||
WorkflowRunResponseBase,
|
||||
WorkflowRunStatus,
|
||||
)
|
||||
from skyvern.schemas.runs import (
|
||||
ProxyLocation,
|
||||
RunStatus,
|
||||
@@ -12,6 +38,13 @@ from skyvern.schemas.runs import (
|
||||
WorkflowRunRequest,
|
||||
WorkflowRunResponse,
|
||||
)
|
||||
from skyvern.schemas.webhooks import RunWebhookPreviewResponse, RunWebhookReplayResponse
|
||||
from skyvern.services import run_service, task_v2_service
|
||||
from skyvern.utils.url_validators import validate_url
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
RESPONSE_BODY_TRUNCATION_LIMIT = 2048
|
||||
|
||||
|
||||
def _now() -> datetime:
|
||||
@@ -163,3 +196,326 @@ def build_sample_workflow_run_payload(run_id: str | None = None) -> str:
|
||||
|
||||
payload_dict.update(json.loads(workflow_run_response.model_dump_json(exclude_unset=True)))
|
||||
return json.dumps(payload_dict, separators=(",", ":"), ensure_ascii=False)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _WebhookPayload:
|
||||
run_id: str
|
||||
run_type: str
|
||||
payload: str
|
||||
default_webhook_url: str | None
|
||||
|
||||
|
||||
async def build_run_preview(organization_id: str, run_id: str) -> RunWebhookPreviewResponse:
|
||||
"""Return the payload and headers that would be used for a replay."""
|
||||
payload = await _build_webhook_payload(organization_id=organization_id, run_id=run_id)
|
||||
api_key = await _get_api_key(organization_id=organization_id)
|
||||
headers = generate_skyvern_webhook_headers(payload=payload.payload, api_key=api_key)
|
||||
return RunWebhookPreviewResponse(
|
||||
run_id=payload.run_id,
|
||||
run_type=payload.run_type,
|
||||
default_webhook_url=payload.default_webhook_url,
|
||||
payload=payload.payload,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
|
||||
async def replay_run_webhook(organization_id: str, run_id: str, target_url: str | None) -> RunWebhookReplayResponse:
|
||||
"""
|
||||
Send the webhook payload for a run to either the stored URL or a caller-provided override.
|
||||
"""
|
||||
payload = await _build_webhook_payload(organization_id=organization_id, run_id=run_id)
|
||||
api_key = await _get_api_key(organization_id=organization_id)
|
||||
headers = generate_skyvern_webhook_headers(payload=payload.payload, api_key=api_key)
|
||||
|
||||
url_to_use: str | None = target_url if target_url else payload.default_webhook_url
|
||||
|
||||
if not url_to_use:
|
||||
raise MissingWebhookTarget()
|
||||
|
||||
validated_url = _validate_target_url(url_to_use)
|
||||
|
||||
status_code, latency_ms, response_body, error = await _deliver_webhook(
|
||||
url=validated_url,
|
||||
payload=payload.payload,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
return RunWebhookReplayResponse(
|
||||
run_id=payload.run_id,
|
||||
run_type=payload.run_type,
|
||||
default_webhook_url=payload.default_webhook_url,
|
||||
target_webhook_url=validated_url,
|
||||
payload=payload.payload,
|
||||
headers=headers,
|
||||
status_code=status_code,
|
||||
latency_ms=latency_ms,
|
||||
response_body=response_body,
|
||||
error=error,
|
||||
)
|
||||
|
||||
|
||||
async def _build_webhook_payload(organization_id: str, run_id: str) -> _WebhookPayload:
|
||||
run = await app.DATABASE.get_run(run_id, organization_id=organization_id)
|
||||
if not run:
|
||||
# Attempt to resolve task v2 runs that may not yet be in the runs table.
|
||||
task_v2 = await app.DATABASE.get_task_v2(run_id, organization_id=organization_id)
|
||||
if task_v2:
|
||||
return await _build_task_v2_payload(task_v2)
|
||||
workflow_run = await app.DATABASE.get_workflow_run(
|
||||
workflow_run_id=run_id,
|
||||
organization_id=organization_id,
|
||||
)
|
||||
if workflow_run:
|
||||
return await _build_workflow_payload(
|
||||
organization_id=organization_id,
|
||||
workflow_run_id=run_id,
|
||||
)
|
||||
raise SkyvernHTTPException(
|
||||
f"Run {run_id} not found",
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
|
||||
run_type = _as_run_type_str(run.task_run_type)
|
||||
if run.task_run_type in {
|
||||
RunType.task_v1,
|
||||
RunType.openai_cua,
|
||||
RunType.anthropic_cua,
|
||||
RunType.ui_tars,
|
||||
}:
|
||||
return await _build_task_payload(
|
||||
organization_id=organization_id,
|
||||
run_id=run.run_id,
|
||||
run_type_str=run_type,
|
||||
)
|
||||
if run.task_run_type == RunType.task_v2:
|
||||
task_v2 = await app.DATABASE.get_task_v2(run.run_id, organization_id=organization_id)
|
||||
if not task_v2:
|
||||
raise SkyvernHTTPException(
|
||||
f"Task v2 run {run_id} missing task record",
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
return await _build_task_v2_payload(task_v2)
|
||||
if run.task_run_type == RunType.workflow_run:
|
||||
return await _build_workflow_payload(organization_id=organization_id, workflow_run_id=run.run_id)
|
||||
|
||||
raise WebhookReplayError(f"Run type {run_type} is not supported for webhook replay.")
|
||||
|
||||
|
||||
async def _build_task_payload(organization_id: str, run_id: str, run_type_str: str) -> _WebhookPayload:
|
||||
task: Task | None = await app.DATABASE.get_task(run_id, organization_id=organization_id)
|
||||
if not task:
|
||||
raise TaskNotFound(task_id=run_id)
|
||||
if not task.status.is_final():
|
||||
LOG.warning(
|
||||
"Webhook replay requested for non-terminal task run",
|
||||
run_id=run_id,
|
||||
status=task.status,
|
||||
)
|
||||
raise WebhookReplayError(f"Run {run_id} has not reached a terminal state (status={task.status}).")
|
||||
latest_step = await app.DATABASE.get_latest_step(run_id, organization_id=organization_id)
|
||||
task_response = await app.agent.build_task_response(task=task, last_step=latest_step)
|
||||
|
||||
payload_dict = json.loads(task_response.model_dump_json(exclude={"request"}))
|
||||
|
||||
run_response = await run_service.get_run_response(run_id=run_id, organization_id=organization_id)
|
||||
if isinstance(run_response, TaskRunResponse):
|
||||
if not run_response.status.is_final():
|
||||
LOG.warning(
|
||||
"Webhook replay requested for non-terminal task run response",
|
||||
run_id=run_id,
|
||||
status=run_response.status,
|
||||
)
|
||||
raise WebhookReplayError(f"Run {run_id} has not reached a terminal state (status={run_response.status}).")
|
||||
run_response_json = run_response.model_dump_json(exclude={"run_request"})
|
||||
payload_dict.update(json.loads(run_response_json))
|
||||
|
||||
payload = json.dumps(payload_dict, separators=(",", ":"), ensure_ascii=False)
|
||||
return _WebhookPayload(
|
||||
run_id=run_id,
|
||||
run_type=run_type_str,
|
||||
payload=payload,
|
||||
default_webhook_url=task.webhook_callback_url,
|
||||
)
|
||||
|
||||
|
||||
async def _build_task_v2_payload(task_v2: TaskV2) -> _WebhookPayload:
|
||||
if not task_v2.status.is_final():
|
||||
LOG.warning(
|
||||
"Webhook replay requested for non-terminal task v2 run",
|
||||
run_id=task_v2.observer_cruise_id,
|
||||
status=task_v2.status,
|
||||
)
|
||||
raise WebhookReplayError(
|
||||
f"Run {task_v2.observer_cruise_id} has not reached a terminal state (status={task_v2.status})."
|
||||
)
|
||||
task_run_response = await task_v2_service.build_task_v2_run_response(task_v2)
|
||||
if not task_run_response.status.is_final():
|
||||
LOG.warning(
|
||||
"Webhook replay requested for non-terminal task v2 run response",
|
||||
run_id=task_v2.observer_cruise_id,
|
||||
status=task_run_response.status,
|
||||
)
|
||||
raise WebhookReplayError(
|
||||
f"Run {task_v2.observer_cruise_id} has not reached a terminal state (status={task_run_response.status})."
|
||||
)
|
||||
task_run_response_json = task_run_response.model_dump_json(exclude={"run_request"})
|
||||
|
||||
payload = json.dumps(json.loads(task_run_response_json), separators=(",", ":"), ensure_ascii=False)
|
||||
return _WebhookPayload(
|
||||
run_id=task_v2.observer_cruise_id,
|
||||
run_type=RunType.task_v2.value,
|
||||
payload=payload,
|
||||
default_webhook_url=task_v2.webhook_callback_url,
|
||||
)
|
||||
|
||||
|
||||
async def _build_workflow_payload(
|
||||
organization_id: str,
|
||||
workflow_run_id: str,
|
||||
) -> _WebhookPayload:
|
||||
workflow_run: WorkflowRun | None = await app.DATABASE.get_workflow_run(
|
||||
workflow_run_id=workflow_run_id,
|
||||
organization_id=organization_id,
|
||||
)
|
||||
if not workflow_run:
|
||||
raise WorkflowRunNotFound(workflow_run_id=workflow_run_id)
|
||||
if not workflow_run.status.is_final():
|
||||
LOG.warning(
|
||||
"Webhook replay requested for non-terminal workflow run",
|
||||
workflow_run_id=workflow_run_id,
|
||||
status=workflow_run.status,
|
||||
)
|
||||
raise WebhookReplayError(
|
||||
f"Run {workflow_run_id} has not reached a terminal state (status={workflow_run.status})."
|
||||
)
|
||||
|
||||
status_response = await app.WORKFLOW_SERVICE.build_workflow_run_status_response(
|
||||
workflow_permanent_id=workflow_run.workflow_permanent_id,
|
||||
workflow_run_id=workflow_run.workflow_run_id,
|
||||
organization_id=workflow_run.organization_id,
|
||||
)
|
||||
if not status_response.status.is_final():
|
||||
LOG.warning(
|
||||
"Webhook replay requested for non-terminal workflow run response",
|
||||
workflow_run_id=workflow_run_id,
|
||||
status=status_response.status,
|
||||
)
|
||||
raise WebhookReplayError(
|
||||
f"Run {workflow_run_id} has not reached a terminal state (status={status_response.status})."
|
||||
)
|
||||
|
||||
app_url = (
|
||||
f"{settings.SKYVERN_APP_URL.rstrip('/')}/workflows/"
|
||||
f"{workflow_run.workflow_permanent_id}/{workflow_run.workflow_run_id}"
|
||||
)
|
||||
|
||||
run_response = WorkflowRunResponse(
|
||||
run_id=workflow_run.workflow_run_id,
|
||||
run_type=RunType.workflow_run,
|
||||
status=RunStatus(status_response.status),
|
||||
output=status_response.outputs,
|
||||
downloaded_files=status_response.downloaded_files,
|
||||
recording_url=status_response.recording_url,
|
||||
screenshot_urls=status_response.screenshot_urls,
|
||||
failure_reason=status_response.failure_reason,
|
||||
app_url=app_url,
|
||||
script_run=status_response.script_run,
|
||||
created_at=status_response.created_at,
|
||||
modified_at=status_response.modified_at,
|
||||
errors=status_response.errors,
|
||||
)
|
||||
|
||||
payload_dict = json.loads(
|
||||
status_response.model_dump_json(
|
||||
exclude={
|
||||
"webhook_callback_url",
|
||||
"totp_verification_url",
|
||||
"totp_identifier",
|
||||
"extra_http_headers",
|
||||
}
|
||||
)
|
||||
)
|
||||
payload_dict.update(json.loads(run_response.model_dump_json(exclude={"run_request"})))
|
||||
payload = json.dumps(payload_dict, separators=(",", ":"), ensure_ascii=False)
|
||||
|
||||
return _WebhookPayload(
|
||||
run_id=workflow_run.workflow_run_id,
|
||||
run_type=RunType.workflow_run.value,
|
||||
payload=payload,
|
||||
default_webhook_url=workflow_run.webhook_callback_url,
|
||||
)
|
||||
|
||||
|
||||
async def _get_api_key(organization_id: str) -> str:
|
||||
api_key_obj = await app.DATABASE.get_valid_org_auth_token(
|
||||
organization_id,
|
||||
OrganizationAuthTokenType.api.value,
|
||||
)
|
||||
if not api_key_obj or not api_key_obj.token:
|
||||
raise MissingApiKey()
|
||||
return api_key_obj.token
|
||||
|
||||
|
||||
async def _deliver_webhook(
|
||||
url: str, payload: str, headers: dict[str, str]
|
||||
) -> tuple[int | None, int, str | None, str | None]:
|
||||
start = perf_counter()
|
||||
status_code: int | None = None
|
||||
response_body: str | None = None
|
||||
error: str | None = None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(url, content=payload, headers=headers, timeout=httpx.Timeout(10.0))
|
||||
status_code = response.status_code
|
||||
body_text = response.text or ""
|
||||
if len(body_text) > RESPONSE_BODY_TRUNCATION_LIMIT:
|
||||
response_body = f"{body_text[:RESPONSE_BODY_TRUNCATION_LIMIT]}\n... (truncated)"
|
||||
else:
|
||||
response_body = body_text or None
|
||||
except httpx.TimeoutException:
|
||||
error = "Request timed out after 10 seconds."
|
||||
LOG.warning("Webhook replay timed out", url=url)
|
||||
except httpx.NetworkError as exc:
|
||||
error = f"Could not reach URL: {exc}"
|
||||
LOG.warning("Webhook replay network error", url=url, error=str(exc))
|
||||
except Exception as exc: # pragma: no cover - defensive guard
|
||||
error = f"Unexpected error: {exc}"
|
||||
LOG.error("Webhook replay unexpected error", url=url, error=str(exc), exc_info=True)
|
||||
|
||||
latency_ms = int((perf_counter() - start) * 1000)
|
||||
return status_code, latency_ms, response_body, error
|
||||
|
||||
|
||||
def _as_run_type_str(run_type: RunType | str | None) -> str:
|
||||
if isinstance(run_type, RunType):
|
||||
return run_type.value
|
||||
if isinstance(run_type, str):
|
||||
return run_type
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _validate_target_url(url: str) -> str:
|
||||
try:
|
||||
validated_url = validate_url(url)
|
||||
if not validated_url:
|
||||
raise SkyvernHTTPException("Invalid webhook URL.", status_code=status.HTTP_400_BAD_REQUEST)
|
||||
return validated_url
|
||||
except BlockedHost as exc:
|
||||
raise SkyvernHTTPException(
|
||||
message=(
|
||||
f"This URL is blocked by SSRF protection. {str(exc)} "
|
||||
"Add the host to ALLOWED_HOSTS to test internal endpoints or use an external receiver "
|
||||
"such as webhook.site or requestbin.com."
|
||||
),
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
) from exc
|
||||
except SkyvernHTTPException:
|
||||
raise
|
||||
except Exception as exc: # pragma: no cover - defensive guard
|
||||
LOG.error("Unexpected error validating webhook URL", url=url, error=str(exc))
|
||||
raise SkyvernHTTPException(
|
||||
"Unexpected error while validating the webhook URL.",
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
) from exc
|
||||
|
||||
Reference in New Issue
Block a user