[Backend] Add City and State targeting for Massive geo-targeting (#4133)

This commit is contained in:
Marc Kelechava
2025-11-28 14:24:44 -08:00
committed by GitHub
parent 793d5d350d
commit b23fea86be
12 changed files with 213 additions and 47 deletions

View File

@@ -115,7 +115,7 @@ from skyvern.forge.sdk.workflow.models.workflow import (
WorkflowRunParameter,
WorkflowRunStatus,
)
from skyvern.schemas.runs import ProxyLocation, RunEngine, RunType
from skyvern.schemas.runs import GeoTarget, ProxyLocation, ProxyLocationInput, RunEngine, RunType
from skyvern.schemas.scripts import Script, ScriptBlock, ScriptFile, ScriptStatus, WorkflowScript
from skyvern.schemas.steps import AgentStepOutput
from skyvern.schemas.workflows import BlockStatus, BlockType, WorkflowStatus
@@ -123,6 +123,34 @@ from skyvern.webeye.actions.actions import Action
LOG = structlog.get_logger()
def _serialize_proxy_location(proxy_location: ProxyLocationInput) -> str | None:
"""
Serialize proxy_location for database storage.
Converts GeoTarget objects or dicts to JSON strings, passes through
ProxyLocation enum values as-is, and returns None for None.
"""
result: str | None = None
if proxy_location is None:
result = None
elif isinstance(proxy_location, GeoTarget):
result = json.dumps(proxy_location.model_dump())
elif isinstance(proxy_location, dict):
result = json.dumps(proxy_location)
else:
# ProxyLocation enum - return the string value
result = str(proxy_location)
LOG.debug(
"Serializing proxy_location for DB",
input_type=type(proxy_location).__name__,
input_value=str(proxy_location),
serialized_value=result,
)
return result
DB_CONNECT_ARGS: dict[str, Any] = {}
if "postgresql+psycopg" in settings.DATABASE_STRING:
@@ -161,7 +189,7 @@ class AgentDB:
totp_verification_url: str | None = None,
totp_identifier: str | None = None,
organization_id: str | None = None,
proxy_location: ProxyLocation | None = None,
proxy_location: ProxyLocationInput = None,
extracted_information_schema: dict[str, Any] | list | str | None = None,
workflow_run_id: str | None = None,
order: int | None = None,
@@ -194,7 +222,7 @@ class AgentDB:
data_extraction_goal=data_extraction_goal,
navigation_payload=navigation_payload,
organization_id=organization_id,
proxy_location=proxy_location,
proxy_location=_serialize_proxy_location(proxy_location),
extracted_information_schema=extracted_information_schema,
workflow_run_id=workflow_run_id,
order=order,
@@ -1390,7 +1418,7 @@ class AgentDB:
workflow_definition: dict[str, Any],
organization_id: str | None = None,
description: str | None = None,
proxy_location: ProxyLocation | None = None,
proxy_location: ProxyLocationInput = None,
webhook_callback_url: str | None = None,
max_screenshot_scrolling_times: int | None = None,
extra_http_headers: dict[str, str] | None = None,
@@ -1415,7 +1443,7 @@ class AgentDB:
title=title,
description=description,
workflow_definition=workflow_definition,
proxy_location=proxy_location,
proxy_location=_serialize_proxy_location(proxy_location),
webhook_callback_url=webhook_callback_url,
totp_verification_url=totp_verification_url,
totp_identifier=totp_identifier,
@@ -2259,7 +2287,7 @@ class AgentDB:
organization_id: str,
browser_session_id: str | None = None,
browser_profile_id: str | None = None,
proxy_location: ProxyLocation | None = None,
proxy_location: ProxyLocationInput = None,
webhook_callback_url: str | None = None,
totp_verification_url: str | None = None,
totp_identifier: str | None = None,
@@ -2281,7 +2309,7 @@ class AgentDB:
organization_id=organization_id,
browser_session_id=browser_session_id,
browser_profile_id=browser_profile_id,
proxy_location=proxy_location,
proxy_location=_serialize_proxy_location(proxy_location),
status="created",
webhook_callback_url=webhook_callback_url,
totp_verification_url=totp_verification_url,
@@ -3565,7 +3593,7 @@ class AgentDB:
prompt: str | None = None,
url: str | None = None,
organization_id: str | None = None,
proxy_location: ProxyLocation | None = None,
proxy_location: ProxyLocationInput = None,
totp_identifier: str | None = None,
totp_verification_url: str | None = None,
webhook_callback_url: str | None = None,
@@ -3584,7 +3612,7 @@ class AgentDB:
workflow_permanent_id=workflow_permanent_id,
prompt=prompt,
url=url,
proxy_location=proxy_location,
proxy_location=_serialize_proxy_location(proxy_location),
totp_identifier=totp_identifier,
totp_verification_url=totp_verification_url,
webhook_callback_url=webhook_callback_url,
@@ -4190,7 +4218,7 @@ class AgentDB:
runnable_type: str | None = None,
runnable_id: str | None = None,
timeout_minutes: int | None = None,
proxy_location: ProxyLocation | None = ProxyLocation.RESIDENTIAL,
proxy_location: ProxyLocationInput = ProxyLocation.RESIDENTIAL,
) -> PersistentBrowserSession:
"""Create a new persistent browser session."""
try:
@@ -4200,7 +4228,7 @@ class AgentDB:
runnable_type=runnable_type,
runnable_id=runnable_id,
timeout_minutes=timeout_minutes,
proxy_location=proxy_location,
proxy_location=_serialize_proxy_location(proxy_location),
)
session.add(browser_session)
await session.commit()

View File

@@ -55,7 +55,7 @@ from skyvern.forge.sdk.workflow.models.workflow import (
WorkflowRunStatus,
WorkflowStatus,
)
from skyvern.schemas.runs import ProxyLocation, ScriptRunResponse
from skyvern.schemas.runs import GeoTarget, ProxyLocation, ProxyLocationInput, ScriptRunResponse
from skyvern.schemas.scripts import Script, ScriptBlock, ScriptFile
from skyvern.schemas.workflows import BlockStatus, BlockType
from skyvern.webeye.actions.actions import (
@@ -85,6 +85,50 @@ from skyvern.webeye.actions.actions import (
LOG = structlog.get_logger()
def _deserialize_proxy_location(value: str | None) -> ProxyLocationInput:
"""
Deserialize proxy_location from database storage.
Handles:
- None -> None
- ProxyLocation enum string (e.g., "RESIDENTIAL") -> ProxyLocation enum
- JSON string (e.g., '{"country": "US", ...}') -> GeoTarget object
"""
if value is None:
return None
result: ProxyLocationInput = None
# Try to parse as JSON first (for GeoTarget)
if value.startswith("{"):
try:
data = json.loads(value)
result = GeoTarget.model_validate(data)
LOG.info(
"Deserialized proxy_location as GeoTarget",
db_value=value,
result=str(result),
)
return result
except (json.JSONDecodeError, ValueError):
pass
# Try as ProxyLocation enum
try:
result = ProxyLocation(value)
LOG.info(
"Deserialized proxy_location as ProxyLocation enum",
db_value=value,
result=str(result),
)
return result
except ValueError:
# If all else fails, return as-is (shouldn't happen with valid data)
LOG.warning("Failed to deserialize proxy_location", db_value=value)
return None
# Mapping of action types to their corresponding action classes
ACTION_TYPE_TO_CLASS = {
ActionType.CLICK: ClickAction,
@@ -142,7 +186,7 @@ def convert_to_task(task_obj: TaskModel, debug_enabled: bool = False, workflow_p
extracted_information=task_obj.extracted_information,
failure_reason=task_obj.failure_reason,
organization_id=task_obj.organization_id,
proxy_location=(ProxyLocation(task_obj.proxy_location) if task_obj.proxy_location else None),
proxy_location=_deserialize_proxy_location(task_obj.proxy_location),
extracted_information_schema=task_obj.extracted_information_schema,
extra_http_headers=task_obj.extra_http_headers,
workflow_run_id=task_obj.workflow_run_id,
@@ -272,7 +316,7 @@ def convert_to_workflow(workflow_model: WorkflowModel, debug_enabled: bool = Fal
totp_identifier=workflow_model.totp_identifier,
persist_browser_session=workflow_model.persist_browser_session,
model=workflow_model.model,
proxy_location=(ProxyLocation(workflow_model.proxy_location) if workflow_model.proxy_location else None),
proxy_location=_deserialize_proxy_location(workflow_model.proxy_location),
max_screenshot_scrolls=workflow_model.max_screenshot_scrolling_times,
version=workflow_model.version,
is_saved_task=workflow_model.is_saved_task,
@@ -312,9 +356,7 @@ def convert_to_workflow_run(
browser_profile_id=workflow_run_model.browser_profile_id,
status=WorkflowRunStatus[workflow_run_model.status],
failure_reason=workflow_run_model.failure_reason,
proxy_location=(
ProxyLocation(workflow_run_model.proxy_location) if workflow_run_model.proxy_location else None
),
proxy_location=_deserialize_proxy_location(workflow_run_model.proxy_location),
webhook_callback_url=workflow_run_model.webhook_callback_url,
webhook_failure_reason=workflow_run_model.webhook_failure_reason,
totp_verification_url=workflow_run_model.totp_verification_url,