SDK: file uploading (public url only) (#3867)

This commit is contained in:
Stanislav Novosad
2025-11-07 15:21:40 -07:00
committed by GitHub
parent 581d6e5332
commit d1d0c9414b
14 changed files with 379 additions and 12 deletions

View File

@@ -273,6 +273,7 @@ if typing.TYPE_CHECKING:
RunSdkActionRequestAction_AiClick,
RunSdkActionRequestAction_AiInputText,
RunSdkActionRequestAction_AiSelectOption,
RunSdkActionRequestAction_AiUploadFile,
RunSdkActionRequestAction_Extract,
RunSdkActionResponse,
RunStatus,
@@ -327,6 +328,8 @@ if typing.TYPE_CHECKING:
ThoughtType,
TotpCode,
TotpType,
UploadFileAction,
UploadFileActionData,
UploadToS3Block,
UploadToS3BlockYaml,
UrlBlock,
@@ -740,6 +743,7 @@ _dynamic_imports: typing.Dict[str, str] = {
"RunSdkActionRequestAction_AiClick": ".types",
"RunSdkActionRequestAction_AiInputText": ".types",
"RunSdkActionRequestAction_AiSelectOption": ".types",
"RunSdkActionRequestAction_AiUploadFile": ".types",
"RunSdkActionRequestAction_Extract": ".types",
"RunSdkActionResponse": ".types",
"RunStatus": ".types",
@@ -797,6 +801,8 @@ _dynamic_imports: typing.Dict[str, str] = {
"TotpCode": ".types",
"TotpType": ".types",
"UnprocessableEntityError": ".errors",
"UploadFileAction": ".types",
"UploadFileActionData": ".types",
"UploadToS3Block": ".types",
"UploadToS3BlockYaml": ".types",
"UrlBlock": ".types",
@@ -1231,6 +1237,7 @@ __all__ = [
"RunSdkActionRequestAction_AiClick",
"RunSdkActionRequestAction_AiInputText",
"RunSdkActionRequestAction_AiSelectOption",
"RunSdkActionRequestAction_AiUploadFile",
"RunSdkActionRequestAction_Extract",
"RunSdkActionResponse",
"RunStatus",
@@ -1288,6 +1295,8 @@ __all__ = [
"TotpCode",
"TotpType",
"UnprocessableEntityError",
"UploadFileAction",
"UploadFileActionData",
"UploadToS3Block",
"UploadToS3BlockYaml",
"UrlBlock",

View File

@@ -299,6 +299,7 @@ if typing.TYPE_CHECKING:
RunSdkActionRequestAction_AiClick,
RunSdkActionRequestAction_AiInputText,
RunSdkActionRequestAction_AiSelectOption,
RunSdkActionRequestAction_AiUploadFile,
RunSdkActionRequestAction_Extract,
)
from .run_sdk_action_response import RunSdkActionResponse
@@ -358,6 +359,8 @@ if typing.TYPE_CHECKING:
from .thought_type import ThoughtType
from .totp_code import TotpCode
from .totp_type import TotpType
from .upload_file_action import UploadFileAction
from .upload_file_action_data import UploadFileActionData
from .upload_to_s3block import UploadToS3Block
from .upload_to_s3block_yaml import UploadToS3BlockYaml
from .url_block import UrlBlock
@@ -775,6 +778,7 @@ _dynamic_imports: typing.Dict[str, str] = {
"RunSdkActionRequestAction_AiClick": ".run_sdk_action_request_action",
"RunSdkActionRequestAction_AiInputText": ".run_sdk_action_request_action",
"RunSdkActionRequestAction_AiSelectOption": ".run_sdk_action_request_action",
"RunSdkActionRequestAction_AiUploadFile": ".run_sdk_action_request_action",
"RunSdkActionRequestAction_Extract": ".run_sdk_action_request_action",
"RunSdkActionResponse": ".run_sdk_action_response",
"RunStatus": ".run_status",
@@ -829,6 +833,8 @@ _dynamic_imports: typing.Dict[str, str] = {
"ThoughtType": ".thought_type",
"TotpCode": ".totp_code",
"TotpType": ".totp_type",
"UploadFileAction": ".upload_file_action",
"UploadFileActionData": ".upload_file_action_data",
"UploadToS3Block": ".upload_to_s3block",
"UploadToS3BlockYaml": ".upload_to_s3block_yaml",
"UrlBlock": ".url_block",
@@ -1256,6 +1262,7 @@ __all__ = [
"RunSdkActionRequestAction_AiClick",
"RunSdkActionRequestAction_AiInputText",
"RunSdkActionRequestAction_AiSelectOption",
"RunSdkActionRequestAction_AiUploadFile",
"RunSdkActionRequestAction_Extract",
"RunSdkActionResponse",
"RunStatus",
@@ -1310,6 +1317,8 @@ __all__ = [
"ThoughtType",
"TotpCode",
"TotpType",
"UploadFileAction",
"UploadFileActionData",
"UploadToS3Block",
"UploadToS3BlockYaml",
"UrlBlock",

View File

@@ -12,6 +12,7 @@ from .extract_action_data import ExtractActionData
from .extract_action_extract_schema import ExtractActionExtractSchema
from .input_text_action_data import InputTextActionData
from .select_option_action_data import SelectOptionActionData
from .upload_file_action_data import UploadFileActionData
class RunSdkActionRequestAction_AiAct(UniversalBaseModel):
@@ -100,6 +101,28 @@ class RunSdkActionRequestAction_AiSelectOption(UniversalBaseModel):
extra = pydantic.Extra.allow
class RunSdkActionRequestAction_AiUploadFile(UniversalBaseModel):
"""
The action to execute with its specific parameters
"""
type: typing.Literal["ai_upload_file"] = "ai_upload_file"
selector: typing.Optional[str] = None
file_url: typing.Optional[str] = None
intention: typing.Optional[str] = None
data: typing.Optional[UploadFileActionData] = None
timeout: typing.Optional[float] = None
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow
class RunSdkActionRequestAction_Extract(UniversalBaseModel):
"""
The action to execute with its specific parameters
@@ -127,5 +150,6 @@ RunSdkActionRequestAction = typing.Union[
RunSdkActionRequestAction_AiClick,
RunSdkActionRequestAction_AiInputText,
RunSdkActionRequestAction_AiSelectOption,
RunSdkActionRequestAction_AiUploadFile,
RunSdkActionRequestAction_Extract,
]

View File

@@ -11,6 +11,7 @@ from .extract_action_data import ExtractActionData
from .extract_action_extract_schema import ExtractActionExtractSchema
from .input_text_action_data import InputTextActionData
from .select_option_action_data import SelectOptionActionData
from .upload_file_action_data import UploadFileActionData
class SdkAction_AiClick(UniversalBaseModel):
@@ -68,6 +69,24 @@ class SdkAction_AiSelectOption(UniversalBaseModel):
extra = pydantic.Extra.allow
class SdkAction_AiUploadFile(UniversalBaseModel):
type: typing.Literal["ai_upload_file"] = "ai_upload_file"
selector: typing.Optional[str] = None
file_url: typing.Optional[str] = None
intention: typing.Optional[str] = None
data: typing.Optional[UploadFileActionData] = None
timeout: typing.Optional[float] = None
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow
class SdkAction_AiAct(UniversalBaseModel):
type: typing.Literal["ai_act"] = "ai_act"
intention: typing.Optional[str] = None
@@ -101,5 +120,10 @@ class SdkAction_Extract(UniversalBaseModel):
SdkAction = typing.Union[
SdkAction_AiClick, SdkAction_AiInputText, SdkAction_AiSelectOption, SdkAction_AiAct, SdkAction_Extract
SdkAction_AiClick,
SdkAction_AiInputText,
SdkAction_AiSelectOption,
SdkAction_AiUploadFile,
SdkAction_AiAct,
SdkAction_Extract,
]

View File

@@ -0,0 +1,47 @@
# This file was auto-generated by Fern from our API Definition.
import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
from .upload_file_action_data import UploadFileActionData
class UploadFileAction(UniversalBaseModel):
"""
Upload file action parameters.
"""
selector: typing.Optional[str] = pydantic.Field(default=None)
"""
CSS selector for the element
"""
file_url: typing.Optional[str] = pydantic.Field(default=None)
"""
File URL for upload
"""
intention: typing.Optional[str] = pydantic.Field(default=None)
"""
The intention or goal of the upload
"""
data: typing.Optional[UploadFileActionData] = pydantic.Field(default=None)
"""
Additional context data
"""
timeout: typing.Optional[float] = pydantic.Field(default=None)
"""
Timeout in milliseconds
"""
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow

View File

@@ -0,0 +1,5 @@
# This file was auto-generated by Fern from our API Definition.
import typing
UploadFileActionData = typing.Union[str, typing.Dict[str, typing.Optional[typing.Any]]]

View File

@@ -13,6 +13,7 @@ from skyvern.constants import SPECIAL_FIELD_VERIFICATION_CODE
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.files import validate_download_url
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.schemas.totp_codes import OTPType
from skyvern.services.otp_service import poll_otp_value
@@ -322,10 +323,11 @@ class RealSkyvernPageAi(SkyvernPageAi):
async def ai_upload_file(
self,
selector: str | None,
files: str,
files: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
public_url_only: bool = False,
) -> str:
"""Upload a file using AI to process the file URL."""
@@ -410,6 +412,9 @@ class RealSkyvernPageAi(SkyvernPageAi):
except Exception:
LOG.exception(f"Failed to adapt value for upload file action on selector={selector}, file={files}")
if public_url_only and not validate_download_url(files):
raise Exception("Only public URLs are allowed")
if action and organization_id and task and step:
result = await handle_upload_file_action(action, self.page, self.scraped_page, task, step)
if result and result[-1].success is False:

View File

@@ -411,13 +411,38 @@ class SkyvernPage:
await handler_utils.input_sequentially(locator, value, timeout=timeout)
return value
@overload
async def upload_file(
self,
selector: str,
files: str,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str: ...
@overload
async def upload_file(
self,
*,
prompt: str,
files: str | None = None,
selector: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str: ...
@action_wrap(ActionType.UPLOAD_FILE)
async def upload_file(
self,
selector: str | None,
files: str,
ai: str | None = "fallback",
selector: str | None = None,
files: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
@@ -433,8 +458,11 @@ class SkyvernPage:
if context and context.ai_mode_override:
ai = context.ai_mode_override
if ai == "fallback":
if not files and not prompt:
raise ValueError("Missing input: files should be provided explicitly or in prompt")
error_to_raise = None
if selector:
if selector and files:
try:
file_path = await download_file(files)
locator = self.page.locator(selector)
@@ -453,6 +481,8 @@ class SkyvernPage:
)
if error_to_raise:
raise error_to_raise
elif not files:
raise ValueError("Parameter 'files' is required but was not provided")
else:
return files
elif ai == "proactive" and prompt:
@@ -466,6 +496,8 @@ class SkyvernPage:
if not selector:
raise ValueError("Selector is required but was not provided")
if not files:
raise ValueError("Parameter 'files' is required but was not provided")
file_path = await download_file(files)
locator = self.page.locator(selector)

View File

@@ -34,10 +34,11 @@ class SkyvernPageAi(Protocol):
async def ai_upload_file(
self,
selector: str | None,
files: str,
files: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
public_url_only: bool = False,
) -> str:
"""Upload a file using AI to process the file URL."""
...

View File

@@ -69,6 +69,56 @@ def is_valid_mime_type(file_path: str) -> bool:
return mime_type is not None
def validate_download_url(url: str) -> bool:
"""Validate if a URL is supported for downloading.
Security validation for URL downloads to prevent:
- File system access outside allowed directories
- Access to local file system in non-local environments
- Unsupported or dangerous URL schemes
Args:
url: The URL to validate
Returns:
True if valid, False otherwise.
"""
try:
parsed_url = urlparse(url)
scheme = parsed_url.scheme.lower()
# Allow http/https URLs (includes Google Drive which uses https)
if scheme in ("http", "https"):
return True
# Allow S3 URIs for Skyvern uploads bucket
if scheme == "s3":
if url.startswith(f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{settings.ENV}/o_"):
return True
return False
# Allow file:// URLs only in local environment
if scheme == "file":
if settings.ENV != "local":
return False
# Validate the file path is within allowed directories
try:
file_path = parse_uri_to_path(url)
allowed_prefix = f"{REPO_ROOT_DIR}/downloads"
if not file_path.startswith(allowed_prefix):
return False
return True
except ValueError:
return False
# Reject unsupported schemes
return False
except Exception:
return False
async def download_file(url: str, max_size_mb: int | None = None) -> str:
try:
# Check if URL is a Google Drive link

View File

@@ -6,6 +6,7 @@ from fastapi import Depends, HTTPException, status
from skyvern.core.script_generations.real_skyvern_page_ai import RealSkyvernPageAi
from skyvern.core.script_generations.script_skyvern_page import ScriptSkyvernPage
from skyvern.forge import app
from skyvern.forge.sdk.api.files import validate_download_url
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.skyvern_context import SkyvernContext
from skyvern.forge.sdk.routes.routers import base_router
@@ -95,8 +96,8 @@ async def run_sdk_action(
task = await app.DATABASE.create_task(
organization_id=organization_id,
url=action_request.url,
navigation_goal=action.intention,
navigation_payload=action.data,
navigation_goal=action.get_navigation_goal(),
navigation_payload=action.get_navigation_payload(),
data_extraction_goal=None,
title=f"SDK Action Task: {action_request.action.type}",
workflow_run_id=workflow_run.workflow_run_id,
@@ -174,6 +175,16 @@ async def run_sdk_action(
data=action.data,
timeout=action.timeout,
)
elif action.type == "ai_upload_file":
if action.file_url and not validate_download_url(action.file_url):
raise HTTPException(status_code=400, detail="Unsupported file url")
result = await page_ai.ai_upload_file(
selector=action.selector,
files=action.file_url,
intention=action.intention,
data=action.data,
timeout=action.timeout,
)
elif action.type == "ai_act":
await page_ai.ai_act(
prompt=action.intention,

View File

@@ -12,6 +12,7 @@ class SdkActionType(str, Enum):
AI_CLICK = "ai_click"
AI_INPUT_TEXT = "ai_input_text"
AI_SELECT_OPTION = "ai_select_option"
AI_UPLOAD_FILE = "ai_upload_file"
AI_ACT = "ai_act"
EXTRACT = "extract"
@@ -22,6 +23,12 @@ class SdkActionBase(BaseModel):
type: str = Field(..., description="The type of action")
def get_navigation_goal(self) -> str | None:
return None
def get_navigation_payload(self) -> dict[str, Any] | None:
return None
# Specific action types
class ClickAction(SdkActionBase):
@@ -33,6 +40,12 @@ class ClickAction(SdkActionBase):
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
class InputTextAction(SdkActionBase):
"""Input text action parameters."""
@@ -46,6 +59,12 @@ class InputTextAction(SdkActionBase):
totp_url: str | None = Field(None, description="TOTP URL for input_text actions")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
class SelectOptionAction(SdkActionBase):
"""Select option action parameters."""
@@ -57,6 +76,35 @@ class SelectOptionAction(SdkActionBase):
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
class UploadFileAction(SdkActionBase):
"""Upload file action parameters."""
type: Literal["ai_upload_file"] = "ai_upload_file"
selector: str | None = Field(default="", description="CSS selector for the element")
file_url: str | None = Field(default="", description="File URL for upload")
intention: str = Field(default="", description="The intention or goal of the upload")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
if self.data and not isinstance(self.data, dict):
return None
data = self.data or {}
if "files" not in data:
data["files"] = self.file_url
return data
class ActAction(SdkActionBase):
"""AI act action parameters."""
@@ -65,6 +113,12 @@ class ActAction(SdkActionBase):
intention: str = Field(default="", description="Natural language prompt for the action")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
class ExtractAction(SdkActionBase):
"""Extract data action parameters."""
@@ -76,10 +130,16 @@ class ExtractAction(SdkActionBase):
intention: str | None = Field(None, description="The intention or goal of the extraction")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
# Discriminated union of all action types
SdkAction = Annotated[
Union[ClickAction, InputTextAction, SelectOptionAction, ActAction, ExtractAction],
Union[ClickAction, InputTextAction, SelectOptionAction, UploadFileAction, ActAction, ExtractAction],
Field(discriminator="type"),
]

View File

@@ -7,6 +7,7 @@ from skyvern.client.types.sdk_action import (
SdkAction_AiClick,
SdkAction_AiInputText,
SdkAction_AiSelectOption,
SdkAction_AiUploadFile,
SdkAction_Extract,
)
from skyvern.config import settings
@@ -113,12 +114,30 @@ class SdkSkyvernPageAi(SkyvernPageAi):
async def ai_upload_file(
self,
selector: str | None,
files: str,
files: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
public_url_only: bool = False,
) -> str:
raise NotImplementedError("Upload is not supported yet")
"""Upload a file using AI via API call."""
await self._browser.sdk.ensure_has_server()
response = await self._browser.client.run_sdk_action(
url=self._page.url,
action=SdkAction_AiUploadFile(
selector=selector,
file_url=files,
intention=intention,
data=data,
timeout=timeout,
),
browser_session_id=self._browser.browser_session_id,
browser_address=self._browser.browser_address,
workflow_run_id=self._browser.workflow_run_id,
)
self._browser.workflow_run_id = response.workflow_run_id
return response.result if response.result else files or ""
async def ai_extract(
self,