From d1d0c9414b789169a8f951658a5571ffb05d2909 Mon Sep 17 00:00:00 2001 From: Stanislav Novosad Date: Fri, 7 Nov 2025 15:21:40 -0700 Subject: [PATCH] SDK: file uploading (public url only) (#3867) --- fern/openapi/skyvern_openapi.json | 71 +++++++++++++++++++ skyvern/client/__init__.py | 9 +++ skyvern/client/types/__init__.py | 9 +++ .../types/run_sdk_action_request_action.py | 24 +++++++ skyvern/client/types/sdk_action.py | 26 ++++++- skyvern/client/types/upload_file_action.py | 47 ++++++++++++ .../client/types/upload_file_action_data.py | 5 ++ .../real_skyvern_page_ai.py | 7 +- .../core/script_generations/skyvern_page.py | 40 +++++++++-- .../script_generations/skyvern_page_ai.py | 3 +- skyvern/forge/sdk/api/files.py | 50 +++++++++++++ skyvern/forge/sdk/routes/sdk.py | 15 +++- skyvern/forge/sdk/schemas/sdk_actions.py | 62 +++++++++++++++- skyvern/library/skyvern_browser_page_ai.py | 23 +++++- 14 files changed, 379 insertions(+), 12 deletions(-) create mode 100644 skyvern/client/types/upload_file_action.py create mode 100644 skyvern/client/types/upload_file_action_data.py diff --git a/fern/openapi/skyvern_openapi.json b/fern/openapi/skyvern_openapi.json index 1eff9fa3..21993fe8 100644 --- a/fern/openapi/skyvern_openapi.json +++ b/fern/openapi/skyvern_openapi.json @@ -10408,6 +10408,9 @@ { "$ref": "#/components/schemas/SelectOptionAction" }, + { + "$ref": "#/components/schemas/UploadFileAction" + }, { "$ref": "#/components/schemas/ActAction" }, @@ -10424,6 +10427,7 @@ "ai_click": "#/components/schemas/ClickAction", "ai_input_text": "#/components/schemas/InputTextAction", "ai_select_option": "#/components/schemas/SelectOptionAction", + "ai_upload_file": "#/components/schemas/UploadFileAction", "extract": "#/components/schemas/ExtractAction" } } @@ -10691,6 +10695,73 @@ "title": "SelectOptionAction", "description": "Select option action parameters." }, + "UploadFileAction": { + "properties": { + "type": { + "type": "string", + "const": "ai_upload_file", + "title": "Type", + "default": "ai_upload_file" + }, + "selector": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Selector", + "description": "CSS selector for the element", + "default": "" + }, + "file_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Url", + "description": "File URL for upload", + "default": "" + }, + "intention": { + "type": "string", + "title": "Intention", + "description": "The intention or goal of the upload", + "default": "" + }, + "data": { + "anyOf": [ + { + "type": "string" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Data", + "description": "Additional context data" + }, + "timeout": { + "type": "number", + "title": "Timeout", + "description": "Timeout in milliseconds", + "default": 10000 + } + }, + "type": "object", + "title": "UploadFileAction", + "description": "Upload file action parameters." + }, "SendEmailBlock": { "properties": { "label": { diff --git a/skyvern/client/__init__.py b/skyvern/client/__init__.py index 66e38fbc..fb393f7b 100644 --- a/skyvern/client/__init__.py +++ b/skyvern/client/__init__.py @@ -273,6 +273,7 @@ if typing.TYPE_CHECKING: RunSdkActionRequestAction_AiClick, RunSdkActionRequestAction_AiInputText, RunSdkActionRequestAction_AiSelectOption, + RunSdkActionRequestAction_AiUploadFile, RunSdkActionRequestAction_Extract, RunSdkActionResponse, RunStatus, @@ -327,6 +328,8 @@ if typing.TYPE_CHECKING: ThoughtType, TotpCode, TotpType, + UploadFileAction, + UploadFileActionData, UploadToS3Block, UploadToS3BlockYaml, UrlBlock, @@ -740,6 +743,7 @@ _dynamic_imports: typing.Dict[str, str] = { "RunSdkActionRequestAction_AiClick": ".types", "RunSdkActionRequestAction_AiInputText": ".types", "RunSdkActionRequestAction_AiSelectOption": ".types", + "RunSdkActionRequestAction_AiUploadFile": ".types", "RunSdkActionRequestAction_Extract": ".types", "RunSdkActionResponse": ".types", "RunStatus": ".types", @@ -797,6 +801,8 @@ _dynamic_imports: typing.Dict[str, str] = { "TotpCode": ".types", "TotpType": ".types", "UnprocessableEntityError": ".errors", + "UploadFileAction": ".types", + "UploadFileActionData": ".types", "UploadToS3Block": ".types", "UploadToS3BlockYaml": ".types", "UrlBlock": ".types", @@ -1231,6 +1237,7 @@ __all__ = [ "RunSdkActionRequestAction_AiClick", "RunSdkActionRequestAction_AiInputText", "RunSdkActionRequestAction_AiSelectOption", + "RunSdkActionRequestAction_AiUploadFile", "RunSdkActionRequestAction_Extract", "RunSdkActionResponse", "RunStatus", @@ -1288,6 +1295,8 @@ __all__ = [ "TotpCode", "TotpType", "UnprocessableEntityError", + "UploadFileAction", + "UploadFileActionData", "UploadToS3Block", "UploadToS3BlockYaml", "UrlBlock", diff --git a/skyvern/client/types/__init__.py b/skyvern/client/types/__init__.py index 97f7b0ec..2ddc9898 100644 --- a/skyvern/client/types/__init__.py +++ b/skyvern/client/types/__init__.py @@ -299,6 +299,7 @@ if typing.TYPE_CHECKING: RunSdkActionRequestAction_AiClick, RunSdkActionRequestAction_AiInputText, RunSdkActionRequestAction_AiSelectOption, + RunSdkActionRequestAction_AiUploadFile, RunSdkActionRequestAction_Extract, ) from .run_sdk_action_response import RunSdkActionResponse @@ -358,6 +359,8 @@ if typing.TYPE_CHECKING: from .thought_type import ThoughtType from .totp_code import TotpCode from .totp_type import TotpType + from .upload_file_action import UploadFileAction + from .upload_file_action_data import UploadFileActionData from .upload_to_s3block import UploadToS3Block from .upload_to_s3block_yaml import UploadToS3BlockYaml from .url_block import UrlBlock @@ -775,6 +778,7 @@ _dynamic_imports: typing.Dict[str, str] = { "RunSdkActionRequestAction_AiClick": ".run_sdk_action_request_action", "RunSdkActionRequestAction_AiInputText": ".run_sdk_action_request_action", "RunSdkActionRequestAction_AiSelectOption": ".run_sdk_action_request_action", + "RunSdkActionRequestAction_AiUploadFile": ".run_sdk_action_request_action", "RunSdkActionRequestAction_Extract": ".run_sdk_action_request_action", "RunSdkActionResponse": ".run_sdk_action_response", "RunStatus": ".run_status", @@ -829,6 +833,8 @@ _dynamic_imports: typing.Dict[str, str] = { "ThoughtType": ".thought_type", "TotpCode": ".totp_code", "TotpType": ".totp_type", + "UploadFileAction": ".upload_file_action", + "UploadFileActionData": ".upload_file_action_data", "UploadToS3Block": ".upload_to_s3block", "UploadToS3BlockYaml": ".upload_to_s3block_yaml", "UrlBlock": ".url_block", @@ -1256,6 +1262,7 @@ __all__ = [ "RunSdkActionRequestAction_AiClick", "RunSdkActionRequestAction_AiInputText", "RunSdkActionRequestAction_AiSelectOption", + "RunSdkActionRequestAction_AiUploadFile", "RunSdkActionRequestAction_Extract", "RunSdkActionResponse", "RunStatus", @@ -1310,6 +1317,8 @@ __all__ = [ "ThoughtType", "TotpCode", "TotpType", + "UploadFileAction", + "UploadFileActionData", "UploadToS3Block", "UploadToS3BlockYaml", "UrlBlock", diff --git a/skyvern/client/types/run_sdk_action_request_action.py b/skyvern/client/types/run_sdk_action_request_action.py index 4b4df74e..b9834c11 100644 --- a/skyvern/client/types/run_sdk_action_request_action.py +++ b/skyvern/client/types/run_sdk_action_request_action.py @@ -12,6 +12,7 @@ from .extract_action_data import ExtractActionData from .extract_action_extract_schema import ExtractActionExtractSchema from .input_text_action_data import InputTextActionData from .select_option_action_data import SelectOptionActionData +from .upload_file_action_data import UploadFileActionData class RunSdkActionRequestAction_AiAct(UniversalBaseModel): @@ -100,6 +101,28 @@ class RunSdkActionRequestAction_AiSelectOption(UniversalBaseModel): extra = pydantic.Extra.allow +class RunSdkActionRequestAction_AiUploadFile(UniversalBaseModel): + """ + The action to execute with its specific parameters + """ + + type: typing.Literal["ai_upload_file"] = "ai_upload_file" + selector: typing.Optional[str] = None + file_url: typing.Optional[str] = None + intention: typing.Optional[str] = None + data: typing.Optional[UploadFileActionData] = None + timeout: typing.Optional[float] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + class RunSdkActionRequestAction_Extract(UniversalBaseModel): """ The action to execute with its specific parameters @@ -127,5 +150,6 @@ RunSdkActionRequestAction = typing.Union[ RunSdkActionRequestAction_AiClick, RunSdkActionRequestAction_AiInputText, RunSdkActionRequestAction_AiSelectOption, + RunSdkActionRequestAction_AiUploadFile, RunSdkActionRequestAction_Extract, ] diff --git a/skyvern/client/types/sdk_action.py b/skyvern/client/types/sdk_action.py index 41a15e2b..fff9c3cc 100644 --- a/skyvern/client/types/sdk_action.py +++ b/skyvern/client/types/sdk_action.py @@ -11,6 +11,7 @@ from .extract_action_data import ExtractActionData from .extract_action_extract_schema import ExtractActionExtractSchema from .input_text_action_data import InputTextActionData from .select_option_action_data import SelectOptionActionData +from .upload_file_action_data import UploadFileActionData class SdkAction_AiClick(UniversalBaseModel): @@ -68,6 +69,24 @@ class SdkAction_AiSelectOption(UniversalBaseModel): extra = pydantic.Extra.allow +class SdkAction_AiUploadFile(UniversalBaseModel): + type: typing.Literal["ai_upload_file"] = "ai_upload_file" + selector: typing.Optional[str] = None + file_url: typing.Optional[str] = None + intention: typing.Optional[str] = None + data: typing.Optional[UploadFileActionData] = None + timeout: typing.Optional[float] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + class SdkAction_AiAct(UniversalBaseModel): type: typing.Literal["ai_act"] = "ai_act" intention: typing.Optional[str] = None @@ -101,5 +120,10 @@ class SdkAction_Extract(UniversalBaseModel): SdkAction = typing.Union[ - SdkAction_AiClick, SdkAction_AiInputText, SdkAction_AiSelectOption, SdkAction_AiAct, SdkAction_Extract + SdkAction_AiClick, + SdkAction_AiInputText, + SdkAction_AiSelectOption, + SdkAction_AiUploadFile, + SdkAction_AiAct, + SdkAction_Extract, ] diff --git a/skyvern/client/types/upload_file_action.py b/skyvern/client/types/upload_file_action.py new file mode 100644 index 00000000..ac64f7f2 --- /dev/null +++ b/skyvern/client/types/upload_file_action.py @@ -0,0 +1,47 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from .upload_file_action_data import UploadFileActionData + + +class UploadFileAction(UniversalBaseModel): + """ + Upload file action parameters. + """ + + selector: typing.Optional[str] = pydantic.Field(default=None) + """ + CSS selector for the element + """ + + file_url: typing.Optional[str] = pydantic.Field(default=None) + """ + File URL for upload + """ + + intention: typing.Optional[str] = pydantic.Field(default=None) + """ + The intention or goal of the upload + """ + + data: typing.Optional[UploadFileActionData] = pydantic.Field(default=None) + """ + Additional context data + """ + + timeout: typing.Optional[float] = pydantic.Field(default=None) + """ + Timeout in milliseconds + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/skyvern/client/types/upload_file_action_data.py b/skyvern/client/types/upload_file_action_data.py new file mode 100644 index 00000000..939adc16 --- /dev/null +++ b/skyvern/client/types/upload_file_action_data.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +UploadFileActionData = typing.Union[str, typing.Dict[str, typing.Optional[typing.Any]]] diff --git a/skyvern/core/script_generations/real_skyvern_page_ai.py b/skyvern/core/script_generations/real_skyvern_page_ai.py index 3698c5a4..1cebd69d 100644 --- a/skyvern/core/script_generations/real_skyvern_page_ai.py +++ b/skyvern/core/script_generations/real_skyvern_page_ai.py @@ -13,6 +13,7 @@ from skyvern.constants import SPECIAL_FIELD_VERIFICATION_CODE from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi from skyvern.forge import app from skyvern.forge.prompts import prompt_engine +from skyvern.forge.sdk.api.files import validate_download_url from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.schemas.totp_codes import OTPType from skyvern.services.otp_service import poll_otp_value @@ -322,10 +323,11 @@ class RealSkyvernPageAi(SkyvernPageAi): async def ai_upload_file( self, selector: str | None, - files: str, + files: str | None, intention: str, data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + public_url_only: bool = False, ) -> str: """Upload a file using AI to process the file URL.""" @@ -410,6 +412,9 @@ class RealSkyvernPageAi(SkyvernPageAi): except Exception: LOG.exception(f"Failed to adapt value for upload file action on selector={selector}, file={files}") + if public_url_only and not validate_download_url(files): + raise Exception("Only public URLs are allowed") + if action and organization_id and task and step: result = await handle_upload_file_action(action, self.page, self.scraped_page, task, step) if result and result[-1].success is False: diff --git a/skyvern/core/script_generations/skyvern_page.py b/skyvern/core/script_generations/skyvern_page.py index 0d508f20..4c480246 100644 --- a/skyvern/core/script_generations/skyvern_page.py +++ b/skyvern/core/script_generations/skyvern_page.py @@ -411,13 +411,38 @@ class SkyvernPage: await handler_utils.input_sequentially(locator, value, timeout=timeout) return value + @overload + async def upload_file( + self, + selector: str, + files: str, + *, + prompt: str | None = None, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + ) -> str: ... + + @overload + async def upload_file( + self, + *, + prompt: str, + files: str | None = None, + selector: str | None = None, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + ) -> str: ... + @action_wrap(ActionType.UPLOAD_FILE) async def upload_file( self, - selector: str | None, - files: str, - ai: str | None = "fallback", + selector: str | None = None, + files: str | None = None, + *, prompt: str | None = None, + ai: str | None = "fallback", data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, intention: str | None = None, # backward compatibility @@ -433,8 +458,11 @@ class SkyvernPage: if context and context.ai_mode_override: ai = context.ai_mode_override if ai == "fallback": + if not files and not prompt: + raise ValueError("Missing input: files should be provided explicitly or in prompt") + error_to_raise = None - if selector: + if selector and files: try: file_path = await download_file(files) locator = self.page.locator(selector) @@ -453,6 +481,8 @@ class SkyvernPage: ) if error_to_raise: raise error_to_raise + elif not files: + raise ValueError("Parameter 'files' is required but was not provided") else: return files elif ai == "proactive" and prompt: @@ -466,6 +496,8 @@ class SkyvernPage: if not selector: raise ValueError("Selector is required but was not provided") + if not files: + raise ValueError("Parameter 'files' is required but was not provided") file_path = await download_file(files) locator = self.page.locator(selector) diff --git a/skyvern/core/script_generations/skyvern_page_ai.py b/skyvern/core/script_generations/skyvern_page_ai.py index 15e75437..08a35f14 100644 --- a/skyvern/core/script_generations/skyvern_page_ai.py +++ b/skyvern/core/script_generations/skyvern_page_ai.py @@ -34,10 +34,11 @@ class SkyvernPageAi(Protocol): async def ai_upload_file( self, selector: str | None, - files: str, + files: str | None, intention: str, data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + public_url_only: bool = False, ) -> str: """Upload a file using AI to process the file URL.""" ... diff --git a/skyvern/forge/sdk/api/files.py b/skyvern/forge/sdk/api/files.py index 5692c871..9a5e2108 100644 --- a/skyvern/forge/sdk/api/files.py +++ b/skyvern/forge/sdk/api/files.py @@ -69,6 +69,56 @@ def is_valid_mime_type(file_path: str) -> bool: return mime_type is not None +def validate_download_url(url: str) -> bool: + """Validate if a URL is supported for downloading. + + Security validation for URL downloads to prevent: + - File system access outside allowed directories + - Access to local file system in non-local environments + - Unsupported or dangerous URL schemes + + Args: + url: The URL to validate + + Returns: + True if valid, False otherwise. + """ + try: + parsed_url = urlparse(url) + scheme = parsed_url.scheme.lower() + + # Allow http/https URLs (includes Google Drive which uses https) + if scheme in ("http", "https"): + return True + + # Allow S3 URIs for Skyvern uploads bucket + if scheme == "s3": + if url.startswith(f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{settings.ENV}/o_"): + return True + return False + + # Allow file:// URLs only in local environment + if scheme == "file": + if settings.ENV != "local": + return False + + # Validate the file path is within allowed directories + try: + file_path = parse_uri_to_path(url) + allowed_prefix = f"{REPO_ROOT_DIR}/downloads" + if not file_path.startswith(allowed_prefix): + return False + return True + except ValueError: + return False + + # Reject unsupported schemes + return False + + except Exception: + return False + + async def download_file(url: str, max_size_mb: int | None = None) -> str: try: # Check if URL is a Google Drive link diff --git a/skyvern/forge/sdk/routes/sdk.py b/skyvern/forge/sdk/routes/sdk.py index 27b0f03a..c9632e4a 100644 --- a/skyvern/forge/sdk/routes/sdk.py +++ b/skyvern/forge/sdk/routes/sdk.py @@ -6,6 +6,7 @@ from fastapi import Depends, HTTPException, status from skyvern.core.script_generations.real_skyvern_page_ai import RealSkyvernPageAi from skyvern.core.script_generations.script_skyvern_page import ScriptSkyvernPage from skyvern.forge import app +from skyvern.forge.sdk.api.files import validate_download_url from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core.skyvern_context import SkyvernContext from skyvern.forge.sdk.routes.routers import base_router @@ -95,8 +96,8 @@ async def run_sdk_action( task = await app.DATABASE.create_task( organization_id=organization_id, url=action_request.url, - navigation_goal=action.intention, - navigation_payload=action.data, + navigation_goal=action.get_navigation_goal(), + navigation_payload=action.get_navigation_payload(), data_extraction_goal=None, title=f"SDK Action Task: {action_request.action.type}", workflow_run_id=workflow_run.workflow_run_id, @@ -174,6 +175,16 @@ async def run_sdk_action( data=action.data, timeout=action.timeout, ) + elif action.type == "ai_upload_file": + if action.file_url and not validate_download_url(action.file_url): + raise HTTPException(status_code=400, detail="Unsupported file url") + result = await page_ai.ai_upload_file( + selector=action.selector, + files=action.file_url, + intention=action.intention, + data=action.data, + timeout=action.timeout, + ) elif action.type == "ai_act": await page_ai.ai_act( prompt=action.intention, diff --git a/skyvern/forge/sdk/schemas/sdk_actions.py b/skyvern/forge/sdk/schemas/sdk_actions.py index 16400544..e83b6cc6 100644 --- a/skyvern/forge/sdk/schemas/sdk_actions.py +++ b/skyvern/forge/sdk/schemas/sdk_actions.py @@ -12,6 +12,7 @@ class SdkActionType(str, Enum): AI_CLICK = "ai_click" AI_INPUT_TEXT = "ai_input_text" AI_SELECT_OPTION = "ai_select_option" + AI_UPLOAD_FILE = "ai_upload_file" AI_ACT = "ai_act" EXTRACT = "extract" @@ -22,6 +23,12 @@ class SdkActionBase(BaseModel): type: str = Field(..., description="The type of action") + def get_navigation_goal(self) -> str | None: + return None + + def get_navigation_payload(self) -> dict[str, Any] | None: + return None + # Specific action types class ClickAction(SdkActionBase): @@ -33,6 +40,12 @@ class ClickAction(SdkActionBase): data: str | dict[str, Any] | None = Field(None, description="Additional context data") timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds") + def get_navigation_goal(self) -> str | None: + return self.intention + + def get_navigation_payload(self) -> dict[str, Any] | None: + return self.data if isinstance(self.data, dict) else None + class InputTextAction(SdkActionBase): """Input text action parameters.""" @@ -46,6 +59,12 @@ class InputTextAction(SdkActionBase): totp_url: str | None = Field(None, description="TOTP URL for input_text actions") timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds") + def get_navigation_goal(self) -> str | None: + return self.intention + + def get_navigation_payload(self) -> dict[str, Any] | None: + return self.data if isinstance(self.data, dict) else None + class SelectOptionAction(SdkActionBase): """Select option action parameters.""" @@ -57,6 +76,35 @@ class SelectOptionAction(SdkActionBase): data: str | dict[str, Any] | None = Field(None, description="Additional context data") timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds") + def get_navigation_goal(self) -> str | None: + return self.intention + + def get_navigation_payload(self) -> dict[str, Any] | None: + return self.data if isinstance(self.data, dict) else None + + +class UploadFileAction(SdkActionBase): + """Upload file action parameters.""" + + type: Literal["ai_upload_file"] = "ai_upload_file" + selector: str | None = Field(default="", description="CSS selector for the element") + file_url: str | None = Field(default="", description="File URL for upload") + intention: str = Field(default="", description="The intention or goal of the upload") + data: str | dict[str, Any] | None = Field(None, description="Additional context data") + timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds") + + def get_navigation_goal(self) -> str | None: + return self.intention + + def get_navigation_payload(self) -> dict[str, Any] | None: + if self.data and not isinstance(self.data, dict): + return None + + data = self.data or {} + if "files" not in data: + data["files"] = self.file_url + return data + class ActAction(SdkActionBase): """AI act action parameters.""" @@ -65,6 +113,12 @@ class ActAction(SdkActionBase): intention: str = Field(default="", description="Natural language prompt for the action") data: str | dict[str, Any] | None = Field(None, description="Additional context data") + def get_navigation_goal(self) -> str | None: + return self.intention + + def get_navigation_payload(self) -> dict[str, Any] | None: + return self.data if isinstance(self.data, dict) else None + class ExtractAction(SdkActionBase): """Extract data action parameters.""" @@ -76,10 +130,16 @@ class ExtractAction(SdkActionBase): intention: str | None = Field(None, description="The intention or goal of the extraction") data: str | dict[str, Any] | None = Field(None, description="Additional context data") + def get_navigation_goal(self) -> str | None: + return self.intention + + def get_navigation_payload(self) -> dict[str, Any] | None: + return self.data if isinstance(self.data, dict) else None + # Discriminated union of all action types SdkAction = Annotated[ - Union[ClickAction, InputTextAction, SelectOptionAction, ActAction, ExtractAction], + Union[ClickAction, InputTextAction, SelectOptionAction, UploadFileAction, ActAction, ExtractAction], Field(discriminator="type"), ] diff --git a/skyvern/library/skyvern_browser_page_ai.py b/skyvern/library/skyvern_browser_page_ai.py index 6f165b13..10f359e4 100644 --- a/skyvern/library/skyvern_browser_page_ai.py +++ b/skyvern/library/skyvern_browser_page_ai.py @@ -7,6 +7,7 @@ from skyvern.client.types.sdk_action import ( SdkAction_AiClick, SdkAction_AiInputText, SdkAction_AiSelectOption, + SdkAction_AiUploadFile, SdkAction_Extract, ) from skyvern.config import settings @@ -113,12 +114,30 @@ class SdkSkyvernPageAi(SkyvernPageAi): async def ai_upload_file( self, selector: str | None, - files: str, + files: str | None, intention: str, data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + public_url_only: bool = False, ) -> str: - raise NotImplementedError("Upload is not supported yet") + """Upload a file using AI via API call.""" + + await self._browser.sdk.ensure_has_server() + response = await self._browser.client.run_sdk_action( + url=self._page.url, + action=SdkAction_AiUploadFile( + selector=selector, + file_url=files, + intention=intention, + data=data, + timeout=timeout, + ), + browser_session_id=self._browser.browser_session_id, + browser_address=self._browser.browser_address, + workflow_run_id=self._browser.workflow_run_id, + ) + self._browser.workflow_run_id = response.workflow_run_id + return response.result if response.result else files or "" async def ai_extract( self,