diff --git a/skyvern-ts/client/src/library/Skyvern.ts b/skyvern-ts/client/src/library/Skyvern.ts index bb313f98..0e1e5dcb 100644 --- a/skyvern-ts/client/src/library/Skyvern.ts +++ b/skyvern-ts/client/src/library/Skyvern.ts @@ -33,6 +33,26 @@ export interface DownloadFilesOptions extends SkyvernApi.DownloadFilesRequest { timeout?: number; } +/** + * Main entry point for the Skyvern SDK. + * + * This class provides methods to launch and connect to browsers (both local and cloud-hosted), + * and access the Skyvern API client for task and workflow management. It combines browser + * automation capabilities with AI-powered task execution. + * + * @example + * ```typescript + * // Remote mode: Connect to Skyvern Cloud (API key required) + * const skyvern = new Skyvern({ apiKey: "your-api-key" }); + * + * // Launch a cloud browser + * const browser = await skyvern.launchCloudBrowser(); + * const page = await browser.getWorkingPage(); + * + * // Execute AI-powered tasks + * await page.agent.runTask("Fill out the form and submit it"); + * ``` + */ export class Skyvern extends SkyvernClient { private readonly _apiKey: string; private readonly _environment: SkyvernEnvironment | string; @@ -156,6 +176,19 @@ export class Skyvern extends SkyvernClient { return response; } + /** + * Launch a new cloud-hosted browser session. + * + * This creates a new browser session in Skyvern's cloud infrastructure and connects to it. + * + * @param options - Optional configuration + * @param options.timeout - Timeout in minutes for the session. Timeout is applied after the session is started. + * Must be between 5 and 1440. Defaults to 60. + * @param options.proxyLocation - Geographic proxy location to route the browser traffic through. + * This is only available in Skyvern Cloud. + * + * @returns SkyvernBrowser instance connected to the new cloud session. + */ async launchCloudBrowser(options?: { timeout?: number; proxyLocation?: SkyvernApi.ProxyLocation; @@ -172,6 +205,13 @@ export class Skyvern extends SkyvernClient { return this._connectToCloudBrowserSession(browserSession); } + /** + * Connect to an existing cloud-hosted browser session by ID. + * + * @param browserSessionId - The ID of the cloud browser session to connect to. + * + * @returns SkyvernBrowser instance connected to the cloud session. + */ async connectToCloudBrowserSession(browserSessionId: string): Promise { this._ensureCloudEnvironment(); @@ -182,6 +222,21 @@ export class Skyvern extends SkyvernClient { return this._connectToCloudBrowserSession(browserSession); } + /** + * Get or create a cloud browser session. + * + * This method attempts to reuse the most recent available cloud browser session. + * If no session exists, it creates a new one. This is useful for cost efficiency + * and session persistence. + * + * @param options - Optional configuration + * @param options.timeout - Timeout in minutes for the session. Timeout is applied after the session is started. + * Must be between 5 and 1440. Defaults to 60. Only used when creating a new session. + * @param options.proxyLocation - Geographic proxy location to route the browser traffic through. + * This is only available in Skyvern Cloud. Only used when creating a new session. + * + * @returns SkyvernBrowser instance connected to an existing or new cloud session. + */ async useCloudBrowser(options?: { timeout?: number; proxyLocation?: ProxyLocation }): Promise { this._ensureCloudEnvironment(); @@ -204,6 +259,16 @@ export class Skyvern extends SkyvernClient { return this._connectToCloudBrowserSession(browserSession); } + /** + * Connect to an existing browser instance via Chrome DevTools Protocol (CDP). + * + * Use this to connect to a browser that's already running with CDP enabled, + * whether local or remote. + * + * @param cdpUrl - The CDP WebSocket URL (e.g., "http://localhost:9222"). + * + * @returns SkyvernBrowser instance connected to the existing browser. + */ async connectToBrowserOverCdp(cdpUrl: string): Promise { const browser = await chromium.connectOverCDP(cdpUrl); const browserContext = browser.contexts()[0] ?? (await browser.newContext()); @@ -213,6 +278,9 @@ export class Skyvern extends SkyvernClient { return skyvernBrowser; } + /** + * Close all browsers and release resources. + */ async close(): Promise { await Promise.all(Array.from(this._browsers).map((browser) => browser.close())); this._browsers.clear(); diff --git a/skyvern-ts/client/src/library/SkyvernBrowser.ts b/skyvern-ts/client/src/library/SkyvernBrowser.ts index 85c15df0..74114f7c 100644 --- a/skyvern-ts/client/src/library/SkyvernBrowser.ts +++ b/skyvern-ts/client/src/library/SkyvernBrowser.ts @@ -2,6 +2,26 @@ import type { Browser, BrowserContext, Page } from "playwright"; import type { Skyvern } from "./Skyvern.js"; import { SkyvernBrowserPageCore, type SkyvernBrowserPage } from "./SkyvernBrowserPage.js"; +/** + * A browser context wrapper that creates Skyvern-enabled pages. + * + * This class wraps a Playwright BrowserContext and provides methods to create + * SkyvernBrowserPage instances that combine traditional browser automation with + * AI-powered task execution capabilities. It manages browser session state and + * enables persistent browser sessions across multiple pages. + * + * @example + * ```typescript + * const skyvern = Skyvern.local(); + * const browser = await skyvern.launchCloudBrowser(); + * + * // Get or create the working page + * const page = await browser.getWorkingPage(); + * + * // Create a new page + * const newPage = await browser.newPage(); + * ``` + */ export class SkyvernBrowser { private readonly _skyvern: Skyvern; private readonly _browserContext: BrowserContext; @@ -43,12 +63,29 @@ export class SkyvernBrowser { return this._browserContext; } + /** + * Get the most recent page or create a new one if none exists. + * + * This method returns the last page in the browser context, or creates a new page + * if the context has no pages. This is useful for continuing work on an existing + * page without creating unnecessary new tabs. + * + * @returns SkyvernBrowserPage: The most recent page wrapped with Skyvern capabilities. + */ async getWorkingPage(): Promise { const pages = this._browserContext.pages(); const page = pages.length > 0 ? pages[pages.length - 1] : await this._browserContext.newPage(); return this._createSkyvernPage(page); } + /** + * Create a new page (tab) in the browser context. + * + * This method always creates a new page, similar to opening a new tab in a browser. + * The new page will have both Playwright's standard API and Skyvern's AI capabilities. + * + * @returns SkyvernBrowserPage: A new page wrapped with Skyvern capabilities. + */ async newPage(): Promise { const page = await this._browserContext.newPage(); return this._createSkyvernPage(page); @@ -58,6 +95,20 @@ export class SkyvernBrowser { return this._browserContext.pages().map((page) => SkyvernBrowserPageCore.create(this, page)); } + /** + * Close the browser and optionally close the browser session. + * + * This method closes the browser context. If the browser is associated with a + * cloud browser session (has a browserSessionId), it will also close the + * browser session via the API, marking it as completed. + * + * @example + * ```typescript + * const browser = await skyvern.launchCloudBrowser(); + * // ... use the browser ... + * await browser.close(); // Closes both browser and cloud session + * ``` + */ async close(): Promise { if (this._browser) { await this._browser.close(); diff --git a/skyvern-ts/client/src/library/SkyvernBrowserPage.ts b/skyvern-ts/client/src/library/SkyvernBrowserPage.ts index d870808b..c0c35cdf 100644 --- a/skyvern-ts/client/src/library/SkyvernBrowserPage.ts +++ b/skyvern-ts/client/src/library/SkyvernBrowserPage.ts @@ -3,6 +3,25 @@ import type { SkyvernBrowser } from "./SkyvernBrowser.js"; import { SkyvernBrowserPageAgent } from "./SkyvernBrowserPageAgent.js"; import { SkyvernBrowserPageAi } from "./SkyvernBrowserPageAi.js"; +/** + * A browser page wrapper that combines Playwright's page API with Skyvern's AI capabilities. + * + * This class provides a unified interface for both traditional browser automation (via Playwright) + * and AI-powered task execution (via Skyvern). It exposes standard page methods like click, fill, + * goto, etc., while also providing access to Skyvern's task and workflow execution through the + * `agent` attribute. + * + * @example + * ```typescript + * // Use standard Playwright methods + * await page.goto("https://example.com"); + * await page.fill("#username", "user@example.com"); + * await page.click("#login-button"); + * + * // Or use Skyvern's AI capabilities + * await page.agent.runTask("Fill out the contact form and submit it"); + * ``` + */ export class SkyvernBrowserPageCore { private readonly _browser: SkyvernBrowser; private readonly _page: Page; @@ -115,6 +134,17 @@ export class SkyvernBrowserPageCore { } } + /** + * Perform an action on the page using AI based on a natural language prompt. + * + * @param prompt - Natural language description of the action to perform. + * + * @example + * ```typescript + * // Simple action + * await page.act("Click the login button"); + * ``` + */ async act(prompt: string): Promise { return this._ai.aiAct(prompt); } diff --git a/skyvern-ts/client/src/library/SkyvernBrowserPageAgent.ts b/skyvern-ts/client/src/library/SkyvernBrowserPageAgent.ts index 5666e323..c0deac34 100644 --- a/skyvern-ts/client/src/library/SkyvernBrowserPageAgent.ts +++ b/skyvern-ts/client/src/library/SkyvernBrowserPageAgent.ts @@ -9,6 +9,13 @@ function getAppUrlForRun(runId: string): string { return `https://app.skyvern.com/runs/${runId}`; } +/** + * Provides methods to run Skyvern tasks and workflows in the context of a browser page. + * + * This class enables executing AI-powered browser automation tasks while sharing the + * context of an existing browser page. It supports running custom tasks, login workflows, + * and pre-defined workflows with automatic waiting for completion. + */ export class SkyvernBrowserPageAgent { private readonly _browser: SkyvernBrowser; private readonly _page: Page; @@ -18,6 +25,25 @@ export class SkyvernBrowserPageAgent { this._page = page; } + /** + * Run a task in the context of this page and wait for it to finish. + * + * @param prompt - Natural language description of the task to perform. + * @param options - Optional configuration + * @param options.engine - The execution engine to use. Defaults to skyvern_v2. + * @param options.model - LLM model configuration options. + * @param options.url - URL to navigate to. If not provided, uses the current page URL. + * @param options.webhookUrl - URL to receive webhook notifications about task progress. + * @param options.totpIdentifier - Identifier for TOTP (Time-based One-Time Password) authentication. + * @param options.totpUrl - URL to fetch TOTP codes from. + * @param options.title - Human-readable title for this task run. + * @param options.errorCodeMapping - Mapping of error codes to custom error messages. + * @param options.dataExtractionSchema - Schema defining what data to extract from the page. + * @param options.maxSteps - Maximum number of steps the agent can take. + * @param options.timeout - Maximum time in seconds to wait for task completion. + * + * @returns TaskRunResponse containing the task execution results. + */ async runTask( prompt: string, options?: { @@ -70,6 +96,26 @@ export class SkyvernBrowserPageAgent { return completedRun as Skyvern.TaskRunResponse; } + /** + * Run a login task in the context of this page and wait for it to finish. + * + * @param credentialType - Type of credential store to use (e.g., skyvern, bitwarden, onepassword). + * @param options - Optional configuration + * @param options.url - URL to navigate to for login. If not provided, uses the current page URL. + * @param options.credentialId - ID of the credential to use. + * @param options.bitwardenCollectionId - Bitwarden collection ID containing the credentials. + * @param options.bitwardenItemId - Bitwarden item ID for the credentials. + * @param options.onepasswordVaultId - 1Password vault ID containing the credentials. + * @param options.onepasswordItemId - 1Password item ID for the credentials. + * @param options.prompt - Additional instructions for the login process. + * @param options.webhookUrl - URL to receive webhook notifications about login progress. + * @param options.totpIdentifier - Identifier for TOTP authentication. + * @param options.totpUrl - URL to fetch TOTP codes from. + * @param options.extraHttpHeaders - Additional HTTP headers to include in requests. + * @param options.timeout - Maximum time in seconds to wait for login completion. + * + * @returns WorkflowRunResponse containing the login workflow execution results. + */ async login( credentialType: string, options?: { @@ -126,6 +172,23 @@ export class SkyvernBrowserPageAgent { return completedRun as Skyvern.WorkflowRunResponse; } + /** + * Run a file download task in the context of this page and wait for it to finish. + * + * @param prompt - Instructions for navigating to and downloading the file. + * @param options - Optional configuration + * @param options.url - URL to navigate to for file download. If not provided, uses the current page URL. + * @param options.downloadSuffix - Suffix or complete filename for the downloaded file. + * @param options.downloadTimeout - Timeout in seconds for the download operation. + * @param options.maxStepsPerRun - Maximum number of steps to execute. + * @param options.webhookUrl - URL to receive webhook notifications about download progress. + * @param options.totpIdentifier - Identifier for TOTP authentication. + * @param options.totpUrl - URL to fetch TOTP codes from. + * @param options.extraHttpHeaders - Additional HTTP headers to include in requests. + * @param options.timeout - Maximum time in seconds to wait for download completion. + * + * @returns WorkflowRunResponse containing the file download workflow execution results. + */ async downloadFiles( prompt: string, options?: { @@ -172,6 +235,21 @@ export class SkyvernBrowserPageAgent { return completedRun as Skyvern.WorkflowRunResponse; } + /** + * Run a workflow in the context of this page and wait for it to finish. + * + * @param workflowId - ID of the workflow to execute. + * @param options - Optional configuration + * @param options.parameters - Dictionary of parameters to pass to the workflow. + * @param options.template - Whether this is a workflow template. + * @param options.title - Human-readable title for this workflow run. + * @param options.webhookUrl - URL to receive webhook notifications about workflow progress. + * @param options.totpUrl - URL to fetch TOTP codes from. + * @param options.totpIdentifier - Identifier for TOTP authentication. + * @param options.timeout - Maximum time in seconds to wait for workflow completion. + * + * @returns WorkflowRunResponse containing the workflow execution results. + */ async runWorkflow( workflowId: string, options?: { diff --git a/skyvern-ts/client/src/library/SkyvernBrowserPageAi.ts b/skyvern-ts/client/src/library/SkyvernBrowserPageAi.ts index 7a4d7315..2484002c 100644 --- a/skyvern-ts/client/src/library/SkyvernBrowserPageAi.ts +++ b/skyvern-ts/client/src/library/SkyvernBrowserPageAi.ts @@ -12,6 +12,9 @@ export class SkyvernBrowserPageAi { this._page = page; } + /** + * Click an element using AI via API call. + */ async aiClick(options: { selector?: string; intention: string; @@ -41,6 +44,9 @@ export class SkyvernBrowserPageAi { return response.result ? String(response.result) : options.selector || null; } + /** + * Input text into an element using AI via API call. + */ async aiInputText(options: { selector?: string; value?: string; @@ -76,6 +82,9 @@ export class SkyvernBrowserPageAi { return response.result ? String(response.result) : options.value || ""; } + /** + * Select an option from a dropdown using AI via API call. + */ async aiSelectOption(options: { selector?: string; value?: string; @@ -107,6 +116,9 @@ export class SkyvernBrowserPageAi { return response.result ? String(response.result) : options.value || ""; } + /** + * Upload a file using AI via API call. + */ async aiUploadFile(options: { selector?: string; fileUrl?: string; @@ -138,6 +150,9 @@ export class SkyvernBrowserPageAi { return response.result ? String(response.result) : options.fileUrl || ""; } + /** + * Extract information from the page using AI via API call. + */ async aiExtract(options: { prompt: string; extractSchema?: Record | unknown[] | string; @@ -169,6 +184,9 @@ export class SkyvernBrowserPageAi { return (response.result as Record | unknown[] | string) || null; } + /** + * Validate the current page state using AI via API call. + */ async aiValidate(options: { prompt: string; model?: Record }): Promise { LOG.info("AI validate", { prompt: options.prompt, model: options.model, workflow_run_id: this._browser.workflowRunId }); @@ -191,6 +209,9 @@ export class SkyvernBrowserPageAi { return response.result != null ? Boolean(response.result) : false; } + /** + * Perform an action on the page using AI via API call. + */ async aiAct(prompt: string): Promise { LOG.info("AI act", { prompt, workflow_run_id: this._browser.workflowRunId }); @@ -210,6 +231,13 @@ export class SkyvernBrowserPageAi { } } + /** + * Locate an element on the page using AI and return its XPath selector via API call. + * + * @param prompt - Natural language description of the element to locate (e.g., 'find "download invoices" button') + * + * @returns XPath selector string (e.g., 'xpath=//button[@id="download"]') or null if not found + */ async aiLocateElement(prompt: string): Promise { LOG.info("AI locate element", { prompt, workflow_run_id: this._browser.workflowRunId }); @@ -235,6 +263,9 @@ export class SkyvernBrowserPageAi { return null; } + /** + * Send a prompt to the LLM and get a response based on the provided schema via API call. + */ async aiPrompt(options: { prompt: string; schema?: Record;