Type Script SDK: docs (#4264)

This commit is contained in:
Stanislav Novosad
2025-12-10 18:27:40 -07:00
committed by GitHub
parent a9058d1292
commit c0fd36e84f
5 changed files with 258 additions and 0 deletions

View File

@@ -33,6 +33,26 @@ export interface DownloadFilesOptions extends SkyvernApi.DownloadFilesRequest {
timeout?: number; timeout?: number;
} }
/**
* Main entry point for the Skyvern SDK.
*
* This class provides methods to launch and connect to browsers (both local and cloud-hosted),
* and access the Skyvern API client for task and workflow management. It combines browser
* automation capabilities with AI-powered task execution.
*
* @example
* ```typescript
* // Remote mode: Connect to Skyvern Cloud (API key required)
* const skyvern = new Skyvern({ apiKey: "your-api-key" });
*
* // Launch a cloud browser
* const browser = await skyvern.launchCloudBrowser();
* const page = await browser.getWorkingPage();
*
* // Execute AI-powered tasks
* await page.agent.runTask("Fill out the form and submit it");
* ```
*/
export class Skyvern extends SkyvernClient { export class Skyvern extends SkyvernClient {
private readonly _apiKey: string; private readonly _apiKey: string;
private readonly _environment: SkyvernEnvironment | string; private readonly _environment: SkyvernEnvironment | string;
@@ -156,6 +176,19 @@ export class Skyvern extends SkyvernClient {
return response; return response;
} }
/**
* Launch a new cloud-hosted browser session.
*
* This creates a new browser session in Skyvern's cloud infrastructure and connects to it.
*
* @param options - Optional configuration
* @param options.timeout - Timeout in minutes for the session. Timeout is applied after the session is started.
* Must be between 5 and 1440. Defaults to 60.
* @param options.proxyLocation - Geographic proxy location to route the browser traffic through.
* This is only available in Skyvern Cloud.
*
* @returns SkyvernBrowser instance connected to the new cloud session.
*/
async launchCloudBrowser(options?: { async launchCloudBrowser(options?: {
timeout?: number; timeout?: number;
proxyLocation?: SkyvernApi.ProxyLocation; proxyLocation?: SkyvernApi.ProxyLocation;
@@ -172,6 +205,13 @@ export class Skyvern extends SkyvernClient {
return this._connectToCloudBrowserSession(browserSession); return this._connectToCloudBrowserSession(browserSession);
} }
/**
* Connect to an existing cloud-hosted browser session by ID.
*
* @param browserSessionId - The ID of the cloud browser session to connect to.
*
* @returns SkyvernBrowser instance connected to the cloud session.
*/
async connectToCloudBrowserSession(browserSessionId: string): Promise<SkyvernBrowser> { async connectToCloudBrowserSession(browserSessionId: string): Promise<SkyvernBrowser> {
this._ensureCloudEnvironment(); this._ensureCloudEnvironment();
@@ -182,6 +222,21 @@ export class Skyvern extends SkyvernClient {
return this._connectToCloudBrowserSession(browserSession); return this._connectToCloudBrowserSession(browserSession);
} }
/**
* Get or create a cloud browser session.
*
* This method attempts to reuse the most recent available cloud browser session.
* If no session exists, it creates a new one. This is useful for cost efficiency
* and session persistence.
*
* @param options - Optional configuration
* @param options.timeout - Timeout in minutes for the session. Timeout is applied after the session is started.
* Must be between 5 and 1440. Defaults to 60. Only used when creating a new session.
* @param options.proxyLocation - Geographic proxy location to route the browser traffic through.
* This is only available in Skyvern Cloud. Only used when creating a new session.
*
* @returns SkyvernBrowser instance connected to an existing or new cloud session.
*/
async useCloudBrowser(options?: { timeout?: number; proxyLocation?: ProxyLocation }): Promise<SkyvernBrowser> { async useCloudBrowser(options?: { timeout?: number; proxyLocation?: ProxyLocation }): Promise<SkyvernBrowser> {
this._ensureCloudEnvironment(); this._ensureCloudEnvironment();
@@ -204,6 +259,16 @@ export class Skyvern extends SkyvernClient {
return this._connectToCloudBrowserSession(browserSession); return this._connectToCloudBrowserSession(browserSession);
} }
/**
* Connect to an existing browser instance via Chrome DevTools Protocol (CDP).
*
* Use this to connect to a browser that's already running with CDP enabled,
* whether local or remote.
*
* @param cdpUrl - The CDP WebSocket URL (e.g., "http://localhost:9222").
*
* @returns SkyvernBrowser instance connected to the existing browser.
*/
async connectToBrowserOverCdp(cdpUrl: string): Promise<SkyvernBrowser> { async connectToBrowserOverCdp(cdpUrl: string): Promise<SkyvernBrowser> {
const browser = await chromium.connectOverCDP(cdpUrl); const browser = await chromium.connectOverCDP(cdpUrl);
const browserContext = browser.contexts()[0] ?? (await browser.newContext()); const browserContext = browser.contexts()[0] ?? (await browser.newContext());
@@ -213,6 +278,9 @@ export class Skyvern extends SkyvernClient {
return skyvernBrowser; return skyvernBrowser;
} }
/**
* Close all browsers and release resources.
*/
async close(): Promise<void> { async close(): Promise<void> {
await Promise.all(Array.from(this._browsers).map((browser) => browser.close())); await Promise.all(Array.from(this._browsers).map((browser) => browser.close()));
this._browsers.clear(); this._browsers.clear();

View File

@@ -2,6 +2,26 @@ import type { Browser, BrowserContext, Page } from "playwright";
import type { Skyvern } from "./Skyvern.js"; import type { Skyvern } from "./Skyvern.js";
import { SkyvernBrowserPageCore, type SkyvernBrowserPage } from "./SkyvernBrowserPage.js"; import { SkyvernBrowserPageCore, type SkyvernBrowserPage } from "./SkyvernBrowserPage.js";
/**
* A browser context wrapper that creates Skyvern-enabled pages.
*
* This class wraps a Playwright BrowserContext and provides methods to create
* SkyvernBrowserPage instances that combine traditional browser automation with
* AI-powered task execution capabilities. It manages browser session state and
* enables persistent browser sessions across multiple pages.
*
* @example
* ```typescript
* const skyvern = Skyvern.local();
* const browser = await skyvern.launchCloudBrowser();
*
* // Get or create the working page
* const page = await browser.getWorkingPage();
*
* // Create a new page
* const newPage = await browser.newPage();
* ```
*/
export class SkyvernBrowser { export class SkyvernBrowser {
private readonly _skyvern: Skyvern; private readonly _skyvern: Skyvern;
private readonly _browserContext: BrowserContext; private readonly _browserContext: BrowserContext;
@@ -43,12 +63,29 @@ export class SkyvernBrowser {
return this._browserContext; return this._browserContext;
} }
/**
* Get the most recent page or create a new one if none exists.
*
* This method returns the last page in the browser context, or creates a new page
* if the context has no pages. This is useful for continuing work on an existing
* page without creating unnecessary new tabs.
*
* @returns SkyvernBrowserPage: The most recent page wrapped with Skyvern capabilities.
*/
async getWorkingPage(): Promise<SkyvernBrowserPage> { async getWorkingPage(): Promise<SkyvernBrowserPage> {
const pages = this._browserContext.pages(); const pages = this._browserContext.pages();
const page = pages.length > 0 ? pages[pages.length - 1] : await this._browserContext.newPage(); const page = pages.length > 0 ? pages[pages.length - 1] : await this._browserContext.newPage();
return this._createSkyvernPage(page); return this._createSkyvernPage(page);
} }
/**
* Create a new page (tab) in the browser context.
*
* This method always creates a new page, similar to opening a new tab in a browser.
* The new page will have both Playwright's standard API and Skyvern's AI capabilities.
*
* @returns SkyvernBrowserPage: A new page wrapped with Skyvern capabilities.
*/
async newPage(): Promise<SkyvernBrowserPage> { async newPage(): Promise<SkyvernBrowserPage> {
const page = await this._browserContext.newPage(); const page = await this._browserContext.newPage();
return this._createSkyvernPage(page); return this._createSkyvernPage(page);
@@ -58,6 +95,20 @@ export class SkyvernBrowser {
return this._browserContext.pages().map((page) => SkyvernBrowserPageCore.create(this, page)); return this._browserContext.pages().map((page) => SkyvernBrowserPageCore.create(this, page));
} }
/**
* Close the browser and optionally close the browser session.
*
* This method closes the browser context. If the browser is associated with a
* cloud browser session (has a browserSessionId), it will also close the
* browser session via the API, marking it as completed.
*
* @example
* ```typescript
* const browser = await skyvern.launchCloudBrowser();
* // ... use the browser ...
* await browser.close(); // Closes both browser and cloud session
* ```
*/
async close(): Promise<void> { async close(): Promise<void> {
if (this._browser) { if (this._browser) {
await this._browser.close(); await this._browser.close();

View File

@@ -3,6 +3,25 @@ import type { SkyvernBrowser } from "./SkyvernBrowser.js";
import { SkyvernBrowserPageAgent } from "./SkyvernBrowserPageAgent.js"; import { SkyvernBrowserPageAgent } from "./SkyvernBrowserPageAgent.js";
import { SkyvernBrowserPageAi } from "./SkyvernBrowserPageAi.js"; import { SkyvernBrowserPageAi } from "./SkyvernBrowserPageAi.js";
/**
* A browser page wrapper that combines Playwright's page API with Skyvern's AI capabilities.
*
* This class provides a unified interface for both traditional browser automation (via Playwright)
* and AI-powered task execution (via Skyvern). It exposes standard page methods like click, fill,
* goto, etc., while also providing access to Skyvern's task and workflow execution through the
* `agent` attribute.
*
* @example
* ```typescript
* // Use standard Playwright methods
* await page.goto("https://example.com");
* await page.fill("#username", "user@example.com");
* await page.click("#login-button");
*
* // Or use Skyvern's AI capabilities
* await page.agent.runTask("Fill out the contact form and submit it");
* ```
*/
export class SkyvernBrowserPageCore { export class SkyvernBrowserPageCore {
private readonly _browser: SkyvernBrowser; private readonly _browser: SkyvernBrowser;
private readonly _page: Page; private readonly _page: Page;
@@ -115,6 +134,17 @@ export class SkyvernBrowserPageCore {
} }
} }
/**
* Perform an action on the page using AI based on a natural language prompt.
*
* @param prompt - Natural language description of the action to perform.
*
* @example
* ```typescript
* // Simple action
* await page.act("Click the login button");
* ```
*/
async act(prompt: string): Promise<void> { async act(prompt: string): Promise<void> {
return this._ai.aiAct(prompt); return this._ai.aiAct(prompt);
} }

View File

@@ -9,6 +9,13 @@ function getAppUrlForRun(runId: string): string {
return `https://app.skyvern.com/runs/${runId}`; return `https://app.skyvern.com/runs/${runId}`;
} }
/**
* Provides methods to run Skyvern tasks and workflows in the context of a browser page.
*
* This class enables executing AI-powered browser automation tasks while sharing the
* context of an existing browser page. It supports running custom tasks, login workflows,
* and pre-defined workflows with automatic waiting for completion.
*/
export class SkyvernBrowserPageAgent { export class SkyvernBrowserPageAgent {
private readonly _browser: SkyvernBrowser; private readonly _browser: SkyvernBrowser;
private readonly _page: Page; private readonly _page: Page;
@@ -18,6 +25,25 @@ export class SkyvernBrowserPageAgent {
this._page = page; this._page = page;
} }
/**
* Run a task in the context of this page and wait for it to finish.
*
* @param prompt - Natural language description of the task to perform.
* @param options - Optional configuration
* @param options.engine - The execution engine to use. Defaults to skyvern_v2.
* @param options.model - LLM model configuration options.
* @param options.url - URL to navigate to. If not provided, uses the current page URL.
* @param options.webhookUrl - URL to receive webhook notifications about task progress.
* @param options.totpIdentifier - Identifier for TOTP (Time-based One-Time Password) authentication.
* @param options.totpUrl - URL to fetch TOTP codes from.
* @param options.title - Human-readable title for this task run.
* @param options.errorCodeMapping - Mapping of error codes to custom error messages.
* @param options.dataExtractionSchema - Schema defining what data to extract from the page.
* @param options.maxSteps - Maximum number of steps the agent can take.
* @param options.timeout - Maximum time in seconds to wait for task completion.
*
* @returns TaskRunResponse containing the task execution results.
*/
async runTask( async runTask(
prompt: string, prompt: string,
options?: { options?: {
@@ -70,6 +96,26 @@ export class SkyvernBrowserPageAgent {
return completedRun as Skyvern.TaskRunResponse; return completedRun as Skyvern.TaskRunResponse;
} }
/**
* Run a login task in the context of this page and wait for it to finish.
*
* @param credentialType - Type of credential store to use (e.g., skyvern, bitwarden, onepassword).
* @param options - Optional configuration
* @param options.url - URL to navigate to for login. If not provided, uses the current page URL.
* @param options.credentialId - ID of the credential to use.
* @param options.bitwardenCollectionId - Bitwarden collection ID containing the credentials.
* @param options.bitwardenItemId - Bitwarden item ID for the credentials.
* @param options.onepasswordVaultId - 1Password vault ID containing the credentials.
* @param options.onepasswordItemId - 1Password item ID for the credentials.
* @param options.prompt - Additional instructions for the login process.
* @param options.webhookUrl - URL to receive webhook notifications about login progress.
* @param options.totpIdentifier - Identifier for TOTP authentication.
* @param options.totpUrl - URL to fetch TOTP codes from.
* @param options.extraHttpHeaders - Additional HTTP headers to include in requests.
* @param options.timeout - Maximum time in seconds to wait for login completion.
*
* @returns WorkflowRunResponse containing the login workflow execution results.
*/
async login( async login(
credentialType: string, credentialType: string,
options?: { options?: {
@@ -126,6 +172,23 @@ export class SkyvernBrowserPageAgent {
return completedRun as Skyvern.WorkflowRunResponse; return completedRun as Skyvern.WorkflowRunResponse;
} }
/**
* Run a file download task in the context of this page and wait for it to finish.
*
* @param prompt - Instructions for navigating to and downloading the file.
* @param options - Optional configuration
* @param options.url - URL to navigate to for file download. If not provided, uses the current page URL.
* @param options.downloadSuffix - Suffix or complete filename for the downloaded file.
* @param options.downloadTimeout - Timeout in seconds for the download operation.
* @param options.maxStepsPerRun - Maximum number of steps to execute.
* @param options.webhookUrl - URL to receive webhook notifications about download progress.
* @param options.totpIdentifier - Identifier for TOTP authentication.
* @param options.totpUrl - URL to fetch TOTP codes from.
* @param options.extraHttpHeaders - Additional HTTP headers to include in requests.
* @param options.timeout - Maximum time in seconds to wait for download completion.
*
* @returns WorkflowRunResponse containing the file download workflow execution results.
*/
async downloadFiles( async downloadFiles(
prompt: string, prompt: string,
options?: { options?: {
@@ -172,6 +235,21 @@ export class SkyvernBrowserPageAgent {
return completedRun as Skyvern.WorkflowRunResponse; return completedRun as Skyvern.WorkflowRunResponse;
} }
/**
* Run a workflow in the context of this page and wait for it to finish.
*
* @param workflowId - ID of the workflow to execute.
* @param options - Optional configuration
* @param options.parameters - Dictionary of parameters to pass to the workflow.
* @param options.template - Whether this is a workflow template.
* @param options.title - Human-readable title for this workflow run.
* @param options.webhookUrl - URL to receive webhook notifications about workflow progress.
* @param options.totpUrl - URL to fetch TOTP codes from.
* @param options.totpIdentifier - Identifier for TOTP authentication.
* @param options.timeout - Maximum time in seconds to wait for workflow completion.
*
* @returns WorkflowRunResponse containing the workflow execution results.
*/
async runWorkflow( async runWorkflow(
workflowId: string, workflowId: string,
options?: { options?: {

View File

@@ -12,6 +12,9 @@ export class SkyvernBrowserPageAi {
this._page = page; this._page = page;
} }
/**
* Click an element using AI via API call.
*/
async aiClick(options: { async aiClick(options: {
selector?: string; selector?: string;
intention: string; intention: string;
@@ -41,6 +44,9 @@ export class SkyvernBrowserPageAi {
return response.result ? String(response.result) : options.selector || null; return response.result ? String(response.result) : options.selector || null;
} }
/**
* Input text into an element using AI via API call.
*/
async aiInputText(options: { async aiInputText(options: {
selector?: string; selector?: string;
value?: string; value?: string;
@@ -76,6 +82,9 @@ export class SkyvernBrowserPageAi {
return response.result ? String(response.result) : options.value || ""; return response.result ? String(response.result) : options.value || "";
} }
/**
* Select an option from a dropdown using AI via API call.
*/
async aiSelectOption(options: { async aiSelectOption(options: {
selector?: string; selector?: string;
value?: string; value?: string;
@@ -107,6 +116,9 @@ export class SkyvernBrowserPageAi {
return response.result ? String(response.result) : options.value || ""; return response.result ? String(response.result) : options.value || "";
} }
/**
* Upload a file using AI via API call.
*/
async aiUploadFile(options: { async aiUploadFile(options: {
selector?: string; selector?: string;
fileUrl?: string; fileUrl?: string;
@@ -138,6 +150,9 @@ export class SkyvernBrowserPageAi {
return response.result ? String(response.result) : options.fileUrl || ""; return response.result ? String(response.result) : options.fileUrl || "";
} }
/**
* Extract information from the page using AI via API call.
*/
async aiExtract(options: { async aiExtract(options: {
prompt: string; prompt: string;
extractSchema?: Record<string, unknown> | unknown[] | string; extractSchema?: Record<string, unknown> | unknown[] | string;
@@ -169,6 +184,9 @@ export class SkyvernBrowserPageAi {
return (response.result as Record<string, unknown> | unknown[] | string) || null; return (response.result as Record<string, unknown> | unknown[] | string) || null;
} }
/**
* Validate the current page state using AI via API call.
*/
async aiValidate(options: { prompt: string; model?: Record<string, unknown> }): Promise<boolean> { async aiValidate(options: { prompt: string; model?: Record<string, unknown> }): Promise<boolean> {
LOG.info("AI validate", { prompt: options.prompt, model: options.model, workflow_run_id: this._browser.workflowRunId }); LOG.info("AI validate", { prompt: options.prompt, model: options.model, workflow_run_id: this._browser.workflowRunId });
@@ -191,6 +209,9 @@ export class SkyvernBrowserPageAi {
return response.result != null ? Boolean(response.result) : false; return response.result != null ? Boolean(response.result) : false;
} }
/**
* Perform an action on the page using AI via API call.
*/
async aiAct(prompt: string): Promise<void> { async aiAct(prompt: string): Promise<void> {
LOG.info("AI act", { prompt, workflow_run_id: this._browser.workflowRunId }); LOG.info("AI act", { prompt, workflow_run_id: this._browser.workflowRunId });
@@ -210,6 +231,13 @@ export class SkyvernBrowserPageAi {
} }
} }
/**
* Locate an element on the page using AI and return its XPath selector via API call.
*
* @param prompt - Natural language description of the element to locate (e.g., 'find "download invoices" button')
*
* @returns XPath selector string (e.g., 'xpath=//button[@id="download"]') or null if not found
*/
async aiLocateElement(prompt: string): Promise<string | null> { async aiLocateElement(prompt: string): Promise<string | null> {
LOG.info("AI locate element", { prompt, workflow_run_id: this._browser.workflowRunId }); LOG.info("AI locate element", { prompt, workflow_run_id: this._browser.workflowRunId });
@@ -235,6 +263,9 @@ export class SkyvernBrowserPageAi {
return null; return null;
} }
/**
* Send a prompt to the LLM and get a response based on the provided schema via API call.
*/
async aiPrompt(options: { async aiPrompt(options: {
prompt: string; prompt: string;
schema?: Record<string, unknown>; schema?: Record<string, unknown>;