SDK: docs and improvements (#4310)

This commit is contained in:
Stanislav Novosad
2025-12-17 14:11:39 -07:00
committed by GitHub
parent 0d6a070a80
commit 5d2bb07371
32 changed files with 1274 additions and 291 deletions

View File

@@ -9,6 +9,10 @@ import type { GetRunResponse, ProxyLocation } from "../api/index.js";
import { LOG } from "./logger.js";
import * as core from "../core/index.js";
function _getBrowserSessionUrl(browserSessionId: string): string {
return `https://app.skyvern.com/browser-session/${browserSessionId}`;
}
export interface SkyvernOptions extends BaseClientOptions {
apiKey: string;
}
@@ -200,7 +204,11 @@ export class Skyvern extends SkyvernClient {
proxy_location: options?.proxyLocation,
});
LOG.info("Launched new cloud browser session", { browser_session_id: browserSession.browser_session_id });
if (this._environment === SkyvernEnvironment.Cloud) {
LOG.info("Launched new cloud browser session", { url: _getBrowserSessionUrl(browserSession.browser_session_id) });
} else {
LOG.info("Launched new cloud browser session", { browser_session_id: browserSession.browser_session_id });
}
return this._connectToCloudBrowserSession(browserSession);
}
@@ -217,7 +225,11 @@ export class Skyvern extends SkyvernClient {
const browserSession = await this.getBrowserSession(browserSessionId);
LOG.info("Connecting to existing cloud browser session", { browser_session_id: browserSession.browser_session_id });
if (this._environment === SkyvernEnvironment.Cloud) {
LOG.info("Connecting to existing cloud browser session", { url: _getBrowserSessionUrl(browserSession.browser_session_id) });
} else {
LOG.info("Connecting to existing cloud browser session", { browser_session_id: browserSession.browser_session_id });
}
return this._connectToCloudBrowserSession(browserSession);
}
@@ -254,7 +266,11 @@ export class Skyvern extends SkyvernClient {
return this.launchCloudBrowser(options);
}
LOG.info("Reusing existing cloud browser session", { browser_session_id: browserSession.browser_session_id });
if (this._environment === SkyvernEnvironment.Cloud) {
LOG.info("Reusing existing cloud browser session", { url: _getBrowserSessionUrl(browserSession.browser_session_id) });
} else {
LOG.info("Reusing existing cloud browser session", { browser_session_id: browserSession.browser_session_id });
}
return this._connectToCloudBrowserSession(browserSession);
}

View File

@@ -63,74 +63,295 @@ export class SkyvernBrowserPageCore {
return this._browser;
}
/**
* Click an element using a CSS selector, AI-powered prompt matching, or both.
*
* This method supports three modes:
* - **Selector-based**: Click the element matching the CSS selector
* - **AI-powered**: Use natural language to describe which element to click
* - **Fallback mode**: Try the selector first, fall back to AI if it fails
*
* @param selector - CSS selector for the target element.
* @param options - Click options including prompt.
* @param options.prompt - Natural language description of which element to click.
*
* @example
* ```typescript
* // Click using a CSS selector
* await page.click("#open-invoice-button");
*
* // Click using AI with natural language
* await page.click({ prompt: "Click on the 'Open Invoice' button" });
*
* // Try selector first, fall back to AI if selector fails
* await page.click("#open-invoice-button", { prompt: "Click on the 'Open Invoice' button" });
* ```
*/
async click(selector: string, options?: Parameters<Page["click"]>[1]): Promise<void>;
async click(options: { prompt: string } & Partial<Parameters<Page["click"]>[1]>): Promise<void>;
async click(
selectorOrOptions: string | ({ prompt: string } & Partial<Parameters<Page["click"]>[1]>),
options?: Parameters<Page["click"]>[1],
selector: string,
options: { prompt: string } & Partial<Parameters<Page["click"]>[1]>,
): Promise<void>;
async click(
selectorOrOptions?: string | ({ prompt: string } & Partial<Parameters<Page["click"]>[1]>),
options?: Parameters<Page["click"]>[1] | ({ prompt?: string } & Partial<Parameters<Page["click"]>[1]>),
): Promise<void> {
let selector: string | undefined;
let prompt: string | undefined;
let clickOptions: Partial<Parameters<Page["click"]>[1]> = {};
let timeout: number | undefined;
// Parse arguments
if (typeof selectorOrOptions === "string") {
return this._page.click(selectorOrOptions, options);
} else {
const { prompt, timeout, ...data } = selectorOrOptions;
selector = selectorOrOptions;
if (options && typeof options === "object") {
const { prompt: p, timeout: t, ...rest } = options as {
prompt?: string;
timeout?: number;
} & Partial<Parameters<Page["click"]>[1]>;
prompt = p;
timeout = t;
clickOptions = rest;
} else if (options) {
clickOptions = options;
}
} else if (selectorOrOptions && typeof selectorOrOptions === "object") {
const { prompt: p, timeout: t, ...rest } = selectorOrOptions;
prompt = p;
timeout = t;
clickOptions = rest;
}
if (!selector && !prompt) {
throw new Error("Missing input: pass a selector and/or a prompt.");
}
// Try to click the element with the original selector first
let errorToRaise: Error | undefined;
if (selector) {
try {
await this._page.click(selector, { ...clickOptions, timeout });
return;
} catch (error) {
errorToRaise = error as Error;
selector = undefined;
}
}
// If the original selector doesn't work, try to click the element with the AI generated selector
if (prompt) {
await this._ai.aiClick({
intention: prompt,
data: Object.keys(clickOptions).length > 0 ? clickOptions : undefined,
timeout,
data: Object.keys(data).length > 0 ? data : undefined,
});
return;
}
if (errorToRaise) {
throw errorToRaise;
}
}
/**
* Fill an input field using a CSS selector, AI-powered prompt matching, or both.
*
* This method supports three modes:
* - **Selector-based**: Fill the input field with a value using CSS selector
* - **AI-powered**: Use natural language prompt (AI extracts value from prompt or uses provided value)
* - **Fallback mode**: Try the selector first, fall back to AI if it fails
*
* @param selector - CSS selector for the target input element.
* @param value - The text value to input into the field.
* @param options - Fill options including prompt.
* @param options.prompt - Natural language description of which field to fill and what value.
*
* @example
* ```typescript
* // Fill using selector and value
* await page.fill("#email-input", "user@example.com");
*
* // Fill using AI with natural language
* await page.fill({ prompt: "Fill 'user@example.com' in the email address field" });
*
* // Try selector first, fall back to AI if selector fails
* await page.fill("#email-input", "user@example.com", { prompt: "Fill the email address" });
* ```
*/
async fill(selector: string, value: string, options?: Parameters<Page["fill"]>[2]): Promise<void>;
async fill(options: { prompt: string; value?: string } & Partial<Parameters<Page["fill"]>[2]>): Promise<void>;
async fill(
selectorOrOptions: string | ({ prompt: string; value?: string } & Partial<Parameters<Page["fill"]>[2]>),
selector: string,
value: string,
options: { prompt: string } & Partial<Parameters<Page["fill"]>[2]>,
): Promise<void>;
async fill(
selectorOrOptions?: string | ({ prompt: string; value?: string } & Partial<Parameters<Page["fill"]>[2]>),
value?: string,
options?: Parameters<Page["fill"]>[2],
options?: Parameters<Page["fill"]>[2] | ({ prompt?: string } & Partial<Parameters<Page["fill"]>[2]>),
): Promise<void> {
let selector: string | undefined;
let fillValue: string | undefined;
let prompt: string | undefined;
let fillOptions: Partial<Parameters<Page["fill"]>[2]> = {};
let timeout: number | undefined;
if (typeof selectorOrOptions === "string") {
if (value === undefined) {
throw new Error("value is required when selector is provided");
selector = selectorOrOptions;
fillValue = value;
if (options && typeof options === "object") {
const { prompt: p, timeout: t, ...rest } = options as {
prompt?: string;
timeout?: number;
} & Partial<Parameters<Page["fill"]>[2]>;
prompt = p;
timeout = t;
fillOptions = rest;
} else if (options) {
fillOptions = options;
}
return this._page.fill(selectorOrOptions, value, options);
} else {
const { prompt, value: fillValue, timeout, ...data } = selectorOrOptions;
} else if (selectorOrOptions && typeof selectorOrOptions === "object") {
const { prompt: p, value: v, timeout: t, ...rest } = selectorOrOptions;
prompt = p;
fillValue = v;
timeout = t;
fillOptions = rest;
}
if (!selector && !prompt) {
throw new Error("Missing input: pass a selector and/or a prompt.");
}
let errorToRaise: Error | undefined;
if (selector && fillValue !== undefined) {
try {
await this._page.fill(selector, fillValue, { ...fillOptions, timeout });
return;
} catch (error) {
errorToRaise = error as Error;
selector = undefined;
}
}
if (prompt) {
await this._ai.aiInputText({
value: fillValue,
intention: prompt,
data: Object.keys(fillOptions).length > 0 ? fillOptions : undefined,
timeout,
data: Object.keys(data).length > 0 ? data : undefined,
});
return;
}
if (errorToRaise) {
throw errorToRaise;
}
}
/**
* Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
*
* This method supports three modes:
* - **Selector-based**: Select the option with a value using CSS selector
* - **AI-powered**: Use natural language prompt (AI extracts value from prompt or uses provided value)
* - **Fallback mode**: Try the selector first, fall back to AI if it fails
*
* @param selector - CSS selector for the target select/dropdown element.
* @param value - The option value to select.
* @param options - Select options including prompt.
* @param options.prompt - Natural language description of which option to select.
*
* @example
* ```typescript
* // Select using selector and value
* await page.selectOption("#country", "us");
*
* // Select using AI with natural language
* await page.selectOption({ prompt: "Select 'United States' from the country dropdown" });
*
* // Try selector first, fall back to AI if selector fails
* await page.selectOption("#country", "us", { prompt: "Select United States from country" });
* ```
*/
async selectOption(
selector: string,
values: string | string[],
options?: Parameters<Page["selectOption"]>[2],
): Promise<string[]>;
): Promise<void>;
async selectOption(
options: { prompt: string; value?: string } & Partial<Parameters<Page["selectOption"]>[2]>,
): Promise<string[]>;
): Promise<void>;
async selectOption(
selectorOrOptions: string | ({ prompt: string; value?: string } & Partial<Parameters<Page["selectOption"]>[2]>),
selector: string,
values: string | string[],
options: { prompt: string } & Partial<Parameters<Page["selectOption"]>[2]>,
): Promise<void>;
async selectOption(
selectorOrOptions?:
| string
| ({ prompt: string; value?: string } & Partial<Parameters<Page["selectOption"]>[2]>),
values?: string | string[],
options?: Parameters<Page["selectOption"]>[2],
): Promise<string[]> {
options?: Parameters<Page["selectOption"]>[2] | ({ prompt?: string } & Partial<Parameters<Page["selectOption"]>[2]>),
): Promise<void> {
let selector: string | undefined;
let selectValue: string | string[] | undefined;
let prompt: string | undefined;
let selectOptions: Partial<Parameters<Page["selectOption"]>[2]> = {};
let timeout: number | undefined;
// Parse arguments
if (typeof selectorOrOptions === "string") {
if (values === undefined) {
throw new Error("value is required when selector is provided");
selector = selectorOrOptions;
selectValue = values;
if (options && typeof options === "object") {
const { prompt: p, timeout: t, ...rest } = options as {
prompt?: string;
timeout?: number;
} & Partial<Parameters<Page["selectOption"]>[2]>;
prompt = p;
timeout = t;
selectOptions = rest;
} else if (options) {
selectOptions = options;
}
return this._page.selectOption(selectorOrOptions, values, options);
} else {
const { prompt, value, timeout, ...data } = selectorOrOptions;
} else if (selectorOrOptions && typeof selectorOrOptions === "object") {
const { prompt: p, value: v, timeout: t, ...rest } = selectorOrOptions;
prompt = p;
selectValue = v;
timeout = t;
selectOptions = rest;
}
if (!selector && !prompt) {
throw new Error("Missing input: pass a selector and/or a prompt.");
}
// Try to select the option with the original selector first
let errorToRaise: Error | undefined;
if (selector && selectValue !== undefined) {
try {
await this._page.selectOption(selector, selectValue, { ...selectOptions, timeout });
return;
} catch (error) {
errorToRaise = error as Error;
selector = undefined;
}
}
// If the original selector doesn't work, try to select the option with AI
if (prompt) {
await this._ai.aiSelectOption({
value,
value: typeof selectValue === "string" ? selectValue : selectValue?.[0],
intention: prompt,
data: Object.keys(selectOptions).length > 0 ? selectOptions : undefined,
timeout,
data: Object.keys(data).length > 0 ? data : undefined,
});
return value ? [value] : [];
return;
}
if (errorToRaise) {
throw errorToRaise;
}
}

View File

@@ -99,32 +99,110 @@ export class SkyvernBrowserPageAgent {
/**
* Run a login task in the context of this page and wait for it to finish.
*
* @param credentialType - Type of credential store to use (e.g., skyvern, bitwarden, onepassword).
* @param options - Optional configuration
* @param options.url - URL to navigate to for login. If not provided, uses the current page URL.
* @param options.credentialId - ID of the credential to use.
* @param options.bitwardenCollectionId - Bitwarden collection ID containing the credentials.
* @param options.bitwardenItemId - Bitwarden item ID for the credentials.
* @param options.onepasswordVaultId - 1Password vault ID containing the credentials.
* @param options.onepasswordItemId - 1Password item ID for the credentials.
* @param options.prompt - Additional instructions for the login process.
* @param options.webhookUrl - URL to receive webhook notifications about login progress.
* @param options.totpIdentifier - Identifier for TOTP authentication.
* @param options.totpUrl - URL to fetch TOTP codes from.
* @param options.extraHttpHeaders - Additional HTTP headers to include in requests.
* @param options.timeout - Maximum time in seconds to wait for login completion.
* This method has multiple overloaded signatures for different credential types:
*
* @returns WorkflowRunResponse containing the login workflow execution results.
* 1. Skyvern credentials:
* ```typescript
* await page.agent.login("skyvern", {
* credentialId: "cred_123"
* });
* ```
*
* 2. Bitwarden credentials:
* ```typescript
* await page.agent.login("bitwarden", {
* bitwardenItemId: "item_id",
* bitwardenCollectionId: "collection_id"
* });
* ```
*
* 3. 1Password credentials:
* ```typescript
* await page.agent.login("1password", {
* onepasswordVaultId: "vault_id",
* onepasswordItemId: "item_id"
* });
* ```
*
* 4. Azure Vault credentials:
* ```typescript
* await page.agent.login("azure_vault", {
* azureVaultName: "vault_name",
* azureVaultUsernameKey: "username_key",
* azureVaultPasswordKey: "password_key",
* });
* ```
*/
async login(
credentialType: string,
options?: {
credentialType: "skyvern",
options: {
credentialId: string;
url?: string;
prompt?: string;
webhookUrl?: string;
totpIdentifier?: string;
totpUrl?: string;
extraHttpHeaders?: Record<string, string>;
timeout?: number;
},
): Promise<Skyvern.WorkflowRunResponse>;
async login(
credentialType: "bitwarden",
options: {
bitwardenItemId: string;
bitwardenCollectionId?: string;
url?: string;
prompt?: string;
webhookUrl?: string;
totpIdentifier?: string;
totpUrl?: string;
extraHttpHeaders?: Record<string, string>;
timeout?: number;
},
): Promise<Skyvern.WorkflowRunResponse>;
async login(
credentialType: "1password",
options: {
onepasswordVaultId: string;
onepasswordItemId: string;
url?: string;
prompt?: string;
webhookUrl?: string;
totpIdentifier?: string;
totpUrl?: string;
extraHttpHeaders?: Record<string, string>;
timeout?: number;
},
): Promise<Skyvern.WorkflowRunResponse>;
async login(
credentialType: "azure_vault",
options: {
azureVaultName: string;
azureVaultUsernameKey: string;
azureVaultPasswordKey: string;
azureVaultTotpSecretKey?: string;
url?: string;
prompt?: string;
webhookUrl?: string;
totpIdentifier?: string;
totpUrl?: string;
extraHttpHeaders?: Record<string, string>;
timeout?: number;
},
): Promise<Skyvern.WorkflowRunResponse>;
async login(
credentialType: Skyvern.SkyvernSchemasRunBlocksCredentialType,
options: {
url?: string;
credentialId?: string;
bitwardenCollectionId?: string;
bitwardenItemId?: string;
onepasswordVaultId?: string;
onepasswordItemId?: string;
azureVaultName?: string;
azureVaultUsernameKey?: string;
azureVaultPasswordKey?: string;
azureVaultTotpSecretKey?: string;
prompt?: string;
webhookUrl?: string;
totpIdentifier?: string;
@@ -137,20 +215,24 @@ export class SkyvernBrowserPageAgent {
const workflowRun = await this._browser.skyvern.login(
{
credential_type: credentialType as Skyvern.SkyvernSchemasRunBlocksCredentialType,
url: options?.url ?? this._getPageUrl(),
credential_id: options?.credentialId,
bitwarden_collection_id: options?.bitwardenCollectionId,
bitwarden_item_id: options?.bitwardenItemId,
onepassword_vault_id: options?.onepasswordVaultId,
onepassword_item_id: options?.onepasswordItemId,
prompt: options?.prompt,
webhook_url: options?.webhookUrl,
totp_identifier: options?.totpIdentifier,
totp_url: options?.totpUrl,
credential_type: credentialType,
url: options.url ?? this._getPageUrl(),
credential_id: options.credentialId,
bitwarden_collection_id: options.bitwardenCollectionId,
bitwarden_item_id: options.bitwardenItemId,
onepassword_vault_id: options.onepasswordVaultId,
onepassword_item_id: options.onepasswordItemId,
azure_vault_name: options.azureVaultName,
azure_vault_username_key: options.azureVaultUsernameKey,
azure_vault_password_key: options.azureVaultPasswordKey,
azure_vault_totp_secret_key: options.azureVaultTotpSecretKey,
prompt: options.prompt,
webhook_url: options.webhookUrl,
totp_identifier: options.totpIdentifier,
totp_url: options.totpUrl,
browser_session_id: this._browser.browserSessionId,
browser_address: this._browser.browserAddress,
extra_http_headers: options?.extraHttpHeaders,
extra_http_headers: options.extraHttpHeaders,
},
{
headers: { "x-user-agent": "skyvern-sdk" },
@@ -158,15 +240,15 @@ export class SkyvernBrowserPageAgent {
);
if (this._browser.skyvern.environment === SkyvernEnvironment.Cloud) {
LOG.info("AI login workflow is running, this may take a while", { url: getAppUrlForRun(workflowRun.run_id), run_id: workflowRun.run_id });
LOG.info("AI login workflow is running, this may take a while", {
url: getAppUrlForRun(workflowRun.run_id),
run_id: workflowRun.run_id,
});
} else {
LOG.info("AI login workflow is running, this may take a while", { run_id: workflowRun.run_id });
}
const completedRun = await this._waitForRunCompletion(
workflowRun.run_id,
options?.timeout ?? DEFAULT_AGENT_TIMEOUT,
);
const completedRun = await this._waitForRunCompletion(workflowRun.run_id, options.timeout ?? DEFAULT_AGENT_TIMEOUT);
LOG.info("AI login workflow finished", { run_id: completedRun.run_id, status: completedRun.status });
return completedRun as Skyvern.WorkflowRunResponse;