Files
Dorod-Sky/skyvern-ts/client/src/library/SkyvernBrowserPage.ts
2025-12-17 21:11:39 +00:00

401 lines
15 KiB
TypeScript

import type { Page } from "playwright";
import type { SkyvernBrowser } from "./SkyvernBrowser.js";
import { SkyvernBrowserPageAgent } from "./SkyvernBrowserPageAgent.js";
import { SkyvernBrowserPageAi } from "./SkyvernBrowserPageAi.js";
/**
* A browser page wrapper that combines Playwright's page API with Skyvern's AI capabilities.
*
* This class provides a unified interface for both traditional browser automation (via Playwright)
* and AI-powered task execution (via Skyvern). It exposes standard page methods like click, fill,
* goto, etc., while also providing access to Skyvern's task and workflow execution through the
* `agent` attribute.
*
* @example
* ```typescript
* // Use standard Playwright methods
* await page.goto("https://example.com");
* await page.fill("#username", "user@example.com");
* await page.click("#login-button");
*
* // Or use Skyvern's AI capabilities
* await page.agent.runTask("Fill out the contact form and submit it");
* ```
*/
export class SkyvernBrowserPageCore {
private readonly _browser: SkyvernBrowser;
private readonly _page: Page;
private readonly _ai: SkyvernBrowserPageAi;
public readonly agent: SkyvernBrowserPageAgent;
private readonly _proxy: SkyvernBrowserPage;
private constructor(browser: SkyvernBrowser, page: Page) {
this._browser = browser;
this._page = page;
this._ai = new SkyvernBrowserPageAi(browser, page);
this.agent = new SkyvernBrowserPageAgent(browser, page);
this._proxy = new Proxy(this, {
get(target, prop, receiver) {
if (prop in target) {
return Reflect.get(target, prop, receiver);
}
const value = Reflect.get(target._page, prop, target._page);
if (typeof value === "function") {
return value.bind(target._page);
}
return value;
},
}) as unknown as SkyvernBrowserPage;
}
static create(browser: SkyvernBrowser, page: Page): SkyvernBrowserPage {
const instance = new SkyvernBrowserPageCore(browser, page);
return instance._proxy;
}
get page(): Page {
return this._page;
}
get browser(): SkyvernBrowser {
return this._browser;
}
/**
* Click an element using a CSS selector, AI-powered prompt matching, or both.
*
* This method supports three modes:
* - **Selector-based**: Click the element matching the CSS selector
* - **AI-powered**: Use natural language to describe which element to click
* - **Fallback mode**: Try the selector first, fall back to AI if it fails
*
* @param selector - CSS selector for the target element.
* @param options - Click options including prompt.
* @param options.prompt - Natural language description of which element to click.
*
* @example
* ```typescript
* // Click using a CSS selector
* await page.click("#open-invoice-button");
*
* // Click using AI with natural language
* await page.click({ prompt: "Click on the 'Open Invoice' button" });
*
* // Try selector first, fall back to AI if selector fails
* await page.click("#open-invoice-button", { prompt: "Click on the 'Open Invoice' button" });
* ```
*/
async click(selector: string, options?: Parameters<Page["click"]>[1]): Promise<void>;
async click(options: { prompt: string } & Partial<Parameters<Page["click"]>[1]>): Promise<void>;
async click(
selector: string,
options: { prompt: string } & Partial<Parameters<Page["click"]>[1]>,
): Promise<void>;
async click(
selectorOrOptions?: string | ({ prompt: string } & Partial<Parameters<Page["click"]>[1]>),
options?: Parameters<Page["click"]>[1] | ({ prompt?: string } & Partial<Parameters<Page["click"]>[1]>),
): Promise<void> {
let selector: string | undefined;
let prompt: string | undefined;
let clickOptions: Partial<Parameters<Page["click"]>[1]> = {};
let timeout: number | undefined;
// Parse arguments
if (typeof selectorOrOptions === "string") {
selector = selectorOrOptions;
if (options && typeof options === "object") {
const { prompt: p, timeout: t, ...rest } = options as {
prompt?: string;
timeout?: number;
} & Partial<Parameters<Page["click"]>[1]>;
prompt = p;
timeout = t;
clickOptions = rest;
} else if (options) {
clickOptions = options;
}
} else if (selectorOrOptions && typeof selectorOrOptions === "object") {
const { prompt: p, timeout: t, ...rest } = selectorOrOptions;
prompt = p;
timeout = t;
clickOptions = rest;
}
if (!selector && !prompt) {
throw new Error("Missing input: pass a selector and/or a prompt.");
}
// Try to click the element with the original selector first
let errorToRaise: Error | undefined;
if (selector) {
try {
await this._page.click(selector, { ...clickOptions, timeout });
return;
} catch (error) {
errorToRaise = error as Error;
selector = undefined;
}
}
// If the original selector doesn't work, try to click the element with the AI generated selector
if (prompt) {
await this._ai.aiClick({
intention: prompt,
data: Object.keys(clickOptions).length > 0 ? clickOptions : undefined,
timeout,
});
return;
}
if (errorToRaise) {
throw errorToRaise;
}
}
/**
* Fill an input field using a CSS selector, AI-powered prompt matching, or both.
*
* This method supports three modes:
* - **Selector-based**: Fill the input field with a value using CSS selector
* - **AI-powered**: Use natural language prompt (AI extracts value from prompt or uses provided value)
* - **Fallback mode**: Try the selector first, fall back to AI if it fails
*
* @param selector - CSS selector for the target input element.
* @param value - The text value to input into the field.
* @param options - Fill options including prompt.
* @param options.prompt - Natural language description of which field to fill and what value.
*
* @example
* ```typescript
* // Fill using selector and value
* await page.fill("#email-input", "user@example.com");
*
* // Fill using AI with natural language
* await page.fill({ prompt: "Fill 'user@example.com' in the email address field" });
*
* // Try selector first, fall back to AI if selector fails
* await page.fill("#email-input", "user@example.com", { prompt: "Fill the email address" });
* ```
*/
async fill(selector: string, value: string, options?: Parameters<Page["fill"]>[2]): Promise<void>;
async fill(options: { prompt: string; value?: string } & Partial<Parameters<Page["fill"]>[2]>): Promise<void>;
async fill(
selector: string,
value: string,
options: { prompt: string } & Partial<Parameters<Page["fill"]>[2]>,
): Promise<void>;
async fill(
selectorOrOptions?: string | ({ prompt: string; value?: string } & Partial<Parameters<Page["fill"]>[2]>),
value?: string,
options?: Parameters<Page["fill"]>[2] | ({ prompt?: string } & Partial<Parameters<Page["fill"]>[2]>),
): Promise<void> {
let selector: string | undefined;
let fillValue: string | undefined;
let prompt: string | undefined;
let fillOptions: Partial<Parameters<Page["fill"]>[2]> = {};
let timeout: number | undefined;
if (typeof selectorOrOptions === "string") {
selector = selectorOrOptions;
fillValue = value;
if (options && typeof options === "object") {
const { prompt: p, timeout: t, ...rest } = options as {
prompt?: string;
timeout?: number;
} & Partial<Parameters<Page["fill"]>[2]>;
prompt = p;
timeout = t;
fillOptions = rest;
} else if (options) {
fillOptions = options;
}
} else if (selectorOrOptions && typeof selectorOrOptions === "object") {
const { prompt: p, value: v, timeout: t, ...rest } = selectorOrOptions;
prompt = p;
fillValue = v;
timeout = t;
fillOptions = rest;
}
if (!selector && !prompt) {
throw new Error("Missing input: pass a selector and/or a prompt.");
}
let errorToRaise: Error | undefined;
if (selector && fillValue !== undefined) {
try {
await this._page.fill(selector, fillValue, { ...fillOptions, timeout });
return;
} catch (error) {
errorToRaise = error as Error;
selector = undefined;
}
}
if (prompt) {
await this._ai.aiInputText({
value: fillValue,
intention: prompt,
data: Object.keys(fillOptions).length > 0 ? fillOptions : undefined,
timeout,
});
return;
}
if (errorToRaise) {
throw errorToRaise;
}
}
/**
* Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
*
* This method supports three modes:
* - **Selector-based**: Select the option with a value using CSS selector
* - **AI-powered**: Use natural language prompt (AI extracts value from prompt or uses provided value)
* - **Fallback mode**: Try the selector first, fall back to AI if it fails
*
* @param selector - CSS selector for the target select/dropdown element.
* @param value - The option value to select.
* @param options - Select options including prompt.
* @param options.prompt - Natural language description of which option to select.
*
* @example
* ```typescript
* // Select using selector and value
* await page.selectOption("#country", "us");
*
* // Select using AI with natural language
* await page.selectOption({ prompt: "Select 'United States' from the country dropdown" });
*
* // Try selector first, fall back to AI if selector fails
* await page.selectOption("#country", "us", { prompt: "Select United States from country" });
* ```
*/
async selectOption(
selector: string,
values: string | string[],
options?: Parameters<Page["selectOption"]>[2],
): Promise<void>;
async selectOption(
options: { prompt: string; value?: string } & Partial<Parameters<Page["selectOption"]>[2]>,
): Promise<void>;
async selectOption(
selector: string,
values: string | string[],
options: { prompt: string } & Partial<Parameters<Page["selectOption"]>[2]>,
): Promise<void>;
async selectOption(
selectorOrOptions?:
| string
| ({ prompt: string; value?: string } & Partial<Parameters<Page["selectOption"]>[2]>),
values?: string | string[],
options?: Parameters<Page["selectOption"]>[2] | ({ prompt?: string } & Partial<Parameters<Page["selectOption"]>[2]>),
): Promise<void> {
let selector: string | undefined;
let selectValue: string | string[] | undefined;
let prompt: string | undefined;
let selectOptions: Partial<Parameters<Page["selectOption"]>[2]> = {};
let timeout: number | undefined;
// Parse arguments
if (typeof selectorOrOptions === "string") {
selector = selectorOrOptions;
selectValue = values;
if (options && typeof options === "object") {
const { prompt: p, timeout: t, ...rest } = options as {
prompt?: string;
timeout?: number;
} & Partial<Parameters<Page["selectOption"]>[2]>;
prompt = p;
timeout = t;
selectOptions = rest;
} else if (options) {
selectOptions = options;
}
} else if (selectorOrOptions && typeof selectorOrOptions === "object") {
const { prompt: p, value: v, timeout: t, ...rest } = selectorOrOptions;
prompt = p;
selectValue = v;
timeout = t;
selectOptions = rest;
}
if (!selector && !prompt) {
throw new Error("Missing input: pass a selector and/or a prompt.");
}
// Try to select the option with the original selector first
let errorToRaise: Error | undefined;
if (selector && selectValue !== undefined) {
try {
await this._page.selectOption(selector, selectValue, { ...selectOptions, timeout });
return;
} catch (error) {
errorToRaise = error as Error;
selector = undefined;
}
}
// If the original selector doesn't work, try to select the option with AI
if (prompt) {
await this._ai.aiSelectOption({
value: typeof selectValue === "string" ? selectValue : selectValue?.[0],
intention: prompt,
data: Object.keys(selectOptions).length > 0 ? selectOptions : undefined,
timeout,
});
return;
}
if (errorToRaise) {
throw errorToRaise;
}
}
/**
* Perform an action on the page using AI based on a natural language prompt.
*
* @param prompt - Natural language description of the action to perform.
*
* @example
* ```typescript
* // Simple action
* await page.act("Click the login button");
* ```
*/
async act(prompt: string): Promise<void> {
return this._ai.aiAct(prompt);
}
async extract(options: {
prompt: string;
schema?: Record<string, unknown> | unknown[] | string;
errorCodeMapping?: Record<string, string>;
intention?: string;
data?: string | Record<string, unknown>;
}): Promise<Record<string, unknown> | unknown[] | string | null> {
return this._ai.aiExtract(options);
}
async validate(prompt: string, model?: Record<string, unknown> | string): Promise<boolean> {
const normalizedModel: Record<string, unknown> | undefined =
typeof model === "string" ? { modelName: model } : model;
return this._ai.aiValidate({ prompt, model: normalizedModel });
}
async prompt(
prompt: string,
schema?: Record<string, unknown>,
model?: Record<string, unknown> | string,
): Promise<Record<string, unknown> | unknown[] | string | null> {
const normalizedModel: Record<string, unknown> | undefined =
typeof model === "string" ? { modelName: model } : model;
return this._ai.aiPrompt({ prompt, schema, model: normalizedModel });
}
}
export type SkyvernBrowserPage = SkyvernBrowserPageCore & Page;