current viewpoint screenshot and scrolling n screenshot (#2716)

Co-authored-by: lawyzheng <lawyzheng1106@gmail.com>
This commit is contained in:
Shuchang Zheng
2025-06-13 23:59:50 -07:00
committed by GitHub
parent 11288817af
commit 775da18878
39 changed files with 452 additions and 35 deletions

View File

@@ -0,0 +1,37 @@
"""add_screenshot_scroll_number
Revision ID: 2c6b27e8e961
Revises: 1517a4ba63fa
Create Date: 2025-06-14 06:49:43.628471+00:00
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "2c6b27e8e961"
down_revision: Union[str, None] = "1517a4ba63fa"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("observer_cruises", sa.Column("max_screenshot_scrolling_times", sa.Integer(), nullable=True))
op.add_column("tasks", sa.Column("max_screenshot_scrolling_times", sa.Integer(), nullable=True))
op.add_column("workflow_runs", sa.Column("max_screenshot_scrolling_times", sa.Integer(), nullable=True))
op.add_column("workflows", sa.Column("max_screenshot_scrolling_times", sa.Integer(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("workflows", "max_screenshot_scrolling_times")
op.drop_column("workflow_runs", "max_screenshot_scrolling_times")
op.drop_column("tasks", "max_screenshot_scrolling_times")
op.drop_column("observer_cruises", "max_screenshot_scrolling_times")
# ### end Alembic commands ###

View File

@@ -141,6 +141,7 @@ export type CreateTaskRequest = {
totp_identifier?: string | null; totp_identifier?: string | null;
application?: string | null; application?: string | null;
include_action_history_in_verification?: boolean | null; include_action_history_in_verification?: boolean | null;
max_screenshot_scrolling_times?: number | null;
}; };
export type User = { export type User = {
@@ -293,6 +294,7 @@ export type WorkflowRunStatusApiResponse = {
total_cost: number | null; total_cost: number | null;
task_v2: TaskV2 | null; task_v2: TaskV2 | null;
workflow_title: string | null; workflow_title: string | null;
max_screenshot_scrolling_times: number | null;
}; };
export type TaskGenerationApiResponse = { export type TaskGenerationApiResponse = {

View File

@@ -39,6 +39,7 @@ import {
} from "./taskFormTypes"; } from "./taskFormTypes";
import { ProxySelector } from "@/components/ProxySelector"; import { ProxySelector } from "@/components/ProxySelector";
import { Switch } from "@/components/ui/switch"; import { Switch } from "@/components/ui/switch";
import { MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT } from "@/routes/workflows/editor/nodes/Taskv2Node/types";
type Props = { type Props = {
initialValues: CreateNewTaskFormValues; initialValues: CreateNewTaskFormValues;
}; };
@@ -80,6 +81,7 @@ function createTaskRequestObject(
extracted_information_schema: extractedInformationSchema, extracted_information_schema: extractedInformationSchema,
totp_identifier: transform(formValues.totpIdentifier), totp_identifier: transform(formValues.totpIdentifier),
error_code_mapping: errorCodeMapping, error_code_mapping: errorCodeMapping,
max_screenshot_scrolling_times: formValues.maxScreenshotScrollingTimes,
include_action_history_in_verification: include_action_history_in_verification:
formValues.includeActionHistoryInVerification, formValues.includeActionHistoryInVerification,
}; };
@@ -114,6 +116,8 @@ function CreateNewTaskForm({ initialValues }: Props) {
...initialValues, ...initialValues,
maxStepsOverride: initialValues.maxStepsOverride ?? null, maxStepsOverride: initialValues.maxStepsOverride ?? null,
proxyLocation: initialValues.proxyLocation ?? ProxyLocation.Residential, proxyLocation: initialValues.proxyLocation ?? ProxyLocation.Residential,
maxScreenshotScrollingTimes:
initialValues.maxScreenshotScrollingTimes ?? null,
}, },
}); });
const { errors } = useFormState({ control: form.control }); const { errors } = useFormState({ control: form.control });
@@ -557,6 +561,45 @@ function CreateNewTaskForm({ initialValues }: Props) {
); );
}} }}
/> />
<FormField
control={form.control}
name="maxScreenshotScrollingTimes"
render={({ field }) => (
<FormItem>
<div className="flex gap-16">
<FormLabel>
<div className="w-72">
<h1 className="text-lg">
Max Scrolling Screenshots
</h1>
<h2 className="text-base text-slate-400">
{`The maximum number of times to scroll down the page to take merged screenshots after action. Default is ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}. If it's set to 0, it will take the current viewport screenshot.`}
</h2>
</div>
</FormLabel>
<div className="w-full">
<FormControl>
<Input
{...field}
type="number"
min={0}
value={field.value ?? ""}
placeholder={`Default: ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}`}
onChange={(event) => {
const value =
event.target.value === ""
? null
: Number(event.target.value);
field.onChange(value);
}}
/>
</FormControl>
<FormMessage />
</div>
</div>
</FormItem>
)}
/>
<Separator /> <Separator />
<FormField <FormField
control={form.control} control={form.control}

View File

@@ -62,6 +62,7 @@ function CreateNewTaskFormPage() {
webhookCallbackUrl: null, webhookCallbackUrl: null,
proxyLocation: null, proxyLocation: null,
includeActionHistoryInVerification: null, includeActionHistoryInVerification: null,
maxScreenshotScrollingTimes: null,
}} }}
/> />
</div> </div>
@@ -131,6 +132,7 @@ function CreateNewTaskFormPage() {
includeActionHistoryInVerification: includeActionHistoryInVerification:
data.workflow_definition.blocks[0] data.workflow_definition.blocks[0]
.include_action_history_in_verification, .include_action_history_in_verification,
maxScreenshotScrollingTimes: data.max_screenshot_scrolling_times,
}} }}
/> />
</div> </div>

View File

@@ -43,7 +43,10 @@ import {
generateUniqueEmail, generateUniqueEmail,
} from "../data/sampleTaskData"; } from "../data/sampleTaskData";
import { ExampleCasePill } from "./ExampleCasePill"; import { ExampleCasePill } from "./ExampleCasePill";
import { MAX_STEPS_DEFAULT } from "@/routes/workflows/editor/nodes/Taskv2Node/types"; import {
MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT,
MAX_STEPS_DEFAULT,
} from "@/routes/workflows/editor/nodes/Taskv2Node/types";
function createTemplateTaskFromTaskGenerationParameters( function createTemplateTaskFromTaskGenerationParameters(
values: TaskGenerationApiResponse, values: TaskGenerationApiResponse,
@@ -153,6 +156,8 @@ function PromptBox() {
const [publishWorkflow, setPublishWorkflow] = useState(false); const [publishWorkflow, setPublishWorkflow] = useState(false);
const [totpIdentifier, setTotpIdentifier] = useState(""); const [totpIdentifier, setTotpIdentifier] = useState("");
const [maxStepsOverride, setMaxStepsOverride] = useState<string | null>(null); const [maxStepsOverride, setMaxStepsOverride] = useState<string | null>(null);
const [maxScreenshotScrollingTimes, setMaxScreenshotScrollingTimes] =
useState<string | null>(null);
const [showAdvancedSettings, setShowAdvancedSettings] = useState(false); const [showAdvancedSettings, setShowAdvancedSettings] = useState(false);
const [dataSchema, setDataSchema] = useState<string | null>(null); const [dataSchema, setDataSchema] = useState<string | null>(null);
@@ -167,6 +172,7 @@ function PromptBox() {
proxy_location: proxyLocation, proxy_location: proxyLocation,
totp_identifier: totpIdentifier, totp_identifier: totpIdentifier,
publish_workflow: publishWorkflow, publish_workflow: publishWorkflow,
max_screenshot_scrolling_times: maxScreenshotScrollingTimes,
extracted_information_schema: dataSchema extracted_information_schema: dataSchema
? (() => { ? (() => {
try { try {
@@ -438,6 +444,21 @@ function PromptBox() {
/> />
</div> </div>
</div> </div>
<div className="flex gap-16">
<div className="w-48 shrink-0">
<div className="text-sm">Max Scrolling Screenshots</div>
<div className="text-xs text-slate-400">
{`The maximum number of times to scroll down the page to take merged screenshots after action. Default is ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}. If it's set to 0, it will take the current viewport screenshot.`}
</div>
</div>
<Input
value={maxScreenshotScrollingTimes ?? ""}
placeholder={`Default: ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}`}
onChange={(event) => {
setMaxScreenshotScrollingTimes(event.target.value);
}}
/>
</div>
</div> </div>
</div> </div>
) : null} ) : null}

View File

@@ -44,6 +44,8 @@ function RetryTask() {
proxyLocation: task.request.proxy_location ?? null, proxyLocation: task.request.proxy_location ?? null,
includeActionHistoryInVerification: includeActionHistoryInVerification:
task.request.include_action_history_in_verification ?? false, task.request.include_action_history_in_verification ?? false,
maxScreenshotScrollingTimes:
task.request.max_screenshot_scrolling_times ?? null,
}} }}
/> />
</div> </div>

View File

@@ -15,6 +15,7 @@ const createNewTaskFormSchemaBase = z.object({
errorCodeMapping: z.string().or(z.null()), errorCodeMapping: z.string().or(z.null()),
proxyLocation: z.nativeEnum(ProxyLocation).or(z.null()), proxyLocation: z.nativeEnum(ProxyLocation).or(z.null()),
includeActionHistoryInVerification: z.boolean().or(z.null()).default(false), includeActionHistoryInVerification: z.boolean().or(z.null()).default(false),
maxScreenshotScrollingTimes: z.number().or(z.null()).default(null),
}); });
const savedTaskFormSchemaBase = createNewTaskFormSchemaBase.extend({ const savedTaskFormSchemaBase = createNewTaskFormSchemaBase.extend({

View File

@@ -28,12 +28,14 @@ import { WorkflowParameter } from "./types/workflowTypes";
import { WorkflowParameterInput } from "./WorkflowParameterInput"; import { WorkflowParameterInput } from "./WorkflowParameterInput";
import { AxiosError } from "axios"; import { AxiosError } from "axios";
import { getLabelForWorkflowParameterType } from "./editor/workflowEditorUtils"; import { getLabelForWorkflowParameterType } from "./editor/workflowEditorUtils";
import { MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT } from "./editor/nodes/Taskv2Node/types";
type Props = { type Props = {
workflowParameters: Array<WorkflowParameter>; workflowParameters: Array<WorkflowParameter>;
initialValues: Record<string, unknown>; initialValues: Record<string, unknown>;
initialSettings: { initialSettings: {
proxyLocation: ProxyLocation; proxyLocation: ProxyLocation;
webhookCallbackUrl: string; webhookCallbackUrl: string;
maxScreenshotScrollingTimes: number | null;
}; };
}; };
@@ -73,14 +75,20 @@ type RunWorkflowRequestBody = {
proxy_location: ProxyLocation | null; proxy_location: ProxyLocation | null;
webhook_callback_url?: string | null; webhook_callback_url?: string | null;
browser_session_id: string | null; browser_session_id: string | null;
max_screenshot_scrolling_times?: number | null;
}; };
function getRunWorkflowRequestBody( function getRunWorkflowRequestBody(
values: RunWorkflowFormType, values: RunWorkflowFormType,
workflowParameters: Array<WorkflowParameter>, workflowParameters: Array<WorkflowParameter>,
): RunWorkflowRequestBody { ): RunWorkflowRequestBody {
const { webhookCallbackUrl, proxyLocation, browserSessionId, ...parameters } = const {
values; webhookCallbackUrl,
proxyLocation,
browserSessionId,
maxScreenshotScrollingTimes,
...parameters
} = values;
const parsedParameters = parseValuesForWorkflowRun( const parsedParameters = parseValuesForWorkflowRun(
parameters, parameters,
@@ -95,6 +103,10 @@ function getRunWorkflowRequestBody(
browser_session_id: bsi, browser_session_id: bsi,
}; };
if (maxScreenshotScrollingTimes) {
body.max_screenshot_scrolling_times = maxScreenshotScrollingTimes;
}
if (webhookCallbackUrl) { if (webhookCallbackUrl) {
body.webhook_callback_url = webhookCallbackUrl; body.webhook_callback_url = webhookCallbackUrl;
} }
@@ -106,6 +118,7 @@ type RunWorkflowFormType = Record<string, unknown> & {
webhookCallbackUrl: string; webhookCallbackUrl: string;
proxyLocation: ProxyLocation; proxyLocation: ProxyLocation;
browserSessionId: string | null; browserSessionId: string | null;
maxScreenshotScrollingTimes: number | null;
}; };
function RunWorkflowForm({ function RunWorkflowForm({
@@ -127,6 +140,7 @@ function RunWorkflowForm({
webhookCallbackUrl: initialSettings.webhookCallbackUrl, webhookCallbackUrl: initialSettings.webhookCallbackUrl,
proxyLocation: initialSettings.proxyLocation, proxyLocation: initialSettings.proxyLocation,
browserSessionId: browserSessionIdDefault, browserSessionId: browserSessionIdDefault,
maxScreenshotScrollingTimes: initialSettings.maxScreenshotScrollingTimes,
}, },
}); });
const apiCredential = useApiCredential(); const apiCredential = useApiCredential();
@@ -177,6 +191,7 @@ function RunWorkflowForm({
webhookCallbackUrl, webhookCallbackUrl,
proxyLocation, proxyLocation,
browserSessionId, browserSessionId,
maxScreenshotScrollingTimes,
...parameters ...parameters
} = values; } = values;
@@ -189,6 +204,7 @@ function RunWorkflowForm({
webhookCallbackUrl, webhookCallbackUrl,
proxyLocation, proxyLocation,
browserSessionId, browserSessionId,
maxScreenshotScrollingTimes,
}); });
} }
@@ -392,6 +408,48 @@ function RunWorkflowForm({
); );
}} }}
/> />
<FormField
key="maxScreenshotScrollingTimes"
control={form.control}
name="maxScreenshotScrollingTimes"
render={({ field }) => {
return (
<FormItem>
<div className="flex gap-16">
<FormLabel>
<div className="w-72">
<div className="flex items-center gap-2 text-lg">
Max Scrolling Screenshots
</div>
<h2 className="text-sm text-slate-400">
{`The maximum number of times to scroll down the page to take merged screenshots after action. Default is ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}. If it's set to 0, it will take the current viewport screenshot.`}
</h2>
</div>
</FormLabel>
<div className="w-full space-y-2">
<FormControl>
<Input
{...field}
type="number"
min={0}
value={field.value ?? ""}
placeholder={`Default: ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}`}
onChange={(event) => {
const value =
event.target.value === ""
? null
: Number(event.target.value);
field.onChange(value);
}}
/>
</FormControl>
<FormMessage />
</div>
</div>
</FormItem>
);
}}
/>
</div> </div>
<div className="flex justify-end gap-2"> <div className="flex justify-end gap-2">
<CopyApiCommandDropdown <CopyApiCommandDropdown

View File

@@ -100,6 +100,8 @@ function WorkflowRun() {
const parameters = workflowRun?.parameters ?? {}; const parameters = workflowRun?.parameters ?? {};
const proxyLocation = const proxyLocation =
workflowRun?.proxy_location ?? ProxyLocation.Residential; workflowRun?.proxy_location ?? ProxyLocation.Residential;
const maxScreenshotScrollingTimes =
workflowRun?.max_screenshot_scrolling_times ?? null;
const title = workflowIsLoading ? ( const title = workflowIsLoading ? (
<Skeleton className="h-9 w-48" /> <Skeleton className="h-9 w-48" />
@@ -244,6 +246,7 @@ function WorkflowRun() {
data: parameters, data: parameters,
proxyLocation, proxyLocation,
webhookCallbackUrl: workflowRun?.webhook_callback_url ?? "", webhookCallbackUrl: workflowRun?.webhook_callback_url ?? "",
maxScreenshotScrollingTimes,
}} }}
> >
<PlayIcon className="mr-2 h-4 w-4" /> <PlayIcon className="mr-2 h-4 w-4" />

View File

@@ -30,6 +30,8 @@ function WorkflowRunParameters() {
const proxyLocation = location.state const proxyLocation = location.state
? (location.state.proxyLocation as ProxyLocation) ? (location.state.proxyLocation as ProxyLocation)
: null; : null;
const maxScreenshotScrollingTimes =
location.state?.maxScreenshotScrollingTimes ?? null;
const webhookCallbackUrl = location.state const webhookCallbackUrl = location.state
? (location.state.webhookCallbackUrl as string) ? (location.state.webhookCallbackUrl as string)
@@ -109,6 +111,10 @@ function WorkflowRunParameters() {
ProxyLocation.Residential, ProxyLocation.Residential,
webhookCallbackUrl: webhookCallbackUrl:
webhookCallbackUrl ?? workflow.webhook_callback_url ?? "", webhookCallbackUrl ?? workflow.webhook_callback_url ?? "",
maxScreenshotScrollingTimes:
maxScreenshotScrollingTimes ??
workflow.max_screenshot_scrolling_times ??
null,
}} }}
/> />
</div> </div>

View File

@@ -289,6 +289,8 @@ function FlowRenderer({
webhook_callback_url: data.settings.webhookCallbackUrl, webhook_callback_url: data.settings.webhookCallbackUrl,
persist_browser_session: data.settings.persistBrowserSession, persist_browser_session: data.settings.persistBrowserSession,
model: data.settings.model, model: data.settings.model,
max_screenshot_scrolling_times:
data.settings.maxScreenshotScrollingTimes,
totp_verification_url: workflow.totp_verification_url, totp_verification_url: workflow.totp_verification_url,
workflow_definition: { workflow_definition: {
parameters: data.parameters, parameters: data.parameters,

View File

@@ -60,6 +60,7 @@ function WorkflowEditor() {
proxyLocation: workflow.proxy_location, proxyLocation: workflow.proxy_location,
webhookCallbackUrl: workflow.webhook_callback_url, webhookCallbackUrl: workflow.webhook_callback_url,
model: workflow.model, model: workflow.model,
maxScreenshotScrollingTimes: workflow.max_screenshot_scrolling_times,
}; };
const elements = getElements( const elements = getElements(

View File

@@ -20,6 +20,7 @@ import { Separator } from "@/components/ui/separator";
import { ModelsResponse } from "@/api/types"; import { ModelsResponse } from "@/api/types";
import { ModelSelector } from "@/components/ModelSelector"; import { ModelSelector } from "@/components/ModelSelector";
import { WorkflowModel } from "@/routes/workflows/types/workflowTypes"; import { WorkflowModel } from "@/routes/workflows/types/workflowTypes";
import { MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT } from "../Taskv2Node/types";
function StartNode({ id, data }: NodeProps<StartNode>) { function StartNode({ id, data }: NodeProps<StartNode>) {
const credentialGetter = useCredentialGetter(); const credentialGetter = useCredentialGetter();
@@ -51,6 +52,9 @@ function StartNode({ id, data }: NodeProps<StartNode>) {
? data.persistBrowserSession ? data.persistBrowserSession
: false, : false,
model: data.withWorkflowSettings ? data.model : workflowModel, model: data.withWorkflowSettings ? data.model : workflowModel,
maxScreenshotScrollingTimes: data.withWorkflowSettings
? data.maxScreenshotScrollingTimes
: null,
}); });
function handleChange(key: string, value: unknown) { function handleChange(key: string, value: unknown) {
@@ -130,6 +134,26 @@ function StartNode({ id, data }: NodeProps<StartNode>) {
/> />
</div> </div>
</div> </div>
<div className="space-y-2">
<div className="flex items-center gap-2">
<Label>Max Scrolling Screenshots</Label>
<HelpTooltip
content={`The maximum number of times to scroll down the page to take merged screenshots after action. Default is ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}. If it's set to 0, it will take the current viewport screenshot.`}
/>
</div>
<Input
value={inputs.maxScreenshotScrollingTimes ?? ""}
placeholder={`Default: ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}`}
onChange={(event) => {
const value =
event.target.value === ""
? null
: Number(event.target.value);
handleChange("maxScreenshotScrollingTimes", value);
}}
/>
</div>
</div> </div>
</AccordionContent> </AccordionContent>
</AccordionItem> </AccordionItem>

View File

@@ -9,6 +9,7 @@ export type WorkflowStartNodeData = {
proxyLocation: ProxyLocation; proxyLocation: ProxyLocation;
persistBrowserSession: boolean; persistBrowserSession: boolean;
model: WorkflowModel | null; model: WorkflowModel | null;
maxScreenshotScrollingTimes: number | null;
editable: boolean; editable: boolean;
}; };

View File

@@ -2,6 +2,7 @@ import { Node } from "@xyflow/react";
import { NodeBaseData } from "../types"; import { NodeBaseData } from "../types";
export const MAX_STEPS_DEFAULT = 25; export const MAX_STEPS_DEFAULT = 25;
export const MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT = 3;
export type Taskv2NodeData = NodeBaseData & { export type Taskv2NodeData = NodeBaseData & {
prompt: string; prompt: string;
@@ -9,6 +10,7 @@ export type Taskv2NodeData = NodeBaseData & {
totpVerificationUrl: string | null; totpVerificationUrl: string | null;
totpIdentifier: string | null; totpIdentifier: string | null;
maxSteps: number | null; maxSteps: number | null;
maxScreenshotScrollingTimes: number | null;
}; };
export type Taskv2Node = Node<Taskv2NodeData, "taskv2">; export type Taskv2Node = Node<Taskv2NodeData, "taskv2">;
@@ -23,6 +25,7 @@ export const taskv2NodeDefaultData: Taskv2NodeData = {
totpVerificationUrl: null, totpVerificationUrl: null,
maxSteps: MAX_STEPS_DEFAULT, maxSteps: MAX_STEPS_DEFAULT,
model: null, model: null,
maxScreenshotScrollingTimes: null,
}; };
export function isTaskV2Node(node: Node): node is Taskv2Node { export function isTaskV2Node(node: Node): node is Taskv2Node {

View File

@@ -251,6 +251,7 @@ function convertToNode(
maxSteps: block.max_steps, maxSteps: block.max_steps,
totpIdentifier: block.totp_identifier, totpIdentifier: block.totp_identifier,
totpVerificationUrl: block.totp_verification_url, totpVerificationUrl: block.totp_verification_url,
maxScreenshotScrollingTimes: null,
}, },
}; };
} }
@@ -662,6 +663,7 @@ function getElements(
proxyLocation: settings.proxyLocation ?? ProxyLocation.Residential, proxyLocation: settings.proxyLocation ?? ProxyLocation.Residential,
webhookCallbackUrl: settings.webhookCallbackUrl ?? "", webhookCallbackUrl: settings.webhookCallbackUrl ?? "",
model: settings.model, model: settings.model,
maxScreenshotScrollingTimes: settings.maxScreenshotScrollingTimes,
editable, editable,
}), }),
); );
@@ -1322,6 +1324,7 @@ function getWorkflowSettings(nodes: Array<AppNode>): WorkflowSettings {
proxyLocation: ProxyLocation.Residential, proxyLocation: ProxyLocation.Residential,
webhookCallbackUrl: null, webhookCallbackUrl: null,
model: null, model: null,
maxScreenshotScrollingTimes: null,
}; };
const startNodes = nodes.filter(isStartNode); const startNodes = nodes.filter(isStartNode);
const startNodeWithWorkflowSettings = startNodes.find( const startNodeWithWorkflowSettings = startNodes.find(
@@ -1337,6 +1340,7 @@ function getWorkflowSettings(nodes: Array<AppNode>): WorkflowSettings {
proxyLocation: data.proxyLocation, proxyLocation: data.proxyLocation,
webhookCallbackUrl: data.webhookCallbackUrl, webhookCallbackUrl: data.webhookCallbackUrl,
model: data.model, model: data.model,
maxScreenshotScrollingTimes: data.maxScreenshotScrollingTimes,
}; };
} }
return defaultSettings; return defaultSettings;
@@ -1992,6 +1996,7 @@ function convert(workflow: WorkflowApiResponse): WorkflowCreateYAMLRequest {
persist_browser_session: workflow.persist_browser_session, persist_browser_session: workflow.persist_browser_session,
model: workflow.model, model: workflow.model,
totp_verification_url: workflow.totp_verification_url, totp_verification_url: workflow.totp_verification_url,
max_screenshot_scrolling_times: workflow.max_screenshot_scrolling_times,
workflow_definition: { workflow_definition: {
parameters: convertParametersToParameterYAML(userParameters), parameters: convertParametersToParameterYAML(userParameters),
blocks: convertBlocksToBlockYAML(workflow.workflow_definition.blocks), blocks: convertBlocksToBlockYAML(workflow.workflow_definition.blocks),

View File

@@ -470,6 +470,7 @@ export type WorkflowApiResponse = {
model: WorkflowModel | null; model: WorkflowModel | null;
totp_verification_url: string | null; totp_verification_url: string | null;
totp_identifier: string | null; totp_identifier: string | null;
max_screenshot_scrolling_times: number | null;
created_at: string; created_at: string;
modified_at: string; modified_at: string;
deleted_at: string | null; deleted_at: string | null;
@@ -480,6 +481,7 @@ export type WorkflowSettings = {
webhookCallbackUrl: string | null; webhookCallbackUrl: string | null;
persistBrowserSession: boolean; persistBrowserSession: boolean;
model: WorkflowModel | null; model: WorkflowModel | null;
maxScreenshotScrollingTimes: number | null;
}; };
export type WorkflowModel = JsonObjectExtendable<{ model_name: string }>; export type WorkflowModel = JsonObjectExtendable<{ model_name: string }>;

View File

@@ -12,6 +12,7 @@ export type WorkflowCreateYAMLRequest = {
totp_verification_url?: string | null; totp_verification_url?: string | null;
workflow_definition: WorkflowDefinitionYAML; workflow_definition: WorkflowDefinitionYAML;
is_saved_task?: boolean; is_saved_task?: boolean;
max_screenshot_scrolling_times?: number | null;
}; };
export type WorkflowDefinitionYAML = { export type WorkflowDefinitionYAML = {

View File

@@ -20,6 +20,7 @@ AUTO_COMPLETION_POTENTIAL_VALUES_COUNT = 3
DROPDOWN_MENU_MAX_DISTANCE = 100 DROPDOWN_MENU_MAX_DISTANCE = 100
BROWSER_DOWNLOADING_SUFFIX = ".crdownload" BROWSER_DOWNLOADING_SUFFIX = ".crdownload"
MAX_UPLOAD_FILE_COUNT = 50 MAX_UPLOAD_FILE_COUNT = 50
DEFAULT_MAX_SCREENSHOT_SCROLLING_TIMES = 3
# reserved fields for navigation payload # reserved fields for navigation payload
SPECIAL_FIELD_VERIFICATION_CODE = "verification_code" SPECIAL_FIELD_VERIFICATION_CODE = "verification_code"

View File

@@ -19,6 +19,7 @@ from skyvern import analytics
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import ( from skyvern.constants import (
BROWSER_DOWNLOADING_SUFFIX, BROWSER_DOWNLOADING_SUFFIX,
DEFAULT_MAX_SCREENSHOT_SCROLLING_TIMES,
GET_DOWNLOADED_FILES_TIMEOUT, GET_DOWNLOADED_FILES_TIMEOUT,
SAVE_DOWNLOADED_FILES_TIMEOUT, SAVE_DOWNLOADED_FILES_TIMEOUT,
SCRAPE_TYPE_ORDER, SCRAPE_TYPE_ORDER,
@@ -181,6 +182,7 @@ class ForgeAgent:
error_code_mapping=task_block.error_code_mapping, error_code_mapping=task_block.error_code_mapping,
include_action_history_in_verification=task_block.include_action_history_in_verification, include_action_history_in_verification=task_block.include_action_history_in_verification,
model=task_block.model, model=task_block.model,
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolling_times,
) )
LOG.info( LOG.info(
"Created a new task for workflow run", "Created a new task for workflow run",
@@ -237,6 +239,7 @@ class ForgeAgent:
application=task_request.application, application=task_request.application,
include_action_history_in_verification=task_request.include_action_history_in_verification, include_action_history_in_verification=task_request.include_action_history_in_verification,
model=task_request.model, model=task_request.model,
max_screenshot_scrolling_times=task_request.max_screenshot_scrolling_times,
) )
LOG.info( LOG.info(
"Created new task", "Created new task",
@@ -1650,12 +1653,22 @@ class ForgeAgent:
if not working_page: if not working_page:
raise BrowserStateMissingPage() raise BrowserStateMissingPage()
fullpage_screenshot = True context = skyvern_context.ensure_context()
scrolling_number = context.max_screenshot_scrolling_times
if scrolling_number is None:
scrolling_number = DEFAULT_MAX_SCREENSHOT_SCROLLING_TIMES
if engine in CUA_ENGINES: if engine in CUA_ENGINES:
fullpage_screenshot = False scrolling_number = 0
try: try:
screenshot = await browser_state.take_screenshot(full_page=fullpage_screenshot) screenshot = await browser_state.take_post_action_screenshot(
scrolling_number=scrolling_number,
use_playwright_fullpage=app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
"ENABLE_PLAYWRIGHT_FULLPAGE",
str(task.organization_id),
),
)
await app.ARTIFACT_MANAGER.create_artifact( await app.ARTIFACT_MANAGER.create_artifact(
step=step, step=step,
artifact_type=ArtifactType.SCREENSHOT_ACTION, artifact_type=ArtifactType.SCREENSHOT_ACTION,
@@ -2135,7 +2148,12 @@ class ForgeAgent:
browser_state = app.BROWSER_MANAGER.get_for_task(task.task_id) browser_state = app.BROWSER_MANAGER.get_for_task(task.task_id)
if browser_state is not None and await browser_state.get_working_page() is not None: if browser_state is not None and await browser_state.get_working_page() is not None:
try: try:
screenshot = await browser_state.take_screenshot(full_page=True) screenshot = await browser_state.take_fullpage_screenshot(
use_playwright_fullpage=app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
"ENABLE_PLAYWRIGHT_FULLPAGE",
str(task.organization_id),
)
)
await app.ARTIFACT_MANAGER.create_artifact( await app.ARTIFACT_MANAGER.create_artifact(
step=last_step, step=last_step,
artifact_type=ArtifactType.SCREENSHOT_FINAL, artifact_type=ArtifactType.SCREENSHOT_FINAL,

View File

@@ -23,6 +23,7 @@ class SkyvernContext:
hashed_href_map: dict[str, str] = field(default_factory=dict) hashed_href_map: dict[str, str] = field(default_factory=dict)
refresh_working_page: bool = False refresh_working_page: bool = False
frame_index_map: dict[Frame, int] = field(default_factory=dict) frame_index_map: dict[Frame, int] = field(default_factory=dict)
max_screenshot_scrolling_times: int | None = None
def __repr__(self) -> str: def __repr__(self) -> str:
return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, task_v2_id={self.task_v2_id}, max_steps_override={self.max_steps_override})" return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, task_v2_id={self.task_v2_id}, max_steps_override={self.max_steps_override})"

View File

@@ -149,6 +149,7 @@ class AgentDB:
application: str | None = None, application: str | None = None,
include_action_history_in_verification: bool | None = None, include_action_history_in_verification: bool | None = None,
model: dict[str, Any] | None = None, model: dict[str, Any] | None = None,
max_screenshot_scrolling_times: int | None = None,
) -> Task: ) -> Task:
try: try:
async with self.Session() as session: async with self.Session() as session:
@@ -176,6 +177,7 @@ class AgentDB:
application=application, application=application,
include_action_history_in_verification=include_action_history_in_verification, include_action_history_in_verification=include_action_history_in_verification,
model=model, model=model,
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
) )
session.add(new_task) session.add(new_task)
await session.commit() await session.commit()
@@ -1217,6 +1219,7 @@ class AgentDB:
description: str | None = None, description: str | None = None,
proxy_location: ProxyLocation | None = None, proxy_location: ProxyLocation | None = None,
webhook_callback_url: str | None = None, webhook_callback_url: str | None = None,
max_screenshot_scrolling_times: int | None = None,
totp_verification_url: str | None = None, totp_verification_url: str | None = None,
totp_identifier: str | None = None, totp_identifier: str | None = None,
persist_browser_session: bool = False, persist_browser_session: bool = False,
@@ -1236,6 +1239,7 @@ class AgentDB:
webhook_callback_url=webhook_callback_url, webhook_callback_url=webhook_callback_url,
totp_verification_url=totp_verification_url, totp_verification_url=totp_verification_url,
totp_identifier=totp_identifier, totp_identifier=totp_identifier,
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
persist_browser_session=persist_browser_session, persist_browser_session=persist_browser_session,
model=model, model=model,
is_saved_task=is_saved_task, is_saved_task=is_saved_task,
@@ -1479,6 +1483,7 @@ class AgentDB:
totp_verification_url: str | None = None, totp_verification_url: str | None = None,
totp_identifier: str | None = None, totp_identifier: str | None = None,
parent_workflow_run_id: str | None = None, parent_workflow_run_id: str | None = None,
max_screenshot_scrolling_times: int | None = None,
) -> WorkflowRun: ) -> WorkflowRun:
try: try:
async with self.Session() as session: async with self.Session() as session:
@@ -1492,6 +1497,7 @@ class AgentDB:
totp_verification_url=totp_verification_url, totp_verification_url=totp_verification_url,
totp_identifier=totp_identifier, totp_identifier=totp_identifier,
parent_workflow_run_id=parent_workflow_run_id, parent_workflow_run_id=parent_workflow_run_id,
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
) )
session.add(workflow_run) session.add(workflow_run)
await session.commit() await session.commit()
@@ -2436,6 +2442,7 @@ class AgentDB:
extracted_information_schema: dict | list | str | None = None, extracted_information_schema: dict | list | str | None = None,
error_code_mapping: dict | None = None, error_code_mapping: dict | None = None,
model: dict[str, Any] | None = None, model: dict[str, Any] | None = None,
max_screenshot_scrolling_times: int | None = None,
) -> TaskV2: ) -> TaskV2:
async with self.Session() as session: async with self.Session() as session:
new_task_v2 = TaskV2Model( new_task_v2 = TaskV2Model(
@@ -2452,6 +2459,7 @@ class AgentDB:
error_code_mapping=error_code_mapping, error_code_mapping=error_code_mapping,
organization_id=organization_id, organization_id=organization_id,
model=model, model=model,
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
) )
session.add(new_task_v2) session.add(new_task_v2)
await session.commit() await session.commit()

View File

@@ -88,6 +88,7 @@ class TaskModel(Base):
queued_at = Column(DateTime, nullable=True) queued_at = Column(DateTime, nullable=True)
started_at = Column(DateTime, nullable=True) started_at = Column(DateTime, nullable=True)
finished_at = Column(DateTime, nullable=True) finished_at = Column(DateTime, nullable=True)
max_screenshot_scrolling_times = Column(Integer, nullable=True)
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True) created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True)
modified_at = Column( modified_at = Column(
DateTime, DateTime,
@@ -218,6 +219,7 @@ class WorkflowModel(Base):
workflow_definition = Column(JSON, nullable=False) workflow_definition = Column(JSON, nullable=False)
proxy_location = Column(String) proxy_location = Column(String)
webhook_callback_url = Column(String) webhook_callback_url = Column(String)
max_screenshot_scrolling_times = Column(Integer, nullable=True)
totp_verification_url = Column(String) totp_verification_url = Column(String)
totp_identifier = Column(String) totp_identifier = Column(String)
persist_browser_session = Column(Boolean, default=False, nullable=False) persist_browser_session = Column(Boolean, default=False, nullable=False)
@@ -254,6 +256,7 @@ class WorkflowRunModel(Base):
webhook_callback_url = Column(String) webhook_callback_url = Column(String)
totp_verification_url = Column(String) totp_verification_url = Column(String)
totp_identifier = Column(String) totp_identifier = Column(String)
max_screenshot_scrolling_times = Column(Integer, nullable=True)
queued_at = Column(DateTime, nullable=True) queued_at = Column(DateTime, nullable=True)
started_at = Column(DateTime, nullable=True) started_at = Column(DateTime, nullable=True)
@@ -621,6 +624,7 @@ class TaskV2Model(Base):
extracted_information_schema = Column(JSON, nullable=True) extracted_information_schema = Column(JSON, nullable=True)
error_code_mapping = Column(JSON, nullable=True) error_code_mapping = Column(JSON, nullable=True)
max_steps = Column(Integer, nullable=True) max_steps = Column(Integer, nullable=True)
max_screenshot_scrolling_times = Column(Integer, nullable=True)
queued_at = Column(DateTime, nullable=True) queued_at = Column(DateTime, nullable=True)
started_at = Column(DateTime, nullable=True) started_at = Column(DateTime, nullable=True)

View File

@@ -142,6 +142,7 @@ def convert_to_task(task_obj: TaskModel, debug_enabled: bool = False, workflow_p
queued_at=task_obj.queued_at, queued_at=task_obj.queued_at,
started_at=task_obj.started_at, started_at=task_obj.started_at,
finished_at=task_obj.finished_at, finished_at=task_obj.finished_at,
max_screenshot_scrolling_times=task_obj.max_screenshot_scrolling_times,
) )
return task return task
@@ -238,6 +239,7 @@ def convert_to_workflow(workflow_model: WorkflowModel, debug_enabled: bool = Fal
persist_browser_session=workflow_model.persist_browser_session, persist_browser_session=workflow_model.persist_browser_session,
model=workflow_model.model, model=workflow_model.model,
proxy_location=(ProxyLocation(workflow_model.proxy_location) if workflow_model.proxy_location else None), proxy_location=(ProxyLocation(workflow_model.proxy_location) if workflow_model.proxy_location else None),
max_screenshot_scrolling_times=workflow_model.max_screenshot_scrolling_times,
version=workflow_model.version, version=workflow_model.version,
is_saved_task=workflow_model.is_saved_task, is_saved_task=workflow_model.is_saved_task,
description=workflow_model.description, description=workflow_model.description,
@@ -278,6 +280,7 @@ def convert_to_workflow_run(
created_at=workflow_run_model.created_at, created_at=workflow_run_model.created_at,
modified_at=workflow_run_model.modified_at, modified_at=workflow_run_model.modified_at,
workflow_title=workflow_title, workflow_title=workflow_title,
max_screenshot_scrolling_times=workflow_run_model.max_screenshot_scrolling_times,
) )

View File

@@ -107,6 +107,7 @@ class BackgroundTaskExecutor(AsyncExecutor):
context.task_id = task.task_id context.task_id = task.task_id
context.organization_id = organization_id context.organization_id = organization_id
context.max_steps_override = max_steps_override context.max_steps_override = max_steps_override
context.max_screenshot_scrolling_times = task.max_screenshot_scrolling_times
if background_tasks: if background_tasks:
await initialize_skyvern_state_file(task_id=task_id, organization_id=organization_id) await initialize_skyvern_state_file(task_id=task_id, organization_id=organization_id)

View File

@@ -166,6 +166,7 @@ async def run_task(
totp_identifier=run_request.totp_identifier, totp_identifier=run_request.totp_identifier,
include_action_history_in_verification=run_request.include_action_history_in_verification, include_action_history_in_verification=run_request.include_action_history_in_verification,
model=run_request.model, model=run_request.model,
max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times,
) )
task_v1_response = await task_v1_service.run_task( task_v1_response = await task_v1_service.run_task(
task=task_v1_request, task=task_v1_request,
@@ -203,6 +204,7 @@ async def run_task(
data_extraction_schema=task_v1_response.extracted_information_schema, data_extraction_schema=task_v1_response.extracted_information_schema,
error_code_mapping=task_v1_response.error_code_mapping, error_code_mapping=task_v1_response.error_code_mapping,
browser_session_id=run_request.browser_session_id, browser_session_id=run_request.browser_session_id,
max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times,
), ),
) )
if run_request.engine == RunEngine.skyvern_v2: if run_request.engine == RunEngine.skyvern_v2:
@@ -221,6 +223,7 @@ async def run_task(
error_code_mapping=run_request.error_code_mapping, error_code_mapping=run_request.error_code_mapping,
create_task_run=True, create_task_run=True,
model=run_request.model, model=run_request.model,
max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times,
) )
except MissingBrowserAddressError as e: except MissingBrowserAddressError as e:
raise HTTPException(status_code=400, detail=str(e)) from e raise HTTPException(status_code=400, detail=str(e)) from e
@@ -263,6 +266,7 @@ async def run_task(
error_code_mapping=task_v2.error_code_mapping, error_code_mapping=task_v2.error_code_mapping,
data_extraction_schema=task_v2.extracted_information_schema, data_extraction_schema=task_v2.extracted_information_schema,
publish_workflow=run_request.publish_workflow, publish_workflow=run_request.publish_workflow,
max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times,
), ),
) )
LOG.error("Invalid agent engine", engine=run_request.engine, organization_id=current_org.organization_id) LOG.error("Invalid agent engine", engine=run_request.engine, organization_id=current_org.organization_id)
@@ -318,6 +322,7 @@ async def run_workflow(
totp_identifier=workflow_run_request.totp_identifier, totp_identifier=workflow_run_request.totp_identifier,
totp_url=workflow_run_request.totp_url, totp_url=workflow_run_request.totp_url,
browser_session_id=workflow_run_request.browser_session_id, browser_session_id=workflow_run_request.browser_session_id,
max_screenshot_scrolling_times=workflow_run_request.max_screenshot_scrolling_times,
) )
try: try:
@@ -1765,6 +1770,7 @@ async def run_task_v2(
create_task_run=True, create_task_run=True,
extracted_information_schema=data.extracted_information_schema, extracted_information_schema=data.extracted_information_schema,
error_code_mapping=data.error_code_mapping, error_code_mapping=data.error_code_mapping,
max_screenshot_scrolling_times=data.max_screenshot_scrolling_times,
) )
except MissingBrowserAddressError as e: except MissingBrowserAddressError as e:
raise HTTPException(status_code=400, detail=str(e)) from e raise HTTPException(status_code=400, detail=str(e)) from e

View File

@@ -48,6 +48,8 @@ class TaskV2(BaseModel):
queued_at: datetime | None = None queued_at: datetime | None = None
started_at: datetime | None = None started_at: datetime | None = None
finished_at: datetime | None = None finished_at: datetime | None = None
max_screenshot_scrolling_times: int | None = None
created_at: datetime created_at: datetime
modified_at: datetime modified_at: datetime
@@ -147,6 +149,7 @@ class TaskV2Request(BaseModel):
publish_workflow: bool = False publish_workflow: bool = False
extracted_information_schema: dict | list | str | None = None extracted_information_schema: dict | list | str | None = None
error_code_mapping: dict[str, str] | None = None error_code_mapping: dict[str, str] | None = None
max_screenshot_scrolling_times: int | None = None
@field_validator("url", "webhook_callback_url", "totp_verification_url") @field_validator("url", "webhook_callback_url", "totp_verification_url")
@classmethod @classmethod

View File

@@ -96,6 +96,11 @@ class TaskBase(BaseModel):
description="Whether to include the action history when verifying the task is complete", description="Whether to include the action history when verifying the task is complete",
examples=[True, False], examples=[True, False],
) )
max_screenshot_scrolling_times: int | None = Field(
default=None,
description="Scroll down n times to get the merged screenshot of the page after taking an action. When it's None or 0, it takes the current viewpoint screenshot.",
examples=[10],
)
class TaskRequest(TaskBase): class TaskRequest(TaskBase):
@@ -314,6 +319,7 @@ class Task(TaskBase):
errors=self.errors, errors=self.errors,
max_steps_per_run=self.max_steps_per_run, max_steps_per_run=self.max_steps_per_run,
workflow_run_id=self.workflow_run_id, workflow_run_id=self.workflow_run_id,
max_screenshot_scrolling_times=self.max_screenshot_scrolling_times,
) )
@@ -337,6 +343,7 @@ class TaskResponse(BaseModel):
queued_at: datetime | None = None queued_at: datetime | None = None
started_at: datetime | None = None started_at: datetime | None = None
finished_at: datetime | None = None finished_at: datetime | None = None
max_screenshot_scrolling_times: int | None = None
class TaskOutput(BaseModel): class TaskOutput(BaseModel):

View File

@@ -307,7 +307,12 @@ class Block(BaseModel, abc.ABC):
if not browser_state: if not browser_state:
LOG.warning("No browser state found when creating workflow_run_block", workflow_run_id=workflow_run_id) LOG.warning("No browser state found when creating workflow_run_block", workflow_run_id=workflow_run_id)
else: else:
screenshot = await browser_state.take_screenshot(full_page=True) screenshot = await browser_state.take_fullpage_screenshot(
use_playwright_fullpage=app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
"ENABLE_PLAYWRIGHT_FULLPAGE",
str(organization_id),
)
)
if screenshot: if screenshot:
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact( await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact(
workflow_run_block=workflow_run_block, workflow_run_block=workflow_run_block,
@@ -569,8 +574,15 @@ class BaseTaskBlock(Block):
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, url=self.url, browser_session_id=browser_session_id workflow_run=workflow_run, url=self.url, browser_session_id=browser_session_id
) )
# assert that the browser state is not None, otherwise we can't go through typing
assert browser_state is not None
# add screenshot artifact for the first task # add screenshot artifact for the first task
screenshot = await browser_state.take_screenshot(full_page=True) screenshot = await browser_state.take_fullpage_screenshot(
use_playwright_fullpage=app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
"ENABLE_PLAYWRIGHT_FULLPAGE",
str(organization_id),
)
)
if screenshot: if screenshot:
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact( await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact(
workflow_run_block=workflow_run_block, workflow_run_block=workflow_run_block,
@@ -2486,6 +2498,7 @@ class TaskV2Block(Block):
proxy_location=workflow_run.proxy_location, proxy_location=workflow_run.proxy_location,
totp_identifier=self.totp_identifier, totp_identifier=self.totp_identifier,
totp_verification_url=self.totp_verification_url, totp_verification_url=self.totp_verification_url,
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolling_times,
) )
await app.DATABASE.update_task_v2( await app.DATABASE.update_task_v2(
task_v2.observer_cruise_id, status=TaskV2Status.queued, organization_id=organization_id task_v2.observer_cruise_id, status=TaskV2Status.queued, organization_id=organization_id
@@ -2517,6 +2530,7 @@ class TaskV2Block(Block):
workflow_permanent_id=workflow_run.workflow_permanent_id, workflow_permanent_id=workflow_run.workflow_permanent_id,
workflow_run_id=workflow_run_id, workflow_run_id=workflow_run_id,
browser_session_id=browser_session_id, browser_session_id=browser_session_id,
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolling_times,
) )
) )
result_dict = None result_dict = None

View File

@@ -22,6 +22,7 @@ class WorkflowRequestBody(BaseModel):
totp_verification_url: str | None = None totp_verification_url: str | None = None
totp_identifier: str | None = None totp_identifier: str | None = None
browser_session_id: str | None = None browser_session_id: str | None = None
max_screenshot_scrolling_times: int | None = None
@field_validator("webhook_callback_url", "totp_verification_url") @field_validator("webhook_callback_url", "totp_verification_url")
@classmethod @classmethod
@@ -76,6 +77,7 @@ class Workflow(BaseModel):
persist_browser_session: bool = False persist_browser_session: bool = False
model: dict[str, Any] | None = None model: dict[str, Any] | None = None
status: WorkflowStatus = WorkflowStatus.published status: WorkflowStatus = WorkflowStatus.published
max_screenshot_scrolling_times: int | None = None
created_at: datetime created_at: datetime
modified_at: datetime modified_at: datetime
@@ -115,6 +117,7 @@ class WorkflowRun(BaseModel):
failure_reason: str | None = None failure_reason: str | None = None
parent_workflow_run_id: str | None = None parent_workflow_run_id: str | None = None
workflow_title: str | None = None workflow_title: str | None = None
max_screenshot_scrolling_times: int | None = None
queued_at: datetime | None = None queued_at: datetime | None = None
started_at: datetime | None = None started_at: datetime | None = None
@@ -162,3 +165,4 @@ class WorkflowRunResponseBase(BaseModel):
task_v2: TaskV2 | None = None task_v2: TaskV2 | None = None
workflow_title: str | None = None workflow_title: str | None = None
browser_session_id: str | None = None browser_session_id: str | None = None
max_screenshot_scrolling_times: int | None = None

View File

@@ -424,4 +424,5 @@ class WorkflowCreateYAMLRequest(BaseModel):
model: dict[str, Any] | None = None model: dict[str, Any] | None = None
workflow_definition: WorkflowDefinitionYAML workflow_definition: WorkflowDefinitionYAML
is_saved_task: bool = False is_saved_task: bool = False
max_screenshot_scrolling_times: int | None = None
status: WorkflowStatus = WorkflowStatus.published status: WorkflowStatus = WorkflowStatus.published

View File

@@ -169,6 +169,7 @@ class WorkflowService:
organization_id=workflow.organization_id, organization_id=workflow.organization_id,
proxy_location=workflow_request.proxy_location, proxy_location=workflow_request.proxy_location,
webhook_callback_url=workflow_request.webhook_callback_url, webhook_callback_url=workflow_request.webhook_callback_url,
max_screenshot_scrolling_times=workflow_request.max_screenshot_scrolling_times,
) )
skyvern_context.set( skyvern_context.set(
SkyvernContext( SkyvernContext(
@@ -178,6 +179,7 @@ class WorkflowService:
workflow_id=workflow_id, workflow_id=workflow_id,
workflow_run_id=workflow_run.workflow_run_id, workflow_run_id=workflow_run.workflow_run_id,
max_steps_override=max_steps_override, max_steps_override=max_steps_override,
max_screenshot_scrolling_times=workflow_request.max_screenshot_scrolling_times,
) )
) )
@@ -577,6 +579,7 @@ class WorkflowService:
workflow_definition: WorkflowDefinition, workflow_definition: WorkflowDefinition,
description: str | None = None, description: str | None = None,
proxy_location: ProxyLocation | None = None, proxy_location: ProxyLocation | None = None,
max_screenshot_scrolling_times: int | None = None,
webhook_callback_url: str | None = None, webhook_callback_url: str | None = None,
totp_verification_url: str | None = None, totp_verification_url: str | None = None,
totp_identifier: str | None = None, totp_identifier: str | None = None,
@@ -594,6 +597,7 @@ class WorkflowService:
description=description, description=description,
proxy_location=proxy_location, proxy_location=proxy_location,
webhook_callback_url=webhook_callback_url, webhook_callback_url=webhook_callback_url,
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
totp_verification_url=totp_verification_url, totp_verification_url=totp_verification_url,
totp_identifier=totp_identifier, totp_identifier=totp_identifier,
persist_browser_session=persist_browser_session, persist_browser_session=persist_browser_session,
@@ -767,6 +771,7 @@ class WorkflowService:
totp_verification_url=workflow_request.totp_verification_url, totp_verification_url=workflow_request.totp_verification_url,
totp_identifier=workflow_request.totp_identifier, totp_identifier=workflow_request.totp_identifier,
parent_workflow_run_id=parent_workflow_run_id, parent_workflow_run_id=parent_workflow_run_id,
max_screenshot_scrolling_times=workflow_request.max_screenshot_scrolling_times,
) )
async def mark_workflow_run_as_completed(self, workflow_run_id: str) -> WorkflowRun: async def mark_workflow_run_as_completed(self, workflow_run_id: str) -> WorkflowRun:
@@ -1180,6 +1185,7 @@ class WorkflowService:
total_steps=total_steps, total_steps=total_steps,
total_cost=total_cost, total_cost=total_cost,
workflow_title=workflow.title, workflow_title=workflow.title,
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolling_times,
) )
async def clean_up_workflow( async def clean_up_workflow(
@@ -1453,6 +1459,7 @@ class WorkflowService:
totp_identifier=request.totp_identifier, totp_identifier=request.totp_identifier,
persist_browser_session=request.persist_browser_session, persist_browser_session=request.persist_browser_session,
model=request.model, model=request.model,
max_screenshot_scrolling_times=request.max_screenshot_scrolling_times,
workflow_permanent_id=workflow_permanent_id, workflow_permanent_id=workflow_permanent_id,
version=existing_version + 1, version=existing_version + 1,
is_saved_task=request.is_saved_task, is_saved_task=request.is_saved_task,
@@ -1470,6 +1477,7 @@ class WorkflowService:
totp_identifier=request.totp_identifier, totp_identifier=request.totp_identifier,
persist_browser_session=request.persist_browser_session, persist_browser_session=request.persist_browser_session,
model=request.model, model=request.model,
max_screenshot_scrolling_times=request.max_screenshot_scrolling_times,
is_saved_task=request.is_saved_task, is_saved_task=request.is_saved_task,
status=request.status, status=request.status,
) )

View File

@@ -279,6 +279,10 @@ class TaskRunRequest(BaseModel):
include_action_history_in_verification: bool | None = Field( include_action_history_in_verification: bool | None = Field(
default=False, description="Whether to include action history when verifying that the task is complete" default=False, description="Whether to include action history when verifying that the task is complete"
) )
max_screenshot_scrolling_times: int | None = Field(
default=None,
description="Scroll down n times to get the merged screenshot of the page after taking an action. When it's None or 0, it takes the current viewpoint screenshot.",
)
@field_validator("url", "webhook_url", "totp_url") @field_validator("url", "webhook_url", "totp_url")
@classmethod @classmethod
@@ -326,6 +330,10 @@ class WorkflowRunRequest(BaseModel):
default=None, default=None,
description="ID of a Skyvern browser session to reuse, having it continue from the current screen state", description="ID of a Skyvern browser session to reuse, having it continue from the current screen state",
) )
max_screenshot_scrolling_times: int | None = Field(
default=None,
description="Scroll down n times to get the merged screenshot of the page after taking an action. When it's None or 0, it takes the current viewpoint screenshot.",
)
@field_validator("webhook_url", "totp_url") @field_validator("webhook_url", "totp_url")
@classmethod @classmethod
@@ -368,9 +376,11 @@ class BaseRunResponse(BaseModel):
examples=["https://app.skyvern.com/tasks/tsk_123", "https://app.skyvern.com/workflows/wpid_123/wr_123"], examples=["https://app.skyvern.com/tasks/tsk_123", "https://app.skyvern.com/workflows/wpid_123/wr_123"],
) )
browser_session_id: str | None = Field( browser_session_id: str | None = Field(
default=None, description="ID of the Skyvern persistent browser session used for this run", examples=["pbs_123"]
)
max_screenshot_scrolling_times: int | None = Field(
default=None, default=None,
description="ID of the Skyvern persistent browser session used for this run", description="Scroll down n times to get the merged screenshot of the page after taking an action. When it's NONE or 0, it takes the current view point screenshot.",
examples=["pbs_123"],
) )

View File

@@ -67,6 +67,7 @@ async def get_run_response(run_id: str, organization_id: str | None = None) -> R
max_steps=task_v1_response.max_steps_per_run, max_steps=task_v1_response.max_steps_per_run,
data_extraction_schema=task_v1_response.request.extracted_information_schema, data_extraction_schema=task_v1_response.request.extracted_information_schema,
error_code_mapping=task_v1_response.request.error_code_mapping, error_code_mapping=task_v1_response.request.error_code_mapping,
max_screenshot_scrolling_times=task_v1_response.request.max_screenshot_scrolling_times,
), ),
) )
elif run.task_run_type == RunType.task_v2: elif run.task_run_type == RunType.task_v2:

View File

@@ -165,6 +165,7 @@ async def initialize_task_v2(
error_code_mapping: dict | None = None, error_code_mapping: dict | None = None,
create_task_run: bool = False, create_task_run: bool = False,
model: dict[str, Any] | None = None, model: dict[str, Any] | None = None,
max_screenshot_scrolling_times: int | None = None,
) -> TaskV2: ) -> TaskV2:
task_v2 = await app.DATABASE.create_task_v2( task_v2 = await app.DATABASE.create_task_v2(
prompt=user_prompt, prompt=user_prompt,
@@ -176,11 +177,13 @@ async def initialize_task_v2(
extracted_information_schema=extracted_information_schema, extracted_information_schema=extracted_information_schema,
error_code_mapping=error_code_mapping, error_code_mapping=error_code_mapping,
model=model, model=model,
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
) )
# set task_v2_id in context # set task_v2_id in context
context = skyvern_context.current() context = skyvern_context.current()
if context: if context:
context.task_v2_id = task_v2.observer_cruise_id context.task_v2_id = task_v2.observer_cruise_id
context.max_screenshot_scrolling_times = max_screenshot_scrolling_times
thought = await app.DATABASE.create_thought( thought = await app.DATABASE.create_thought(
task_v2_id=task_v2.observer_cruise_id, task_v2_id=task_v2.observer_cruise_id,
@@ -221,7 +224,9 @@ async def initialize_task_v2(
) )
workflow_run = await app.WORKFLOW_SERVICE.setup_workflow_run( workflow_run = await app.WORKFLOW_SERVICE.setup_workflow_run(
request_id=None, request_id=None,
workflow_request=WorkflowRequestBody(), workflow_request=WorkflowRequestBody(
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
),
workflow_permanent_id=new_workflow.workflow_permanent_id, workflow_permanent_id=new_workflow.workflow_permanent_id,
organization=organization, organization=organization,
version=None, version=None,
@@ -454,6 +459,7 @@ async def run_task_v2_helper(
request_id=request_id, request_id=request_id,
task_v2_id=task_v2_id, task_v2_id=task_v2_id,
browser_session_id=browser_session_id, browser_session_id=browser_session_id,
max_screenshot_scrolling_times=task_v2.max_screenshot_scrolling_times,
) )
) )
@@ -771,6 +777,7 @@ async def run_task_v2_helper(
proxy_location=task_v2.proxy_location or ProxyLocation.RESIDENTIAL, proxy_location=task_v2.proxy_location or ProxyLocation.RESIDENTIAL,
workflow_definition=workflow_definition_yaml, workflow_definition=workflow_definition_yaml,
status=workflow.status, status=workflow.status,
max_screenshot_scrolling_times=task_v2.max_screenshot_scrolling_times,
) )
LOG.info("Creating workflow from request", workflow_create_request=workflow_create_request) LOG.info("Creating workflow from request", workflow_create_request=workflow_create_request)
workflow = await app.WORKFLOW_SERVICE.create_workflow_from_request( workflow = await app.WORKFLOW_SERVICE.create_workflow_from_request(

View File

@@ -97,6 +97,7 @@ async def get_workflow_run_response(
webhook_url=workflow_run.webhook_callback_url or None, webhook_url=workflow_run.webhook_callback_url or None,
totp_url=workflow_run.totp_verification_url or None, totp_url=workflow_run.totp_verification_url or None,
totp_identifier=workflow_run.totp_identifier, totp_identifier=workflow_run.totp_identifier,
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolling_times,
# TODO: add browser session id # TODO: add browser session id
), ),
) )

View File

@@ -35,7 +35,7 @@ from skyvern.exceptions import (
from skyvern.forge.sdk.api.files import get_download_dir, make_temp_directory from skyvern.forge.sdk.api.files import get_download_dir, make_temp_directory
from skyvern.forge.sdk.core.skyvern_context import current, ensure_context from skyvern.forge.sdk.core.skyvern_context import current, ensure_context
from skyvern.schemas.runs import ProxyLocation, get_tzinfo_from_proxy from skyvern.schemas.runs import ProxyLocation, get_tzinfo_from_proxy
from skyvern.webeye.utils.page import SkyvernFrame from skyvern.webeye.utils.page import ScreenshotMode, SkyvernFrame
LOG = structlog.get_logger() LOG = structlog.get_logger()
@@ -865,6 +865,30 @@ class BrowserState:
except asyncio.TimeoutError: except asyncio.TimeoutError:
LOG.error("Timeout to close playwright, might leave the broswer opening forever") LOG.error("Timeout to close playwright, might leave the broswer opening forever")
async def take_screenshot(self, full_page: bool = False, file_path: str | None = None) -> bytes: async def take_fullpage_screenshot(
self,
file_path: str | None = None,
use_playwright_fullpage: bool = False, # TODO: THIS IS ONLY FOR EXPERIMENT. will be removed after experiment.
) -> bytes:
page = await self.__assert_page() page = await self.__assert_page()
return await SkyvernFrame.take_screenshot(page=page, full_page=full_page, file_path=file_path) return await SkyvernFrame.take_scrolling_screenshot(
page=page,
file_path=file_path,
mode=ScreenshotMode.LITE,
use_playwright_fullpage=use_playwright_fullpage,
)
async def take_post_action_screenshot(
self,
scrolling_number: int,
file_path: str | None = None,
use_playwright_fullpage: bool = False, # TODO: THIS IS ONLY FOR EXPERIMENT. will be removed after experiment.
) -> bytes:
page = await self.__assert_page()
return await SkyvernFrame.take_scrolling_screenshot(
page=page,
file_path=file_path,
mode=ScreenshotMode.LITE,
scrolling_number=scrolling_number,
use_playwright_fullpage=use_playwright_fullpage,
)

View File

@@ -2115,13 +2115,14 @@ async function scrollToNextPage(
draw_boxes, draw_boxes,
frame = "main.frame", frame = "main.frame",
frame_index = undefined, frame_index = undefined,
need_overlap = true,
) { ) {
// remove bounding boxes, scroll to next page with 200px overlap, then draw bounding boxes again // remove bounding boxes, scroll to next page with 200px overlap, then draw bounding boxes again
// return true if there is a next page, false otherwise // return true if there is a next page, false otherwise
removeBoundingBoxes(); removeBoundingBoxes();
window.scrollBy({ window.scrollBy({
left: 0, left: 0,
top: window.innerHeight - 200, top: need_overlap ? window.innerHeight - 200 : window.innerHeight,
behavior: "instant", behavior: "instant",
}); });
if (draw_boxes) { if (draw_boxes) {

View File

@@ -2,9 +2,12 @@ from __future__ import annotations
import asyncio import asyncio
import time import time
from enum import StrEnum
from io import BytesIO
from typing import Any from typing import Any
import structlog import structlog
from PIL import Image
from playwright._impl._errors import TimeoutError from playwright._impl._errors import TimeoutError
from playwright.async_api import ElementHandle, Frame, Page from playwright.async_api import ElementHandle, Frame, Page
@@ -31,17 +34,24 @@ def load_js_script() -> str:
JS_FUNCTION_DEFS = load_js_script() JS_FUNCTION_DEFS = load_js_script()
class ScreenshotMode(StrEnum):
LITE = "lite"
DETAILED = "detailed"
async def _current_viewpoint_screenshot_helper( async def _current_viewpoint_screenshot_helper(
page: Page, page: Page,
file_path: str | None = None, file_path: str | None = None,
full_page: bool = False, full_page: bool = False,
timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS, timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS,
mode: ScreenshotMode = ScreenshotMode.DETAILED,
) -> bytes: ) -> bytes:
if page.is_closed(): if page.is_closed():
raise FailedToTakeScreenshot(error_message="Page is closed") raise FailedToTakeScreenshot(error_message="Page is closed")
try: try:
await page.wait_for_load_state(timeout=settings.BROWSER_LOADING_TIMEOUT_MS) if mode == ScreenshotMode.DETAILED:
LOG.debug("Page is fully loaded, agent is about to take screenshots") await page.wait_for_load_state(timeout=settings.BROWSER_LOADING_TIMEOUT_MS)
LOG.debug("Page is fully loaded, agent is about to take screenshots")
start_time = time.time() start_time = time.time()
screenshot: bytes = b"" screenshot: bytes = b""
if file_path: if file_path:
@@ -77,6 +87,7 @@ async def _scrolling_screenshots_helper(
url: str | None = None, url: str | None = None,
draw_boxes: bool = False, draw_boxes: bool = False,
max_number: int = settings.MAX_NUM_SCREENSHOTS, max_number: int = settings.MAX_NUM_SCREENSHOTS,
mode: ScreenshotMode = ScreenshotMode.DETAILED,
) -> list[bytes]: ) -> list[bytes]:
skyvern_page = await SkyvernFrame.create_instance(frame=page) skyvern_page = await SkyvernFrame.create_instance(frame=page)
# page is the main frame and the index must be 0 # page is the main frame and the index must be 0
@@ -84,6 +95,11 @@ async def _scrolling_screenshots_helper(
frame = "main.frame" frame = "main.frame"
frame_index = 0 frame_index = 0
# when mode is lite, we don't draw bounding boxes
# since draw_boxes impacts the performance of processing
if mode == ScreenshotMode.LITE:
draw_boxes = False
screenshots: list[bytes] = [] screenshots: list[bytes] = []
if await skyvern_page.is_window_scrollable(): if await skyvern_page.is_window_scrollable():
scroll_y_px_old = -30.0 scroll_y_px_old = -30.0
@@ -92,12 +108,15 @@ async def _scrolling_screenshots_helper(
# We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the # We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the
# page. If the difference is less than 25, we assume we have reached the end of the page. # page. If the difference is less than 25, we assume we have reached the end of the page.
while abs(scroll_y_px_old - scroll_y_px) > 25 and len(screenshots) < max_number: while abs(scroll_y_px_old - scroll_y_px) > 25 and len(screenshots) < max_number:
screenshot = await _current_viewpoint_screenshot_helper(page=skyvern_page.frame) screenshot = await _current_viewpoint_screenshot_helper(page=skyvern_page.frame, mode=mode)
screenshots.append(screenshot) screenshots.append(screenshot)
scroll_y_px_old = scroll_y_px scroll_y_px_old = scroll_y_px
LOG.debug("Scrolling to next page", url=url, num_screenshots=len(screenshots)) LOG.debug("Scrolling to next page", url=url, num_screenshots=len(screenshots))
scroll_y_px = await skyvern_page.scroll_to_next_page( scroll_y_px = await skyvern_page.scroll_to_next_page(
draw_boxes=draw_boxes, frame=frame, frame_index=frame_index draw_boxes=draw_boxes,
frame=frame,
frame_index=frame_index,
need_overlap=(mode == ScreenshotMode.DETAILED),
) )
LOG.debug( LOG.debug(
"Scrolled to next page", "Scrolled to next page",
@@ -107,15 +126,17 @@ async def _scrolling_screenshots_helper(
if draw_boxes: if draw_boxes:
await skyvern_page.remove_bounding_boxes() await skyvern_page.remove_bounding_boxes()
await skyvern_page.scroll_to_top(draw_boxes=False, frame=frame, frame_index=frame_index) await skyvern_page.scroll_to_top(draw_boxes=False, frame=frame, frame_index=frame_index)
# wait until animation ends, which is triggered by scrolling
LOG.debug("Waiting for 2 seconds until animation ends.") if mode == ScreenshotMode.DETAILED:
await asyncio.sleep(2) # wait until animation ends, which is triggered by scrolling
LOG.debug("Waiting for 2 seconds until animation ends.")
await asyncio.sleep(2)
else: else:
if draw_boxes: if draw_boxes:
await skyvern_page.build_elements_and_draw_bounding_boxes(frame=frame, frame_index=frame_index) await skyvern_page.build_elements_and_draw_bounding_boxes(frame=frame, frame_index=frame_index)
LOG.debug("Page is not scrollable", url=url, num_screenshots=len(screenshots)) LOG.debug("Page is not scrollable", url=url, num_screenshots=len(screenshots))
screenshot = await _current_viewpoint_screenshot_helper(page=skyvern_page.frame) screenshot = await _current_viewpoint_screenshot_helper(page=skyvern_page.frame, mode=mode)
screenshots.append(screenshot) screenshots.append(screenshot)
if draw_boxes: if draw_boxes:
@@ -144,28 +165,85 @@ class SkyvernFrame:
return await SkyvernFrame.evaluate(frame=frame, expression="() => document.location.href") return await SkyvernFrame.evaluate(frame=frame, expression="() => document.location.href")
@staticmethod @staticmethod
async def take_screenshot( async def take_scrolling_screenshot(
page: Page, page: Page,
full_page: bool = False,
file_path: str | None = None, file_path: str | None = None,
timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS, timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS,
mode: ScreenshotMode = ScreenshotMode.DETAILED,
scrolling_number: int = settings.MAX_NUM_SCREENSHOTS,
use_playwright_fullpage: bool = False, # TODO: THIS IS ONLY FOR EXPERIMENT. will be removed after experiment.
) -> bytes: ) -> bytes:
return await _current_viewpoint_screenshot_helper( if scrolling_number <= 0:
page=page, file_path=file_path, full_page=full_page, timeout=timeout return await _current_viewpoint_screenshot_helper(
) page=page, file_path=file_path, timeout=timeout, mode=mode
)
if use_playwright_fullpage:
return await _current_viewpoint_screenshot_helper(
page=page, file_path=file_path, timeout=timeout, full_page=True
)
if scrolling_number > settings.MAX_NUM_SCREENSHOTS:
LOG.warning(
"scrolling_number is greater than the max number of screenshots, setting it to the max number of screenshots",
scrolling_number=scrolling_number,
max_number=settings.MAX_NUM_SCREENSHOTS,
)
scrolling_number = settings.MAX_NUM_SCREENSHOTS
# use spilt screenshot with lite mode, isntead of fullpage screenshot from playwright
LOG.debug("Page is fully loaded, agent is about to generate the full page screenshot")
start_time = time.time()
async with asyncio.timeout(timeout):
screenshots = await _scrolling_screenshots_helper(page=page, mode=mode, max_number=scrolling_number)
images = []
for screenshot in screenshots:
with Image.open(BytesIO(screenshot)) as img:
img.load()
images.append(img)
total_height = sum(img.height for img in images)
max_width = max(img.width for img in images)
merged_img = Image.new("RGB", (max_width, total_height), color=(255, 255, 255))
current_y = 0
for img in images:
merged_img.paste(img, (0, current_y))
current_y += img.height
buffer = BytesIO()
merged_img.save(buffer, format="PNG")
buffer.seek(0)
img_data = buffer.read()
if file_path is not None:
with open(file_path, "wb") as f:
f.write(img_data)
end_time = time.time()
LOG.debug(
"Full page screenshot taking time",
screenshot_time=end_time - start_time,
file_path=file_path,
)
return img_data
@staticmethod @staticmethod
async def take_split_screenshots( async def take_split_screenshots(
page: Page, page: Page,
url: str, url: str | None = None,
draw_boxes: bool = False, draw_boxes: bool = False,
max_number: int = settings.MAX_NUM_SCREENSHOTS, max_number: int = settings.MAX_NUM_SCREENSHOTS,
scroll: bool = True, scroll: bool = True,
) -> list[bytes]: ) -> list[bytes]:
if not scroll: if not scroll:
return [await _current_viewpoint_screenshot_helper(page=page)] return [await _current_viewpoint_screenshot_helper(page=page, mode=ScreenshotMode.DETAILED)]
return await _scrolling_screenshots_helper(page=page, url=url, max_number=max_number, draw_boxes=draw_boxes) return await _scrolling_screenshots_helper(
page=page, url=url, max_number=max_number, draw_boxes=draw_boxes, mode=ScreenshotMode.DETAILED
)
@classmethod @classmethod
async def create_instance(cls, frame: Page | Frame) -> SkyvernFrame: async def create_instance(cls, frame: Page | Frame) -> SkyvernFrame:
@@ -235,19 +313,21 @@ class SkyvernFrame:
) )
return scroll_y_px return scroll_y_px
async def scroll_to_next_page(self, draw_boxes: bool, frame: str, frame_index: int) -> float: async def scroll_to_next_page(
self, draw_boxes: bool, frame: str, frame_index: int, need_overlap: bool = True
) -> float:
""" """
Scroll to the next page and take a screenshot. Scroll to the next page and take a screenshot.
:param drow_boxes: If True, draw bounding boxes around the elements. :param drow_boxes: If True, draw bounding boxes around the elements.
:param page: Page instance to take the screenshot from. :param page: Page instance to take the screenshot from.
:return: Screenshot of the page. :return: Screenshot of the page.
""" """
js_script = "async ([draw_boxes, frame, frame_index]) => await scrollToNextPage(draw_boxes, frame, frame_index)" js_script = "async ([draw_boxes, frame, frame_index, need_overlap]) => await scrollToNextPage(draw_boxes, frame, frame_index, need_overlap)"
scroll_y_px = await self.evaluate( scroll_y_px = await self.evaluate(
frame=self.frame, frame=self.frame,
expression=js_script, expression=js_script,
timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS,
arg=[draw_boxes, frame, frame_index], arg=[draw_boxes, frame, frame_index, need_overlap],
) )
return scroll_y_px return scroll_y_px