current viewpoint screenshot and scrolling n screenshot (#2716)

Co-authored-by: lawyzheng <lawyzheng1106@gmail.com>
This commit is contained in:
Shuchang Zheng
2025-06-13 23:59:50 -07:00
committed by GitHub
parent 11288817af
commit 775da18878
39 changed files with 452 additions and 35 deletions

View File

@@ -141,6 +141,7 @@ export type CreateTaskRequest = {
totp_identifier?: string | null;
application?: string | null;
include_action_history_in_verification?: boolean | null;
max_screenshot_scrolling_times?: number | null;
};
export type User = {
@@ -293,6 +294,7 @@ export type WorkflowRunStatusApiResponse = {
total_cost: number | null;
task_v2: TaskV2 | null;
workflow_title: string | null;
max_screenshot_scrolling_times: number | null;
};
export type TaskGenerationApiResponse = {

View File

@@ -39,6 +39,7 @@ import {
} from "./taskFormTypes";
import { ProxySelector } from "@/components/ProxySelector";
import { Switch } from "@/components/ui/switch";
import { MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT } from "@/routes/workflows/editor/nodes/Taskv2Node/types";
type Props = {
initialValues: CreateNewTaskFormValues;
};
@@ -80,6 +81,7 @@ function createTaskRequestObject(
extracted_information_schema: extractedInformationSchema,
totp_identifier: transform(formValues.totpIdentifier),
error_code_mapping: errorCodeMapping,
max_screenshot_scrolling_times: formValues.maxScreenshotScrollingTimes,
include_action_history_in_verification:
formValues.includeActionHistoryInVerification,
};
@@ -114,6 +116,8 @@ function CreateNewTaskForm({ initialValues }: Props) {
...initialValues,
maxStepsOverride: initialValues.maxStepsOverride ?? null,
proxyLocation: initialValues.proxyLocation ?? ProxyLocation.Residential,
maxScreenshotScrollingTimes:
initialValues.maxScreenshotScrollingTimes ?? null,
},
});
const { errors } = useFormState({ control: form.control });
@@ -557,6 +561,45 @@ function CreateNewTaskForm({ initialValues }: Props) {
);
}}
/>
<FormField
control={form.control}
name="maxScreenshotScrollingTimes"
render={({ field }) => (
<FormItem>
<div className="flex gap-16">
<FormLabel>
<div className="w-72">
<h1 className="text-lg">
Max Scrolling Screenshots
</h1>
<h2 className="text-base text-slate-400">
{`The maximum number of times to scroll down the page to take merged screenshots after action. Default is ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}. If it's set to 0, it will take the current viewport screenshot.`}
</h2>
</div>
</FormLabel>
<div className="w-full">
<FormControl>
<Input
{...field}
type="number"
min={0}
value={field.value ?? ""}
placeholder={`Default: ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}`}
onChange={(event) => {
const value =
event.target.value === ""
? null
: Number(event.target.value);
field.onChange(value);
}}
/>
</FormControl>
<FormMessage />
</div>
</div>
</FormItem>
)}
/>
<Separator />
<FormField
control={form.control}

View File

@@ -62,6 +62,7 @@ function CreateNewTaskFormPage() {
webhookCallbackUrl: null,
proxyLocation: null,
includeActionHistoryInVerification: null,
maxScreenshotScrollingTimes: null,
}}
/>
</div>
@@ -131,6 +132,7 @@ function CreateNewTaskFormPage() {
includeActionHistoryInVerification:
data.workflow_definition.blocks[0]
.include_action_history_in_verification,
maxScreenshotScrollingTimes: data.max_screenshot_scrolling_times,
}}
/>
</div>

View File

@@ -43,7 +43,10 @@ import {
generateUniqueEmail,
} from "../data/sampleTaskData";
import { ExampleCasePill } from "./ExampleCasePill";
import { MAX_STEPS_DEFAULT } from "@/routes/workflows/editor/nodes/Taskv2Node/types";
import {
MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT,
MAX_STEPS_DEFAULT,
} from "@/routes/workflows/editor/nodes/Taskv2Node/types";
function createTemplateTaskFromTaskGenerationParameters(
values: TaskGenerationApiResponse,
@@ -153,6 +156,8 @@ function PromptBox() {
const [publishWorkflow, setPublishWorkflow] = useState(false);
const [totpIdentifier, setTotpIdentifier] = useState("");
const [maxStepsOverride, setMaxStepsOverride] = useState<string | null>(null);
const [maxScreenshotScrollingTimes, setMaxScreenshotScrollingTimes] =
useState<string | null>(null);
const [showAdvancedSettings, setShowAdvancedSettings] = useState(false);
const [dataSchema, setDataSchema] = useState<string | null>(null);
@@ -167,6 +172,7 @@ function PromptBox() {
proxy_location: proxyLocation,
totp_identifier: totpIdentifier,
publish_workflow: publishWorkflow,
max_screenshot_scrolling_times: maxScreenshotScrollingTimes,
extracted_information_schema: dataSchema
? (() => {
try {
@@ -438,6 +444,21 @@ function PromptBox() {
/>
</div>
</div>
<div className="flex gap-16">
<div className="w-48 shrink-0">
<div className="text-sm">Max Scrolling Screenshots</div>
<div className="text-xs text-slate-400">
{`The maximum number of times to scroll down the page to take merged screenshots after action. Default is ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}. If it's set to 0, it will take the current viewport screenshot.`}
</div>
</div>
<Input
value={maxScreenshotScrollingTimes ?? ""}
placeholder={`Default: ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}`}
onChange={(event) => {
setMaxScreenshotScrollingTimes(event.target.value);
}}
/>
</div>
</div>
</div>
) : null}

View File

@@ -44,6 +44,8 @@ function RetryTask() {
proxyLocation: task.request.proxy_location ?? null,
includeActionHistoryInVerification:
task.request.include_action_history_in_verification ?? false,
maxScreenshotScrollingTimes:
task.request.max_screenshot_scrolling_times ?? null,
}}
/>
</div>

View File

@@ -15,6 +15,7 @@ const createNewTaskFormSchemaBase = z.object({
errorCodeMapping: z.string().or(z.null()),
proxyLocation: z.nativeEnum(ProxyLocation).or(z.null()),
includeActionHistoryInVerification: z.boolean().or(z.null()).default(false),
maxScreenshotScrollingTimes: z.number().or(z.null()).default(null),
});
const savedTaskFormSchemaBase = createNewTaskFormSchemaBase.extend({

View File

@@ -28,12 +28,14 @@ import { WorkflowParameter } from "./types/workflowTypes";
import { WorkflowParameterInput } from "./WorkflowParameterInput";
import { AxiosError } from "axios";
import { getLabelForWorkflowParameterType } from "./editor/workflowEditorUtils";
import { MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT } from "./editor/nodes/Taskv2Node/types";
type Props = {
workflowParameters: Array<WorkflowParameter>;
initialValues: Record<string, unknown>;
initialSettings: {
proxyLocation: ProxyLocation;
webhookCallbackUrl: string;
maxScreenshotScrollingTimes: number | null;
};
};
@@ -73,14 +75,20 @@ type RunWorkflowRequestBody = {
proxy_location: ProxyLocation | null;
webhook_callback_url?: string | null;
browser_session_id: string | null;
max_screenshot_scrolling_times?: number | null;
};
function getRunWorkflowRequestBody(
values: RunWorkflowFormType,
workflowParameters: Array<WorkflowParameter>,
): RunWorkflowRequestBody {
const { webhookCallbackUrl, proxyLocation, browserSessionId, ...parameters } =
values;
const {
webhookCallbackUrl,
proxyLocation,
browserSessionId,
maxScreenshotScrollingTimes,
...parameters
} = values;
const parsedParameters = parseValuesForWorkflowRun(
parameters,
@@ -95,6 +103,10 @@ function getRunWorkflowRequestBody(
browser_session_id: bsi,
};
if (maxScreenshotScrollingTimes) {
body.max_screenshot_scrolling_times = maxScreenshotScrollingTimes;
}
if (webhookCallbackUrl) {
body.webhook_callback_url = webhookCallbackUrl;
}
@@ -106,6 +118,7 @@ type RunWorkflowFormType = Record<string, unknown> & {
webhookCallbackUrl: string;
proxyLocation: ProxyLocation;
browserSessionId: string | null;
maxScreenshotScrollingTimes: number | null;
};
function RunWorkflowForm({
@@ -127,6 +140,7 @@ function RunWorkflowForm({
webhookCallbackUrl: initialSettings.webhookCallbackUrl,
proxyLocation: initialSettings.proxyLocation,
browserSessionId: browserSessionIdDefault,
maxScreenshotScrollingTimes: initialSettings.maxScreenshotScrollingTimes,
},
});
const apiCredential = useApiCredential();
@@ -177,6 +191,7 @@ function RunWorkflowForm({
webhookCallbackUrl,
proxyLocation,
browserSessionId,
maxScreenshotScrollingTimes,
...parameters
} = values;
@@ -189,6 +204,7 @@ function RunWorkflowForm({
webhookCallbackUrl,
proxyLocation,
browserSessionId,
maxScreenshotScrollingTimes,
});
}
@@ -392,6 +408,48 @@ function RunWorkflowForm({
);
}}
/>
<FormField
key="maxScreenshotScrollingTimes"
control={form.control}
name="maxScreenshotScrollingTimes"
render={({ field }) => {
return (
<FormItem>
<div className="flex gap-16">
<FormLabel>
<div className="w-72">
<div className="flex items-center gap-2 text-lg">
Max Scrolling Screenshots
</div>
<h2 className="text-sm text-slate-400">
{`The maximum number of times to scroll down the page to take merged screenshots after action. Default is ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}. If it's set to 0, it will take the current viewport screenshot.`}
</h2>
</div>
</FormLabel>
<div className="w-full space-y-2">
<FormControl>
<Input
{...field}
type="number"
min={0}
value={field.value ?? ""}
placeholder={`Default: ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}`}
onChange={(event) => {
const value =
event.target.value === ""
? null
: Number(event.target.value);
field.onChange(value);
}}
/>
</FormControl>
<FormMessage />
</div>
</div>
</FormItem>
);
}}
/>
</div>
<div className="flex justify-end gap-2">
<CopyApiCommandDropdown

View File

@@ -100,6 +100,8 @@ function WorkflowRun() {
const parameters = workflowRun?.parameters ?? {};
const proxyLocation =
workflowRun?.proxy_location ?? ProxyLocation.Residential;
const maxScreenshotScrollingTimes =
workflowRun?.max_screenshot_scrolling_times ?? null;
const title = workflowIsLoading ? (
<Skeleton className="h-9 w-48" />
@@ -244,6 +246,7 @@ function WorkflowRun() {
data: parameters,
proxyLocation,
webhookCallbackUrl: workflowRun?.webhook_callback_url ?? "",
maxScreenshotScrollingTimes,
}}
>
<PlayIcon className="mr-2 h-4 w-4" />

View File

@@ -30,6 +30,8 @@ function WorkflowRunParameters() {
const proxyLocation = location.state
? (location.state.proxyLocation as ProxyLocation)
: null;
const maxScreenshotScrollingTimes =
location.state?.maxScreenshotScrollingTimes ?? null;
const webhookCallbackUrl = location.state
? (location.state.webhookCallbackUrl as string)
@@ -109,6 +111,10 @@ function WorkflowRunParameters() {
ProxyLocation.Residential,
webhookCallbackUrl:
webhookCallbackUrl ?? workflow.webhook_callback_url ?? "",
maxScreenshotScrollingTimes:
maxScreenshotScrollingTimes ??
workflow.max_screenshot_scrolling_times ??
null,
}}
/>
</div>

View File

@@ -289,6 +289,8 @@ function FlowRenderer({
webhook_callback_url: data.settings.webhookCallbackUrl,
persist_browser_session: data.settings.persistBrowserSession,
model: data.settings.model,
max_screenshot_scrolling_times:
data.settings.maxScreenshotScrollingTimes,
totp_verification_url: workflow.totp_verification_url,
workflow_definition: {
parameters: data.parameters,

View File

@@ -60,6 +60,7 @@ function WorkflowEditor() {
proxyLocation: workflow.proxy_location,
webhookCallbackUrl: workflow.webhook_callback_url,
model: workflow.model,
maxScreenshotScrollingTimes: workflow.max_screenshot_scrolling_times,
};
const elements = getElements(

View File

@@ -20,6 +20,7 @@ import { Separator } from "@/components/ui/separator";
import { ModelsResponse } from "@/api/types";
import { ModelSelector } from "@/components/ModelSelector";
import { WorkflowModel } from "@/routes/workflows/types/workflowTypes";
import { MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT } from "../Taskv2Node/types";
function StartNode({ id, data }: NodeProps<StartNode>) {
const credentialGetter = useCredentialGetter();
@@ -51,6 +52,9 @@ function StartNode({ id, data }: NodeProps<StartNode>) {
? data.persistBrowserSession
: false,
model: data.withWorkflowSettings ? data.model : workflowModel,
maxScreenshotScrollingTimes: data.withWorkflowSettings
? data.maxScreenshotScrollingTimes
: null,
});
function handleChange(key: string, value: unknown) {
@@ -130,6 +134,26 @@ function StartNode({ id, data }: NodeProps<StartNode>) {
/>
</div>
</div>
<div className="space-y-2">
<div className="flex items-center gap-2">
<Label>Max Scrolling Screenshots</Label>
<HelpTooltip
content={`The maximum number of times to scroll down the page to take merged screenshots after action. Default is ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}. If it's set to 0, it will take the current viewport screenshot.`}
/>
</div>
<Input
value={inputs.maxScreenshotScrollingTimes ?? ""}
placeholder={`Default: ${MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT}`}
onChange={(event) => {
const value =
event.target.value === ""
? null
: Number(event.target.value);
handleChange("maxScreenshotScrollingTimes", value);
}}
/>
</div>
</div>
</AccordionContent>
</AccordionItem>

View File

@@ -9,6 +9,7 @@ export type WorkflowStartNodeData = {
proxyLocation: ProxyLocation;
persistBrowserSession: boolean;
model: WorkflowModel | null;
maxScreenshotScrollingTimes: number | null;
editable: boolean;
};

View File

@@ -2,6 +2,7 @@ import { Node } from "@xyflow/react";
import { NodeBaseData } from "../types";
export const MAX_STEPS_DEFAULT = 25;
export const MAX_SCREENSHOT_SCROLLING_TIMES_DEFAULT = 3;
export type Taskv2NodeData = NodeBaseData & {
prompt: string;
@@ -9,6 +10,7 @@ export type Taskv2NodeData = NodeBaseData & {
totpVerificationUrl: string | null;
totpIdentifier: string | null;
maxSteps: number | null;
maxScreenshotScrollingTimes: number | null;
};
export type Taskv2Node = Node<Taskv2NodeData, "taskv2">;
@@ -23,6 +25,7 @@ export const taskv2NodeDefaultData: Taskv2NodeData = {
totpVerificationUrl: null,
maxSteps: MAX_STEPS_DEFAULT,
model: null,
maxScreenshotScrollingTimes: null,
};
export function isTaskV2Node(node: Node): node is Taskv2Node {

View File

@@ -251,6 +251,7 @@ function convertToNode(
maxSteps: block.max_steps,
totpIdentifier: block.totp_identifier,
totpVerificationUrl: block.totp_verification_url,
maxScreenshotScrollingTimes: null,
},
};
}
@@ -662,6 +663,7 @@ function getElements(
proxyLocation: settings.proxyLocation ?? ProxyLocation.Residential,
webhookCallbackUrl: settings.webhookCallbackUrl ?? "",
model: settings.model,
maxScreenshotScrollingTimes: settings.maxScreenshotScrollingTimes,
editable,
}),
);
@@ -1322,6 +1324,7 @@ function getWorkflowSettings(nodes: Array<AppNode>): WorkflowSettings {
proxyLocation: ProxyLocation.Residential,
webhookCallbackUrl: null,
model: null,
maxScreenshotScrollingTimes: null,
};
const startNodes = nodes.filter(isStartNode);
const startNodeWithWorkflowSettings = startNodes.find(
@@ -1337,6 +1340,7 @@ function getWorkflowSettings(nodes: Array<AppNode>): WorkflowSettings {
proxyLocation: data.proxyLocation,
webhookCallbackUrl: data.webhookCallbackUrl,
model: data.model,
maxScreenshotScrollingTimes: data.maxScreenshotScrollingTimes,
};
}
return defaultSettings;
@@ -1992,6 +1996,7 @@ function convert(workflow: WorkflowApiResponse): WorkflowCreateYAMLRequest {
persist_browser_session: workflow.persist_browser_session,
model: workflow.model,
totp_verification_url: workflow.totp_verification_url,
max_screenshot_scrolling_times: workflow.max_screenshot_scrolling_times,
workflow_definition: {
parameters: convertParametersToParameterYAML(userParameters),
blocks: convertBlocksToBlockYAML(workflow.workflow_definition.blocks),

View File

@@ -470,6 +470,7 @@ export type WorkflowApiResponse = {
model: WorkflowModel | null;
totp_verification_url: string | null;
totp_identifier: string | null;
max_screenshot_scrolling_times: number | null;
created_at: string;
modified_at: string;
deleted_at: string | null;
@@ -480,6 +481,7 @@ export type WorkflowSettings = {
webhookCallbackUrl: string | null;
persistBrowserSession: boolean;
model: WorkflowModel | null;
maxScreenshotScrollingTimes: number | null;
};
export type WorkflowModel = JsonObjectExtendable<{ model_name: string }>;

View File

@@ -12,6 +12,7 @@ export type WorkflowCreateYAMLRequest = {
totp_verification_url?: string | null;
workflow_definition: WorkflowDefinitionYAML;
is_saved_task?: boolean;
max_screenshot_scrolling_times?: number | null;
};
export type WorkflowDefinitionYAML = {