From 58bd43171ec51a085cefa4e3c24c383f73a955cc Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Sun, 10 Aug 2025 13:16:46 -0700 Subject: [PATCH] Script generation (#3157) --- skyvern/__init__.py | 24 +- .../code_generations/workflow_wrappers.py | 59 ----- .../__init__.py | 0 .../generate_script.py} | 209 ++++++++++++++++-- .../run_initializer.py | 5 +- .../script_run_context_manager.py} | 4 +- .../skyvern_page.py | 121 +++++++--- .../transform_workflow_run.py | 4 +- .../script_generations/workflow_wrappers.py | 195 ++++++++++++++++ skyvern/forge/sdk/db/client.py | 86 +++++-- skyvern/forge/sdk/routes/scripts.py | 42 +--- skyvern/forge/sdk/workflow/service.py | 6 +- skyvern/services/script_service.py | 50 ++++- skyvern/webeye/actions/action_types.py | 1 - skyvern/webeye/actions/handler.py | 65 +----- skyvern/webeye/actions/handler_utils.py | 81 ++++++- 16 files changed, 708 insertions(+), 244 deletions(-) delete mode 100644 skyvern/core/code_generations/workflow_wrappers.py rename skyvern/core/{code_generations => script_generations}/__init__.py (100%) rename skyvern/core/{code_generations/generate_code.py => script_generations/generate_script.py} (61%) rename skyvern/core/{code_generations => script_generations}/run_initializer.py (71%) rename skyvern/core/{code_generations/code_run_context_manager.py => script_generations/script_run_context_manager.py} (84%) rename skyvern/core/{code_generations => script_generations}/skyvern_page.py (60%) rename skyvern/core/{code_generations => script_generations}/transform_workflow_run.py (95%) create mode 100644 skyvern/core/script_generations/workflow_wrappers.py diff --git a/skyvern/__init__.py b/skyvern/__init__.py index 51dacac6..9415f588 100644 --- a/skyvern/__init__.py +++ b/skyvern/__init__.py @@ -23,14 +23,16 @@ setup_logger() from skyvern.forge import app # noqa: E402, F401 from skyvern.library import Skyvern # noqa: E402 -from skyvern.core.code_generations.skyvern_page import RunContext, SkyvernPage # noqa: E402 -from skyvern.core.code_generations.run_initializer import setup # noqa: E402 -from skyvern.core.code_generations.workflow_wrappers import ( # noqa: E402 - workflow, # noqa: E402 - task_block, # noqa: E402 - file_download_block, # noqa: E402 +from skyvern.core.script_generations.skyvern_page import RunContext, SkyvernPage # noqa: E402 +from skyvern.core.script_generations.run_initializer import setup # noqa: E402 +from skyvern.core.script_generations.workflow_wrappers import ( # noqa: E402 email_block, # noqa: E402 + file_download_block, # noqa: E402 + navigation_block, # noqa: E402 + task_block, # noqa: E402 + url_block, # noqa: E402 wait_block, # noqa: E402 + workflow, # noqa: E402 ) # noqa: E402 @@ -38,10 +40,12 @@ __all__ = [ "Skyvern", "SkyvernPage", "RunContext", - "setup", - "workflow", - "task_block", - "file_download_block", "email_block", + "file_download_block", + "navigation_block", + "setup", + "task_block", + "url_block", "wait_block", + "workflow", ] diff --git a/skyvern/core/code_generations/workflow_wrappers.py b/skyvern/core/code_generations/workflow_wrappers.py deleted file mode 100644 index c366c284..00000000 --- a/skyvern/core/code_generations/workflow_wrappers.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import Any, Callable - - -# Build a dummy workflow decorator -def workflow( - title: str | None = None, - totp_url: str | None = None, - totp_identifier: str | None = None, - webhook_url: str | None = None, - max_steps: int | None = None, -) -> Callable: - def wrapper(func: Callable) -> Callable: - return func - - return wrapper - - -def task_block( - prompt: str | None = None, - title: str | None = None, - url: str | None = None, - engine: str | None = None, - model: dict[str, Any] | None = None, - totp_url: str | None = None, - totp_identifier: str | None = None, - max_steps: int | None = None, - navigation_payload: str | None = None, - webhook_url: str | None = None, -) -> Callable: - def decorator(func: Callable) -> Callable: - return func - - return decorator - - -def file_download_block( - prompt: str | None = None, - title: str | None = None, - url: str | None = None, - max_steps: int | None = None, -) -> Callable: - def decorator(func: Callable) -> Callable: - return func - - return decorator - - -def email_block(prompt: str | None = None, title: str | None = None, url: str | None = None) -> Callable: - def decorator(func: Callable) -> Callable: - return func - - return decorator - - -def wait_block(seconds: int) -> Callable: - def decorator(func: Callable) -> Callable: - return func - - return decorator diff --git a/skyvern/core/code_generations/__init__.py b/skyvern/core/script_generations/__init__.py similarity index 100% rename from skyvern/core/code_generations/__init__.py rename to skyvern/core/script_generations/__init__.py diff --git a/skyvern/core/code_generations/generate_code.py b/skyvern/core/script_generations/generate_script.py similarity index 61% rename from skyvern/core/code_generations/generate_code.py rename to skyvern/core/script_generations/generate_script.py index 20dda983..6e0e272a 100644 --- a/skyvern/core/code_generations/generate_code.py +++ b/skyvern/core/script_generations/generate_script.py @@ -16,15 +16,21 @@ Path("workflow.py").write_text(src) from __future__ import annotations +import hashlib import keyword from enum import StrEnum from typing import Any import libcst as cst +import structlog from libcst import Attribute, Call, Dict, DictElement, FunctionDef, Name, Param +from skyvern.forge import app from skyvern.webeye.actions.action_types import ActionType +LOG = structlog.get_logger(__name__) + + # --------------------------------------------------------------------- # # 1. helpers # # --------------------------------------------------------------------- # @@ -45,6 +51,12 @@ ACTION_MAP = { "wait": "wait", "extract": "extract", } +ACTIONS_WITH_XPATH = [ + "click", + "input_text", + "upload_file", + "select_option", +] INDENT = " " * 4 @@ -130,6 +142,12 @@ def _make_decorator(block: dict[str, Any]) -> cst.Decorator: "send_email": "email_block", "wait": "wait_block", "navigation": "navigation_block", + "for_loop": "for_loop_block", + "action": "action_block", + "extraction": "extraction_block", + "login": "login_block", + "text_prompt": "text_prompt_block", + "goto_url": "url_block", }[bt] kwargs = [] @@ -177,17 +195,28 @@ def _action_to_stmt(act: dict[str, Any]) -> cst.BaseStatement: """ method = ACTION_MAP[act["action_type"]] - args = [ - cst.Arg(keyword=cst.Name("xpath"), value=_value(act["xpath"])), - cst.Arg( - keyword=cst.Name("intention"), - value=_value(act.get("intention") or act.get("reasoning") or ""), - ), - cst.Arg( - keyword=cst.Name("data"), - value=cst.Attribute(value=cst.Name("context"), attr=cst.Name("parameters")), - ), - ] + args: list[cst.Arg] = [] + if method == "input_text": + args.append(cst.Arg(keyword=cst.Name("text"), value=_value(act["text"]))) + elif method == "select_option": + args.append(cst.Arg(keyword=cst.Name("option"), value=_value(act["option"]["value"]))) + elif method == "wait": + args.append(cst.Arg(keyword=cst.Name("seconds"), value=_value(act["seconds"]))) + + args.extend( + [ + cst.Arg( + keyword=cst.Name("intention"), + value=_value(act.get("intention") or act.get("reasoning") or ""), + ), + cst.Arg( + keyword=cst.Name("data"), + value=cst.Attribute(value=cst.Name("context"), attr=cst.Name("parameters")), + ), + ] + ) + if method in ACTIONS_WITH_XPATH: + args.append(cst.Arg(keyword=cst.Name("xpath"), value=_value(act["xpath"]))) call = cst.Call( func=cst.Attribute(value=cst.Name("page"), attr=cst.Name(method)), @@ -209,7 +238,7 @@ def _build_block_fn(block: dict[str, Any], actions: list[dict[str, Any]]) -> Fun body_stmts.append(cst.parse_statement(f"await page.goto({repr(block['url'])})")) for act in actions: - if act["action_type"] in [ActionType.COMPLETE]: + if act["action_type"] in [ActionType.COMPLETE, ActionType.TERMINATE, ActionType.NULL_ACTION]: continue body_stmts.append(_action_to_stmt(act)) @@ -329,19 +358,22 @@ def _build_run_fn(task_titles: list[str], wf_req: dict[str, Any]) -> FunctionDef # --------------------------------------------------------------------- # -def generate_workflow_script( +async def generate_workflow_script( *, file_name: str, workflow_run_request: dict[str, Any], workflow: dict[str, Any], tasks: list[dict[str, Any]], actions_by_task: dict[str, list[dict[str, Any]]], + organization_id: str | None = None, + run_id: str | None = None, ) -> str: """ Build a LibCST Module and emit .code (PEP-8-formatted source). """ # --- imports -------------------------------------------------------- imports: list[cst.BaseStatement] = [ + cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("asyncio"))])]), cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("pydantic"))])]), cst.SimpleStatementLine( [ @@ -372,8 +404,43 @@ def generate_workflow_script( # --- blocks --------------------------------------------------------- block_fns = [] length_of_tasks = len(tasks) + + # Create script first if organization_id is provided + script_id = None + script_revision_id = None + if organization_id: + try: + script = await app.DATABASE.create_script( + organization_id=organization_id, + run_id=run_id, + ) + script_id = script.script_id + script_revision_id = script.script_revision_id + except Exception as e: + LOG.error("Failed to create script", error=str(e), exc_info=True) + # Continue without script creation if it fails + for idx, task in enumerate(tasks): - block_fns.append(_build_block_fn(task, actions_by_task.get(task.get("task_id", ""), []))) + block_fn_def = _build_block_fn(task, actions_by_task.get(task.get("task_id", ""), [])) + + # Create script block if we have script context + if script_id and script_revision_id and organization_id: + try: + block_name = task.get("title") or task.get("label") or task.get("task_id") or f"task_{idx}" + block_description = f"Generated block for task: {block_name}" + await create_script_block( + block_fn_def=block_fn_def, + script_revision_id=script_revision_id, + script_id=script_id, + organization_id=organization_id, + block_name=block_name, + block_description=block_description, + ) + except Exception as e: + LOG.error("Failed to create script block", error=str(e), exc_info=True) + # Continue without script block creation if it fails + + block_fns.append(block_fn_def) if idx < length_of_tasks - 1: block_fns.append(cst.EmptyLine()) block_fns.append(cst.EmptyLine()) @@ -400,8 +467,122 @@ def generate_workflow_script( cst.EmptyLine(), cst.EmptyLine(), run_fn, + cst.EmptyLine(), + cst.EmptyLine(), + cst.parse_statement("if __name__ == '__main__':\n asyncio.run(run_workflow())"), ] ) + + # Create main script file if we have script context + if script_id and script_revision_id and organization_id: + try: + main_script_code = module.code + main_file_name = "main.py" + main_file_path = main_file_name + + # Create artifact and upload to S3 + artifact_id = await app.ARTIFACT_MANAGER.create_script_file_artifact( + organization_id=organization_id, + script_id=script_id, + script_version=1, # Assuming version 1 for now + file_path=main_file_path, + data=main_script_code.encode("utf-8"), + ) + + # Create script file record for main file + await app.DATABASE.create_script_file( + script_revision_id=script_revision_id, + script_id=script_id, + organization_id=organization_id, + file_path=main_file_path, + file_name=main_file_name, + file_type="file", + content_hash=f"sha256:{hashlib.sha256(main_script_code.encode('utf-8')).hexdigest()}", + file_size=len(main_script_code.encode("utf-8")), + mime_type="text/x-python", + artifact_id=artifact_id, + ) + except Exception as e: + LOG.error("Failed to create main script file", error=str(e), exc_info=True) + # Continue without main script file creation if it fails + with open(file_name, "w") as f: f.write(module.code) return module.code + + +async def create_script_block( + block_fn_def: FunctionDef, + script_revision_id: str, + script_id: str, + organization_id: str, + block_name: str, + block_description: str | None = None, +) -> None: + """ + Create a script block in the database and save the block code to a script file. + + Args: + block_fn_def: The LibCST function definition to save + script_revision_id: The script revision ID + script_id: The script ID + organization_id: The organization ID + block_name: Optional custom name for the block (defaults to function name) + block_description: Optional description for the block + """ + try: + # Step 1: Transform the block function definition to a string + block_code = block_fn_def.code + + # Step 2: Use the function name as block name if not provided + if not block_name: + block_name = block_fn_def.name.value + + # Step 3: Create script block in database + script_block = await app.DATABASE.create_script_block( + script_revision_id=script_revision_id, + script_id=script_id, + organization_id=organization_id, + script_block_label=block_name, + ) + + # Step 4: Create script file for the block + # Generate a unique filename for the block + file_name = f"{block_name}.skyvern" + file_path = f"blocks/{script_block.script_block_id}/{file_name}" + + # Create artifact and upload to S3 + artifact_id = await app.ARTIFACT_MANAGER.create_script_file_artifact( + organization_id=organization_id, + script_id=script_id, + script_version=1, # Assuming version 1 for now + file_path=file_path, + data=block_code.encode("utf-8"), + ) + + # Create script file record + script_file = await app.DATABASE.create_script_file( + script_revision_id=script_revision_id, + script_id=script_id, + organization_id=organization_id, + file_path=file_path, + file_name=file_name, + file_type="file", + content_hash=f"sha256:{hashlib.sha256(block_code.encode('utf-8')).hexdigest()}", + file_size=len(block_code.encode("utf-8")), + mime_type="text/x-python", + artifact_id=artifact_id, + ) + + # update script block with script file id + await app.DATABASE.update_script_block( + script_block_id=script_block.script_block_id, + organization_id=organization_id, + script_file_id=script_file.script_file_id, + ) + + except Exception as e: + # Log error but don't fail the entire generation process + LOG.error("Failed to create script block", error=str(e), exc_info=True) + # For now, just log the error and continue + # In production, you might want to handle this differently diff --git a/skyvern/core/code_generations/run_initializer.py b/skyvern/core/script_generations/run_initializer.py similarity index 71% rename from skyvern/core/code_generations/run_initializer.py rename to skyvern/core/script_generations/run_initializer.py index bfdc040e..b6830e2e 100644 --- a/skyvern/core/code_generations/run_initializer.py +++ b/skyvern/core/script_generations/run_initializer.py @@ -2,13 +2,13 @@ from typing import Any from playwright.async_api import async_playwright -from skyvern.core.code_generations.skyvern_page import RunContext, SkyvernPage +from skyvern.core.script_generations.skyvern_page import RunContext, SkyvernPage from skyvern.forge.sdk.core import skyvern_context from skyvern.webeye.browser_factory import BrowserContextFactory # TODO: find a better name for this function -async def setup(parameters: dict[str, Any]) -> tuple[SkyvernPage, RunContext]: +async def setup(parameters: dict[str, Any], generate_response: bool = False) -> tuple[SkyvernPage, RunContext]: # set up skyvern context skyvern_context.set(skyvern_context.SkyvernContext()) # start playwright @@ -19,5 +19,6 @@ async def setup(parameters: dict[str, Any]) -> tuple[SkyvernPage, RunContext]: _, ) = await BrowserContextFactory.create_browser_context(playwright=pw) new_page = await browser_context.new_page() + # skyvern_page = SkyvernPage(page=new_page, generate_response=generate_response) skyvern_page = SkyvernPage(page=new_page) return skyvern_page, RunContext(parameters=parameters, page=skyvern_page) diff --git a/skyvern/core/code_generations/code_run_context_manager.py b/skyvern/core/script_generations/script_run_context_manager.py similarity index 84% rename from skyvern/core/code_generations/code_run_context_manager.py rename to skyvern/core/script_generations/script_run_context_manager.py index 12087978..c1b79cfa 100644 --- a/skyvern/core/code_generations/code_run_context_manager.py +++ b/skyvern/core/script_generations/script_run_context_manager.py @@ -1,7 +1,7 @@ -from skyvern.core.code_generations.skyvern_page import RunContext +from skyvern.core.script_generations.skyvern_page import RunContext -class CodeRunContextManager: +class ScriptRunContextManager: """ Manages the run context for code runs. """ diff --git a/skyvern/core/code_generations/skyvern_page.py b/skyvern/core/script_generations/skyvern_page.py similarity index 60% rename from skyvern/core/code_generations/skyvern_page.py rename to skyvern/core/script_generations/skyvern_page.py index 74ee7abc..b81467b6 100644 --- a/skyvern/core/code_generations/skyvern_page.py +++ b/skyvern/core/script_generations/skyvern_page.py @@ -1,8 +1,9 @@ from __future__ import annotations +import asyncio from dataclasses import dataclass from enum import StrEnum -from typing import Any, Callable +from typing import Any, Callable, Literal from playwright.async_api import Page @@ -48,6 +49,7 @@ class SkyvernPage: driver: Driver = Driver.PLAYWRIGHT, *, recorder: Callable[[ActionCall], None] | None = None, + # generate_response: bool = False, ): self.driver = driver self.page = page # e.g. Playwright's Page @@ -95,7 +97,32 @@ class SkyvernPage: ######### Public Interfaces ######### @action_wrap(ActionType.CLICK) async def click(self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: - locator = self.page.locator(xpath) + # if self.generate_response: + # # TODO: get element tree + # # generate click action based on the current html + # single_click_prompt = prompt_engine.load_prompt( + # template="single-click-action", + # navigation_goal=intention, + # navigation_payload_str=data, + # current_url=self.page.url, + # elements=element_tree, + # local_datetime=datetime.now(context.tz_info).isoformat(), + # user_context=context.prompt, + # ) + # json_response = await app.SINGLE_CLICK_AGENT_LLM_API_HANDLER( + # prompt=single_click_prompt, + # prompt_name="single-click-action", + # step=step, + # ) + # click_actions = parse_actions(new_task, step.step_id, step.order, scraped_page, json_response["actions"]) + # if not click_actions: + # raise CachedActionPlanError("No click actions to execute") + # for click_action in click_actions: + # await _handle_action( + # click_action, step, new_task, scraped_page, current_page, detailed_output, browser_state, engine + # ) + + locator = self.page.locator(f"xpath={xpath}") await locator.click(timeout=5000) @action_wrap(ActionType.INPUT_TEXT) @@ -107,53 +134,83 @@ class SkyvernPage: data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, ) -> None: - locator = self.page.locator(xpath) + # if self.generate_response: + # # TODO: regenerate text + # pass + locator = self.page.locator(f"xpath={xpath}") await handler_utils.input_sequentially(locator, text, timeout=timeout) @action_wrap(ActionType.UPLOAD_FILE) async def upload_file( self, xpath: str, file_path: str, intention: str | None = None, data: str | dict[str, Any] | None = None ) -> None: + # if self.generate_response: + # # TODO: regenerate file_path and xpath + # pass file = await download_file(file_path) await self.page.set_input_files(xpath, file) @action_wrap(ActionType.SELECT_OPTION) async def select_option( - self, xpath: str, option: str, intention: str | None = None, data: str | dict[str, Any] | None = None + self, + xpath: str, + option: str, + intention: str | None = None, + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, ) -> None: - locator = self.page.locator(xpath) - await locator.select_option(option, timeout=5000) + # if self.generate_response: + # # TODO: regenerate option + # pass + locator = self.page.locator(f"xpath={xpath}") + try: + await locator.click(timeout=timeout) + except Exception: + print("Failed to click before select action") + return + await locator.select_option(option, timeout=timeout) @action_wrap(ActionType.WAIT) async def wait( self, seconds: float, intention: str | None = None, data: str | dict[str, Any] | None = None - ) -> None: ... + ) -> None: + await asyncio.sleep(seconds) @action_wrap(ActionType.NULL_ACTION) - async def null_action(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: ... + async def null_action(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: + return @action_wrap(ActionType.SOLVE_CAPTCHA) async def solve_captcha( self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None - ) -> None: ... + ) -> None: + await asyncio.sleep(30) @action_wrap(ActionType.TERMINATE) async def terminate( self, errors: list[str], intention: str | None = None, data: str | dict[str, Any] | None = None - ) -> None: ... + ) -> None: + # TODO: update the workflow run status to terminated + return @action_wrap(ActionType.COMPLETE) async def complete( self, data_extraction_goal: str, intention: str | None = None, data: str | dict[str, Any] | None = None - ) -> None: ... + ) -> None: + # TODO: update the workflow run status to completed + return @action_wrap(ActionType.RELOAD_PAGE) - async def reload_page(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: ... + async def reload_page(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: + await self.page.reload() + return @action_wrap(ActionType.EXTRACT) async def extract( self, data_extraction_goal: str, intention: str | None = None, data: str | dict[str, Any] | None = None - ) -> None: ... + ) -> None: + # TODO: extract the data + return @action_wrap(ActionType.VERIFICATION_CODE) async def verification_code( @@ -162,37 +219,48 @@ class SkyvernPage: @action_wrap(ActionType.SCROLL) async def scroll( - self, amount: int, intention: str | None = None, data: str | dict[str, Any] | None = None - ) -> None: ... + self, scroll_x: int, scroll_y: int, intention: str | None = None, data: str | dict[str, Any] | None = None + ) -> None: + await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") @action_wrap(ActionType.KEYPRESS) async def keypress( - self, key: str, intention: str | None = None, data: str | dict[str, Any] | None = None - ) -> None: ... - - @action_wrap(ActionType.TYPE) - async def type(self, text: str, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: ... + self, + keys: list[str], + hold: bool = False, + duration: float = 0, + intention: str | None = None, + data: str | dict[str, Any] | None = None, + ) -> None: + await handler_utils.keypress(self.page, keys, hold=hold, duration=duration) @action_wrap(ActionType.MOVE) async def move( self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None - ) -> None: ... + ) -> None: + await self.page.mouse.move(x, y) @action_wrap(ActionType.DRAG) async def drag( self, start_x: int, start_y: int, - end_x: int, - end_y: int, + path: list[tuple[int, int]], intention: str | None = None, data: str | dict[str, Any] | None = None, - ) -> None: ... + ) -> None: + await handler_utils.drag(self.page, start_x, start_y, path) @action_wrap(ActionType.LEFT_MOUSE) async def left_mouse( - self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None - ) -> None: ... + self, + x: int, + y: int, + direction: Literal["down", "up"], + intention: str | None = None, + data: str | dict[str, Any] | None = None, + ) -> None: + await handler_utils.left_mouse(self.page, x, y, direction) class RunContext: @@ -204,3 +272,4 @@ class RunContext: self.parameters = parameters self.page = page self.trace: list[ActionCall] = [] + self.prompt: str | None = None diff --git a/skyvern/core/code_generations/transform_workflow_run.py b/skyvern/core/script_generations/transform_workflow_run.py similarity index 95% rename from skyvern/core/code_generations/transform_workflow_run.py rename to skyvern/core/script_generations/transform_workflow_run.py index 95ee41b0..710e5ca5 100644 --- a/skyvern/core/code_generations/transform_workflow_run.py +++ b/skyvern/core/script_generations/transform_workflow_run.py @@ -63,7 +63,9 @@ async def transform_workflow_run_to_code_gen_input(workflow_run_id: str, organiz LOG.warning(f"Task {block.task_id} not found") continue block_dump.update(task.model_dump()) - actions = await app.DATABASE.get_task_actions(task_id=block.task_id, organization_id=organization_id) + actions = await app.DATABASE.get_task_actions_hydrated( + task_id=block.task_id, organization_id=organization_id + ) action_dumps = [] for action in actions: action_dump = action.model_dump() diff --git a/skyvern/core/script_generations/workflow_wrappers.py b/skyvern/core/script_generations/workflow_wrappers.py new file mode 100644 index 00000000..6f5ced3e --- /dev/null +++ b/skyvern/core/script_generations/workflow_wrappers.py @@ -0,0 +1,195 @@ +from typing import Any, Callable + +from skyvern import RunContext, SkyvernPage + + +# Build a dummy workflow decorator +def workflow( + title: str | None = None, + totp_url: str | None = None, + totp_identifier: str | None = None, + webhook_url: str | None = None, + max_steps: int | None = None, +) -> Callable: + def wrapper(func: Callable) -> Callable: + return func + + return wrapper + + +def task_block( + prompt: str | None = None, + title: str | None = None, + url: str | None = None, + engine: str | None = None, + model: dict[str, Any] | None = None, + totp_url: str | None = None, + totp_identifier: str | None = None, + max_steps: int | None = None, + navigation_payload: str | None = None, + webhook_url: str | None = None, +) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # Store the prompt in the context + context.prompt = prompt + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator + + +def login_block( + prompt: str | None = None, + title: str | None = None, + url: str | None = None, + engine: str | None = None, + model: dict[str, Any] | None = None, + totp_url: str | None = None, + totp_identifier: str | None = None, + max_steps: int | None = None, + navigation_payload: str | None = None, + webhook_url: str | None = None, +) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # Store the prompt in the context + context.prompt = prompt + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator + + +def navigation_block( + prompt: str | None = None, + title: str | None = None, + url: str | None = None, + engine: str | None = None, + model: dict[str, Any] | None = None, + totp_url: str | None = None, + totp_identifier: str | None = None, + max_steps: int | None = None, +) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # Store the prompt in the context + context.prompt = prompt + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator + + +def action_block( + prompt: str | None = None, + title: str | None = None, + url: str | None = None, + engine: str | None = None, + model: dict[str, Any] | None = None, + totp_url: str | None = None, + totp_identifier: str | None = None, + max_steps: int | None = None, +) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # Store the prompt in the context + context.prompt = prompt + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator + + +def extraction_block( + title: str | None = None, + data_extraction_goal: str | None = None, + data_extraction_schema: dict[str, Any] | list | str | None = None, + model: dict[str, Any] | None = None, +) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # Store the data_extraction_goal as prompt in the context + context.prompt = data_extraction_goal + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator + + +def url_block( + title: str | None = None, + url: str | None = None, +) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # No prompt to store for url_block + context.prompt = None + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator + + +def file_download_block( + prompt: str | None = None, + title: str | None = None, + url: str | None = None, + max_steps: int | None = None, + engine: str | None = None, +) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # Store the prompt in the context + context.prompt = prompt + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator + + +def email_block(prompt: str | None = None, title: str | None = None, url: str | None = None) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # Store the prompt in the context + context.prompt = prompt + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator + + +def wait_block(seconds: int, title: str | None = None) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # No prompt to store for wait_block + context.prompt = None + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator + + +def text_prompt_block( + prompt: str | None = None, + title: str | None = None, + json_schema: dict[str, Any] | list | str | None = None, +) -> Callable: + def decorator(func: Callable) -> Callable: + async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any: + # Store the prompt in the context + context.prompt = prompt + return await func(page, context, *args, **kwargs) + + return wrapper + + return decorator diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index e99d9e34..d90acff4 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -3742,31 +3742,71 @@ class AgentDB: mime_type: str | None = None, encoding: str = "utf-8", artifact_id: str | None = None, - ) -> None: - """Create a script file record.""" - try: - async with self.Session() as session: - script_file = ScriptFileModel( - script_revision_id=script_revision_id, - script_id=script_id, - organization_id=organization_id, - file_path=file_path, - file_name=file_name, - file_type=file_type, - content_hash=content_hash, - file_size=file_size, - mime_type=mime_type, - encoding=encoding, - artifact_id=artifact_id, + ) -> ScriptFile: + """Create a script file.""" + async with self.Session() as session: + script_file = ScriptFileModel( + script_revision_id=script_revision_id, + script_id=script_id, + organization_id=organization_id, + file_path=file_path, + file_name=file_name, + file_type=file_type, + content_hash=content_hash, + file_size=file_size, + mime_type=mime_type, + encoding=encoding, + artifact_id=artifact_id, + ) + session.add(script_file) + await session.commit() + await session.refresh(script_file) + return convert_to_script_file(script_file) + + async def create_script_block( + self, + script_revision_id: str, + script_id: str, + organization_id: str, + script_block_label: str, + script_file_id: str | None = None, + ) -> ScriptBlock: + """Create a script block.""" + async with self.Session() as session: + script_block = ScriptBlockModel( + script_revision_id=script_revision_id, + script_id=script_id, + organization_id=organization_id, + script_block_label=script_block_label, + script_file_id=script_file_id, + ) + session.add(script_block) + await session.commit() + await session.refresh(script_block) + return convert_to_script_block(script_block) + + async def update_script_block( + self, + script_block_id: str, + organization_id: str, + script_file_id: str | None = None, + ) -> ScriptBlock: + async with self.Session() as session: + script_block = ( + await session.scalars( + select(ScriptBlockModel) + .filter_by(script_block_id=script_block_id) + .filter_by(organization_id=organization_id) ) - session.add(script_file) + ).first() + if script_block: + if script_file_id: + script_block.script_file_id = script_file_id await session.commit() - except SQLAlchemyError: - LOG.error("SQLAlchemyError", exc_info=True) - raise - except Exception: - LOG.error("UnexpectedError", exc_info=True) - raise + await session.refresh(script_block) + return convert_to_script_block(script_block) + else: + raise NotFoundError("Script block not found") async def get_script_files(self, script_revision_id: str, organization_id: str) -> list[ScriptFile]: async with self.Session() as session: diff --git a/skyvern/forge/sdk/routes/scripts.py b/skyvern/forge/sdk/routes/scripts.py index 12c0236a..99a25258 100644 --- a/skyvern/forge/sdk/routes/scripts.py +++ b/skyvern/forge/sdk/routes/scripts.py @@ -35,44 +35,12 @@ async def create_script( current_org: Organization = Depends(org_auth_service.get_current_org), ) -> CreateScriptResponse: """Create a new script with optional files and metadata.""" - organization_id = current_org.organization_id - LOG.info( - "Creating script", - organization_id=organization_id, - file_count=len(data.files) if data.files else 0, + return await script_service.create_script( + organization_id=current_org.organization_id, + workflow_id=data.workflow_id, + run_id=data.run_id, + files=data.files, ) - if data.run_id: - if not await app.DATABASE.get_run(run_id=data.run_id, organization_id=organization_id): - raise HTTPException(status_code=404, detail=f"Run_id {data.run_id} not found") - try: - # Create the script in the database - script = await app.DATABASE.create_script( - organization_id=organization_id, - run_id=data.run_id, - ) - # Process files if provided - file_tree = {} - file_count = 0 - if data.files: - file_tree = await script_service.build_file_tree( - data.files, - organization_id=organization_id, - script_id=script.script_id, - script_version=script.version, - script_revision_id=script.script_revision_id, - ) - file_count = len(data.files) - return CreateScriptResponse( - script_id=script.script_id, - version=script.version, - run_id=script.run_id, - file_count=file_count, - created_at=script.created_at, - file_tree=file_tree, - ) - except Exception as e: - LOG.error("Failed to create script", error=str(e), exc_info=True) - raise HTTPException(status_code=500, detail="Failed to create script") @base_router.get( diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index ac7d828f..8378d435 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -11,8 +11,8 @@ from jinja2.sandbox import SandboxedEnvironment from skyvern import analytics from skyvern.config import settings from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT, SAVE_DOWNLOADED_FILES_TIMEOUT -from skyvern.core.code_generations.generate_code import generate_workflow_script as generate_python_workflow_script -from skyvern.core.code_generations.transform_workflow_run import transform_workflow_run_to_code_gen_input +from skyvern.core.script_generations.generate_script import generate_workflow_script as generate_python_workflow_script +from skyvern.core.script_generations.transform_workflow_run import transform_workflow_run_to_code_gen_input from skyvern.exceptions import ( BlockNotFound, BrowserSessionNotFound, @@ -2287,7 +2287,7 @@ class WorkflowService: workflow_run_id=workflow_run.workflow_run_id, organization_id=workflow.organization_id, ) - python_src = generate_python_workflow_script( + python_src = await generate_python_workflow_script( file_name=codegen_input.file_name, workflow_run_request=codegen_input.workflow_run, workflow=codegen_input.workflow, diff --git a/skyvern/services/script_service.py b/skyvern/services/script_service.py index 322fb54e..dfc767bc 100644 --- a/skyvern/services/script_service.py +++ b/skyvern/services/script_service.py @@ -5,11 +5,11 @@ import subprocess from datetime import datetime import structlog -from fastapi import BackgroundTasks +from fastapi import BackgroundTasks, HTTPException from skyvern.exceptions import ScriptNotFound from skyvern.forge import app -from skyvern.schemas.scripts import FileNode, ScriptFileCreate +from skyvern.schemas.scripts import CreateScriptResponse, FileNode, ScriptFileCreate LOG = structlog.get_logger(__name__) @@ -96,6 +96,52 @@ async def build_file_tree( return file_tree +async def create_script( + organization_id: str, + workflow_id: str | None = None, + run_id: str | None = None, + files: list[ScriptFileCreate] | None = None, +) -> CreateScriptResponse: + LOG.info( + "Creating script", + organization_id=organization_id, + file_count=len(files) if files else 0, + ) + + try: + if run_id and not await app.DATABASE.get_run(run_id=run_id, organization_id=organization_id): + raise HTTPException(status_code=404, detail=f"Run_id {run_id} not found") + + script = await app.DATABASE.create_script( + organization_id=organization_id, + run_id=run_id, + ) + + file_tree: dict[str, FileNode] = {} + file_count = 0 + if files: + file_tree = await build_file_tree( + files, + organization_id=organization_id, + script_id=script.script_id, + script_version=script.version, + script_revision_id=script.script_revision_id, + ) + file_count = len(files) + + return CreateScriptResponse( + script_id=script.script_id, + version=script.version, + run_id=script.run_id, + file_count=file_count, + created_at=script.created_at, + file_tree=file_tree, + ) + except Exception as e: + LOG.error("Failed to create script", error=str(e), exc_info=True) + raise HTTPException(status_code=500, detail="Failed to create script") + + async def execute_script( script_id: str, organization_id: str, diff --git a/skyvern/webeye/actions/action_types.py b/skyvern/webeye/actions/action_types.py index bd91cd57..4ab32121 100644 --- a/skyvern/webeye/actions/action_types.py +++ b/skyvern/webeye/actions/action_types.py @@ -23,7 +23,6 @@ class ActionType(StrEnum): SCROLL = "scroll" KEYPRESS = "keypress" - TYPE = "type" MOVE = "move" DRAG = "drag" LEFT_MOUSE = "left_mouse" diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index b1853050..f842dc42 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -1804,55 +1804,7 @@ async def handle_keypress_action( task: Task, step: Step, ) -> list[ActionResult]: - updated_keys = [] - for key in action.keys: - key_lower_case = key.lower() - if key_lower_case in ("enter", "return"): - updated_keys.append("Enter") - elif key_lower_case == "space": - updated_keys.append(" ") - elif key_lower_case == "ctrl": - updated_keys.append("Control") - elif key_lower_case == "backspace": - updated_keys.append("Backspace") - elif key_lower_case == "pagedown": - updated_keys.append("PageDown") - elif key_lower_case == "pageup": - updated_keys.append("PageUp") - elif key_lower_case == "tab": - updated_keys.append("Tab") - elif key_lower_case == "shift": - updated_keys.append("Shift") - elif key_lower_case in ("arrowleft", "left"): - updated_keys.append("ArrowLeft") - elif key_lower_case in ("arrowright", "right"): - updated_keys.append("ArrowRight") - elif key_lower_case in ("arrowup", "up"): - updated_keys.append("ArrowUp") - elif key_lower_case in ("arrowdown", "down"): - updated_keys.append("ArrowDown") - elif key_lower_case == "home": - updated_keys.append("Home") - elif key_lower_case == "end": - updated_keys.append("End") - elif key_lower_case == "delete": - updated_keys.append("Delete") - elif key_lower_case == "ecs": - updated_keys.append("Escape") - elif key_lower_case == "alt": - updated_keys.append("Alt") - elif key_lower_case.startswith("f") and key_lower_case[1:].isdigit(): - # Handle function keys: f1 -> F1, f5 -> F5, etc. - updated_keys.append(key_lower_case.upper()) - else: - updated_keys.append(key) - keypress_str = "+".join(updated_keys) - if action.hold: - await page.keyboard.down(keypress_str) - await asyncio.sleep(action.duration) - await page.keyboard.up(keypress_str) - else: - await page.keyboard.press(keypress_str) + await handler_utils.keypress(page, action.keys, hold=action.hold, duration=action.duration) return [ActionSuccess()] @@ -1876,13 +1828,7 @@ async def handle_drag_action( task: Task, step: Step, ) -> list[ActionResult]: - if action.start_x and action.start_y: - await page.mouse.move(action.start_x, action.start_y) - await page.mouse.down() - for point in action.path: - x, y = point[0], point[1] - await page.mouse.move(x, y) - await page.mouse.up() + await handler_utils.drag(page, action.start_x, action.start_y, action.path) return [ActionSuccess()] @@ -1913,12 +1859,7 @@ async def handle_left_mouse_action( task: Task, step: Step, ) -> list[ActionResult]: - if action.x and action.y: - await page.mouse.move(action.x, action.y) - if action.direction == "down": - await page.mouse.down() - elif action.direction == "up": - await page.mouse.up() + await handler_utils.left_mouse(page, action.x, action.y, action.direction) return [ActionSuccess()] diff --git a/skyvern/webeye/actions/handler_utils.py b/skyvern/webeye/actions/handler_utils.py index fb0d2884..7d2501b7 100644 --- a/skyvern/webeye/actions/handler_utils.py +++ b/skyvern/webeye/actions/handler_utils.py @@ -1,7 +1,8 @@ -from typing import Any +import asyncio +from typing import Any, Literal import structlog -from playwright.async_api import Locator +from playwright.async_api import Locator, Page from skyvern.config import settings from skyvern.constants import TEXT_INPUT_DELAY, TEXT_PRESS_MAX_LENGTH @@ -31,3 +32,79 @@ async def input_sequentially(locator: Locator, text: str, timeout: float = setti text = text[length - TEXT_PRESS_MAX_LENGTH :] await locator.press_sequentially(text, delay=TEXT_INPUT_DELAY, timeout=timeout) + + +async def keypress(page: Page, keys: list[str], hold: bool = False, duration: float = 0) -> None: + updated_keys = [] + for key in keys: + key_lower_case = key.lower() + if key_lower_case in ("enter", "return"): + updated_keys.append("Enter") + elif key_lower_case == "space": + updated_keys.append(" ") + elif key_lower_case == "ctrl": + updated_keys.append("Control") + elif key_lower_case == "backspace": + updated_keys.append("Backspace") + elif key_lower_case == "pagedown": + updated_keys.append("PageDown") + elif key_lower_case == "pageup": + updated_keys.append("PageUp") + elif key_lower_case == "tab": + updated_keys.append("Tab") + elif key_lower_case == "shift": + updated_keys.append("Shift") + elif key_lower_case in ("arrowleft", "left"): + updated_keys.append("ArrowLeft") + elif key_lower_case in ("arrowright", "right"): + updated_keys.append("ArrowRight") + elif key_lower_case in ("arrowup", "up"): + updated_keys.append("ArrowUp") + elif key_lower_case in ("arrowdown", "down"): + updated_keys.append("ArrowDown") + elif key_lower_case == "home": + updated_keys.append("Home") + elif key_lower_case == "end": + updated_keys.append("End") + elif key_lower_case == "delete": + updated_keys.append("Delete") + elif key_lower_case == "esc": + updated_keys.append("Escape") + elif key_lower_case == "alt": + updated_keys.append("Alt") + elif key_lower_case.startswith("f") and key_lower_case[1:].isdigit(): + # Handle function keys: f1 -> F1, f5 -> F5, etc. + updated_keys.append(key_lower_case.upper()) + else: + updated_keys.append(key) + keypress_str = "+".join(updated_keys) + if hold: + await page.keyboard.down(keypress_str) + await asyncio.sleep(duration) + await page.keyboard.up(keypress_str) + else: + await page.keyboard.press(keypress_str) + + +async def drag( + page: Page, start_x: int | None = None, start_y: int | None = None, path: list[tuple[int, int]] | None = None +) -> None: + if start_x and start_y: + await page.mouse.move(start_x, start_y) + await page.mouse.down() + path = path or [] + for point in path: + x, y = point[0], point[1] + await page.mouse.move(x, y) + await page.mouse.up() + + +async def left_mouse(page: Page, x: int | None, y: int | None, direction: Literal["down", "up"]) -> None: + if x and y: + await page.mouse.move(x, y) + if direction == "down": + await page.mouse.down() + elif direction == "up": + await page.mouse.up() + else: + LOG.info("Invalid direction for left mouse action", direction=direction)