add new workflow block (#1228)

2024-11-21 15:12:26 +08:00
parent 4271ca9ecf
commit 3f209404f7
16 changed files with 483 additions and 13 deletions
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -39,11 +39,12 @@ from skyvern.forge.sdk.artifact.models import ArtifactType
 from skyvern.forge.sdk.core import skyvern_context
 from skyvern.forge.sdk.core.security import generate_skyvern_signature
 from skyvern.forge.sdk.core.validators import validate_url
+from skyvern.forge.sdk.db.enums import TaskPromptTemplate
 from skyvern.forge.sdk.models import Organization, Step, StepStatus
 from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskStatus
 from skyvern.forge.sdk.settings_manager import SettingsManager
 from skyvern.forge.sdk.workflow.context_manager import WorkflowRunContext
-from skyvern.forge.sdk.workflow.models.block import TaskBlock
+from skyvern.forge.sdk.workflow.models.block import ActionBlock, BaseTaskBlock
 from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun, WorkflowRunStatus
 from skyvern.webeye.actions.actions import (
    Action,
@@ -98,7 +99,7 @@ class ForgeAgent:

    async def create_task_and_step_from_block(
        self,
-        task_block: TaskBlock,
+        task_block: BaseTaskBlock,
        workflow: Workflow,
        workflow_run: WorkflowRun,
        workflow_run_context: WorkflowRunContext,
@@ -132,6 +133,9 @@ class ForgeAgent:
        task_url = validate_url(task_url)
        task = await app.DATABASE.create_task(
            url=task_url,
+            prompt_template=task_block.prompt_template,
+            complete_criterion=task_block.complete_criterion,
+            terminate_criterion=task_block.terminate_criterion,
            title=task_block.title or task_block.label,
            webhook_callback_url=None,
            totp_verification_url=task_block.totp_verification_url,
@@ -195,6 +199,8 @@ class ForgeAgent:
            totp_verification_url=totp_verification_url,
            totp_identifier=task_request.totp_identifier,
            navigation_goal=task_request.navigation_goal,
+            complete_criterion=task_request.complete_criterion,
+            terminate_criterion=task_request.terminate_criterion,
            data_extraction_goal=task_request.data_extraction_goal,
            navigation_payload=task_request.navigation_payload,
            organization_id=organization_id,
@@ -222,7 +228,7 @@ class ForgeAgent:
        step: Step,
        api_key: str | None = None,
        close_browser_on_completion: bool = True,
-        task_block: TaskBlock | None = None,
+        task_block: BaseTaskBlock | None = None,
    ) -> Tuple[Step, DetailedAgentStepOutput | None, Step | None]:
        workflow_run: WorkflowRun | None = None
        if task.workflow_run_id:
@@ -362,7 +368,13 @@ class ForgeAgent:
                    is_task_completed,
                    maybe_last_step,
                    maybe_next_step,
-                ) = await self.handle_completed_step(organization, task, step, await browser_state.get_working_page())
+                ) = await self.handle_completed_step(
+                    organization=organization,
+                    task=task,
+                    step=step,
+                    page=await browser_state.get_working_page(),
+                    task_block=task_block,
+                )
                if is_task_completed is not None and maybe_last_step:
                    last_step = maybe_last_step
                    await self.clean_up_task(
@@ -582,7 +594,7 @@ class ForgeAgent:
        step: Step,
        browser_state: BrowserState,
        organization: Organization | None = None,
-        task_block: TaskBlock | None = None,
+        task_block: BaseTaskBlock | None = None,
    ) -> tuple[Step, DetailedAgentStepOutput]:
        detailed_agent_step_output = DetailedAgentStepOutput(
            scraped_page=None,
@@ -620,7 +632,7 @@ class ForgeAgent:
            actions: list[Action]

            using_cached_action_plan = False
-            if not task.navigation_goal:
+            if not task.workflow_run_id and not task.navigation_goal:
                actions = [
                    CompleteAction(
                        reasoning="Task has no navigation goal.",
@@ -870,7 +882,7 @@ class ForgeAgent:
                        break

            task_completes_on_download = task_block and task_block.complete_on_download and task.workflow_run_id
-            if not has_decisive_action and not task_completes_on_download:
+            if not has_decisive_action and not task_completes_on_download and not isinstance(task_block, ActionBlock):
                disable_user_goal_check = app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
                    "DISABLE_USER_GOAL_CHECK",
                    task.task_id,
@@ -1225,8 +1237,10 @@ class ForgeAgent:
        final_navigation_payload = self._build_navigation_payload(
            task, expire_verification_code=expire_verification_code
        )
+
+        template = task.prompt_template if task.prompt_template else TaskPromptTemplate.ExtractAction
        return prompt_engine.load_prompt(
-            "extract-action",
+            template=template,
            navigation_goal=navigation_goal,
            navigation_payload_str=json.dumps(final_navigation_payload),
            starting_url=starting_url,
@@ -1237,6 +1251,8 @@ class ForgeAgent:
            error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
            utc_datetime=datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
            verification_code_check=verification_code_check,
+            complete_criterion=task.complete_criterion,
+            terminate_criterion=task.terminate_criterion,
        )

    def _build_navigation_payload(
@@ -1770,6 +1786,7 @@ class ForgeAgent:
        task: Task,
        step: Step,
        page: Page | None,
+        task_block: BaseTaskBlock | None = None,
    ) -> tuple[bool | None, Step | None, Step | None]:
        if step.is_goal_achieved():
            LOG.info(
@@ -1810,6 +1827,24 @@ class ForgeAgent:
            or organization.max_steps_per_run
            or SettingsManager.get_settings().MAX_STEPS_PER_RUN
        )
+
+        # HACK: action block only have one step to execute without complete action, so we consider the task is completed as long as the step is completed
+        if isinstance(task_block, ActionBlock) and step.is_success():
+            LOG.info(
+                "Step completed for the action block, marking task as completed",
+                task_id=task.task_id,
+                step_id=step.step_id,
+                step_order=step.order,
+                step_retry=step.retry_index,
+                output=step.output,
+            )
+            last_step = await self.update_step(step, is_last=True)
+            await self.update_task(
+                task,
+                status=TaskStatus.completed,
+            )
+            return True, last_step, None
+
        if step.order + 1 >= max_steps_per_run:
            LOG.info(
                "Step completed but max steps reached, marking task as failed",
--- a/skyvern/forge/prompts/skyvern/decisive-criterion-validate.j2
+++ b/skyvern/forge/prompts/skyvern/decisive-criterion-validate.j2
@@ -0,0 +1,55 @@
+Your are here to help the user determine if the current page has met the complete/terminte criterion. Use the criterions of complete/terminate, the content of the elements parsed from the page, the screenshots of the page, and user details to determine whether the criterions has been met.
+
+
+Reply in JSON format with the following keys:
+{
+    "page_info": str, // Think step by step. Describe all the useful information in the page related to the complete/terminate criterion.
+    "thoughts": str, // Think step by step. What information makes you believe whether and which criterion has been met. Use information you see on the site to explain.
+    "actions": array // You are supposed to give only one action("COMPLETE" or "TERMINATE") in the action list. Here's the format of the action:
+    [{
+        "reasoning": str, // The reasoning behind the action. This reasoning must be user information agnostic. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
+        "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
+        "action_type": str, // It's a string enum: "COMPLETE", "TERMINATE". "COMPLETE" is used when the current page info has met the complete criterion. If there is no complete criterion, use "COMPLETE" as long as the page info hasn't met the terminate criterion. "TERMINATE" is used to terminate with a failure when the current page info has met the terminate criterion. It there is no terminate criterion, use "TERMINATE" as long as the page info hasn't met the complete criterion.
+{% if error_code_mapping_str %}
+        "errors": array // A list of errors. This is used to surface any errors that matches the current situation for COMPLETE and TERMINATE actions. For other actions or if no error description suits the current situation on the screenshots, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page.
+        [{
+            "error_code": str, // The error code from the user's error code list
+            "reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point.
+            "confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
+        }]
+{% endif %}
+    }]
+}
+
+HTML elements from `{{ current_url }}`:
+```
+{{ elements }}
+```
+
+The URL of the page you're on right now is `{{ current_url }}`.
+
+{% if complete_criterion %}
+Complete Criterion:
+```
+{{ complete_criterion }}
+```{% endif %}
+{% if terminate_criterion %}
+Terminate Criterion:
+```
+{{ terminate_criterion }}
+```{% endif %}
+{% if error_code_mapping_str %}
+Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
+```
+{{ error_code_mapping_str }}
+```{% endif %}
+
+User details:
+```
+{{ navigation_payload_str }}
+```
+
+Current datetime in UTC, YYYY-MM-DD HH:MM format:
+```
+{{ utc_datetime }}
+```
--- a/skyvern/forge/prompts/skyvern/single-click-action.j2
+++ b/skyvern/forge/prompts/skyvern/single-click-action.j2
@@ -0,0 +1,39 @@
+Your are here to help the user to perform a CLICK action on the web page. Use the user instruction, the content of the elements parsed from the page, the screenshots of the page, and user details to determine which element to click.
+Each actionable element is tagged with an ID. Only take the action on the elements provided in the HTML elements, do not image any new element.
+MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
+
+Reply in JSON format with the following keys:
+{
+    "page_info": str, // Think step by step. Describe all the useful information in the page related to the user instruction.
+    "thoughts": str, // Think step by step. What information makes you believe which element to click. Use information you see on the site to explain.
+    "actions": array // You are supposed to give only one action("CLICK") in the action list. Here's the format of the action:
+    [{
+        "reasoning": str, // The reasoning behind the action. This reasoning must be user information agnostic. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
+        "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
+        "action_type": str, // It's a string enum: "CLICK". "CLICK" is an element you'd like to click.
+        "id": str, // The id of the element to take action on. The id has to be one from the elements list.
+        "download": bool, // If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
+    }]
+}
+
+The URL of the page you're on right now is `{{ current_url }}`.
+
+HTML elements from `{{ current_url }}`:
+```
+{{ elements }}
+```
+
+User instruction:
+```
+{{ navigation_goal }}
+```
+
+User details:
+```
+{{ navigation_payload_str }}
+```
+
+Current datetime in UTC, YYYY-MM-DD HH:MM format:
+```
+{{ utc_datetime }}
+```
--- a/skyvern/forge/prompts/skyvern/single-input-action.j2
+++ b/skyvern/forge/prompts/skyvern/single-input-action.j2
@@ -0,0 +1,41 @@
+Your are here to help the user to perform an INPUT_TEXT action on the web page. Use the user instruction, the content of the elements parsed from the page, the screenshots of the page, and user details to determine which element to input.
+Each actionable element is tagged with an ID. Only take the action on the elements provided in the HTML elements, do not image any new element.
+MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
+
+Reply in JSON format with the following keys:
+{
+    "page_info": str, // Think step by step. Describe all the useful information in the page related to the user instruction.
+    "thoughts": str, // Think step by step. What information makes you believe which element to input the value. Use information you see on the site to explain.
+    "actions": array // You are supposed to give only one action("INPUT_TEXT") in the action list. Here's the format of the action:
+    [{
+        "reasoning": str, // The reasoning behind the action. This reasoning must be user information agnostic. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
+        "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
+        "action_type": str, // It's a string enum: "INPUT_TEXT". "INPUT_TEXT" is an element you'd like to input text into.
+        "id": str, // The id of the element to take action on. The id has to be one from the elements list.
+        "text": str, // The text to input.
+    }]{% if verification_code_check %}
+    "verification_code_reasoning": str, // Let's think step by step. Describe what you see and think if a verification code is needed for login or any verification step. Explain why you believe a verification code is needed or not. Has the code been sent and is code available somewhere (email, phone or 2FA device)?
+    "need_verification_code": bool, // Whether a verification code is needed to proceed. True only if the code is available to user. If the code is not sent, return false {% endif %}
+}
+
+The URL of the page you're on right now is `{{ current_url }}`.
+
+HTML elements from `{{ current_url }}`:
+```
+{{ elements }}
+```
+
+User instruction:
+```
+{{ navigation_goal }}
+```
+
+User details:
+```
+{{ navigation_payload_str }}
+```
+
+Current datetime in UTC, YYYY-MM-DD HH:MM format:
+```
+{{ utc_datetime }}
+```
--- a/skyvern/forge/prompts/skyvern/single-select-action.j2
+++ b/skyvern/forge/prompts/skyvern/single-select-action.j2
@@ -0,0 +1,43 @@
+Your are here to help the user to perform an SELECT_OPTION action on the web page. Use the user instruction, the content of the elements parsed from the page, the screenshots of the page, and user details to determine which element to select.
+Each actionable element is tagged with an ID. Only take the action on the elements provided in the HTML elements, do not image any new element.
+MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
+
+Reply in JSON format with the following keys:
+{
+    "page_info": str, // Think step by step. Describe all the useful information in the page related to the user instruction.
+    "thoughts": str, // Think step by step. What information makes you believe which element to select. Use information you see on the site to explain.
+    "actions": array // You are supposed to give only one action("SELECT_OPTION") in the action list. Here's the format of the action:
+    [{
+        "reasoning": str, // The reasoning behind the action. This reasoning must be user information agnostic. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
+        "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
+        "action_type": str, // It's a string enum: "SELECT_OPTION". "SELECT_OPTION" is an element you'd like to select an option from.
+        "id": str, // The id of the element to take action on. The id has to be one from the elements list.
+        "option": {  // The option to select.
+            "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
+            "index": int, // the index corresponding to the option index under the select element.
+            "value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE
+        },
+    }]
+}
+
+The URL of the page you're on right now is `{{ current_url }}`.
+
+HTML elements from `{{ current_url }}`:
+```
+{{ elements }}
+```
+
+User instruction:
+```
+{{ navigation_goal }}
+```
+
+User details:
+```
+{{ navigation_payload_str }}
+```
+
+Current datetime in UTC, YYYY-MM-DD HH:MM format:
+```
+{{ utc_datetime }}
+```
--- a/skyvern/forge/prompts/skyvern/single-upload-action.j2
+++ b/skyvern/forge/prompts/skyvern/single-upload-action.j2
@@ -0,0 +1,39 @@
+Your are here to help the user to perform an UPLOAD_FILE action on the web page. Use the user instruction, the content of the elements parsed from the page, the screenshots of the page, and user details to determine which element to upload.
+Each actionable element is tagged with an ID. Only take the action on the elements provided in the HTML elements, do not image any new element.
+MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
+
+Reply in JSON format with the following keys:
+{
+    "page_info": str, // Think step by step. Describe all the useful information in the page related to the user instruction.
+    "thoughts": str, // Think step by step. What information makes you believe which element to upload the file. Use information you see on the site to explain.
+    "actions": array // You are supposed to give only one action("UPLOAD_FILE") in the action list. Here's the format of the action:
+    [{
+        "reasoning": str, // The reasoning behind the action. This reasoning must be user information agnostic. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
+        "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
+        "action_type": str, // It's a string enum: "UPLOAD_FILE". "UPLOAD_FILE" is an element you'd like to upload a file into.
+        "id": str, // The id of the element to take action on. The id has to be one from the elements list.
+        "file_url": str, // The url of the file to upload.
+    }]
+}
+
+The URL of the page you're on right now is `{{ current_url }}`.
+
+HTML elements from `{{ current_url }}`:
+```
+{{ elements }}
+```
+
+User instruction:
+```
+{{ navigation_goal }}
+```
+
+User details:
+```
+{{ navigation_payload_str }}
+```
+
+Current datetime in UTC, YYYY-MM-DD HH:MM format:
+```
+{{ utc_datetime }}
+```
--- a/skyvern/forge/sdk/db/client.py
+++ b/skyvern/forge/sdk/db/client.py
@@ -10,7 +10,7 @@ from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
 from skyvern.config import settings
 from skyvern.exceptions import WorkflowParameterNotFound
 from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
-from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
+from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType, TaskPromptTemplate
 from skyvern.forge.sdk.db.exceptions import NotFoundError
 from skyvern.forge.sdk.db.models import (
    ActionModel,
@@ -97,6 +97,8 @@ class AgentDB:
        self,
        url: str,
        title: str | None,
+        complete_criterion: str | None,
+        terminate_criterion: str | None,
        navigation_goal: str | None,
        data_extraction_goal: str | None,
        navigation_payload: dict[str, Any] | list | str | None,
@@ -111,17 +113,21 @@ class AgentDB:
        retry: int | None = None,
        max_steps_per_run: int | None = None,
        error_code_mapping: dict[str, str] | None = None,
+        prompt_template: str = TaskPromptTemplate.ExtractAction,
    ) -> Task:
        try:
            async with self.Session() as session:
                new_task = TaskModel(
                    status="created",
+                    prompt_template=prompt_template,
                    url=url,
                    title=title,
                    webhook_callback_url=webhook_callback_url,
                    totp_verification_url=totp_verification_url,
                    totp_identifier=totp_identifier,
                    navigation_goal=navigation_goal,
+                    complete_criterion=complete_criterion,
+                    terminate_criterion=terminate_criterion,
                    data_extraction_goal=data_extraction_goal,
                    navigation_payload=navigation_payload,
                    organization_id=organization_id,
--- a/skyvern/forge/sdk/db/enums.py
+++ b/skyvern/forge/sdk/db/enums.py
@@ -3,3 +3,19 @@ from enum import StrEnum

 class OrganizationAuthTokenType(StrEnum):
    api = "api"
+
+
+class TaskPromptTemplate(StrEnum):
+    ExtractAction = "extract-action"
+    DecisiveCriterionValidate = "decisive-criterion-validate"
+    SingleClickAction = "single-click-action"
+    SingleInputAction = "single-input-action"
+    SingleUploadAction = "single-upload-action"
+    SingleSelectAction = "single-select-action"
+
+
+class ActionType(StrEnum):
+    Click = "CLICK"
+    InputText = "INPUT_TEXT"
+    UploadFile = "UPLOAD_FILE"
+    SelectOption = "SELECT_OPTION"
--- a/skyvern/forge/sdk/db/models.py
+++ b/skyvern/forge/sdk/db/models.py
@@ -17,7 +17,7 @@ from sqlalchemy import (
 from sqlalchemy.ext.asyncio import AsyncAttrs
 from sqlalchemy.orm import DeclarativeBase

-from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
+from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType, TaskPromptTemplate
 from skyvern.forge.sdk.db.id import (
    generate_action_id,
    generate_artifact_id,
@@ -54,9 +54,12 @@ class TaskModel(Base):
    totp_verification_url = Column(String)
    totp_identifier = Column(String)
    title = Column(String)
+    prompt_template = Column(String, default=TaskPromptTemplate.ExtractAction)
    url = Column(String)
    navigation_goal = Column(String)
    data_extraction_goal = Column(String)
+    complete_criterion = Column(String)
+    terminate_criterion = Column(String)
    navigation_payload = Column(JSON)
    extracted_information = Column(JSON)
    failure_reason = Column(String)
--- a/skyvern/forge/sdk/db/utils.py
+++ b/skyvern/forge/sdk/db/utils.py
@@ -60,8 +60,11 @@ def convert_to_task(task_obj: TaskModel, debug_enabled: bool = False) -> Task:
        status=TaskStatus(task_obj.status),
        created_at=task_obj.created_at,
        modified_at=task_obj.modified_at,
+        prompt_template=task_obj.prompt_template,
        title=task_obj.title,
        url=task_obj.url,
+        complete_criterion=task_obj.complete_criterion,
+        terminate_criterion=task_obj.terminate_criterion,
        webhook_callback_url=task_obj.webhook_callback_url,
        totp_verification_url=task_obj.totp_verification_url,
        totp_identifier=task_obj.totp_identifier,
--- a/skyvern/forge/sdk/models.py
+++ b/skyvern/forge/sdk/models.py
@@ -104,6 +104,14 @@ class Step(BaseModel):

        return False

+    def is_success(self) -> bool:
+        if self.status != StepStatus.completed:
+            return False
+        # TODO (kerem): Remove this check once we have backfilled all the steps
+        if self.output is None or self.output.actions_and_results is None:
+            return False
+        return True
+
    def is_terminated(self) -> bool:
        if self.status != StepStatus.completed:
            return False
--- a/skyvern/forge/sdk/schemas/tasks.py
+++ b/skyvern/forge/sdk/schemas/tasks.py
@@ -8,6 +8,7 @@ from pydantic import BaseModel, Field, HttpUrl, field_validator

 from skyvern.exceptions import BlockedHost, InvalidTaskStatusTransition, TaskAlreadyCanceled
 from skyvern.forge.sdk.core.validators import is_blocked_host
+from skyvern.forge.sdk.db.enums import TaskPromptTemplate


 class ProxyLocation(StrEnum):
@@ -77,6 +78,19 @@ class TaskBase(BaseModel):
        default=None,
        description="The requested schema of the extracted information.",
    )
+    complete_criterion: str | None = Field(
+        default=None, description="Criterion to complete", examples=["Complete if 'hello world' shows up on the page"]
+    )
+    terminate_criterion: str | None = Field(
+        default=None,
+        description="Criterion to terminate",
+        examples=["Terminate if 'existing account' shows up on the page"],
+    )
+    prompt_template: str | None = Field(
+        default=TaskPromptTemplate.ExtractAction,
+        description="The prompt template used for task",
+        examples=[TaskPromptTemplate.ExtractAction, TaskPromptTemplate.DecisiveCriterionValidate],
+    )


 class TaskRequest(TaskBase):
--- a/skyvern/forge/sdk/workflow/models/block.py
+++ b/skyvern/forge/sdk/workflow/models/block.py
@@ -40,6 +40,7 @@ from skyvern.forge.sdk.api.files import (
    get_path_for_workflow_download_directory,
 )
 from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
+from skyvern.forge.sdk.db.enums import TaskPromptTemplate
 from skyvern.forge.sdk.schemas.tasks import Task, TaskOutput, TaskStatus
 from skyvern.forge.sdk.settings_manager import SettingsManager
 from skyvern.forge.sdk.workflow.context_manager import WorkflowRunContext
@@ -69,6 +70,8 @@ class BlockType(StrEnum):
    UPLOAD_TO_S3 = "upload_to_s3"
    SEND_EMAIL = "send_email"
    FILE_URL_PARSER = "file_url_parser"
+    VALIDATION = "validation"
+    ACTION = "action"


 class BlockStatus(StrEnum):
@@ -174,11 +177,12 @@ class Block(BaseModel, abc.ABC):
        pass


-class TaskBlock(Block):
-    block_type: Literal[BlockType.TASK] = BlockType.TASK
-
+class BaseTaskBlock(Block):
+    prompt_template: str = TaskPromptTemplate.ExtractAction
    url: str | None = None
    title: str = ""
+    complete_criterion: str | None = None
+    terminate_criterion: str | None = None
    navigation_goal: str | None = None
    data_extraction_goal: str | None = None
    data_schema: dict[str, Any] | list | None = None
@@ -464,6 +468,10 @@ class TaskBlock(Block):
        )


+class TaskBlock(BaseTaskBlock):
+    block_type: Literal[BlockType.TASK] = BlockType.TASK
+
+
 class ForLoopBlock(Block):
    block_type: Literal[BlockType.FOR_LOOP] = BlockType.FOR_LOOP

@@ -1264,6 +1272,36 @@ class FileParserBlock(Block):
        )


+class ValidationBlock(BaseTaskBlock):
+    block_type: Literal[BlockType.VALIDATION] = BlockType.VALIDATION
+
+    def get_all_parameters(
+        self,
+        workflow_run_id: str,
+    ) -> list[PARAMETER_TYPE]:
+        return self.parameters
+
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
+        task_order, _ = await self.get_task_order(workflow_run_id, 0)
+        is_first_task = task_order == 0
+        if is_first_task:
+            return self.build_block_result(
+                success=False,
+                failure_reason="Validation block should not be the first block",
+                output_parameter_value=None,
+                status=BlockStatus.terminated,
+            )
+
+        return await super().execute(workflow_run_id=workflow_run_id, kwargs=kwargs)
+
+
+class ActionBlock(BaseTaskBlock):
+    block_type: Literal[BlockType.ACTION] = BlockType.ACTION
+
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
+        return await super().execute(workflow_run_id=workflow_run_id, kwargs=kwargs)
+
+
 BlockSubclasses = Union[
    ForLoopBlock,
    TaskBlock,
@@ -1273,5 +1311,7 @@ BlockSubclasses = Union[
    UploadToS3Block,
    SendEmailBlock,
    FileParserBlock,
+    ValidationBlock,
+    ActionBlock,
 ]
 BlockTypeVar = Annotated[BlockSubclasses, Field(discriminator="block_type")]
--- a/skyvern/forge/sdk/workflow/models/yaml.py
+++ b/skyvern/forge/sdk/workflow/models/yaml.py
@@ -3,6 +3,7 @@ from typing import Annotated, Any, Literal

 from pydantic import BaseModel, Field

+from skyvern.forge.sdk.db.enums import ActionType
 from skyvern.forge.sdk.schemas.tasks import ProxyLocation
 from skyvern.forge.sdk.workflow.models.block import BlockType, FileType
 from skyvern.forge.sdk.workflow.models.parameter import ParameterType, WorkflowParameterType
@@ -208,6 +209,32 @@ class FileParserBlockYAML(BlockYAML):
    file_type: FileType


+class ValidationBlockYAML(BlockYAML):
+    block_type: Literal[BlockType.VALIDATION] = BlockType.VALIDATION  # type: ignore
+
+    complete_criterion: str | None = None
+    terminate_criterion: str | None = None
+    error_code_mapping: dict[str, str] | None = None
+    parameter_keys: list[str] | None = None
+
+
+class ActionBlockYAML(BlockYAML):
+    action_type: ActionType
+    block_type: Literal[BlockType.ACTION] = BlockType.ACTION  # type: ignore
+
+    url: str | None = None
+    title: str = ""
+    navigation_goal: str | None = None
+    error_code_mapping: dict[str, str] | None = None
+    max_retries: int = 0
+    parameter_keys: list[str] | None = None
+    complete_on_download: bool = False
+    download_suffix: str | None = None
+    totp_verification_url: str | None = None
+    totp_identifier: str | None = None
+    cache_actions: bool = False
+
+
 PARAMETER_YAML_SUBCLASSES = (
    AWSSecretParameterYAML
    | BitwardenLoginCredentialParameterYAML
@@ -228,6 +255,8 @@ BLOCK_YAML_SUBCLASSES = (
    | UploadToS3BlockYAML
    | SendEmailBlockYAML
    | FileParserBlockYAML
+    | ValidationBlockYAML
+    | ActionBlockYAML
 )
 BLOCK_YAML_TYPES = Annotated[BLOCK_YAML_SUBCLASSES, Field(discriminator="block_type")]

--- a/skyvern/forge/sdk/workflow/service.py
+++ b/skyvern/forge/sdk/workflow/service.py
@@ -18,6 +18,7 @@ from skyvern.forge.sdk.artifact.models import ArtifactType
 from skyvern.forge.sdk.core import skyvern_context
 from skyvern.forge.sdk.core.security import generate_skyvern_signature
 from skyvern.forge.sdk.core.skyvern_context import SkyvernContext
+from skyvern.forge.sdk.db.enums import ActionType, TaskPromptTemplate
 from skyvern.forge.sdk.models import Organization, Step
 from skyvern.forge.sdk.schemas.tasks import ProxyLocation, Task
 from skyvern.forge.sdk.workflow.exceptions import (
@@ -28,6 +29,7 @@ from skyvern.forge.sdk.workflow.exceptions import (
    WorkflowParameterMissingRequiredValue,
 )
 from skyvern.forge.sdk.workflow.models.block import (
+    ActionBlock,
    BlockStatus,
    BlockType,
    BlockTypeVar,
@@ -39,6 +41,7 @@ from skyvern.forge.sdk.workflow.models.block import (
    TaskBlock,
    TextPromptBlock,
    UploadToS3Block,
+    ValidationBlock,
 )
 from skyvern.forge.sdk.workflow.models.parameter import (
    PARAMETER_TYPE,
@@ -1333,4 +1336,65 @@ class WorkflowService:
                file_type=block_yaml.file_type,
                continue_on_failure=block_yaml.continue_on_failure,
            )
+        elif block_yaml.block_type == BlockType.VALIDATION:
+            validation_block_parameters = (
+                [parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
+                if block_yaml.parameter_keys
+                else []
+            )
+
+            if not block_yaml.complete_criterion and not block_yaml.terminate_criterion:
+                raise Exception("Both complete criterion and terminate criterion are empty")
+
+            return ValidationBlock(
+                label=block_yaml.label,
+                prompt_template=TaskPromptTemplate.DecisiveCriterionValidate,
+                parameters=validation_block_parameters,
+                output_parameter=output_parameter,
+                complete_criterion=block_yaml.complete_criterion,
+                terminate_criterion=block_yaml.terminate_criterion,
+                error_code_mapping=block_yaml.error_code_mapping,
+                continue_on_failure=block_yaml.continue_on_failure,
+                # only need one step for validation block
+                max_steps_per_run=1,
+            )
+
+        elif block_yaml.block_type == BlockType.ACTION:
+            action_block_parameters = (
+                [parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
+                if block_yaml.parameter_keys
+                else []
+            )
+            prompt_template = ""
+            if block_yaml.action_type == ActionType.Click:
+                prompt_template = TaskPromptTemplate.SingleClickAction
+            elif block_yaml.action_type == ActionType.InputText:
+                prompt_template = TaskPromptTemplate.SingleInputAction
+            elif block_yaml.action_type == ActionType.UploadFile:
+                prompt_template = TaskPromptTemplate.SingleUploadAction
+            elif block_yaml.action_type == ActionType.SelectOption:
+                prompt_template = TaskPromptTemplate.SingleSelectAction
+
+            if not prompt_template:
+                raise Exception("not supported action type for action block")
+
+            return ActionBlock(
+                prompt_template=prompt_template,
+                label=block_yaml.label,
+                url=block_yaml.url,
+                title=block_yaml.title,
+                parameters=action_block_parameters,
+                output_parameter=output_parameter,
+                navigation_goal=block_yaml.navigation_goal,
+                error_code_mapping=block_yaml.error_code_mapping,
+                max_retries=block_yaml.max_retries,
+                complete_on_download=block_yaml.complete_on_download,
+                download_suffix=block_yaml.download_suffix,
+                continue_on_failure=block_yaml.continue_on_failure,
+                totp_verification_url=block_yaml.totp_verification_url,
+                totp_identifier=block_yaml.totp_identifier,
+                cache_actions=block_yaml.cache_actions,
+                max_steps_per_run=1,
+            )
+
        raise ValueError(f"Invalid block type {block_yaml.block_type}")