Add task and workflow docs (#486)
This commit is contained in:
2
docs/getting-started/quickstart-open-source.mdx
Normal file
2
docs/getting-started/quickstart-open-source.mdx
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
1
docs/getting-started/quickstart-skyvern-cloud.mdx
Normal file
1
docs/getting-started/quickstart-skyvern-cloud.mdx
Normal file
@@ -0,0 +1 @@
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
1
docs/getting-started/supported-functionality.mdx
Normal file
1
docs/getting-started/supported-functionality.mdx
Normal file
@@ -0,0 +1 @@
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
BIN
docs/images/api_key_image.png
Normal file
BIN
docs/images/api_key_image.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 71 KiB |
2
docs/integrations/api.mdx
Normal file
2
docs/integrations/api.mdx
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
1
docs/integrations/make.com.mdx
Normal file
1
docs/integrations/make.com.mdx
Normal file
@@ -0,0 +1 @@
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
1
docs/integrations/zapier.mdx
Normal file
1
docs/integrations/zapier.mdx
Normal file
@@ -0,0 +1 @@
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
@@ -63,19 +63,18 @@
|
||||
"getting-started/skyvern-in-action",
|
||||
"getting-started/quickstart-skyvern-cloud",
|
||||
"getting-started/quickstart-open-source",
|
||||
"getting-started/supported-features"
|
||||
"getting-started/supported-functionality"
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Tasks",
|
||||
"pages": [
|
||||
"running-tasks/introduction",
|
||||
"running-tasks/prompting-tips",
|
||||
"running-tasks/advanced-features",
|
||||
"running-tasks/api-spec",
|
||||
"running-tasks/webhooks",
|
||||
"running-tasks/webhooks-faq",
|
||||
"running-tasks/visualizing-results",
|
||||
"running-tasks/visualizing-results-advanced"
|
||||
"running-tasks/prompting-tips",
|
||||
"running-tasks/advanced-features"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
2
docs/running-tasks/advanced-features.mdx
Normal file
2
docs/running-tasks/advanced-features.mdx
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
99
docs/running-tasks/api-spec.mdx
Normal file
99
docs/running-tasks/api-spec.mdx
Normal file
@@ -0,0 +1,99 @@
|
||||
---
|
||||
title: API Spec
|
||||
description: 'The core building block in Skyvern'
|
||||
---
|
||||
|
||||
Tasks are the building block of Skyvern. They represent a single instruction to the browser to go do something using language models. Ex. “Go to alibaba and extract this information”
|
||||
|
||||
## Request - Initiate a task (Webhook)
|
||||
|
||||
### Body
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| url | String | yes | https://jobs.lever.co/leverdemo-8/45d39614-464a-4b62-a5cd-8683ce4fb80a/apply | The website that you would like to browse / scrape. This should be the ideal starting point for the agent |
|
||||
| webhook_callback_url | String | no | … | The callback URL once our system is finished processing this async task |
|
||||
| navigation_goal | String | no | Apply for a job | The prompt that tells the agent what the user-facing goal is. This is the guiding light for the LLM as it navigates a particular website / sitemap to achieve this specified goal |
|
||||
| data_extraction_goal | String | no | Was the job application successful? | The prompt that instructs the agent to extract information once the agent has achieved its user_goal |
|
||||
| navigation_payload | Object | no | "name": "Chris P. Bacon",<br/>"email": "mailto:chris@pbacon.com" | JSON-formatted payload with any “facts” or information that would help the agent perform its job. In the case of navigating an insurance quote, this payload would include any user information to help fill out the insurance flow such as date of birth, or age they got their license, and so on<br/><br/>This can include nested information, and the formatting isn’t validated |
|
||||
| proxy_location | String | yes | RESIDENTIAL | Proxy location for the web-browsing request. Please pass RESIDENTIAL as a value |
|
||||
| extracted_information_schema | JSON | no | | Used to enforce a JSON schema spec to be enforced in the data_extraction_goal. Similar to https://json-schema.org/ definition. |
|
||||
|
||||
## Example Request (Apply for a job)
|
||||
|
||||
```python
|
||||
{
|
||||
"url": "https://jobs.lever.co/leverdemo-8/45d39614-464a-4b62-a5cd-8683ce4fb80a/apply",
|
||||
"navigation_goal": "Apply for a job",
|
||||
"data_extraction_goal": "Was the job application successful?",
|
||||
"proxy_location": "RESIDENTIAL",
|
||||
"navigation_payload": {
|
||||
"name": "Chris P. Bacon",
|
||||
"email": "chris@pbacon.com"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Response
|
||||
|
||||
| Parameter | Type | Always returned? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| task_id | String | yes | t_123456 | The task id associated with this specific task |
|
||||
|
||||
## Response Webhook - Task conclusion (POST)
|
||||
|
||||
### Headers
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| x-skyvern-signature | String | yes | v0=a2114d57b48eac39b9ad189<br/>dd8316235a7b4a8d21a10bd275<br/>19666489c69b503 | Authentication token that allows our service to communicate with your backend service via callback / webhook <br/>We’ll be using the same strategy slack uses, as defined here: https://api.slack.com/authentication/verifying-requests-from-slack#making__validating-a-request |
|
||||
| x-skyvern-timestamp | String | yes | 1531420618 | Timestamp used to decode and validate the incoming webhook call<br/><br/>We’ll be using the same strategy slack uses, as defined here: https://api.slack.com/authentication/verifying-requests-from-slack#making__validating-a-request |
|
||||
|
||||
### Body
|
||||
|
||||
| Parameter | Type | Always returned? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| task_id | String | yes | t_123456 | The task id associated with this specific task |
|
||||
| extracted_information | Object | Yes | 'price’: ‘$100.0’ | Unstructured JSON payload containing the extracted information as specified by the users’ input prompt |
|
||||
| screenshot_url | String | Yes | … url to screenshot … | Screenshot of the final page, where the data extraction occurs |
|
||||
| recording_url | String | Yes | .. url to recording … | Recording of the entire browsing session to help debug any issues |
|
||||
| failure_reason | String | No | “Failed to pass this page - missing information: date of birth” | A failure reason that caused the job to fail. This can range from internal errors (Skyvern side) or external errors (insufficient information provided) |
|
||||
|
||||
## Request - Task Details (GET)
|
||||
|
||||
You can use this API to poll for task status updates if you don’t want to wait for webhook callbacks.
|
||||
|
||||
Request type: `GET`
|
||||
|
||||
Production:`api.skyvern.com/api/v1/tasks/{task_id}`
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| task_id | String | yes | t_123 | The id of the task you want to check the status of |
|
||||
|
||||
## Request - List Task Details (GET)
|
||||
|
||||
Request type: `GET`
|
||||
|
||||
Production:`api.skyvern.com/api/v1/tasks/`
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| page | Integer | no | 1 | default=1
|
||||
has to be ≥1 |
|
||||
| page_size | Integer | no | 10 | default=10
|
||||
has to be ≥1 |
|
||||
|
||||
## Response - Task Details (GET)
|
||||
|
||||
| Parameter | Type | Sample Value | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| request | JSON | | Includes the initial request sent to create the task. Fields included: url,webhook_callback_url,navigation_goal,data_extraction_goal,navigation_payload,proxy_location,extracted_information_schema |
|
||||
| task_id | String | t_123 | The id of the task you want to check the status of |
|
||||
| status | String | SUCCESS / FAILURE / IN_PROGRESS | String indicating the status of this task |
|
||||
| created_at | Timestamp | 2022-11-22T22:55:55 | Timestamp indicating when the task was created. |
|
||||
| modified_at | Timestamp | 2022-11-22T22:55:55 | Timestamp indicating when the task was last updated. Used to detect long-running tasks. |
|
||||
| extracted_information | Object | Yes | 'price’: ‘$100.0’ |
|
||||
| screenshot_url | String | Yes | … url to screenshot … |
|
||||
| recording_url | String | Yes | .. url to recording … |
|
||||
| failure_reason | String | No | “Failed to pass this page - missing information: invalid password” |
|
||||
16
docs/running-tasks/introduction.mdx
Normal file
16
docs/running-tasks/introduction.mdx
Normal file
@@ -0,0 +1,16 @@
|
||||
---
|
||||
title: Introduction
|
||||
description: 'Tasks are the building block of Skyvern'
|
||||
---
|
||||
|
||||
# Getting your API Key
|
||||
|
||||
1. Navigate to [app.skyvern.com](https://app.skyvern.com) and log into your account
|
||||
1. Validate that you're in the correct account (personal vs organization)
|
||||
1. Click on "Settings"
|
||||
1. Click Reveal to see your API key. This is the unique key associated with your account that allows you to intract with Skyvern
|
||||
|
||||
|
||||
<img src="images/api_key_image.png" />
|
||||
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
2
docs/running-tasks/prompting-tips.mdx
Normal file
2
docs/running-tasks/prompting-tips.mdx
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
2
docs/running-tasks/visualizing-results.mdx
Normal file
2
docs/running-tasks/visualizing-results.mdx
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
36
docs/running-tasks/webhooks-faq.mdx
Normal file
36
docs/running-tasks/webhooks-faq.mdx
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
title: Webhooks FAQ
|
||||
description: 'How Skyvern notifies you when its done'
|
||||
---
|
||||
|
||||
# FAQ
|
||||
## Webhooks vs HTTP requests?
|
||||
|
||||
We use Webhooks for executing tasks as the expected runtime of these jobs can exceed default HTTP timeouts (1 minute)
|
||||
|
||||
## How do we handle webhook authentication? (ie how can we handle callbacks?)
|
||||
|
||||
- a python example for how to generate the signature:
|
||||
|
||||
```python
|
||||
def validate_skyvern_request_headers(request: Request) -> bool:
|
||||
header_skyvern_signature = request.headers["x-skyvern-signature"]
|
||||
payload = request.body() # this is a bytes
|
||||
hash_obj = hmac.new(SKYVERN_API_KEY.encode("utf-8"), msg=payload, digestmod=hashlib.sha256)
|
||||
client_generated_signature = hash_obj.hexdigest()
|
||||
return header_skyvern_signature == client_generated_signature
|
||||
```
|
||||
|
||||
SKYVERN_API_KEY: this is the [api key](/running-tasks/introduction) specific to your organization
|
||||
|
||||
# Webhook common parameters
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| webhook_callback_url | String | yes | … | |
|
||||
|
||||
# Required Headers
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| x-api-key | String | yes | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.e<br/>yJleHAiOjQ4MjU0MjI5NzUsInN1YiI6<br/>Im9fMTA2MTUxNzEyNjQ5ODUxMzQ2In0 | Bearer token that gives your backend access to the Skyvern API. This will be manually provided by us |
|
||||
347
docs/workflows/creating-workflows.mdx
Normal file
347
docs/workflows/creating-workflows.mdx
Normal file
@@ -0,0 +1,347 @@
|
||||
---
|
||||
title: Creating Workflows
|
||||
description: 'Creating complex multi-step workflows'
|
||||
---
|
||||
|
||||
## Building workflows
|
||||
|
||||
Workflows represent chaining multiple blocks together. Imagine calling multiple tasks in a row, doing conditional logic, extracting data to a CSV, etc. All of these ideas will be supported within our workflows feature
|
||||
|
||||
All of our workflows are defined in **YAML** format, and allow chaining multiple components together to generate some defined output
|
||||
|
||||
Today, we’re building the workflows for most of our customers as we iterate on the specification.
|
||||
This is a cumbersome experience -- rest assured that we are improving our web application that will offer a significantly enhanced user experience for this process.
|
||||
|
||||
## Request - Create Workflow (YAML)
|
||||
`POST api.skyvern.com/api/v1/workflows`
|
||||
|
||||
Use this API to create a workflow. The response of this API is a `workflow_permanent_id`, which can be used to run workflows below
|
||||
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| title | String | yes | Calculate a product price diff % | A title for a workflow |
|
||||
| description | String | yes | Compare two products' price diff % on alibaba vs newlabelwholesale | A description for a workflow |
|
||||
|
||||
## Workflow Parameters
|
||||
|
||||
Workflow parameters are specific parameters you’re going to be passing into the workflows to allow execution
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| key | String | yes | alibaba_url | unique key corresponding to a specific parameter |
|
||||
| parameter_type | Enum | yes | workflow | The type of parameter for the workflow. Meant to indicate whether this parameter is being passed in via the run workflow endpoint (workflow), or whether a parameter is the output of a different workflow step (output). Can be workflow , context, aws_secret, or output |
|
||||
| workflow_parameter_type | Enum | no? | string | The actual type of the parameter, meant to be used for type-safety reasons.<br/>Supported types:<br/>STRING = "string"<br/>INTEGER = "integer"<br/>FLOAT = "float"<br/>BOOLEAN = "boolean"<br/>JSON = "json” |
|
||||
| description | string | yes | Alibaba product URL for checking the price of the product | Description of the parameter |
|
||||
|
||||
## Blocks
|
||||
|
||||
Blocks are the building block (pun intended) of Skyvern’s workflows. Each block is one discrete task you want to occur. Multiple blocks may be chained together, with outputs from one block being fed as inputs to the next block.
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| block_type | Enum | yes | Task | Signifying the type of block this is in the workflow |
|
||||
| label | String | yes | get_alibaba_price | The unique identifier for this block within this workflow |
|
||||
| parameter_keys | Array | yes | parameter_keys:<br/>- alibaba_price<br/>- NLW_price | The list of parameters this block depends on for execution |
|
||||
| output_parameter_key | string | yes | output_parameter_key: price_diff_percentage | The optional output of the block, so that it may be used by other blocks |
|
||||
| \{\{ block specific parameters }} | ?? | yes | | Other parameters, specific to the block_type specified above. These are covered below |
|
||||
|
||||
|
||||
### TaskBlock
|
||||
|
||||
The magic block. Skyvern navigates through the websites to take actions and/or extract information.
|
||||
|
||||
Example block:
|
||||
```
|
||||
- block_type: task
|
||||
label: login
|
||||
parameter_keys:
|
||||
- credentials
|
||||
url: website_url
|
||||
navigation_goal: >-
|
||||
If you're not on the login page, navigate to login page and login using
|
||||
the credentials given. First, take actions on promotional popups or cookie prompts that could prevent taking other action on the web page. If you fail to login to find the login page or can't login after several trials, terminate. If login is
|
||||
completed, you're successful.
|
||||
data_extraction_goal: >-
|
||||
Extract anything for the sake of this demo
|
||||
error_code_mapping:
|
||||
stuck_with_popups: terminate and return this error if you can't close popups after several tries and can't take the necessary actions on the website because there is a blocking popup on the page
|
||||
failed_to_login: terminate and return this error if you fail logging in to the page
|
||||
```
|
||||
|
||||
### ForLoopBlock
|
||||
Iterate over something such as a CSV or the output of a previous block. The blocks nedted under `loop_blocks` are the blocks that will be repeated for each entry in the
|
||||
|
||||
```
|
||||
- block_type: for_loop
|
||||
label: iterate_over_order_ids
|
||||
loop_over_parameter_key: order_ids
|
||||
continue_on_failure: true
|
||||
loop_blocks:
|
||||
- block_type: task
|
||||
label: download_invoice_for_order
|
||||
complete_on_download: true
|
||||
continue_on_failure: true
|
||||
parameter_keys:
|
||||
- order_id
|
||||
url: order_history_url
|
||||
navigation_goal: >-
|
||||
Download the invoice of the order with the given order ID.
|
||||
Make sure to download the invoice for the given order id.
|
||||
If the element tree doesn't have a matching order id, check the screenshots.
|
||||
Complete if you have successfully downloaded the invoice according to action history, if you were able to download it, you'll see download_triggered=True for the last step.
|
||||
If you don't see a way to download an invoice, navigate to the order page if possible.
|
||||
If there's no way to download an invoice terminate.
|
||||
If the text suggests printing, you can assume you can download it.
|
||||
Return click action with download=True if you want to trigger a download.
|
||||
error_code_mapping:
|
||||
not_possible_to_download_invoice: return this error if the website doesn't allow downloading/viewing invoices
|
||||
cant_solve_captcha: return this error if captcha isn't solved after multiple retries
|
||||
```
|
||||
|
||||
### CodeBlock
|
||||
This block executes user-defined code within our execution environment. It’s able to take parameters as input and transform them based on a certain specification
|
||||
|
||||
**Example Block**
|
||||
|
||||
```json
|
||||
- block_type: code
|
||||
label: calculate_percentage_diff
|
||||
parameter_keys:
|
||||
- alibaba_price
|
||||
- amazon_price
|
||||
code: |
|
||||
if amazon_price["unitPrice"] and alibaba_price["unitPrice"]:
|
||||
result = 1.0 * (alibaba_price["unitPrice"] - amazon_price["unitPrice"]) / amazon_price["unitPrice"]
|
||||
else:
|
||||
result = None
|
||||
output_parameter_key: price_diff_percentage
|
||||
```
|
||||
|
||||
### TextPromptBlock
|
||||
|
||||
Do a custom OpenAI query as a part of your workflow
|
||||
|
||||
```
|
||||
- block_type: text_prompt
|
||||
label: generate_new_title
|
||||
parameter_keys:
|
||||
- alibaba_title
|
||||
- amazon_title
|
||||
llm_key: OPENAI_GPT4O
|
||||
prompt: >
|
||||
You're given two e-commerce product titles. Use both and generate a
|
||||
better one.
|
||||
|
||||
Title 1: {{ alibaba_title }}
|
||||
Title 2: {{ amazon_title }}
|
||||
output_parameter_key: new_title
|
||||
```
|
||||
|
||||
### DownloadToS3Block
|
||||
### UploadToS3Block
|
||||
|
||||
Persists files inside S3
|
||||
|
||||
```
|
||||
- block_type: upload_to_s3
|
||||
label: upload_downloaded_files_to_s3
|
||||
path: SKYVERN_DOWNLOAD_DIRECTORY
|
||||
```
|
||||
|
||||
### SendEmailBlock
|
||||
|
||||
Sends an email with some data
|
||||
|
||||
```
|
||||
- block_type: send_email
|
||||
label: send_email
|
||||
smtp_host_secret_parameter_key: smtp_host
|
||||
smtp_port_secret_parameter_key: smtp_port
|
||||
smtp_username_secret_parameter_key: smtp_username
|
||||
smtp_password_secret_parameter_key: smtp_password
|
||||
sender: hello@skyvern.com
|
||||
recipients:
|
||||
- founders@skyvern.com
|
||||
subject: Skyvern - Downloaded Invoices Demo
|
||||
body: website_url
|
||||
file_attachments:
|
||||
- SKYVERN_DOWNLOAD_DIRECTORY
|
||||
```
|
||||
|
||||
|
||||
## Managing Credentials
|
||||
This is something the Skyvern team will need to set you up with today. If you're interested, please [book a call with Suchintan](https://meetings.hubspot.com/suchintan)
|
||||
|
||||
|
||||
## Common concepts
|
||||
### `continue_on_failure`
|
||||
|
||||
`continue_on_failure` flag indicates whether a failed block execution should block subsequent blocks or not
|
||||
|
||||
### `error_code_mapping`
|
||||
|
||||
Maps errors to specific error codes so you can have deterministic outputs
|
||||
|
||||
### `output_parameter_key` (autogenerated)
|
||||
|
||||
Specifies the output parameter of a specific block so it can be re-used in a subsequent block
|
||||
|
||||
Its format is always: `{label}_output`
|
||||
|
||||
ie the output parameter for a block like this (which can be referenced in subsequent blocks) would be: `login_output`
|
||||
```
|
||||
- block_type: task
|
||||
label: login
|
||||
parameter_keys:
|
||||
- credentials
|
||||
url: website_url
|
||||
navigation_goal: >-
|
||||
If you're not on the login page, navigate to login page and login using
|
||||
the credentials given. First, take actions on promotional popups or cookie prompts that could prevent taking other action on the web page. If you fail to login to find the login page or can't login after several trials, terminate. If login is
|
||||
completed, you're successful.
|
||||
data_extraction_goal: >-
|
||||
Extract anything for the sake of this demo
|
||||
error_code_mapping:
|
||||
stuck_with_popups: terminate and return this error if you can't close popups after several tries and can't take the necessary actions on the website because there is a blocking popup on the page
|
||||
failed_to_login: terminate and return this error if you fail logging in to the page
|
||||
```
|
||||
|
||||
|
||||
## Example workflow
|
||||
```
|
||||
title: Invoice Downloading Demo (Jun 13)
|
||||
description: >-
|
||||
Login to the website, download all the invoices after a date, email the
|
||||
invoices
|
||||
workflow_definition:
|
||||
parameters:
|
||||
- key: website_url
|
||||
parameter_type: workflow
|
||||
workflow_parameter_type: string
|
||||
- key: credentials
|
||||
parameter_type: bitwarden_login_credential
|
||||
bitwarden_client_id_aws_secret_key: SECRET
|
||||
bitwarden_client_secret_aws_secret_key: SECRET
|
||||
bitwarden_master_password_aws_secret_key: SECRET
|
||||
bitwarden_collection_id: SECRET
|
||||
url_parameter_key: website_url
|
||||
- key: invoice_retrieval_start_date
|
||||
parameter_type: workflow
|
||||
workflow_parameter_type: string
|
||||
- key: smtp_host
|
||||
parameter_type: aws_secret
|
||||
aws_key: SKYVERN_SMTP_HOST_AWS_SES
|
||||
- key: smtp_port
|
||||
parameter_type: aws_secret
|
||||
aws_key: SKYVERN_SMTP_PORT_AWS_SES
|
||||
- key: smtp_username
|
||||
parameter_type: aws_secret
|
||||
aws_key: SKYVERN_SMTP_USERNAME_SES
|
||||
- key: smtp_password
|
||||
parameter_type: aws_secret
|
||||
aws_key: SKYVERN_SMTP_PASSWORD_SES
|
||||
- parameter_type: context
|
||||
key: order_history_url
|
||||
source_parameter_key: get_order_history_page_url_and_qualifying_order_ids_output
|
||||
- parameter_type: context
|
||||
key: order_ids
|
||||
source_parameter_key: get_order_history_page_url_and_qualifying_order_ids_output
|
||||
- parameter_type: context
|
||||
key: order_id
|
||||
source_parameter_key: order_ids
|
||||
blocks:
|
||||
- block_type: task
|
||||
label: login
|
||||
parameter_keys:
|
||||
- credentials
|
||||
url: website_url
|
||||
navigation_goal: >-
|
||||
If you're not on the login page, navigate to login page and login using the credentials given, and then navigate to the personal account page. First, take actions on promotional popups or cookie prompts that could prevent taking other action on the web page. Then, try to login and navigate to the personal account page. If you fail to login to find the login page or can't login after several trials, terminate. If you're on the personal account page, consider the goal is completed.
|
||||
error_code_mapping:
|
||||
stuck_with_popups: terminate and return this error if you can't close popups after several tries and can't take the necessary actions on the website because there is a blocking popup on the page
|
||||
failed_to_login: terminate and return this error if you fail logging in to the page
|
||||
- block_type: task
|
||||
label: get_order_history_page_url_and_qualifying_order_ids
|
||||
parameter_keys:
|
||||
- invoice_retrieval_start_date
|
||||
navigation_goal: Find the order history page. If there is no orders after given start date, terminate.
|
||||
data_extraction_goal: >-
|
||||
You need to extract the order history page url by looking at the current
|
||||
page you're on. You need to extract contact emails you see on the page. You also need to extract the order ids for orders that
|
||||
happened on or after invoice_retrieval_start_date. Make sure to filter
|
||||
only the orders that happened on or after invoice_retrieval_start_date. You need to compare each order's date with the invoice_download_start_date. You can only include an order in the output if the order's date is after or the same as the invoice_download_start_date.
|
||||
While comparing dates, first compare year, then month, then day. invoice_retrieval_start_date
|
||||
is in YYYY-MM-DD format. The dates on the websites may be in different formats, compare accordingly and compare year, date, and month.
|
||||
error_code_mapping:
|
||||
failed_to_find_order_history_page: return this error if you can't find the order history page on the website
|
||||
no_orders_found_after_start_date: return this error if there are no orders after the specified invoice_download_start_date
|
||||
data_schema:
|
||||
type: object
|
||||
properties:
|
||||
order_history_url:
|
||||
type: url
|
||||
description: >-
|
||||
The exact URL of the order history page. Do not make any
|
||||
assumptions. Return the URL that's passed along in this context.
|
||||
contact_emails:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: Contact email for the ecommerce website you're on. If you can't find any return null
|
||||
date_comparison_scratchpad:
|
||||
type: string
|
||||
description: >-
|
||||
You are supposed to filter the orders that happened on or after the invoice_download_start_date. Think through how you will approach this task step-by-step here. Consider these before starting the comparison:
|
||||
- What format is the order date in? How can you parse it into a structured format?
|
||||
- What is the correct way to compare two dates?
|
||||
- How will you compare the order dates to the invoice_download_start_date?
|
||||
|
||||
Write out your thought process before filling out the order_ids field below. Remember, the original date may be in any format, so parse it carefully! The invoice_download_start_date will be an exact date you can directly compare against in the format YYYY-MM-DD.
|
||||
order_ids:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
order_date:
|
||||
type: iso-8601-date-string
|
||||
order_id:
|
||||
type: string
|
||||
description: >-
|
||||
Return a list of order id strings. Do not return order ids of
|
||||
orders that happened before the specified
|
||||
invoice_retrieval_start_date
|
||||
- block_type: for_loop
|
||||
label: iterate_over_order_ids
|
||||
loop_over_parameter_key: order_ids
|
||||
continue_on_failure: true
|
||||
loop_blocks:
|
||||
- block_type: task
|
||||
label: download_invoice_for_order
|
||||
complete_on_download: true
|
||||
continue_on_failure: true
|
||||
parameter_keys:
|
||||
- order_id
|
||||
url: order_history_url
|
||||
navigation_goal: Download the invoice of the order with the given order ID. Make sure to download the invoice for the given order id. If the element tree doesn't have a matching order id, check the screenshots. Complete if you have successfully downloaded the invoice according to action history, if you were able to download it, you'll see download_triggered=True for the last step. If you don't see a way to download an invoice, navigate to the order page if possible. If there's no way to download an invoice terminate. If the text suggests printing, you can assume you can download it. Return click action with download=True if you want to trigger a download.
|
||||
error_code_mapping:
|
||||
not_possible_to_download_invoice: return this error if the website doesn't allow downloading/viewing invoices
|
||||
cant_solve_captcha: return this error if captcha isn't solved after multiple retries
|
||||
- block_type: upload_to_s3
|
||||
label: upload_downloaded_files_to_s3
|
||||
path: SKYVERN_DOWNLOAD_DIRECTORY
|
||||
- block_type: send_email
|
||||
label: send_email
|
||||
smtp_host_secret_parameter_key: smtp_host
|
||||
smtp_port_secret_parameter_key: smtp_port
|
||||
smtp_username_secret_parameter_key: smtp_username
|
||||
smtp_password_secret_parameter_key: smtp_password
|
||||
sender: hello@skyvern.com
|
||||
recipients:
|
||||
- founders@skyvern.com
|
||||
subject: Skyvern - Downloaded Invoices Demo
|
||||
body: website_url
|
||||
file_attachments:
|
||||
- SKYVERN_DOWNLOAD_DIRECTORY
|
||||
|
||||
```
|
||||
16
docs/workflows/introduction.mdx
Normal file
16
docs/workflows/introduction.mdx
Normal file
@@ -0,0 +1,16 @@
|
||||
---
|
||||
title: Introduction
|
||||
description: 'Workflows -- chaining multiple tasks together'
|
||||
---
|
||||
|
||||
Workflows represent chaining multiple blocks together. Imagine calling multiple tasks in a row, doing conditional logic, extracting data to a CSV, etc. All of these ideas will be supported within our workflows feature.
|
||||
|
||||
Building blocks supported today:
|
||||
|
||||
1. TaskBlock: The **magic** block. Skyvern navigates through the websites to take actions and/or extract information.
|
||||
2. ForLoopBlock
|
||||
3. CodeBlock
|
||||
4. TextPromptBlock
|
||||
5. DownloadToS3Block
|
||||
6. UploadToS3Block
|
||||
7. SendEmailBlock
|
||||
209
docs/workflows/running-workflows.mdx
Normal file
209
docs/workflows/running-workflows.mdx
Normal file
@@ -0,0 +1,209 @@
|
||||
---
|
||||
title: Running Workflows
|
||||
description: 'Executing complex multi-step workflows'
|
||||
---
|
||||
|
||||
## Running workflows
|
||||
|
||||
### `POST /workflows/{workflow_permanent_id}/run`
|
||||
|
||||
You can see the generic endpoint definition below. We’ll go into the specifics of the invoice retrieval workflow in the next section.
|
||||
|
||||
### Body
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| data | JSON | no | \{ <br/>"website_url": "YOUR_URL",<br/>"invoice_retrieval_start_date": "2024-04-15"<br/> \}, | The data field is used to pass in required and optional parameters that a workflow accepts. For the invoice retrieval workflow, required fields are website_url and invoice_retrieval_start_date |
|
||||
| webhook_callback_url | String | no | … | Our system will send the webhook once it is finished executing the workflow run. |
|
||||
| proxy_location | String | no | RESIDENTIAL | Proxy location for the web browser. Please pass RESIDENTIAL. <br /> If we use residential proxies, Skyvern’s requests to the websites will be less suspicious. |
|
||||
|
||||
### Response
|
||||
|
||||
| Parameter | Type | Always returned? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| workflow_permanent_id | String | yes | wpid_123456 | The workflow id |
|
||||
| workflow_run_id | String | yes | wr_123456 | The workflow run id that represents this specific workflow run. <br/> You can use this id to match the webhook response to the initial request. |
|
||||
|
||||
### Sample Request & Response - Invoice retrieval
|
||||
|
||||
```bash
|
||||
-- Sample Request
|
||||
curl --location 'https://api.skyvern.com/api/v1/workflows/wpid_123456/run' \
|
||||
--header 'x-api-key: <USE_YOUR_API_KEY>' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"data": {
|
||||
"website_url": "your_website",
|
||||
"invoice_retrieval_start_date": "2024-04-15"
|
||||
},
|
||||
"proxy_location": "RESIDENTIAL",
|
||||
"webhook_callback_url": "<your-endpoint>"
|
||||
}'
|
||||
|
||||
-- Sample Response
|
||||
{
|
||||
"workflow_id": "wpid_123456",
|
||||
"workflow_run_id": "wr_123456"
|
||||
}
|
||||
```
|
||||
|
||||
## Retrieving workflow runs
|
||||
|
||||
### `GET /workflows/{workflow_id}/runs/{workflow_run_id}`
|
||||
|
||||
### Response
|
||||
|
||||
| Parameter | Type | Sample value | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| workflow_id | String | wpid_123456 | |
|
||||
| workflow_run_id | String | wr_123456 | |
|
||||
| status | String | completed | Status of the workflow run. Possible values: created, running, failed, terminated, completed |
|
||||
| proxy_location | JSON | RESIDENTIAL | |
|
||||
| webhook_callback_url | String | 127.0.0.1:8000/api/v1/webhook | |
|
||||
| created_at | Timestamp | 2024-05-16T08:35:24.920793 | Timestamp for when the workflow run is created |
|
||||
| modified_at | Timestamp | 2024-05-16T08:42:32.568908 | Last modified timestamp for the workflow run |
|
||||
| parameters | JSON | see sample response below | The parameters that the workflow run was triggered with. For the invoice retrieval workflow, this field will have the website_url and invoice_retrieval_start_date values you sent. |
|
||||
| screenshot_urls | list[String] | see sample response below | Final screenshots for the last 3 tasks in the workflow. |
|
||||
| recording_url | String | see sample response below | The full browser recording. |
|
||||
| outputs | JSON | see sample response below | See the explaining outputs section |
|
||||
|
||||
### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"workflow_id": "wpid_123456",
|
||||
"workflow_run_id": "wr_123456",
|
||||
"status": "completed",
|
||||
"proxy_location": "RESIDENTIAL",
|
||||
"webhook_callback_url": "127.0.0.1:8000/api/v1/webhook",
|
||||
"created_at": "2024-05-16T08:35:24.920793",
|
||||
"modified_at": "2024-05-16T08:42:32.568908",
|
||||
"parameters": {
|
||||
"website_url": "YOUR_WEBSITE_URL",
|
||||
"invoice_retrieval_start_date": "2024-04-15"
|
||||
},
|
||||
"screenshot_urls": [
|
||||
"https://skyvern-artifacts.s3.amazonaws.com/...",
|
||||
"https://skyvern-artifacts.s3.amazonaws.com/...",
|
||||
"https://skyvern-artifacts.s3.amazonaws.com/..."
|
||||
],
|
||||
"recording_url": "https://skyvern-artifacts.s3.amazonaws.com/...",
|
||||
"outputs": {
|
||||
"login_output": {
|
||||
"task_id": "tsk_1234",
|
||||
"status": "completed",
|
||||
"extracted_information": null,
|
||||
"failure_reason": null,
|
||||
"errors": []
|
||||
},
|
||||
"get_order_history_page_url_and_qualifying_order_ids_output": {
|
||||
"task_id": "tsk_258409009008559418",
|
||||
"status": "completed",
|
||||
"extracted_information": {
|
||||
...
|
||||
},
|
||||
"failure_reason": null,
|
||||
"errors": []
|
||||
},
|
||||
"iterate_over_order_ids_output": [
|
||||
[
|
||||
{
|
||||
...
|
||||
}
|
||||
]
|
||||
],
|
||||
"download_invoice_for_order_output": {
|
||||
"task_id": "tsk_258409361195877732",
|
||||
"status": "completed",
|
||||
"extracted_information": null,
|
||||
"failure_reason": null,
|
||||
"errors": []
|
||||
},
|
||||
"upload_downloaded_files_to_s3_output": [
|
||||
"s3://skyvern-uploads/..."
|
||||
],
|
||||
"send_email_output": {
|
||||
"success": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Webhooks
|
||||
|
||||
Skyvern always sends webhooks when a workflow run is executed. The status for an executed workflow run can be: `completed, failed, terminated`.
|
||||
|
||||
The webhook body is the same as the get workflow run endpoint.
|
||||
|
||||
### Webhook Headers
|
||||
|
||||
| Parameter | Type | Required? | Sample Value | Description |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| x-skyvern-signature | String | yes | v0=a2114d57b48eac39b9ad189dd8316235a7b4a8d21a10bd27519666489c69b503 | Authentication token that allows our service to communicate with your backend service via callback / webhook <br/> <br/> We’ll be using the same strategy slack uses, as defined here: https://api.slack.com/authentication/verifying-requests-from-slack#making__validating-a-request |
|
||||
| x-skyvern-timestamp | String | yes | 1531420618 | Timestamp used to decode and validate the incoming webhook call
|
||||
|
||||
We’ll be using the same strategy slack uses, as defined here: https://api.slack.com/authentication/verifying-requests-from-slack#making__validating-a-request |
|
||||
|
||||
## Explaining `outputs`
|
||||
|
||||
If you checked out the sample response, you probably thought “What the heck is this field right here?”.
|
||||
|
||||
We previously went over that workflows are essentially a list of building blocks. `outputs` field has the output for every single block that a workflow has. Before we start analyzing the `outputs` from the sample above, let’s go over the building blocks for the invoice retrieval workflow.
|
||||
|
||||
### Building blocks of invoice retrieval workflow:
|
||||
|
||||
| # | Block type | Block label | Purpose |
|
||||
| --- | --- | --- | --- |
|
||||
| 1 | TaskBlock | login | Find login page, login to the website |
|
||||
| 2 | TaskBlock | get_order_history_page_url_and_qualifying_order_ids | Find the order history page, extract order history page url, contact emails, and order details for orders after the start date |
|
||||
| 3 | ForLoopBlock | iterate_over_order_ids | The contents of the ForLoop is executed for each order id that’s extracted from the previous step. |
|
||||
| 4 | TaskBlock [within ForLoopBlock] | download_invoice_for_order | For a given order id, find a way to download the invoice, download it. |
|
||||
| 5 | UploadToS3Block | upload_downloaded_files_to_s3 | Upload all downloaded invoices to S3 |
|
||||
| 6 | SendEmailBlock | send_email | Send an email attaching all the downloaded invoices |
|
||||
|
||||
### ⚠️ Still in development, not a blocker
|
||||
|
||||
1. The blocks within the ForLoop show up twice: within the ForLoop output and as a root block.
|
||||
2. UploadToS3Block output is S3 URIs at the moment. They’ll be updated with signed urls instead.
|
||||
3. Add block type to each object in `outputs`, define the output structure for each block for easier integration.
|
||||
|
||||
|
||||
```json
|
||||
...
|
||||
"outputs": {
|
||||
"login_output": {
|
||||
"task_id": "tsk_1234",
|
||||
"status": "completed",
|
||||
"extracted_information": null,
|
||||
"failure_reason": null,
|
||||
"errors": []
|
||||
},
|
||||
"get_order_history_page_url_and_qualifying_order_ids_output": {
|
||||
"task_id": "tsk_1234",
|
||||
"status": "completed",
|
||||
"extracted_information": {
|
||||
...
|
||||
},
|
||||
"failure_reason": null,
|
||||
"errors": []
|
||||
},
|
||||
"iterate_over_order_ids_output": [
|
||||
...
|
||||
],
|
||||
"download_invoice_for_order_output": {
|
||||
"task_id": "tsk_1234",
|
||||
"status": "completed",
|
||||
"extracted_information": null,
|
||||
"failure_reason": null,
|
||||
"errors": []
|
||||
},
|
||||
"upload_downloaded_files_to_s3_output": [
|
||||
"s3://skyvern-uploads/...",
|
||||
"s3://skyvern-uploads/..."
|
||||
],
|
||||
"send_email_output": {
|
||||
"success": true
|
||||
}
|
||||
}
|
||||
...
|
||||
```
|
||||
2
docs/workflows/workflow-blocks.mdx
Normal file
2
docs/workflows/workflow-blocks.mdx
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
<img src="https://thumbs.dreamstime.com/b/pug-dog-holding-pliers-screwdriver-behind-old-wooden-sign-text-under-construction-white-background-constructor-92836854.jpg" />
|
||||
Reference in New Issue
Block a user