2024-03-01 10:09:30 -08:00
from __future__ import annotations
from datetime import datetime
from enum import StrEnum
from typing import Any
2025-04-15 12:34:54 -07:00
from fastapi import status
from pydantic import BaseModel , Field , field_validator , model_validator
from typing_extensions import Self
2024-03-01 10:09:30 -08:00
2025-05-30 20:07:12 -07:00
from skyvern . config import settings
2025-04-15 12:34:54 -07:00
from skyvern . exceptions import (
InvalidTaskStatusTransition ,
SkyvernHTTPException ,
TaskAlreadyCanceled ,
TaskAlreadyTimeout ,
)
2024-11-26 11:29:33 +08:00
from skyvern . forge . sdk . db . enums import TaskType
2025-02-26 17:19:05 -08:00
from skyvern . forge . sdk . schemas . files import FileInfo
2025-05-24 23:46:31 -07:00
from skyvern . schemas . docs . doc_strings import PROXY_LOCATION_DOC_STRING
2025-03-24 15:15:21 -07:00
from skyvern . schemas . runs import ProxyLocation
2025-03-24 23:33:20 -07:00
from skyvern . utils . url_validators import validate_url
2024-12-16 11:22:51 +08:00
2024-10-17 23:47:59 -07:00
class TaskBase ( BaseModel ) :
2024-03-12 22:28:16 -07:00
title : str | None = Field (
default = None ,
description = " The title of the task. " ,
examples = [ " Get a quote for car insurance " ] ,
)
2024-03-01 10:09:30 -08:00
url : str = Field (
. . . ,
description = " Starting URL for the task. " ,
examples = [ " https://www.geico.com " ] ,
)
webhook_callback_url : str | None = Field (
default = None ,
description = " The URL to call when the task is completed. " ,
examples = [ " https://my-webhook.com " ] ,
)
2024-07-11 21:34:00 -07:00
totp_verification_url : str | None = None
2024-09-08 15:07:03 -07:00
totp_identifier : str | None = None
2024-03-01 10:09:30 -08:00
navigation_goal : str | None = Field (
default = None ,
description = " The user ' s goal for the task. " ,
examples = [ " Get a quote for car insurance " ] ,
)
data_extraction_goal : str | None = Field (
default = None ,
description = " The user ' s goal for data extraction. " ,
examples = [ " Extract the quote price " ] ,
)
navigation_payload : dict [ str , Any ] | list | str | None = Field (
2024-03-12 22:28:16 -07:00
default = None ,
2024-03-01 10:09:30 -08:00
description = " The user ' s details needed to achieve the task. " ,
examples = [ { " name " : " John Doe " , " email " : " john@doe.com " } ] ,
)
2024-03-12 22:28:16 -07:00
error_code_mapping : dict [ str , str ] | None = Field (
default = None ,
description = " The mapping of error codes and their descriptions. " ,
examples = [
{
" out_of_stock " : " Return this error when the product is out of stock " ,
" not_found " : " Return this error when the product is not found " ,
}
] ,
)
2024-03-01 10:09:30 -08:00
proxy_location : ProxyLocation | None = Field (
2024-03-12 22:28:16 -07:00
default = None ,
2025-05-24 23:46:31 -07:00
description = PROXY_LOCATION_DOC_STRING ,
2024-03-01 10:09:30 -08:00
)
extracted_information_schema : dict [ str , Any ] | list | str | None = Field (
2024-03-12 22:28:16 -07:00
default = None ,
2024-03-01 10:09:30 -08:00
description = " The requested schema of the extracted information. " ,
)
2025-06-19 00:42:34 -07:00
extra_http_headers : dict [ str , str ] | None = Field (
None , description = " The extra HTTP headers for the requests in browser. "
)
2024-11-21 15:12:26 +08:00
complete_criterion : str | None = Field (
default = None , description = " Criterion to complete " , examples = [ " Complete if ' hello world ' shows up on the page " ]
)
terminate_criterion : str | None = Field (
default = None ,
description = " Criterion to terminate " ,
examples = [ " Terminate if ' existing account ' shows up on the page " ] ,
)
2024-11-26 11:29:33 +08:00
task_type : TaskType | None = Field (
default = TaskType . general ,
description = " The type of the task " ,
examples = [ TaskType . general , TaskType . validation ] ,
2024-11-21 15:12:26 +08:00
)
2024-11-29 05:43:02 -08:00
application : str | None = Field (
default = None ,
description = " The application for which the task is running " ,
examples = [ " forms " ] ,
)
2025-05-15 08:18:24 -07:00
include_action_history_in_verification : bool | None = Field (
default = False ,
description = " Whether to include the action history when verifying the task is complete " ,
examples = [ True , False ] ,
)
2025-06-13 23:59:50 -07:00
max_screenshot_scrolling_times : int | None = Field (
default = None ,
description = " Scroll down n times to get the merged screenshot of the page after taking an action. When it ' s None or 0, it takes the current viewpoint screenshot. " ,
examples = [ 10 ] ,
)
2024-03-01 10:09:30 -08:00
2024-10-17 23:47:59 -07:00
class TaskRequest ( TaskBase ) :
2024-12-06 11:56:12 +08:00
url : str = Field (
2024-10-17 23:47:59 -07:00
. . . ,
description = " Starting URL for the task. " ,
examples = [ " https://www.geico.com " ] ,
)
2024-12-06 11:56:12 +08:00
webhook_callback_url : str | None = Field (
2024-10-17 23:47:59 -07:00
default = None ,
description = " The URL to call when the task is completed. " ,
examples = [ " https://my-webhook.com " ] ,
)
2024-12-06 11:56:12 +08:00
totp_verification_url : str | None = None
2025-01-09 22:04:53 +01:00
browser_session_id : str | None = None
2025-05-30 20:07:12 -07:00
model : dict [ str , Any ] | None = None
2024-10-17 23:47:59 -07:00
2025-04-15 12:34:54 -07:00
@model_validator ( mode = " after " )
def validate_url ( self ) - > Self :
url = self . url
browser_session_id = self . browser_session_id
if len ( url ) == 0 and browser_session_id is not None :
return self
url_validation_result = validate_url ( url )
if url_validation_result is None :
raise SkyvernHTTPException ( message = f " Invalid URL: { url } " , status_code = status . HTTP_400_BAD_REQUEST )
self . url = url_validation_result
return self
@field_validator ( " webhook_callback_url " , " totp_verification_url " )
2024-10-20 18:33:05 -07:00
@classmethod
2025-04-15 12:34:54 -07:00
def validate_optional_urls ( cls , url : str | None ) - > str | None :
2024-12-06 11:56:12 +08:00
if url is None :
return None
2024-12-11 00:57:55 -08:00
return validate_url ( url )
2024-10-20 18:33:05 -07:00
2024-10-17 23:47:59 -07:00
2024-03-01 10:09:30 -08:00
class TaskStatus ( StrEnum ) :
created = " created "
2024-04-08 22:56:49 -07:00
queued = " queued "
2024-03-01 10:09:30 -08:00
running = " running "
2024-04-08 22:56:49 -07:00
timed_out = " timed_out "
2024-03-01 10:09:30 -08:00
failed = " failed "
terminated = " terminated "
completed = " completed "
2024-06-26 15:25:15 -07:00
canceled = " canceled "
2024-03-01 10:09:30 -08:00
def is_final ( self ) - > bool :
2024-05-16 18:20:11 -07:00
return self in {
TaskStatus . failed ,
TaskStatus . terminated ,
TaskStatus . completed ,
TaskStatus . timed_out ,
2024-06-26 15:25:15 -07:00
TaskStatus . canceled ,
2024-05-16 18:20:11 -07:00
}
2024-03-01 10:09:30 -08:00
def can_update_to ( self , new_status : TaskStatus ) - > bool :
allowed_transitions : dict [ TaskStatus , set [ TaskStatus ] ] = {
2024-05-16 18:20:11 -07:00
TaskStatus . created : {
TaskStatus . queued ,
TaskStatus . running ,
TaskStatus . timed_out ,
2024-06-13 21:53:46 +08:00
TaskStatus . failed ,
2024-06-26 15:25:15 -07:00
TaskStatus . canceled ,
2024-06-13 21:53:46 +08:00
} ,
TaskStatus . queued : {
TaskStatus . running ,
TaskStatus . timed_out ,
TaskStatus . failed ,
2024-06-26 15:25:15 -07:00
TaskStatus . canceled ,
2024-05-16 18:20:11 -07:00
} ,
TaskStatus . running : {
TaskStatus . completed ,
TaskStatus . failed ,
TaskStatus . terminated ,
TaskStatus . timed_out ,
2024-06-26 15:25:15 -07:00
TaskStatus . canceled ,
2024-05-16 18:20:11 -07:00
} ,
2024-03-01 10:09:30 -08:00
TaskStatus . failed : set ( ) ,
2024-04-08 22:56:49 -07:00
TaskStatus . terminated : set ( ) ,
2024-03-01 10:09:30 -08:00
TaskStatus . completed : set ( ) ,
2024-05-25 18:52:03 -07:00
TaskStatus . timed_out : set ( ) ,
2024-06-26 15:25:15 -07:00
TaskStatus . canceled : { TaskStatus . completed } ,
2024-03-01 10:09:30 -08:00
}
return new_status in allowed_transitions [ self ]
def requires_extracted_info ( self ) - > bool :
status_requires_extracted_information = { TaskStatus . completed }
return self in status_requires_extracted_information
def cant_have_extracted_info ( self ) - > bool :
status_cant_have_extracted_information = {
TaskStatus . created ,
2024-04-08 22:56:49 -07:00
TaskStatus . queued ,
2024-03-01 10:09:30 -08:00
TaskStatus . running ,
TaskStatus . failed ,
TaskStatus . terminated ,
}
return self in status_cant_have_extracted_information
def requires_failure_reason ( self ) - > bool :
status_requires_failure_reason = { TaskStatus . failed , TaskStatus . terminated }
return self in status_requires_failure_reason
2024-10-17 23:47:59 -07:00
class Task ( TaskBase ) :
2024-03-01 10:09:30 -08:00
created_at : datetime = Field (
. . . ,
description = " The creation datetime of the task. " ,
examples = [ " 2023-01-01T00:00:00Z " ] ,
)
modified_at : datetime = Field (
. . . ,
description = " The modification datetime of the task. " ,
examples = [ " 2023-01-01T00:00:00Z " ] ,
)
task_id : str = Field (
. . . ,
description = " The ID of the task. " ,
examples = [ " 50da533e-3904-4401-8a07-c49adf88b5eb " ] ,
)
status : TaskStatus = Field ( . . . , description = " The status of the task. " , examples = [ " created " ] )
extracted_information : dict [ str , Any ] | list | str | None = Field (
None ,
description = " The extracted information from the task. " ,
)
failure_reason : str | None = Field (
None ,
description = " The reason for the task failure. " ,
)
2025-06-05 19:00:20 -04:00
organization_id : str
2024-03-01 10:09:30 -08:00
workflow_run_id : str | None = None
2025-05-12 08:30:37 -07:00
workflow_permanent_id : str | None = None
2024-03-01 10:09:30 -08:00
order : int | None = None
retry : int | None = None
2024-05-11 14:13:21 -07:00
max_steps_per_run : int | None = None
2024-03-12 22:28:16 -07:00
errors : list [ dict [ str , Any ] ] = [ ]
2025-05-30 20:07:12 -07:00
model : dict [ str , Any ] | None = None
2025-06-11 23:36:49 -04:00
queued_at : datetime | None = None
started_at : datetime | None = None
finished_at : datetime | None = None
2025-05-30 20:07:12 -07:00
@property
def llm_key ( self ) - > str | None :
"""
If the ` Task ` has a ` model ` defined , then return the mapped llm_key for it .
Otherwise return ` None ` .
"""
if self . model :
2025-05-31 19:34:30 -07:00
model_name = self . model . get ( " model_name " )
2025-05-30 20:07:12 -07:00
if model_name :
mapping = settings . get_model_name_to_llm_key ( )
2025-05-31 19:34:30 -07:00
return mapping . get ( model_name , { } ) . get ( " llm_key " )
2025-05-30 20:07:12 -07:00
return None
2024-03-01 10:09:30 -08:00
def validate_update (
self ,
status : TaskStatus ,
extracted_information : dict [ str , Any ] | list | str | None ,
failure_reason : str | None = None ,
) - > None :
old_status = self . status
if not old_status . can_update_to ( status ) :
2024-06-26 15:25:15 -07:00
if old_status == TaskStatus . canceled :
raise TaskAlreadyCanceled ( new_status = status , task_id = self . task_id )
2025-01-22 13:23:10 +08:00
if old_status == TaskStatus . timed_out :
raise TaskAlreadyTimeout ( task_id = self . task_id )
2024-06-26 15:25:15 -07:00
raise InvalidTaskStatusTransition ( old_status = old_status , new_status = status , task_id = self . task_id )
2024-03-01 10:09:30 -08:00
if status . requires_failure_reason ( ) and failure_reason is None :
raise ValueError ( f " status_requires_failure_reason( { status } , { self . task_id } " )
if status . requires_extracted_info ( ) and self . data_extraction_goal and extracted_information is None :
raise ValueError ( f " status_requires_extracted_information( { status } , { self . task_id } " )
if status . cant_have_extracted_info ( ) and extracted_information is not None :
raise ValueError ( f " status_cant_have_extracted_information( { self . task_id } ) " )
if self . failure_reason is not None and failure_reason is not None :
raise ValueError ( f " cant_override_failure_reason( { self . task_id } ) " )
def to_task_response (
2024-04-02 14:43:29 -07:00
self ,
action_screenshot_urls : list [ str ] | None = None ,
screenshot_url : str | None = None ,
recording_url : str | None = None ,
2024-11-04 11:22:08 +08:00
browser_console_log_url : str | None = None ,
2025-02-26 17:19:05 -08:00
downloaded_files : list [ FileInfo ] | None = None ,
2024-04-02 14:43:29 -07:00
failure_reason : str | None = None ,
2024-03-01 10:09:30 -08:00
) - > TaskResponse :
return TaskResponse (
request = self ,
task_id = self . task_id ,
status = self . status ,
created_at = self . created_at ,
modified_at = self . modified_at ,
2025-06-11 23:36:49 -04:00
queued_at = self . queued_at ,
started_at = self . started_at ,
finished_at = self . finished_at ,
2024-03-01 10:09:30 -08:00
extracted_information = self . extracted_information ,
failure_reason = failure_reason or self . failure_reason ,
2024-04-02 14:43:29 -07:00
action_screenshot_urls = action_screenshot_urls ,
2024-03-01 10:09:30 -08:00
screenshot_url = screenshot_url ,
recording_url = recording_url ,
2024-11-04 11:22:08 +08:00
browser_console_log_url = browser_console_log_url ,
2025-02-26 17:19:05 -08:00
downloaded_files = downloaded_files ,
downloaded_file_urls = [ file . url for file in downloaded_files ] if downloaded_files else None ,
2024-03-12 22:28:16 -07:00
errors = self . errors ,
2024-07-18 18:19:14 -07:00
max_steps_per_run = self . max_steps_per_run ,
2024-10-07 12:13:41 -07:00
workflow_run_id = self . workflow_run_id ,
2025-06-13 23:59:50 -07:00
max_screenshot_scrolling_times = self . max_screenshot_scrolling_times ,
2024-03-01 10:09:30 -08:00
)
class TaskResponse ( BaseModel ) :
2024-10-17 23:47:59 -07:00
request : TaskBase
2024-03-01 10:09:30 -08:00
task_id : str
status : TaskStatus
created_at : datetime
modified_at : datetime
extracted_information : list | dict [ str , Any ] | str | None = None
2024-04-02 14:43:29 -07:00
action_screenshot_urls : list [ str ] | None = None
2024-03-01 10:09:30 -08:00
screenshot_url : str | None = None
recording_url : str | None = None
2024-11-04 11:22:08 +08:00
browser_console_log_url : str | None = None
2025-02-26 17:19:05 -08:00
downloaded_files : list [ FileInfo ] | None = None
2024-11-29 16:05:44 +08:00
downloaded_file_urls : list [ str ] | None = None
2024-03-01 10:09:30 -08:00
failure_reason : str | None = None
2024-03-12 22:28:16 -07:00
errors : list [ dict [ str , Any ] ] = [ ]
2024-07-18 18:19:14 -07:00
max_steps_per_run : int | None = None
2024-10-07 12:13:41 -07:00
workflow_run_id : str | None = None
2025-06-11 23:36:49 -04:00
queued_at : datetime | None = None
started_at : datetime | None = None
finished_at : datetime | None = None
2025-06-13 23:59:50 -07:00
max_screenshot_scrolling_times : int | None = None
2024-03-01 10:09:30 -08:00
2024-05-14 00:35:37 -07:00
class TaskOutput ( BaseModel ) :
task_id : str
status : TaskStatus
extracted_information : list | dict [ str , Any ] | str | None = None
failure_reason : str | None = None
errors : list [ dict [ str , Any ] ] = [ ]
2025-02-26 17:19:05 -08:00
downloaded_files : list [ FileInfo ] | None = None
downloaded_file_urls : list [ str ] | None = None # For backward compatibility
2024-05-14 00:35:37 -07:00
@staticmethod
2025-02-26 17:19:05 -08:00
def from_task ( task : Task , downloaded_files : list [ FileInfo ] | None = None ) - > TaskOutput :
# For backward compatibility, extract just the URLs from FileInfo objects
downloaded_file_urls = [ file_info . url for file_info in downloaded_files ] if downloaded_files else None
2024-05-14 00:35:37 -07:00
return TaskOutput (
task_id = task . task_id ,
status = task . status ,
extracted_information = task . extracted_information ,
failure_reason = task . failure_reason ,
errors = task . errors ,
2025-02-26 17:19:05 -08:00
downloaded_files = downloaded_files ,
2025-02-20 01:19:03 -08:00
downloaded_file_urls = downloaded_file_urls ,
2024-05-14 00:35:37 -07:00
)
2024-03-01 10:09:30 -08:00
class CreateTaskResponse ( BaseModel ) :
task_id : str
2024-10-21 10:34:42 -07:00
class OrderBy ( StrEnum ) :
created_at = " created_at "
modified_at = " modified_at "
class SortDirection ( StrEnum ) :
asc = " asc "
desc = " desc "
2025-05-29 06:15:04 -07:00
class ModelsResponse ( BaseModel ) :
2025-05-31 19:34:30 -07:00
models : dict [ str , str ]