2024-12-19 17:26:08 -08:00
import os
import random
import string
2025-02-02 03:10:38 +08:00
from datetime import UTC , datetime
2024-12-19 17:26:08 -08:00
from typing import Any
2025-01-14 08:59:53 -08:00
import httpx
2024-12-19 17:26:08 -08:00
import structlog
2025-03-06 18:27:19 -08:00
from playwright . async_api import Page
2024-12-31 15:03:36 -08:00
from sqlalchemy . exc import OperationalError
2024-12-19 17:26:08 -08:00
2025-03-04 01:07:07 -05:00
from skyvern . config import settings
2025-02-23 16:03:49 -08:00
from skyvern . exceptions import FailedToSendWebhook , TaskTerminationError , TaskV2NotFound , UrlGenerationFailure
2024-12-19 17:26:08 -08:00
from skyvern . forge import app
from skyvern . forge . prompts import prompt_engine
2024-12-27 09:04:09 -08:00
from skyvern . forge . sdk . artifact . models import ArtifactType
2024-12-19 17:26:08 -08:00
from skyvern . forge . sdk . core import skyvern_context
2025-02-09 20:30:19 +08:00
from skyvern . forge . sdk . core . hashing import generate_url_hash
2025-01-14 08:59:53 -08:00
from skyvern . forge . sdk . core . security import generate_skyvern_webhook_headers
2024-12-19 17:26:08 -08:00
from skyvern . forge . sdk . core . skyvern_context import SkyvernContext
2025-01-14 08:59:53 -08:00
from skyvern . forge . sdk . db . enums import OrganizationAuthTokenType
2025-02-22 03:21:19 -08:00
from skyvern . forge . sdk . schemas . organizations import Organization
2025-02-27 20:19:02 -08:00
from skyvern . forge . sdk . schemas . task_v2 import TaskV2 , TaskV2Metadata , TaskV2Status , ThoughtScenario , ThoughtType
2024-12-22 20:54:53 -08:00
from skyvern . forge . sdk . schemas . workflow_runs import WorkflowRunTimeline , WorkflowRunTimelineType
2024-12-19 17:26:08 -08:00
from skyvern . forge . sdk . workflow . models . block import (
BlockResult ,
BlockStatus ,
BlockTypeVar ,
ExtractionBlock ,
ForLoopBlock ,
NavigationBlock ,
TaskBlock ,
2025-02-06 00:50:32 +08:00
UrlBlock ,
2024-12-19 17:26:08 -08:00
)
from skyvern . forge . sdk . workflow . models . parameter import PARAMETER_TYPE , ContextParameter
2025-01-25 04:08:51 +08:00
from skyvern . forge . sdk . workflow . models . workflow import (
Workflow ,
WorkflowRequestBody ,
WorkflowRun ,
WorkflowRunStatus ,
WorkflowStatus ,
)
2024-12-19 17:26:08 -08:00
from skyvern . forge . sdk . workflow . models . yaml import (
BLOCK_YAML_TYPES ,
PARAMETER_YAML_TYPES ,
ContextParameterYAML ,
ExtractionBlockYAML ,
ForLoopBlockYAML ,
NavigationBlockYAML ,
TaskBlockYAML ,
2025-02-06 00:50:32 +08:00
UrlBlockYAML ,
2024-12-19 17:26:08 -08:00
WorkflowCreateYAMLRequest ,
WorkflowDefinitionYAML ,
)
2025-03-30 18:34:48 -07:00
from skyvern . schemas . runs import ProxyLocation , RunType
2024-12-19 17:26:08 -08:00
from skyvern . webeye . browser_factory import BrowserState
from skyvern . webeye . scraper . scraper import ElementTreeFormat , ScrapedPage , scrape_website
from skyvern . webeye . utils . page import SkyvernFrame
LOG = structlog . get_logger ( )
DEFAULT_WORKFLOW_TITLE = " New Workflow "
RANDOM_STRING_POOL = string . ascii_letters + string . digits
DEFAULT_MAX_ITERATIONS = 10
2025-01-06 17:39:36 -08:00
MINI_GOAL_TEMPLATE = """ Achieve the following mini goal and once it ' s achieved, complete: {mini_goal}
This mini goal is part of the big goal the user wants to achieve and use the big goal as context to achieve the mini goal : { main_goal } """
2024-12-19 17:26:08 -08:00
2025-01-08 23:53:21 -08:00
def _generate_data_extraction_schema_for_loop ( loop_values_key : str ) - > dict :
return {
" type " : " object " ,
" properties " : {
loop_values_key : {
" type " : " array " ,
" description " : ' User will later iterate through this array of values to achieve their " big goal " in the web. In each iteration, the user will try to take the same actions in the web but with a different value of its own. If the value is a url link, make sure it is a full url with http/https protocol, domain and path if any, based on the current url. For examples: \n 1. When the goal is " Open up to 10 links from an ecomm search result page, and extract information like the price of each product. " , user will iterate through an array of product links or URLs. In each iteration, the user will go to the linked page and extrat price information of the product. As a result, the array consists of 10 product urls scraped from the search result page. \n 2. When the goal is " download 10 documents found on a page " , user will iterate through an array of document names. In each iteration, the user will use a different value variant to start from the same page (the existing page) and take actions based on the variant. As a result, the array consists of up to 10 document names scraped from the page that the user wants to download. ' ,
" items " : { " type " : " string " , " description " : " The relevant value " } ,
} ,
" is_loop_value_link " : {
" type " : " boolean " ,
" description " : " true if the loop_values is an array of urls to be visited for each task. false if the loop_values is an array of non-link values to be used in each task (for each task they start from the same page / link). " ,
} ,
} ,
}
2024-12-19 17:26:08 -08:00
2025-02-23 22:17:28 -08:00
async def initialize_task_v2 (
2025-01-24 16:21:26 +08:00
organization : Organization ,
user_prompt : str ,
user_url : str | None = None ,
proxy_location : ProxyLocation | None = None ,
totp_identifier : str | None = None ,
totp_verification_url : str | None = None ,
webhook_callback_url : str | None = None ,
2025-01-25 04:08:51 +08:00
publish_workflow : bool = False ,
2025-01-28 16:59:54 +08:00
parent_workflow_run_id : str | None = None ,
2025-03-18 15:36:42 -07:00
extracted_information_schema : dict | list | str | None = None ,
error_code_mapping : dict | None = None ,
2025-03-24 23:16:10 -07:00
create_task_run : bool = False ,
2025-02-27 20:19:02 -08:00
) - > TaskV2 :
2025-02-23 16:03:49 -08:00
task_v2 = await app . DATABASE . create_task_v2 (
2024-12-19 17:26:08 -08:00
prompt = user_prompt ,
organization_id = organization . organization_id ,
2025-01-24 16:21:26 +08:00
totp_verification_url = totp_verification_url ,
totp_identifier = totp_identifier ,
webhook_callback_url = webhook_callback_url ,
proxy_location = proxy_location ,
2025-03-18 15:36:42 -07:00
extracted_information_schema = extracted_information_schema ,
error_code_mapping = error_code_mapping ,
2024-12-19 17:26:08 -08:00
)
2025-02-27 20:19:02 -08:00
# set task_v2_id in context
2025-01-02 12:49:48 -08:00
context = skyvern_context . current ( )
if context :
2025-02-23 16:03:49 -08:00
context . task_v2_id = task_v2 . observer_cruise_id
2024-12-19 17:26:08 -08:00
2025-02-27 20:19:02 -08:00
thought = await app . DATABASE . create_thought (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
2024-12-27 09:04:09 -08:00
organization_id = organization . organization_id ,
2025-02-27 20:19:02 -08:00
thought_type = ThoughtType . metadata ,
thought_scenario = ThoughtScenario . generate_metadata ,
2024-12-27 09:04:09 -08:00
)
2025-02-23 01:26:54 -08:00
metadata_prompt = prompt_engine . load_prompt ( " task_v2_generate_metadata " , user_goal = user_prompt , user_url = user_url )
2025-02-02 03:10:38 +08:00
metadata_response = await app . LLM_API_HANDLER (
2025-02-06 00:50:32 +08:00
prompt = metadata_prompt ,
2025-02-27 20:19:02 -08:00
thought = thought ,
2025-02-23 01:26:54 -08:00
prompt_name = " task_v2_generate_metadata " ,
2025-02-02 03:10:38 +08:00
)
2024-12-19 17:26:08 -08:00
# validate
2025-02-27 20:19:02 -08:00
LOG . info ( f " Initialized task v2 initial response: { metadata_response } " )
2024-12-30 19:13:23 -08:00
url : str = user_url or metadata_response . get ( " url " , " " )
2024-12-19 17:26:08 -08:00
if not url :
raise UrlGenerationFailure ( )
title : str = metadata_response . get ( " title " , DEFAULT_WORKFLOW_TITLE )
2025-02-27 20:19:02 -08:00
metadata = TaskV2Metadata (
2024-12-19 17:26:08 -08:00
url = url ,
workflow_title = title ,
)
url = metadata . url
if not url :
raise UrlGenerationFailure ( )
# create workflow and workflow run
max_steps_override = 10
2024-12-31 14:07:03 -08:00
try :
2025-01-25 04:08:51 +08:00
workflow_status = WorkflowStatus . published if publish_workflow else WorkflowStatus . auto_generated
2025-01-24 16:21:26 +08:00
new_workflow = await app . WORKFLOW_SERVICE . create_empty_workflow (
2025-01-25 04:08:51 +08:00
organization ,
metadata . workflow_title ,
proxy_location = proxy_location ,
status = workflow_status ,
2025-01-24 16:21:26 +08:00
)
2024-12-31 14:07:03 -08:00
workflow_run = await app . WORKFLOW_SERVICE . setup_workflow_run (
request_id = None ,
workflow_request = WorkflowRequestBody ( ) ,
workflow_permanent_id = new_workflow . workflow_permanent_id ,
organization_id = organization . organization_id ,
version = None ,
max_steps_override = max_steps_override ,
2025-01-28 16:59:54 +08:00
parent_workflow_run_id = parent_workflow_run_id ,
2024-12-31 14:07:03 -08:00
)
except Exception :
LOG . error ( " Failed to setup cruise workflow run " , exc_info = True )
# fail the workflow run
2025-02-23 22:17:28 -08:00
await mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
workflow_run_id = task_v2 . workflow_run_id ,
2024-12-31 14:07:03 -08:00
failure_reason = " Skyvern failed to setup the workflow run " ,
2025-02-18 23:21:17 +08:00
organization_id = organization . organization_id ,
2024-12-31 14:07:03 -08:00
)
raise
try :
2025-02-27 20:19:02 -08:00
await app . DATABASE . update_thought (
thought_id = thought . observer_thought_id ,
2024-12-31 14:07:03 -08:00
organization_id = organization . organization_id ,
workflow_run_id = workflow_run . workflow_run_id ,
workflow_id = new_workflow . workflow_id ,
workflow_permanent_id = new_workflow . workflow_permanent_id ,
thought = metadata_response . get ( " thoughts " , " " ) ,
output = metadata . model_dump ( ) ,
)
except Exception :
2025-02-27 20:19:02 -08:00
LOG . warning ( " Failed to update thought " , exc_info = True )
2024-12-27 09:04:09 -08:00
2024-12-19 17:26:08 -08:00
# update oserver cruise
2024-12-31 14:07:03 -08:00
try :
2025-02-23 16:03:49 -08:00
task_v2 = await app . DATABASE . update_task_v2 (
task_v2_id = task_v2 . observer_cruise_id ,
2024-12-31 14:07:03 -08:00
workflow_run_id = workflow_run . workflow_run_id ,
workflow_id = new_workflow . workflow_id ,
workflow_permanent_id = new_workflow . workflow_permanent_id ,
url = url ,
organization_id = organization . organization_id ,
)
2025-02-09 20:30:19 +08:00
if create_task_run :
await app . DATABASE . create_task_run (
2025-03-30 18:34:48 -07:00
task_run_type = RunType . task_v2 ,
2025-02-09 20:30:19 +08:00
organization_id = organization . organization_id ,
2025-02-23 16:03:49 -08:00
run_id = task_v2 . observer_cruise_id ,
2025-02-09 20:30:19 +08:00
title = new_workflow . title ,
url = url ,
url_hash = generate_url_hash ( url ) ,
)
2024-12-31 14:07:03 -08:00
except Exception :
2025-01-17 16:53:03 -08:00
LOG . warning ( " Failed to update task 2.0 " , exc_info = True )
2024-12-31 14:07:03 -08:00
# fail the workflow run
2025-02-23 22:17:28 -08:00
await mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
2024-12-31 14:07:03 -08:00
workflow_run_id = workflow_run . workflow_run_id ,
2025-01-17 16:53:03 -08:00
failure_reason = " Skyvern failed to update the task 2.0 after initializing the workflow run " ,
2025-02-18 23:21:17 +08:00
organization_id = organization . organization_id ,
2024-12-31 14:07:03 -08:00
)
raise
2025-02-23 16:03:49 -08:00
return task_v2
2024-12-19 17:26:08 -08:00
2025-02-23 22:17:28 -08:00
async def run_task_v2 (
2024-12-19 17:26:08 -08:00
organization : Organization ,
2025-02-23 16:03:49 -08:00
task_v2_id : str ,
2024-12-19 17:26:08 -08:00
request_id : str | None = None ,
2025-03-04 01:07:07 -05:00
max_steps_override : str | int | None = None ,
2025-01-09 22:04:53 +01:00
browser_session_id : str | None = None ,
2025-02-27 20:19:02 -08:00
) - > TaskV2 :
2024-12-19 17:26:08 -08:00
organization_id = organization . organization_id
2024-12-31 15:03:36 -08:00
try :
2025-02-23 22:17:28 -08:00
task_v2 = await app . DATABASE . get_task_v2 ( task_v2_id , organization_id = organization_id )
2024-12-31 15:03:36 -08:00
except Exception :
LOG . error (
2025-02-27 20:19:02 -08:00
" Failed to get task v2 " ,
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2024-12-31 15:03:36 -08:00
organization_id = organization_id ,
exc_info = True ,
)
2025-02-23 22:17:28 -08:00
return await mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id ,
2025-02-18 23:21:17 +08:00
organization_id = organization_id ,
failure_reason = " Failed to get task v2 " ,
)
2025-02-23 22:17:28 -08:00
if not task_v2 :
2025-02-23 16:03:49 -08:00
LOG . error ( " Task v2 not found " , task_v2_id = task_v2_id , organization_id = organization_id )
raise TaskV2NotFound ( task_v2_id = task_v2_id )
2024-12-31 15:03:36 -08:00
2025-01-06 16:42:42 -08:00
workflow , workflow_run = None , None
2024-12-31 15:03:36 -08:00
try :
2025-02-23 22:17:28 -08:00
workflow , workflow_run , task_v2 = await run_task_v2_helper (
2024-12-31 15:03:36 -08:00
organization = organization ,
2025-02-23 22:17:28 -08:00
task_v2 = task_v2 ,
2024-12-31 15:03:36 -08:00
request_id = request_id ,
2025-03-04 01:07:07 -05:00
max_steps_override = max_steps_override ,
2025-01-09 22:04:53 +01:00
browser_session_id = browser_session_id ,
2024-12-31 15:03:36 -08:00
)
2025-02-22 00:44:12 -08:00
except TaskTerminationError as e :
2025-02-23 22:17:28 -08:00
task_v2 = await mark_task_v2_as_terminated (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-02-23 22:17:28 -08:00
workflow_run_id = task_v2 . workflow_run_id ,
2025-02-22 00:44:12 -08:00
organization_id = organization_id ,
failure_reason = e . message ,
)
2025-02-23 16:03:49 -08:00
LOG . info ( " Task v2 is terminated " , task_v2_id = task_v2_id , failure_reason = e . message )
2025-02-23 22:17:28 -08:00
return task_v2
2024-12-31 15:03:36 -08:00
except OperationalError :
2025-02-27 20:19:02 -08:00
LOG . error ( " Database error when running task v2 " , exc_info = True )
2025-02-23 22:17:28 -08:00
task_v2 = await mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id ,
2025-02-23 22:17:28 -08:00
workflow_run_id = task_v2 . workflow_run_id ,
2025-01-17 16:53:03 -08:00
failure_reason = " Database error when running task 2.0 " ,
2024-12-31 15:03:36 -08:00
organization_id = organization_id ,
)
2025-01-10 14:59:53 -08:00
except Exception as e :
2025-02-27 20:19:02 -08:00
LOG . error ( " Failed to run task v2 " , exc_info = True )
2025-01-17 16:53:03 -08:00
failure_reason = f " Failed to run task 2.0: { str ( e ) } "
2025-02-23 22:17:28 -08:00
task_v2 = await mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id ,
2025-02-23 22:17:28 -08:00
workflow_run_id = task_v2 . workflow_run_id ,
2025-01-10 14:59:53 -08:00
failure_reason = failure_reason ,
2024-12-31 15:03:36 -08:00
organization_id = organization_id ,
)
2025-01-06 16:42:42 -08:00
finally :
2025-02-17 02:40:47 +08:00
if workflow and workflow_run and workflow_run . parent_workflow_run_id is None :
2025-01-09 22:04:53 +01:00
await app . WORKFLOW_SERVICE . clean_up_workflow (
workflow = workflow ,
workflow_run = workflow_run ,
browser_session_id = browser_session_id ,
close_browser_on_completion = browser_session_id is None ,
)
2025-01-06 16:42:42 -08:00
else :
LOG . warning ( " Workflow or workflow run not found " )
2024-12-31 15:03:36 -08:00
2025-01-06 16:42:42 -08:00
skyvern_context . reset ( )
2024-12-31 15:03:36 -08:00
2025-02-23 22:17:28 -08:00
return task_v2
2025-01-27 12:53:23 +08:00
2024-12-31 15:03:36 -08:00
2025-02-23 22:17:28 -08:00
async def run_task_v2_helper (
2024-12-31 15:03:36 -08:00
organization : Organization ,
2025-02-27 20:19:02 -08:00
task_v2 : TaskV2 ,
2024-12-31 15:03:36 -08:00
request_id : str | None = None ,
2025-03-04 01:07:07 -05:00
max_steps_override : str | int | None = None ,
2025-01-09 22:04:53 +01:00
browser_session_id : str | None = None ,
2025-02-27 20:19:02 -08:00
) - > tuple [ Workflow , WorkflowRun , TaskV2 ] | tuple [ None , None , TaskV2 ] :
2024-12-31 15:03:36 -08:00
organization_id = organization . organization_id
2025-02-23 22:17:28 -08:00
task_v2_id = task_v2 . observer_cruise_id
2025-02-27 20:19:02 -08:00
if task_v2 . status != TaskV2Status . queued :
2024-12-19 17:26:08 -08:00
LOG . error (
2025-02-23 16:03:49 -08:00
" Task v2 is not queued. Duplicate task v2 " ,
task_v2_id = task_v2_id ,
2025-02-23 22:17:28 -08:00
status = task_v2 . status ,
2024-12-19 17:26:08 -08:00
organization_id = organization_id ,
)
2025-02-23 22:17:28 -08:00
return None , None , task_v2
if not task_v2 . url or not task_v2 . prompt :
2024-12-19 17:26:08 -08:00
LOG . error (
2025-02-23 16:03:49 -08:00
" Task v2 url or prompt not found " ,
task_v2_id = task_v2_id ,
2024-12-19 17:26:08 -08:00
organization_id = organization_id ,
)
2025-02-23 22:17:28 -08:00
return None , None , task_v2
if not task_v2 . workflow_run_id :
2024-12-19 17:26:08 -08:00
LOG . error (
2025-02-23 16:03:49 -08:00
" Workflow run id not found in task v2 " ,
task_v2_id = task_v2_id ,
2024-12-19 17:26:08 -08:00
organization_id = organization_id ,
)
2025-02-23 22:17:28 -08:00
return None , None , task_v2
2024-12-19 17:26:08 -08:00
2025-03-04 01:07:07 -05:00
int_max_steps_override = None
if max_steps_override :
2024-12-19 17:26:08 -08:00
try :
2025-03-04 01:07:07 -05:00
int_max_steps_override = int ( max_steps_override )
LOG . info ( " max_steps_override is set " , max_steps = int_max_steps_override )
2024-12-19 17:26:08 -08:00
except ValueError :
LOG . info (
2025-03-04 01:07:07 -05:00
" max_steps_override isn ' t an integer, won ' t override " ,
max_steps_override = max_steps_override ,
2024-12-19 17:26:08 -08:00
)
2025-02-23 22:17:28 -08:00
workflow_run_id = task_v2 . workflow_run_id
2024-12-19 17:26:08 -08:00
2024-12-22 17:49:33 -08:00
workflow_run = await app . WORKFLOW_SERVICE . get_workflow_run ( workflow_run_id , organization_id = organization_id )
2024-12-19 17:26:08 -08:00
if not workflow_run :
LOG . error ( " Workflow run not found " , workflow_run_id = workflow_run_id )
2025-02-23 22:17:28 -08:00
return None , None , task_v2
2024-12-19 17:26:08 -08:00
else :
LOG . info ( " Workflow run found " , workflow_run_id = workflow_run_id )
if workflow_run . status != WorkflowRunStatus . queued :
LOG . warning ( " Duplicate workflow run execution " , workflow_run_id = workflow_run_id , status = workflow_run . status )
2025-02-23 22:17:28 -08:00
return None , None , task_v2
2024-12-19 17:26:08 -08:00
workflow_id = workflow_run . workflow_id
workflow = await app . WORKFLOW_SERVICE . get_workflow ( workflow_id , organization_id = organization_id )
if not workflow :
LOG . error ( " Workflow not found " , workflow_id = workflow_id )
2025-02-23 22:17:28 -08:00
return None , None , task_v2
2024-12-19 17:26:08 -08:00
2025-02-27 20:19:02 -08:00
###################### run task v2 ######################
2024-12-19 17:26:08 -08:00
skyvern_context . set (
SkyvernContext (
organization_id = organization_id ,
workflow_id = workflow_id ,
workflow_run_id = workflow_run_id ,
request_id = request_id ,
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-03-02 23:39:18 -05:00
browser_session_id = browser_session_id ,
2024-12-19 17:26:08 -08:00
)
)
2025-02-23 22:17:28 -08:00
task_v2 = await app . DATABASE . update_task_v2 (
2025-02-27 20:19:02 -08:00
task_v2_id = task_v2_id , organization_id = organization_id , status = TaskV2Status . running
2024-12-19 17:26:08 -08:00
)
await app . WORKFLOW_SERVICE . mark_workflow_run_as_running ( workflow_run_id = workflow_run . workflow_run_id )
2025-02-03 23:18:41 +08:00
await _set_up_workflow_context ( workflow_id , workflow_run_id , organization )
2024-12-19 17:26:08 -08:00
2025-02-23 22:17:28 -08:00
url = str ( task_v2 . url )
user_prompt = task_v2 . prompt
2024-12-19 17:26:08 -08:00
task_history : list [ dict ] = [ ]
yaml_blocks : list [ BLOCK_YAML_TYPES ] = [ ]
yaml_parameters : list [ PARAMETER_YAML_TYPES ] = [ ]
2025-03-04 01:07:07 -05:00
max_steps = int_max_steps_override or settings . MAX_STEPS_PER_TASK_V2
for i in range ( DEFAULT_MAX_ITERATIONS ) :
2025-02-22 00:44:12 -08:00
# validate the task execution
await app . AGENT_FUNCTION . validate_task_execution (
organization_id = organization_id ,
2025-02-23 16:03:49 -08:00
task_id = task_v2_id ,
2025-02-22 00:44:12 -08:00
task_version = " v2 " ,
)
2025-02-18 23:21:17 +08:00
# check the status of the workflow run
workflow_run = await app . WORKFLOW_SERVICE . get_workflow_run ( workflow_run_id , organization_id = organization_id )
if not workflow_run :
LOG . error ( " Workflow run not found " , workflow_run_id = workflow_run_id )
break
if workflow_run . status == WorkflowRunStatus . canceled :
LOG . info (
" Task v2 is canceled. Stopping task v2 " ,
workflow_run_id = workflow_run_id ,
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-02-18 23:21:17 +08:00
)
2025-02-23 22:17:28 -08:00
await mark_task_v2_as_canceled (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-02-18 23:21:17 +08:00
workflow_run_id = workflow_run_id ,
organization_id = organization_id ,
)
2025-02-23 22:17:28 -08:00
return workflow , workflow_run , task_v2
2025-02-18 23:21:17 +08:00
2025-02-27 20:19:02 -08:00
LOG . info ( f " Task v2 iteration i= { i } " , workflow_run_id = workflow_run_id , url = url )
2025-02-06 00:50:32 +08:00
task_type = " "
plan = " "
block : BlockTypeVar | None = None
task_history_record : dict [ str , Any ] = { }
context = skyvern_context . ensure_context ( )
2025-03-06 18:27:19 -08:00
current_url : str | None = None
page : Page | None = None
browser_state = app . BROWSER_MANAGER . get_for_workflow_run ( workflow_run_id , workflow_run . parent_workflow_run_id )
if browser_state :
page = await browser_state . get_working_page ( )
if page :
current_url = await SkyvernFrame . get_url ( page )
if i == 0 and current_url != url :
2025-02-06 00:50:32 +08:00
# The first iteration is always a GOTO_URL task
task_type = " goto_url "
plan = f " Go to this website: { url } "
task_history_record = { " type " : task_type , " task " : plan }
block , block_yaml_list , parameter_yaml_list = await _generate_goto_url_task (
workflow_id = workflow_id ,
2025-01-08 21:27:11 -08:00
url = url ,
)
2025-02-06 00:50:32 +08:00
else :
try :
2025-03-06 18:27:19 -08:00
if browser_state is None :
browser_state = await app . BROWSER_MANAGER . get_or_create_for_workflow_run (
workflow_run = workflow_run ,
url = url ,
browser_session_id = browser_session_id ,
)
2025-02-06 00:50:32 +08:00
scraped_page = await scrape_website (
browser_state ,
url ,
app . AGENT_FUNCTION . cleanup_element_tree_factory ( ) ,
scrape_exclude = app . scrape_exclude ,
)
element_tree_in_prompt : str = scraped_page . build_element_tree ( ElementTreeFormat . HTML )
2025-03-06 18:27:19 -08:00
if page is None :
page = await browser_state . get_working_page ( )
2025-02-06 00:50:32 +08:00
except Exception :
LOG . exception (
2025-02-27 20:19:02 -08:00
" Failed to get browser state or scrape website in task v2 iteration " , iteration = i , url = url
2025-02-06 00:50:32 +08:00
)
continue
2025-03-06 18:27:19 -08:00
current_url = current_url if current_url else str ( await SkyvernFrame . get_url ( frame = page ) if page else url )
2024-12-19 17:26:08 -08:00
2025-02-27 20:19:02 -08:00
task_v2_prompt = prompt_engine . load_prompt (
2025-02-23 01:26:54 -08:00
" task_v2 " ,
2025-02-06 00:50:32 +08:00
current_url = current_url ,
elements = element_tree_in_prompt ,
user_goal = user_prompt ,
task_history = task_history ,
local_datetime = datetime . now ( context . tz_info ) . isoformat ( ) ,
)
2025-02-27 20:19:02 -08:00
thought = await app . DATABASE . create_thought (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-02-06 00:50:32 +08:00
organization_id = organization_id ,
workflow_run_id = workflow_run . workflow_run_id ,
workflow_id = workflow . workflow_id ,
workflow_permanent_id = workflow . workflow_permanent_id ,
2025-02-27 20:19:02 -08:00
thought_type = ThoughtType . plan ,
thought_scenario = ThoughtScenario . generate_plan ,
2025-02-06 00:50:32 +08:00
)
2025-02-27 20:19:02 -08:00
task_v2_response = await app . LLM_API_HANDLER (
prompt = task_v2_prompt ,
2025-02-06 00:50:32 +08:00
screenshots = scraped_page . screenshots ,
2025-02-27 20:19:02 -08:00
thought = thought ,
2025-02-23 01:26:54 -08:00
prompt_name = " task_v2 " ,
2025-02-06 00:50:32 +08:00
)
2024-12-19 17:26:08 -08:00
LOG . info (
2025-02-27 20:19:02 -08:00
" Task v2 response " ,
task_v2_response = task_v2_response ,
2024-12-19 17:26:08 -08:00
iteration = i ,
2025-02-06 00:50:32 +08:00
current_url = current_url ,
2024-12-19 17:26:08 -08:00
workflow_run_id = workflow_run_id ,
)
2025-02-06 00:50:32 +08:00
# see if the user goal has achieved or not
2025-02-27 20:19:02 -08:00
user_goal_achieved = task_v2_response . get ( " user_goal_achieved " , False )
observation = task_v2_response . get ( " page_info " , " " )
thoughts : str = task_v2_response . get ( " thoughts " , " " )
plan = task_v2_response . get ( " plan " , " " )
task_type = task_v2_response . get ( " task_type " , " " )
# Create and save task thought
await app . DATABASE . update_thought (
thought_id = thought . observer_thought_id ,
2025-02-06 00:50:32 +08:00
organization_id = organization_id ,
thought = thoughts ,
observation = observation ,
answer = plan ,
output = { " task_type " : task_type , " user_goal_achieved " : user_goal_achieved } ,
2025-01-10 14:59:53 -08:00
)
2024-12-19 17:26:08 -08:00
2025-02-06 00:50:32 +08:00
if user_goal_achieved is True :
LOG . info (
2025-02-27 20:19:02 -08:00
" User goal achieved. Workflow run will complete. Task v2 is stopping " ,
2025-02-06 00:50:32 +08:00
iteration = i ,
workflow_run_id = workflow_run_id ,
)
2025-02-23 22:17:28 -08:00
task_v2 = await _summarize_task_v2 (
task_v2 = task_v2 ,
2025-02-06 00:50:32 +08:00
task_history = task_history ,
context = context ,
screenshots = scraped_page . screenshots ,
)
break
if not plan :
2025-02-27 20:19:02 -08:00
LOG . warning ( " No plan found in task v2 response " , task_v2_response = task_v2_response )
2025-02-06 00:50:32 +08:00
continue
2024-12-19 17:26:08 -08:00
2025-02-27 20:19:02 -08:00
# parse task v2 response and run the next task
2025-02-06 00:50:32 +08:00
if not task_type :
2025-02-27 20:19:02 -08:00
LOG . error ( " No task type found in task v2 response " , task_v2_response = task_v2_response )
2025-02-23 22:17:28 -08:00
await mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-02-06 00:50:32 +08:00
workflow_run_id = workflow_run_id ,
failure_reason = " Skyvern failed to generate a task. Please try again later. " ,
)
break
2025-02-01 04:13:00 +08:00
2025-02-06 00:50:32 +08:00
if task_type == " extract " :
block , block_yaml_list , parameter_yaml_list = await _generate_extraction_task (
2025-02-23 22:17:28 -08:00
task_v2 = task_v2 ,
2024-12-19 17:26:08 -08:00
workflow_id = workflow_id ,
2024-12-27 09:04:09 -08:00
workflow_permanent_id = workflow . workflow_permanent_id ,
2024-12-19 17:26:08 -08:00
workflow_run_id = workflow_run_id ,
2025-02-06 00:50:32 +08:00
current_url = current_url ,
element_tree_in_prompt = element_tree_in_prompt ,
data_extraction_goal = plan ,
task_history = task_history ,
2025-02-01 04:13:00 +08:00
)
2025-02-06 00:50:32 +08:00
task_history_record = { " type " : task_type , " task " : plan }
elif task_type == " navigate " :
original_url = url if i == 0 else None
navigation_goal = MINI_GOAL_TEMPLATE . format ( main_goal = user_prompt , mini_goal = plan )
block , block_yaml_list , parameter_yaml_list = await _generate_navigation_task (
workflow_id = workflow_id ,
workflow_permanent_id = workflow . workflow_permanent_id ,
workflow_run_id = workflow_run_id ,
original_url = original_url ,
navigation_goal = navigation_goal ,
2025-02-23 22:17:28 -08:00
totp_verification_url = task_v2 . totp_verification_url ,
totp_identifier = task_v2 . totp_identifier ,
2025-02-06 00:50:32 +08:00
)
task_history_record = { " type " : task_type , " task " : plan }
elif task_type == " loop " :
try :
block , block_yaml_list , parameter_yaml_list , extraction_obj , inner_task = await _generate_loop_task (
2025-02-23 22:17:28 -08:00
task_v2 = task_v2 ,
2025-02-06 00:50:32 +08:00
workflow_id = workflow_id ,
workflow_permanent_id = workflow . workflow_permanent_id ,
workflow_run_id = workflow_run_id ,
plan = plan ,
browser_state = browser_state ,
original_url = url ,
scraped_page = scraped_page ,
)
task_history_record = {
" type " : task_type ,
" task " : plan ,
" loop_over_values " : extraction_obj . get ( " loop_values " ) ,
" task_inside_the_loop " : inner_task ,
}
except Exception :
LOG . exception ( " Failed to generate loop task " )
2025-02-23 22:17:28 -08:00
await mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-02-06 00:50:32 +08:00
workflow_run_id = workflow_run_id ,
failure_reason = " Failed to generate the loop. " ,
)
break
else :
LOG . info ( " Unsupported task type " , task_type = task_type )
2025-02-23 22:17:28 -08:00
await mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2024-12-19 17:26:08 -08:00
workflow_run_id = workflow_run_id ,
2025-02-06 00:50:32 +08:00
failure_reason = f " Unsupported task block type gets generated: { task_type } " ,
2024-12-19 17:26:08 -08:00
)
break
# generate the extraction task
2025-01-28 16:59:54 +08:00
block_result = await block . execute_safe (
workflow_run_id = workflow_run_id ,
organization_id = organization_id ,
)
2025-01-08 20:06:14 -08:00
task_history_record [ " status " ] = str ( block_result . status )
if block_result . failure_reason :
task_history_record [ " reason " ] = block_result . failure_reason
2024-12-19 17:26:08 -08:00
2025-01-05 09:06:20 -08:00
extracted_data = _get_extracted_data_from_block_result (
block_result ,
task_type ,
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-01-05 09:06:20 -08:00
workflow_run_id = workflow_run_id ,
)
if extracted_data is not None :
task_history_record [ " extracted_data " ] = extracted_data
task_history . append ( task_history_record )
2024-12-19 17:26:08 -08:00
# refresh workflow
yaml_blocks . extend ( block_yaml_list )
yaml_parameters . extend ( parameter_yaml_list )
# Update workflow definition
workflow_definition_yaml = WorkflowDefinitionYAML (
parameters = yaml_parameters ,
blocks = yaml_blocks ,
)
workflow_create_request = WorkflowCreateYAMLRequest (
title = workflow . title ,
description = workflow . description ,
2025-02-23 22:17:28 -08:00
proxy_location = task_v2 . proxy_location or ProxyLocation . RESIDENTIAL ,
2024-12-19 17:26:08 -08:00
workflow_definition = workflow_definition_yaml ,
2025-01-25 04:08:51 +08:00
status = workflow . status ,
2024-12-19 17:26:08 -08:00
)
LOG . info ( " Creating workflow from request " , workflow_create_request = workflow_create_request )
workflow = await app . WORKFLOW_SERVICE . create_workflow_from_request (
organization = organization ,
request = workflow_create_request ,
workflow_permanent_id = workflow . workflow_permanent_id ,
)
LOG . info ( " Workflow created " , workflow_id = workflow . workflow_id )
# execute the extraction task
2025-01-09 22:04:53 +01:00
workflow_run = await handle_block_result (
2025-02-23 16:03:49 -08:00
task_v2_id ,
2025-01-09 22:04:53 +01:00
block ,
block_result ,
workflow ,
workflow_run ,
browser_session_id = browser_session_id ,
)
2024-12-19 17:26:08 -08:00
if workflow_run . status != WorkflowRunStatus . running :
LOG . info (
2025-02-27 20:19:02 -08:00
" Workflow run is not running anymore, stopping the task v2 " ,
2024-12-19 17:26:08 -08:00
workflow_run_id = workflow_run_id ,
status = workflow_run . status ,
)
break
2025-01-10 00:39:13 -08:00
if block_result . success is True :
2025-01-10 10:32:08 -08:00
completion_screenshots = [ ]
2025-01-10 00:39:13 -08:00
try :
2025-02-06 00:50:32 +08:00
browser_state = await app . BROWSER_MANAGER . get_or_create_for_workflow_run (
workflow_run = workflow_run ,
url = url ,
browser_session_id = browser_session_id ,
)
2025-01-10 00:39:13 -08:00
scraped_page = await scrape_website (
browser_state ,
url ,
app . AGENT_FUNCTION . cleanup_element_tree_factory ( ) ,
scrape_exclude = app . scrape_exclude ,
)
2025-01-10 10:32:08 -08:00
completion_screenshots = scraped_page . screenshots
2025-01-10 00:39:13 -08:00
except Exception :
2025-02-27 20:19:02 -08:00
LOG . warning ( " Failed to scrape the website for task v2 completion check " )
2025-01-10 00:39:13 -08:00
2025-01-08 20:06:14 -08:00
# validate completion only happens at the last iteration
2025-02-27 20:19:02 -08:00
task_v2_completion_prompt = prompt_engine . load_prompt (
2025-02-23 01:26:54 -08:00
" task_v2_check_completion " ,
2024-12-19 17:26:08 -08:00
user_goal = user_prompt ,
task_history = task_history ,
local_datetime = datetime . now ( context . tz_info ) . isoformat ( ) ,
)
2025-02-27 20:19:02 -08:00
thought = await app . DATABASE . create_thought (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2024-12-27 09:04:09 -08:00
organization_id = organization_id ,
workflow_run_id = workflow_run_id ,
workflow_id = workflow_id ,
workflow_permanent_id = workflow . workflow_permanent_id ,
2025-02-27 20:19:02 -08:00
thought_type = ThoughtType . user_goal_check ,
thought_scenario = ThoughtScenario . user_goal_check ,
2024-12-27 09:04:09 -08:00
)
2024-12-19 17:26:08 -08:00
completion_resp = await app . LLM_API_HANDLER (
2025-02-27 20:19:02 -08:00
prompt = task_v2_completion_prompt ,
2025-01-10 10:32:08 -08:00
screenshots = completion_screenshots ,
2025-02-27 20:19:02 -08:00
thought = thought ,
prompt_name = " task_v2_check_completion " ,
2024-12-19 17:26:08 -08:00
)
LOG . info (
2025-02-27 20:19:02 -08:00
" Task v2 completion check response " ,
2024-12-19 17:26:08 -08:00
completion_resp = completion_resp ,
iteration = i ,
workflow_run_id = workflow_run_id ,
task_history = task_history ,
)
2024-12-27 09:04:09 -08:00
user_goal_achieved = completion_resp . get ( " user_goal_achieved " , False )
2025-02-27 20:19:02 -08:00
thought_content = completion_resp . get ( " thoughts " , " " )
await app . DATABASE . update_thought (
thought_id = thought . observer_thought_id ,
2024-12-27 09:04:09 -08:00
organization_id = organization_id ,
2025-02-27 20:19:02 -08:00
thought = thought_content ,
2024-12-27 09:04:09 -08:00
output = { " user_goal_achieved " : user_goal_achieved } ,
)
if user_goal_achieved :
2024-12-19 17:26:08 -08:00
LOG . info (
2025-02-27 20:19:02 -08:00
" User goal achieved according to the task v2 completion check " ,
2024-12-19 17:26:08 -08:00
iteration = i ,
workflow_run_id = workflow_run_id ,
completion_resp = completion_resp ,
)
2025-02-23 22:17:28 -08:00
task_v2 = await _summarize_task_v2 (
task_v2 = task_v2 ,
2025-01-10 14:59:53 -08:00
task_history = task_history ,
context = context ,
screenshots = completion_screenshots ,
2025-01-06 16:42:42 -08:00
)
2024-12-19 17:26:08 -08:00
break
2025-03-04 01:07:07 -05:00
# total step number validation
workflow_run_tasks = await app . DATABASE . get_tasks_by_workflow_run_id ( workflow_run_id = workflow_run_id )
2025-03-04 02:04:18 -05:00
total_step_count = await app . DATABASE . get_total_unique_step_order_count_by_task_ids (
2025-03-04 01:07:07 -05:00
task_ids = [ task . task_id for task in workflow_run_tasks ] ,
organization_id = organization_id ,
)
if total_step_count > = max_steps :
LOG . info ( " Task v2 failed - run out of steps " , max_steps = max_steps , workflow_run_id = workflow_run_id )
await mark_task_v2_as_failed (
task_v2_id = task_v2_id ,
workflow_run_id = workflow_run_id ,
failure_reason = f ' Reached the max number of { max_steps } steps. If you need more steps, update the " Max Steps Override " configuration when running the task. Or add/update the " x-max-steps-override " header with your desired number of steps in the API request. ' ,
organization_id = organization_id ,
)
return workflow , workflow_run , task_v2
2025-01-06 16:42:42 -08:00
else :
LOG . info (
2025-02-27 20:19:02 -08:00
" Task v2 failed - run out of iterations " ,
2025-03-04 01:07:07 -05:00
max_iterations = DEFAULT_MAX_ITERATIONS ,
max_steps = max_steps ,
2025-01-06 16:42:42 -08:00
workflow_run_id = workflow_run_id ,
)
2025-02-23 22:17:28 -08:00
task_v2 = await mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-01-06 16:42:42 -08:00
workflow_run_id = workflow_run_id ,
# TODO: add a better failure reason with LLM
2025-01-17 09:04:43 -08:00
failure_reason = " Max iterations reached " ,
2025-01-06 16:42:42 -08:00
organization_id = organization_id ,
)
2025-02-23 22:17:28 -08:00
return workflow , workflow_run , task_v2
2024-12-19 17:26:08 -08:00
async def handle_block_result (
2025-02-23 16:03:49 -08:00
task_v2_id : str ,
2024-12-19 17:26:08 -08:00
block : BlockTypeVar ,
block_result : BlockResult ,
workflow : Workflow ,
workflow_run : WorkflowRun ,
is_last_block : bool = True ,
2025-01-09 22:04:53 +01:00
browser_session_id : str | None = None ,
2024-12-19 17:26:08 -08:00
) - > WorkflowRun :
workflow_run_id = workflow_run . workflow_run_id
if block_result . status == BlockStatus . canceled :
LOG . info (
" Block with type {block.block_type} was canceled for workflow run {workflow_run_id} , cancelling workflow run " ,
block_type = block . block_type ,
workflow_run_id = workflow_run . workflow_run_id ,
block_result = block_result ,
block_type_var = block . block_type ,
block_label = block . label ,
)
2025-02-23 22:17:28 -08:00
await mark_task_v2_as_canceled (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-02-18 23:21:17 +08:00
workflow_run_id = workflow_run_id ,
organization_id = workflow_run . organization_id ,
)
2024-12-19 17:26:08 -08:00
elif block_result . status == BlockStatus . failed :
LOG . error (
f " Block with type { block . block_type } failed for workflow run { workflow_run_id } " ,
block_type = block . block_type ,
workflow_run_id = workflow_run . workflow_run_id ,
block_result = block_result ,
block_type_var = block . block_type ,
block_label = block . label ,
)
if block . continue_on_failure and not is_last_block :
LOG . warning (
f " Block with type { block . block_type } failed but will continue executing the workflow run { workflow_run_id } " ,
block_type = block . block_type ,
workflow_run_id = workflow_run . workflow_run_id ,
block_result = block_result ,
continue_on_failure = block . continue_on_failure ,
block_type_var = block . block_type ,
block_label = block . label ,
)
2025-02-27 20:19:02 -08:00
# task v2 will continue running the workflow
2024-12-19 17:26:08 -08:00
elif block_result . status == BlockStatus . terminated :
LOG . info (
2025-01-08 20:06:14 -08:00
f " Block with type { block . block_type } was terminated for workflow run { workflow_run_id } " ,
2024-12-19 17:26:08 -08:00
block_type = block . block_type ,
workflow_run_id = workflow_run . workflow_run_id ,
block_result = block_result ,
block_type_var = block . block_type ,
block_label = block . label ,
)
if block . continue_on_failure and not is_last_block :
LOG . warning (
f " Block with type { block . block_type } was terminated for workflow run { workflow_run_id } , but will continue executing the workflow run " ,
block_type = block . block_type ,
workflow_run_id = workflow_run . workflow_run_id ,
block_result = block_result ,
continue_on_failure = block . continue_on_failure ,
block_type_var = block . block_type ,
block_label = block . label ,
)
# refresh workflow run model
2024-12-22 17:49:33 -08:00
return await app . WORKFLOW_SERVICE . get_workflow_run (
workflow_run_id = workflow_run_id ,
2025-01-28 15:04:18 +08:00
organization_id = workflow_run . organization_id ,
2024-12-22 17:49:33 -08:00
)
2024-12-19 17:26:08 -08:00
2025-02-03 23:18:41 +08:00
async def _set_up_workflow_context ( workflow_id : str , workflow_run_id : str , organization : Organization ) - > None :
2024-12-19 17:26:08 -08:00
"""
TODO : see if we could remove this function as we can just set an empty workflow context
"""
# Get all <workflow parameter, workflow run parameter> tuples
wp_wps_tuples = await app . WORKFLOW_SERVICE . get_workflow_run_parameter_tuples ( workflow_run_id = workflow_run_id )
workflow_output_parameters = await app . WORKFLOW_SERVICE . get_workflow_output_parameters ( workflow_id = workflow_id )
2025-02-03 23:18:41 +08:00
await app . WORKFLOW_CONTEXT_MANAGER . initialize_workflow_run_context (
organization ,
2024-12-19 17:26:08 -08:00
workflow_run_id ,
wp_wps_tuples ,
workflow_output_parameters ,
[ ] ,
2025-02-03 23:18:41 +08:00
[ ] ,
2024-12-19 17:26:08 -08:00
)
async def _generate_loop_task (
2025-02-27 20:19:02 -08:00
task_v2 : TaskV2 ,
2024-12-19 17:26:08 -08:00
workflow_id : str ,
2024-12-27 09:04:09 -08:00
workflow_permanent_id : str ,
2024-12-19 17:26:08 -08:00
workflow_run_id : str ,
plan : str ,
browser_state : BrowserState ,
original_url : str ,
scraped_page : ScrapedPage ,
2025-01-08 23:53:21 -08:00
) - > tuple [ ForLoopBlock , list [ BLOCK_YAML_TYPES ] , list [ PARAMETER_YAML_TYPES ] , dict [ str , Any ] , dict [ str , Any ] ] :
2024-12-19 17:26:08 -08:00
for_loop_parameter_yaml_list : list [ PARAMETER_YAML_TYPES ] = [ ]
loop_value_extraction_goal = prompt_engine . load_prompt (
2025-02-23 01:26:54 -08:00
" task_v2_loop_task_extraction_goal " ,
2024-12-19 17:26:08 -08:00
plan = plan ,
)
2024-12-27 09:04:09 -08:00
data_extraction_thought = f " Going to generate a list of values to go through based on the plan: { plan } . "
2025-02-27 20:19:02 -08:00
thought = await app . DATABASE . create_thought (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
organization_id = task_v2 . organization_id ,
2024-12-27 09:04:09 -08:00
workflow_run_id = workflow_run_id ,
workflow_id = workflow_id ,
workflow_permanent_id = workflow_permanent_id ,
2025-02-27 20:19:02 -08:00
thought_type = ThoughtType . plan ,
thought_scenario = ThoughtScenario . extract_loop_values ,
2024-12-27 09:04:09 -08:00
thought = data_extraction_thought ,
)
2025-02-27 20:19:02 -08:00
# generate screenshot artifact for the thought
2024-12-27 09:04:09 -08:00
if scraped_page . screenshots :
for screenshot in scraped_page . screenshots :
2025-02-27 20:19:02 -08:00
await app . ARTIFACT_MANAGER . create_thought_artifact (
thought = thought ,
2024-12-27 09:04:09 -08:00
artifact_type = ArtifactType . SCREENSHOT_LLM ,
data = screenshot ,
)
2025-01-08 23:53:21 -08:00
loop_random_string = _generate_random_string ( )
label = f " extraction_task_for_loop_ { loop_random_string } "
loop_values_key = f " loop_values_ { loop_random_string } "
2024-12-19 17:26:08 -08:00
extraction_block_yaml = ExtractionBlockYAML (
label = label ,
data_extraction_goal = loop_value_extraction_goal ,
2025-01-08 23:53:21 -08:00
data_schema = _generate_data_extraction_schema_for_loop ( loop_values_key ) ,
2024-12-19 17:26:08 -08:00
)
loop_value_extraction_output_parameter = await app . WORKFLOW_SERVICE . create_output_parameter_for_block (
workflow_id = workflow_id ,
block_yaml = extraction_block_yaml ,
)
extraction_block_for_loop = ExtractionBlock (
label = label ,
data_extraction_goal = loop_value_extraction_goal ,
2025-01-08 23:53:21 -08:00
data_schema = _generate_data_extraction_schema_for_loop ( loop_values_key ) ,
2024-12-19 17:26:08 -08:00
output_parameter = loop_value_extraction_output_parameter ,
)
# execute the extraction block
2024-12-22 11:16:23 -08:00
extraction_block_result = await extraction_block_for_loop . execute_safe (
workflow_run_id = workflow_run_id ,
2025-02-23 16:03:49 -08:00
organization_id = task_v2 . organization_id ,
2024-12-22 11:16:23 -08:00
)
2024-12-19 17:26:08 -08:00
LOG . info ( " Extraction block result " , extraction_block_result = extraction_block_result )
if extraction_block_result . success is False :
LOG . error (
" Failed to execute the extraction block for the loop task " ,
extraction_block_result = extraction_block_result ,
)
2025-02-27 20:19:02 -08:00
# wofklow run and task v2 status update is handled in the upper caller layer
2024-12-19 17:26:08 -08:00
raise Exception ( " extraction_block failed " )
# validate output parameter
try :
2025-01-08 23:53:21 -08:00
output_value_obj : dict [ str , Any ] = extraction_block_result . output_parameter_value . get ( " extracted_information " ) # type: ignore
if not output_value_obj or not isinstance ( output_value_obj , dict ) :
raise Exception ( " Invalid output parameter of the extraction block for the loop task " )
if loop_values_key not in output_value_obj :
raise Exception ( " loop_values_key not found in the output parameter of the extraction block " )
if " is_loop_value_link " not in output_value_obj :
raise Exception ( " is_loop_value_link not found in the output parameter of the extraction block " )
loop_values = output_value_obj . get ( loop_values_key , [ ] )
is_loop_value_link = output_value_obj . get ( " is_loop_value_link " )
2024-12-19 17:26:08 -08:00
except Exception :
LOG . error (
" Failed to validate the output parameter of the extraction block for the loop task " ,
extraction_block_result = extraction_block_result ,
)
raise
2025-02-27 20:19:02 -08:00
# update the thought
await app . DATABASE . update_thought (
thought_id = thought . observer_thought_id ,
2025-02-23 16:03:49 -08:00
organization_id = task_v2 . organization_id ,
2025-01-08 23:53:21 -08:00
output = output_value_obj ,
2024-12-27 09:04:09 -08:00
)
2024-12-19 17:26:08 -08:00
# create ContextParameter for the loop over pointer that ForLoopBlock needs.
loop_for_context_parameter = ContextParameter (
2025-01-08 23:53:21 -08:00
key = loop_values_key ,
2024-12-19 17:26:08 -08:00
source = loop_value_extraction_output_parameter ,
)
for_loop_parameter_yaml_list . append (
ContextParameterYAML (
key = loop_for_context_parameter . key ,
description = loop_for_context_parameter . description ,
source_parameter_key = loop_value_extraction_output_parameter . key ,
)
)
app . WORKFLOW_CONTEXT_MANAGER . add_context_parameter ( workflow_run_id , loop_for_context_parameter )
await app . WORKFLOW_CONTEXT_MANAGER . set_parameter_values_for_output_parameter_dependent_blocks (
workflow_run_id = workflow_run_id ,
output_parameter = loop_value_extraction_output_parameter ,
value = extraction_block_result . output_parameter_value ,
)
task_parameters : list [ PARAMETER_TYPE ] = [ ]
2025-01-08 23:53:21 -08:00
if is_loop_value_link is True :
LOG . info ( " Loop values are links " , loop_values = loop_values )
context_parameter_key = url = f " task_in_loop_url_ { loop_random_string } "
2024-12-19 17:26:08 -08:00
else :
2025-01-08 23:53:21 -08:00
LOG . info ( " Loop values are not links " , loop_values = loop_values )
2024-12-19 17:26:08 -08:00
page = await browser_state . get_working_page ( )
url = str (
await SkyvernFrame . evaluate ( frame = page , expression = " () => document.location.href " ) if page else original_url
)
2025-01-01 23:50:08 -08:00
context_parameter_key = " target "
# create ContextParameter for the value
url_value_context_parameter = ContextParameter (
key = context_parameter_key ,
source = loop_for_context_parameter ,
)
task_parameters . append ( url_value_context_parameter )
for_loop_parameter_yaml_list . append (
ContextParameterYAML (
key = url_value_context_parameter . key ,
description = url_value_context_parameter . description ,
source_parameter_key = loop_for_context_parameter . key ,
)
)
app . WORKFLOW_CONTEXT_MANAGER . add_context_parameter ( workflow_run_id , url_value_context_parameter )
2024-12-19 17:26:08 -08:00
task_in_loop_label = f " task_in_loop_ { _generate_random_string ( ) } "
context = skyvern_context . ensure_context ( )
task_in_loop_metadata_prompt = prompt_engine . load_prompt (
2025-02-23 01:26:54 -08:00
" task_v2_generate_task_block " ,
2024-12-19 17:26:08 -08:00
plan = plan ,
local_datetime = datetime . now ( context . tz_info ) . isoformat ( ) ,
2025-01-08 23:53:21 -08:00
is_link = is_loop_value_link ,
loop_values = loop_values ,
2024-12-19 17:26:08 -08:00
)
2025-02-27 20:19:02 -08:00
thought_task_in_loop = await app . DATABASE . create_thought (
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
organization_id = task_v2 . organization_id ,
2024-12-27 09:04:09 -08:00
workflow_run_id = workflow_run_id ,
workflow_id = workflow_id ,
workflow_permanent_id = workflow_permanent_id ,
2025-02-27 20:19:02 -08:00
thought_type = ThoughtType . internal_plan ,
thought_scenario = ThoughtScenario . generate_task_in_loop ,
2024-12-27 09:04:09 -08:00
)
2024-12-19 17:26:08 -08:00
task_in_loop_metadata_response = await app . LLM_API_HANDLER (
task_in_loop_metadata_prompt ,
screenshots = scraped_page . screenshots ,
2025-02-27 20:19:02 -08:00
thought = thought_task_in_loop ,
2025-02-23 01:26:54 -08:00
prompt_name = " task_v2_generate_task_block " ,
2024-12-19 17:26:08 -08:00
)
LOG . info ( " Task in loop metadata response " , task_in_loop_metadata_response = task_in_loop_metadata_response )
navigation_goal = task_in_loop_metadata_response . get ( " navigation_goal " )
data_extraction_goal = task_in_loop_metadata_response . get ( " data_extraction_goal " )
data_extraction_schema = task_in_loop_metadata_response . get ( " data_schema " )
2025-02-27 20:19:02 -08:00
thought_content = task_in_loop_metadata_response . get ( " thoughts " )
await app . DATABASE . update_thought (
thought_id = thought_task_in_loop . observer_thought_id ,
2025-02-23 16:03:49 -08:00
organization_id = task_v2 . organization_id ,
2025-02-27 20:19:02 -08:00
thought = thought_content ,
2024-12-27 09:04:09 -08:00
output = task_in_loop_metadata_response ,
)
2024-12-19 17:26:08 -08:00
if data_extraction_goal and navigation_goal :
navigation_goal = (
navigation_goal
+ " Optimize for extracting as much data as possible. Complete when most data is seen even if some data is partially missing. "
)
block_yaml = TaskBlockYAML (
label = task_in_loop_label ,
url = url ,
title = task_in_loop_label ,
navigation_goal = navigation_goal ,
data_extraction_goal = data_extraction_goal ,
data_schema = data_extraction_schema ,
parameter_keys = [ param . key for param in task_parameters ] ,
continue_on_failure = True ,
)
block_yaml_output_parameter = await app . WORKFLOW_SERVICE . create_output_parameter_for_block (
workflow_id = workflow_id ,
block_yaml = block_yaml ,
)
task_in_loop_block = TaskBlock (
label = task_in_loop_label ,
url = url ,
title = task_in_loop_label ,
navigation_goal = navigation_goal ,
data_extraction_goal = data_extraction_goal ,
data_schema = data_extraction_schema ,
output_parameter = block_yaml_output_parameter ,
parameters = task_parameters ,
continue_on_failure = True ,
)
# use the output parameter of the extraction block to create the for loop block
for_loop_yaml = ForLoopBlockYAML (
label = f " loop_ { _generate_random_string ( ) } " ,
loop_over_parameter_key = loop_for_context_parameter . key ,
loop_blocks = [ block_yaml ] ,
)
output_parameter = await app . WORKFLOW_SERVICE . create_output_parameter_for_block (
workflow_id = workflow_id ,
block_yaml = for_loop_yaml ,
)
return (
ForLoopBlock (
label = for_loop_yaml . label ,
# TODO: this loop over parameter needs to be a context parameter
loop_over = loop_for_context_parameter ,
loop_blocks = [ task_in_loop_block ] ,
output_parameter = output_parameter ,
) ,
[ extraction_block_yaml , for_loop_yaml ] ,
for_loop_parameter_yaml_list ,
output_value_obj ,
{
" inner_task_label " : task_in_loop_block . label ,
" inner_task_navigation_goal " : navigation_goal ,
" inner_task_data_extraction_goal " : data_extraction_goal ,
} ,
)
async def _generate_extraction_task (
2025-02-27 20:19:02 -08:00
task_v2 : TaskV2 ,
2024-12-19 17:26:08 -08:00
workflow_id : str ,
2024-12-27 09:04:09 -08:00
workflow_permanent_id : str ,
workflow_run_id : str ,
2024-12-19 17:26:08 -08:00
current_url : str ,
element_tree_in_prompt : str ,
data_extraction_goal : str ,
task_history : list [ dict ] | None = None ,
) - > tuple [ ExtractionBlock , list [ BLOCK_YAML_TYPES ] , list [ PARAMETER_YAML_TYPES ] ] :
LOG . info ( " Generating extraction task " , data_extraction_goal = data_extraction_goal , current_url = current_url )
# extract the data
context = skyvern_context . ensure_context ( )
generate_extraction_task_prompt = prompt_engine . load_prompt (
2025-02-23 01:26:54 -08:00
" task_v2_generate_extraction_task " ,
2024-12-19 17:26:08 -08:00
current_url = current_url ,
elements = element_tree_in_prompt ,
data_extraction_goal = data_extraction_goal ,
local_datetime = datetime . now ( context . tz_info ) . isoformat ( ) ,
)
generate_extraction_task_response = await app . LLM_API_HANDLER (
generate_extraction_task_prompt ,
2025-02-23 16:03:49 -08:00
task_v2 = task_v2 ,
2025-02-23 01:26:54 -08:00
prompt_name = " task_v2_generate_extraction_task " ,
2024-12-19 17:26:08 -08:00
)
LOG . info ( " Data extraction response " , data_extraction_response = generate_extraction_task_response )
# create OutputParameter for the data_extraction block
data_schema : dict [ str , Any ] | list | None = generate_extraction_task_response . get ( " schema " )
label = f " data_extraction_ { _generate_random_string ( ) } "
url : str | None = None
if not task_history :
# data extraction is the very first block
url = current_url
extraction_block_yaml = ExtractionBlockYAML (
label = label ,
data_extraction_goal = data_extraction_goal ,
data_schema = data_schema ,
url = url ,
)
output_parameter = await app . WORKFLOW_SERVICE . create_output_parameter_for_block (
workflow_id = workflow_id ,
block_yaml = extraction_block_yaml ,
)
# create ExtractionBlock
return (
ExtractionBlock (
label = label ,
url = url ,
data_extraction_goal = data_extraction_goal ,
data_schema = data_schema ,
output_parameter = output_parameter ,
) ,
[ extraction_block_yaml ] ,
[ ] ,
)
async def _generate_navigation_task (
workflow_id : str ,
2024-12-27 09:04:09 -08:00
workflow_permanent_id : str ,
workflow_run_id : str ,
2024-12-19 17:26:08 -08:00
navigation_goal : str ,
original_url : str | None = None ,
2025-01-14 09:06:44 -08:00
totp_verification_url : str | None = None ,
totp_identifier : str | None = None ,
2024-12-19 17:26:08 -08:00
) - > tuple [ NavigationBlock , list [ BLOCK_YAML_TYPES ] , list [ PARAMETER_YAML_TYPES ] ] :
LOG . info ( " Generating navigation task " , navigation_goal = navigation_goal , original_url = original_url )
label = f " navigation_ { _generate_random_string ( ) } "
navigation_block_yaml = NavigationBlockYAML (
label = label ,
url = original_url ,
navigation_goal = navigation_goal ,
2025-01-14 09:06:44 -08:00
totp_verification_url = totp_verification_url ,
totp_identifier = totp_identifier ,
2025-03-19 18:16:55 -07:00
complete_verification = False ,
2024-12-19 17:26:08 -08:00
)
output_parameter = await app . WORKFLOW_SERVICE . create_output_parameter_for_block (
workflow_id = workflow_id ,
block_yaml = navigation_block_yaml ,
)
return (
NavigationBlock (
label = label ,
url = original_url ,
navigation_goal = navigation_goal ,
2025-01-14 09:06:44 -08:00
totp_verification_url = totp_verification_url ,
totp_identifier = totp_identifier ,
2024-12-19 17:26:08 -08:00
output_parameter = output_parameter ,
2025-03-19 18:16:55 -07:00
complete_verification = False ,
2024-12-19 17:26:08 -08:00
) ,
[ navigation_block_yaml ] ,
[ ] ,
)
2025-02-06 00:50:32 +08:00
async def _generate_goto_url_task (
workflow_id : str ,
url : str ,
) - > tuple [ UrlBlock , list [ BLOCK_YAML_TYPES ] , list [ PARAMETER_YAML_TYPES ] ] :
LOG . info ( " Generating goto url task " , url = url )
# create OutputParameter for the data_extraction block
label = f " goto_url_ { _generate_random_string ( ) } "
url_block_yaml = UrlBlockYAML (
label = label ,
url = url ,
)
output_parameter = await app . WORKFLOW_SERVICE . create_output_parameter_for_block (
workflow_id = workflow_id ,
block_yaml = url_block_yaml ,
)
# create UrlBlock
return (
UrlBlock (
label = label ,
url = url ,
output_parameter = output_parameter ,
) ,
[ url_block_yaml ] ,
[ ] ,
)
2024-12-19 17:26:08 -08:00
def _generate_random_string ( length : int = 5 ) - > str :
# Use the current timestamp as the seed
random . seed ( os . urandom ( 16 ) )
return " " . join ( random . choices ( RANDOM_STRING_POOL , k = length ) )
2024-12-22 20:54:53 -08:00
2025-02-27 20:19:02 -08:00
async def get_thought_timelines (
2025-02-23 16:03:49 -08:00
task_v2_id : str ,
2024-12-22 20:54:53 -08:00
organization_id : str | None = None ,
) - > list [ WorkflowRunTimeline ] :
2025-02-27 20:19:02 -08:00
thoughts = await app . DATABASE . get_thoughts (
2025-02-23 16:03:49 -08:00
task_v2_id ,
2024-12-28 09:53:44 -08:00
organization_id = organization_id ,
2025-02-27 20:19:02 -08:00
thought_types = [
ThoughtType . plan ,
ThoughtType . user_goal_check ,
2024-12-28 09:53:44 -08:00
] ,
)
2024-12-22 20:54:53 -08:00
return [
WorkflowRunTimeline (
type = WorkflowRunTimelineType . thought ,
thought = thought ,
created_at = thought . created_at ,
modified_at = thought . modified_at ,
)
2025-02-27 20:19:02 -08:00
for thought in thoughts
2024-12-22 20:54:53 -08:00
]
2024-12-27 19:23:07 -08:00
2025-02-27 20:19:02 -08:00
async def get_task_v2 ( task_v2_id : str , organization_id : str | None = None ) - > TaskV2 | None :
2025-02-23 16:03:49 -08:00
return await app . DATABASE . get_task_v2 ( task_v2_id , organization_id = organization_id )
2024-12-31 15:03:36 -08:00
2025-02-23 22:17:28 -08:00
async def mark_task_v2_as_failed (
2025-02-23 16:03:49 -08:00
task_v2_id : str ,
2024-12-31 15:03:36 -08:00
workflow_run_id : str | None = None ,
failure_reason : str | None = None ,
organization_id : str | None = None ,
2025-02-27 20:19:02 -08:00
) - > TaskV2 :
2025-02-23 22:17:28 -08:00
task_v2 = await app . DATABASE . update_task_v2 (
2025-02-23 16:03:49 -08:00
task_v2_id ,
2025-01-27 12:53:23 +08:00
organization_id = organization_id ,
2025-02-27 20:19:02 -08:00
status = TaskV2Status . failed ,
2024-12-31 15:03:36 -08:00
)
if workflow_run_id :
await app . WORKFLOW_SERVICE . mark_workflow_run_as_failed (
2025-01-17 16:53:03 -08:00
workflow_run_id , failure_reason = failure_reason or " Skyvern task 2.0 failed "
2024-12-31 15:03:36 -08:00
)
2025-02-23 22:17:28 -08:00
await send_task_v2_webhook ( task_v2 )
return task_v2
2025-01-05 09:06:20 -08:00
2025-02-23 22:17:28 -08:00
async def mark_task_v2_as_completed (
2025-02-23 16:03:49 -08:00
task_v2_id : str ,
2025-01-06 16:42:42 -08:00
workflow_run_id : str | None = None ,
organization_id : str | None = None ,
2025-01-10 14:59:53 -08:00
summary : str | None = None ,
output : dict [ str , Any ] | None = None ,
2025-02-27 20:19:02 -08:00
) - > TaskV2 :
2025-02-23 22:17:28 -08:00
task_v2 = await app . DATABASE . update_task_v2 (
2025-02-23 16:03:49 -08:00
task_v2_id ,
2025-01-06 16:42:42 -08:00
organization_id = organization_id ,
2025-02-27 20:19:02 -08:00
status = TaskV2Status . completed ,
2025-01-10 14:59:53 -08:00
summary = summary ,
output = output ,
2025-01-06 16:42:42 -08:00
)
2025-02-06 00:50:32 +08:00
if workflow_run_id :
await app . WORKFLOW_SERVICE . mark_workflow_run_as_completed ( workflow_run_id )
2025-02-02 03:10:38 +08:00
2025-02-27 20:19:02 -08:00
# Track task v2 duration when completed
2025-02-23 22:17:28 -08:00
duration_seconds = ( datetime . now ( UTC ) - task_v2 . created_at . replace ( tzinfo = UTC ) ) . total_seconds ( )
2025-02-02 03:10:38 +08:00
LOG . info (
2025-02-27 20:19:02 -08:00
" Task v2 duration metrics " ,
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-02-02 03:10:38 +08:00
workflow_run_id = workflow_run_id ,
duration_seconds = duration_seconds ,
2025-02-27 20:19:02 -08:00
task_v2_status = TaskV2Status . completed ,
2025-02-02 03:10:38 +08:00
organization_id = organization_id ,
)
2025-01-06 16:42:42 -08:00
2025-02-23 22:17:28 -08:00
await send_task_v2_webhook ( task_v2 )
return task_v2
2025-01-14 08:59:53 -08:00
2025-01-06 16:42:42 -08:00
2025-02-23 22:17:28 -08:00
async def mark_task_v2_as_canceled (
2025-02-23 16:03:49 -08:00
task_v2_id : str ,
2025-02-18 23:21:17 +08:00
workflow_run_id : str | None = None ,
organization_id : str | None = None ,
2025-02-27 20:19:02 -08:00
) - > TaskV2 :
2025-02-23 22:17:28 -08:00
task_v2 = await app . DATABASE . update_task_v2 (
2025-02-23 16:03:49 -08:00
task_v2_id ,
2025-02-18 23:21:17 +08:00
organization_id = organization_id ,
2025-02-27 20:19:02 -08:00
status = TaskV2Status . canceled ,
2025-02-18 23:21:17 +08:00
)
if workflow_run_id :
await app . WORKFLOW_SERVICE . mark_workflow_run_as_canceled ( workflow_run_id )
2025-02-23 22:17:28 -08:00
await send_task_v2_webhook ( task_v2 )
return task_v2
2025-02-18 23:21:17 +08:00
2025-02-23 22:17:28 -08:00
async def mark_task_v2_as_terminated (
2025-02-23 16:03:49 -08:00
task_v2_id : str ,
2025-02-22 00:44:12 -08:00
workflow_run_id : str | None = None ,
organization_id : str | None = None ,
failure_reason : str | None = None ,
2025-02-27 20:19:02 -08:00
) - > TaskV2 :
2025-02-23 22:17:28 -08:00
task_v2 = await app . DATABASE . update_task_v2 (
2025-02-23 16:03:49 -08:00
task_v2_id ,
2025-02-22 00:44:12 -08:00
organization_id = organization_id ,
2025-02-27 20:19:02 -08:00
status = TaskV2Status . terminated ,
2025-02-22 00:44:12 -08:00
)
if workflow_run_id :
await app . WORKFLOW_SERVICE . mark_workflow_run_as_terminated ( workflow_run_id , failure_reason )
2025-02-23 22:17:28 -08:00
await send_task_v2_webhook ( task_v2 )
return task_v2
2025-02-22 00:44:12 -08:00
2025-01-05 09:06:20 -08:00
def _get_extracted_data_from_block_result (
block_result : BlockResult ,
task_type : str ,
2025-02-23 16:03:49 -08:00
task_v2_id : str | None = None ,
2025-01-05 09:06:20 -08:00
workflow_run_id : str | None = None ,
) - > Any | None :
""" Extract data from block result based on task type.
Args :
block_result : The result from block execution
task_type : Type of task ( " extract " or " loop " )
2025-02-23 16:03:49 -08:00
task_v2_id : Optional ID for logging
2025-01-05 09:06:20 -08:00
workflow_run_id : Optional ID for logging
Returns :
Extracted data if available , None otherwise
"""
if task_type == " extract " :
if (
isinstance ( block_result . output_parameter_value , dict )
and " extracted_information " in block_result . output_parameter_value
and block_result . output_parameter_value [ " extracted_information " ]
) :
return block_result . output_parameter_value [ " extracted_information " ]
elif task_type == " loop " :
# if loop task has data extraction, add it to the task history
# WARNING: the assumption here is that the output_paremeter_value is a list of list of dicts
# output_parameter_value data structure is not consistent across all the blocks
if block_result . output_parameter_value and isinstance ( block_result . output_parameter_value , list ) :
loop_output_overall = [ ]
for inner_loop_output in block_result . output_parameter_value :
inner_loop_output_overall = [ ]
if not isinstance ( inner_loop_output , list ) :
LOG . warning (
" Inner loop output is not a list " ,
inner_loop_output = inner_loop_output ,
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-01-05 09:06:20 -08:00
workflow_run_id = workflow_run_id ,
workflow_run_block_id = block_result . workflow_run_block_id ,
)
continue
for inner_output in inner_loop_output :
if not isinstance ( inner_output , dict ) :
LOG . warning (
" inner output is not a dict " ,
inner_output = inner_output ,
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-01-05 09:06:20 -08:00
workflow_run_id = workflow_run_id ,
workflow_run_block_id = block_result . workflow_run_block_id ,
)
continue
output_value = inner_output . get ( " output_value " , { } )
2025-01-08 23:53:21 -08:00
if not isinstance ( output_value , dict ) :
LOG . warning (
" output_value is not a dict " ,
output_value = output_value ,
2025-02-23 16:03:49 -08:00
task_v2_id = task_v2_id ,
2025-01-08 23:53:21 -08:00
workflow_run_id = workflow_run_id ,
workflow_run_block_id = block_result . workflow_run_block_id ,
)
continue
else :
if " extracted_information " in output_value and output_value [ " extracted_information " ] :
inner_loop_output_overall . append ( output_value [ " extracted_information " ] )
2025-01-05 09:06:20 -08:00
loop_output_overall . append ( inner_loop_output_overall )
return loop_output_overall if loop_output_overall else None
return None
2025-01-10 14:59:53 -08:00
2025-02-23 22:17:28 -08:00
async def _summarize_task_v2 (
2025-02-27 20:19:02 -08:00
task_v2 : TaskV2 ,
2025-01-10 14:59:53 -08:00
task_history : list [ dict ] ,
context : SkyvernContext ,
screenshots : list [ bytes ] | None = None ,
2025-02-27 20:19:02 -08:00
) - > TaskV2 :
thought = await app . DATABASE . create_thought (
2025-02-23 22:17:28 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
organization_id = task_v2 . organization_id ,
workflow_run_id = task_v2 . workflow_run_id ,
workflow_id = task_v2 . workflow_id ,
workflow_permanent_id = task_v2 . workflow_permanent_id ,
2025-02-27 20:19:02 -08:00
thought_type = ThoughtType . user_goal_check ,
thought_scenario = ThoughtScenario . summarization ,
2025-01-10 14:59:53 -08:00
)
2025-02-27 20:19:02 -08:00
# summarize the task v2 and format the output
2025-02-23 01:26:54 -08:00
task_v2_summary_prompt = prompt_engine . load_prompt (
" task_v2_summary " ,
2025-02-23 22:17:28 -08:00
user_goal = task_v2 . prompt ,
2025-01-10 14:59:53 -08:00
task_history = task_history ,
2025-03-18 11:53:54 -07:00
extracted_information_schema = task_v2 . extracted_information_schema ,
2025-01-10 14:59:53 -08:00
local_datetime = datetime . now ( context . tz_info ) . isoformat ( ) ,
)
2025-02-23 01:26:54 -08:00
task_v2_summary_resp = await app . LLM_API_HANDLER (
prompt = task_v2_summary_prompt ,
2025-01-10 14:59:53 -08:00
screenshots = screenshots ,
2025-02-27 20:19:02 -08:00
thought = thought ,
2025-02-23 01:26:54 -08:00
prompt_name = " task_v2_summary " ,
2025-01-10 14:59:53 -08:00
)
2025-02-23 01:26:54 -08:00
LOG . info ( " Task v2 summary response " , task_v2_summary_resp = task_v2_summary_resp )
2025-01-10 14:59:53 -08:00
2025-02-27 20:19:02 -08:00
summary_description = task_v2_summary_resp . get ( " description " )
2025-02-23 01:26:54 -08:00
summarized_output = task_v2_summary_resp . get ( " output " )
2025-02-27 20:19:02 -08:00
await app . DATABASE . update_thought (
thought_id = thought . observer_thought_id ,
2025-02-23 22:17:28 -08:00
organization_id = task_v2 . organization_id ,
2025-02-27 20:19:02 -08:00
thought = summary_description ,
2025-02-23 01:26:54 -08:00
output = task_v2_summary_resp ,
2025-01-10 14:59:53 -08:00
)
2025-02-23 22:17:28 -08:00
return await mark_task_v2_as_completed (
task_v2_id = task_v2 . observer_cruise_id ,
workflow_run_id = task_v2 . workflow_run_id ,
organization_id = task_v2 . organization_id ,
2025-02-27 20:19:02 -08:00
summary = summary_description ,
2025-01-10 14:59:53 -08:00
output = summarized_output ,
)
2025-01-14 08:59:53 -08:00
2025-02-27 20:19:02 -08:00
async def send_task_v2_webhook ( task_v2 : TaskV2 ) - > None :
2025-02-23 22:17:28 -08:00
if not task_v2 . webhook_callback_url :
2025-01-14 08:59:53 -08:00
return
2025-02-23 22:17:28 -08:00
organization_id = task_v2 . organization_id
2025-01-14 08:59:53 -08:00
if not organization_id :
return
api_key = await app . DATABASE . get_valid_org_auth_token (
organization_id ,
OrganizationAuthTokenType . api ,
)
if not api_key :
LOG . warning (
2025-02-27 20:19:02 -08:00
" No valid API key found for the organization of task v2 " ,
2025-02-23 22:17:28 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
2025-01-14 08:59:53 -08:00
)
return
2025-02-27 20:19:02 -08:00
# build the task v2 response
2025-02-23 22:17:28 -08:00
payload = task_v2 . model_dump_json ( by_alias = True )
2025-01-14 08:59:53 -08:00
headers = generate_skyvern_webhook_headers ( payload = payload , api_key = api_key . token )
LOG . info (
2025-02-27 20:19:02 -08:00
" Sending task v2 response to webhook callback url " ,
2025-02-23 22:17:28 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
webhook_callback_url = task_v2 . webhook_callback_url ,
2025-01-14 08:59:53 -08:00
payload = payload ,
headers = headers ,
)
try :
resp = await httpx . AsyncClient ( ) . post (
2025-02-23 22:17:28 -08:00
task_v2 . webhook_callback_url , data = payload , headers = headers , timeout = httpx . Timeout ( 30.0 )
2025-01-14 08:59:53 -08:00
)
if resp . status_code == 200 :
LOG . info (
2025-02-27 20:19:02 -08:00
" Task v2 webhook sent successfully " ,
2025-02-23 22:17:28 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
2025-01-14 08:59:53 -08:00
resp_code = resp . status_code ,
resp_text = resp . text ,
)
else :
LOG . info (
2025-02-27 20:19:02 -08:00
" Task v2 webhook failed " ,
2025-02-23 22:17:28 -08:00
task_v2_id = task_v2 . observer_cruise_id ,
2025-01-14 08:59:53 -08:00
resp = resp ,
resp_code = resp . status_code ,
resp_text = resp . text ,
)
except Exception as e :
2025-02-23 22:17:28 -08:00
raise FailedToSendWebhook ( task_v2_id = task_v2 . observer_cruise_id ) from e