From 148693aa255b59da859392e23a0744ffe50a71ff Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Mon, 24 Feb 2025 13:17:28 +0800 Subject: [PATCH] fix evaluation script (#1819) --- evaluation/script/create_webvoyager_evaluation_result.py | 3 +++ evaluation/script/create_webvoyager_task_v2.py | 3 +++ evaluation/script/create_webvoyager_workflow.py | 3 +++ .../{eval_webvoyager_cruise.py => eval_webvoyager_task_v2.py} | 3 +++ 4 files changed, 12 insertions(+) rename evaluation/script/{eval_webvoyager_cruise.py => eval_webvoyager_task_v2.py} (98%) diff --git a/evaluation/script/create_webvoyager_evaluation_result.py b/evaluation/script/create_webvoyager_evaluation_result.py index b5134386..76078ec7 100644 --- a/evaluation/script/create_webvoyager_evaluation_result.py +++ b/evaluation/script/create_webvoyager_evaluation_result.py @@ -3,10 +3,13 @@ import json from typing import Any import typer +from dotenv import load_dotenv from evaluation.core import SkyvernClient from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunStatus +load_dotenv() + csv_headers = [ "id", "status", diff --git a/evaluation/script/create_webvoyager_task_v2.py b/evaluation/script/create_webvoyager_task_v2.py index 53d7345d..955c83e3 100644 --- a/evaluation/script/create_webvoyager_task_v2.py +++ b/evaluation/script/create_webvoyager_task_v2.py @@ -4,6 +4,7 @@ from datetime import datetime from uuid import uuid4 import typer +from dotenv import load_dotenv from evaluation.core import Evaluator, SkyvernClient from evaluation.core.utils import load_webvoyager_case_from_json @@ -11,6 +12,8 @@ from skyvern.forge import app from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.schemas.task_v2 import ObserverTaskRequest +load_dotenv() + async def create_task_v2( base_url: str, diff --git a/evaluation/script/create_webvoyager_workflow.py b/evaluation/script/create_webvoyager_workflow.py index b47f1625..d25130f2 100644 --- a/evaluation/script/create_webvoyager_workflow.py +++ b/evaluation/script/create_webvoyager_workflow.py @@ -5,6 +5,7 @@ from typing import Optional from uuid import uuid4 import typer +from dotenv import load_dotenv from evaluation.core import Evaluator, SkyvernClient from evaluation.core.utils import load_webvoyager_case_from_json @@ -13,6 +14,8 @@ from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.schemas.tasks import ProxyLocation from skyvern.forge.sdk.workflow.models.workflow import WorkflowRequestBody +load_dotenv() + async def create_workflow_run( base_url: str, diff --git a/evaluation/script/eval_webvoyager_cruise.py b/evaluation/script/eval_webvoyager_task_v2.py similarity index 98% rename from evaluation/script/eval_webvoyager_cruise.py rename to evaluation/script/eval_webvoyager_task_v2.py index f5084125..00bbd7fb 100644 --- a/evaluation/script/eval_webvoyager_cruise.py +++ b/evaluation/script/eval_webvoyager_task_v2.py @@ -4,10 +4,13 @@ import json from typing import Any import typer +from dotenv import load_dotenv from evaluation.core import Evaluator, SkyvernClient from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunStatus +load_dotenv() + csv_headers = [ "id", "status",