Use persistent browser session in runnables (#1510)

Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com>
Co-authored-by: Shuchang Zheng <shu@skyvern.com>
This commit is contained in:
Maksim Ivanov
2025-01-09 22:04:53 +01:00
committed by GitHub
parent 5ed7e5ad8e
commit a4744ed9f5
12 changed files with 506 additions and 59 deletions

View File

@@ -239,6 +239,7 @@ class ForgeAgent:
api_key: str | None = None,
close_browser_on_completion: bool = True,
task_block: BaseTaskBlock | None = None,
browser_session_id: str | None = None,
) -> Tuple[Step, DetailedAgentStepOutput | None, Step | None]:
workflow_run: WorkflowRun | None = None
if task.workflow_run_id:
@@ -284,6 +285,8 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=True,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
)
return step, None, None
@@ -316,7 +319,7 @@ class ForgeAgent:
step,
browser_state,
detailed_output,
) = await self._initialize_execution_state(task, step, workflow_run)
) = await self._initialize_execution_state(task, step, workflow_run, browser_session_id)
if page := await browser_state.get_working_page():
await self.register_async_operations(organization, task, page)
@@ -366,6 +369,7 @@ class ForgeAgent:
last_step=last_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
return last_step, detailed_output, None
@@ -382,6 +386,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
return step, detailed_output, None
elif step.status == StepStatus.completed:
@@ -404,6 +409,7 @@ class ForgeAgent:
last_step=last_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
return last_step, detailed_output, None
elif maybe_next_step:
@@ -433,6 +439,7 @@ class ForgeAgent:
next_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
task_block=task_block,
)
elif settings.execute_all_steps() and next_step:
@@ -442,6 +449,7 @@ class ForgeAgent:
next_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
task_block=task_block,
)
else:
@@ -477,6 +485,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
else:
LOG.warning(
@@ -512,6 +521,7 @@ class ForgeAgent:
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
need_final_screenshot=False,
browser_session_id=browser_session_id,
)
else:
LOG.warning(
@@ -530,6 +540,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=False,
browser_session_id=browser_session_id,
)
return step, detailed_output, None
except InvalidTaskStatusTransition:
@@ -544,6 +555,8 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=False,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
)
return step, detailed_output, None
except (UnsupportedActionType, UnsupportedTaskType, FailedToParseActionInstruction) as e:
@@ -560,6 +573,8 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=False,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
)
return step, detailed_output, None
@@ -581,6 +596,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
else:
LOG.warning(
@@ -1110,14 +1126,23 @@ class ForgeAgent:
)
async def _initialize_execution_state(
self, task: Task, step: Step, workflow_run: WorkflowRun | None = None
self,
task: Task,
step: Step,
workflow_run: WorkflowRun | None = None,
browser_session_id: str | None = None,
) -> tuple[Step, BrowserState, DetailedAgentStepOutput]:
if workflow_run:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, url=task.url
workflow_run=workflow_run,
url=task.url,
browser_session_id=browser_session_id,
)
else:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_task(task)
browser_state = await app.BROWSER_MANAGER.get_or_create_for_task(
task=task,
browser_session_id=browser_session_id,
)
# Initialize video artifact for the task here, afterwards it'll only get updated
if browser_state and browser_state.browser_artifacts:
video_artifacts = await app.BROWSER_MANAGER.get_video_artifacts(
@@ -1465,6 +1490,7 @@ class ForgeAgent:
need_call_webhook: bool = True,
close_browser_on_completion: bool = True,
need_final_screenshot: bool = True,
browser_session_id: str | None = None,
) -> None:
"""
send the task response to the webhook callback url
@@ -1544,7 +1570,9 @@ class ForgeAgent:
)
await self.async_operation_pool.remove_task(task.task_id)
await self.cleanup_browser_and_create_artifacts(close_browser_on_completion, last_step, task)
await self.cleanup_browser_and_create_artifacts(
close_browser_on_completion, last_step, task, browser_session_id=browser_session_id
)
# Wait for all tasks to complete before generating the links for the artifacts
await app.ARTIFACT_MANAGER.wait_for_upload_aiotasks([task.task_id])
@@ -1713,7 +1741,11 @@ class ForgeAgent:
)
async def cleanup_browser_and_create_artifacts(
self, close_browser_on_completion: bool, last_step: Step, task: Task
self,
close_browser_on_completion: bool,
last_step: Step,
task: Task,
browser_session_id: str | None = None,
) -> None:
"""
Developer notes: we should not expect any exception to be raised here.
@@ -1721,7 +1753,12 @@ class ForgeAgent:
If errors are raised and not caught inside this function, please catch and handle them.
"""
# We need to close the browser even if there is no webhook callback url or api key
browser_state = await app.BROWSER_MANAGER.cleanup_for_task(task.task_id, close_browser_on_completion)
browser_state = await app.BROWSER_MANAGER.cleanup_for_task(
task.task_id,
close_browser_on_completion,
browser_session_id,
task.organization_id,
)
if browser_state:
# Update recording artifact after closing the browser, so we can get an accurate recording
video_artifacts = await app.BROWSER_MANAGER.get_video_artifacts(

View File

@@ -25,6 +25,7 @@ class AsyncExecutor(abc.ABC):
organization_id: str,
max_steps_override: int | None,
api_key: str | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
pass
@@ -39,6 +40,7 @@ class AsyncExecutor(abc.ABC):
workflow_run_id: str,
max_steps_override: int | None,
api_key: str | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
pass
@@ -51,6 +53,7 @@ class AsyncExecutor(abc.ABC):
organization_id: str,
observer_cruise_id: str,
max_iterations_override: int | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
pass
@@ -65,10 +68,13 @@ class BackgroundTaskExecutor(AsyncExecutor):
organization_id: str,
max_steps_override: int | None,
api_key: str | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
LOG.info("Executing task using background task executor", task_id=task_id)
close_browser_on_completion = browser_session_id is None
organization = await app.DATABASE.get_organization(organization_id)
if organization is None:
raise OrganizationNotFound(organization_id)
@@ -98,6 +104,8 @@ class BackgroundTaskExecutor(AsyncExecutor):
task,
step,
api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
async def execute_workflow(
@@ -109,6 +117,7 @@ class BackgroundTaskExecutor(AsyncExecutor):
workflow_run_id: str,
max_steps_override: int | None,
api_key: str | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
LOG.info(
@@ -126,6 +135,7 @@ class BackgroundTaskExecutor(AsyncExecutor):
workflow_run_id=workflow_run_id,
api_key=api_key,
organization=organization,
browser_session_id=browser_session_id,
)
async def execute_cruise(
@@ -135,6 +145,7 @@ class BackgroundTaskExecutor(AsyncExecutor):
organization_id: str,
observer_cruise_id: str,
max_iterations_override: int | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
LOG.info(
@@ -169,4 +180,5 @@ class BackgroundTaskExecutor(AsyncExecutor):
organization=organization,
observer_cruise_id=observer_cruise_id,
max_iterations_override=max_iterations_override,
browser_session_id=browser_session_id,
)

View File

@@ -156,6 +156,7 @@ async def create_agent_task(
task_id=created_task.task_id,
organization_id=current_org.organization_id,
max_steps_override=x_max_steps_override,
browser_session_id=task.browser_session_id,
api_key=x_api_key,
)
return CreateTaskResponse(task_id=created_task.task_id)
@@ -653,6 +654,7 @@ async def execute_workflow(
workflow_id=workflow_run.workflow_id,
workflow_run_id=workflow_run.workflow_run_id,
max_steps_override=x_max_steps_override,
browser_session_id=workflow_request.browser_session_id,
api_key=x_api_key,
)
return RunWorkflowResponse(
@@ -1143,6 +1145,7 @@ async def observer_cruise(
organization_id=organization.organization_id,
observer_cruise_id=observer_cruise.observer_cruise_id,
max_iterations_override=x_max_iterations_override,
browser_session_id=data.browser_session_id,
)
return observer_cruise

View File

@@ -89,3 +89,4 @@ class ObserverMetadata(BaseModel):
class CruiseRequest(BaseModel):
user_prompt: str
url: HttpUrl | None = None
browser_session_id: str | None = None

View File

@@ -155,6 +155,7 @@ class TaskRequest(TaskBase):
examples=["https://my-webhook.com"],
)
totp_verification_url: str | None = None
browser_session_id: str | None = None
@field_validator("url", "webhook_callback_url", "totp_verification_url")
@classmethod

View File

@@ -173,6 +173,7 @@ async def run_observer_cruise(
observer_cruise_id: str,
request_id: str | None = None,
max_iterations_override: str | int | None = None,
browser_session_id: str | None = None,
) -> None:
organization_id = organization.organization_id
try:
@@ -197,6 +198,7 @@ async def run_observer_cruise(
observer_cruise=observer_cruise,
request_id=request_id,
max_iterations_override=max_iterations_override,
browser_session_id=browser_session_id,
)
except OperationalError:
LOG.error("Database error when running observer cruise", exc_info=True)
@@ -219,7 +221,12 @@ async def run_observer_cruise(
return
finally:
if workflow and workflow_run:
await app.WORKFLOW_SERVICE.clean_up_workflow(workflow=workflow, workflow_run=workflow_run)
await app.WORKFLOW_SERVICE.clean_up_workflow(
workflow=workflow,
workflow_run=workflow_run,
browser_session_id=browser_session_id,
close_browser_on_completion=browser_session_id is None,
)
else:
LOG.warning("Workflow or workflow run not found")
@@ -231,6 +238,7 @@ async def run_observer_cruise_helper(
observer_cruise: ObserverCruise,
request_id: str | None = None,
max_iterations_override: str | int | None = None,
browser_session_id: str | None = None,
) -> tuple[Workflow, WorkflowRun] | tuple[None, None]:
organization_id = organization.organization_id
observer_cruise_id = observer_cruise.observer_cruise_id
@@ -318,6 +326,7 @@ async def run_observer_cruise_helper(
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run,
url=url,
browser_session_id=browser_session_id,
)
scraped_page = await scrape_website(
browser_state,
@@ -494,7 +503,13 @@ async def run_observer_cruise_helper(
LOG.info("Workflow created", workflow_id=workflow.workflow_id)
# execute the extraction task
workflow_run = await handle_block_result(block, block_result, workflow, workflow_run)
workflow_run = await handle_block_result(
block,
block_result,
workflow,
workflow_run,
browser_session_id=browser_session_id,
)
if workflow_run.status != WorkflowRunStatus.running:
LOG.info(
"Workflow run is not running anymore, stopping the observer",
@@ -575,6 +590,7 @@ async def handle_block_result(
workflow: Workflow,
workflow_run: WorkflowRun,
is_last_block: bool = True,
browser_session_id: str | None = None,
) -> WorkflowRun:
workflow_run_id = workflow_run.workflow_run_id
if block_result.status == BlockStatus.canceled:
@@ -593,6 +609,8 @@ async def handle_block_result(
workflow=workflow,
workflow_run=workflow_run,
need_call_webhook=False,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
elif block_result.status == BlockStatus.failed:
LOG.error(

View File

@@ -187,7 +187,12 @@ class Block(BaseModel, abc.ABC):
@abc.abstractmethod
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
pass
@@ -196,6 +201,7 @@ class Block(BaseModel, abc.ABC):
workflow_run_id: str,
parent_workflow_run_block_id: str | None = None,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_block_id = None
@@ -267,7 +273,13 @@ class Block(BaseModel, abc.ABC):
LOG.info(
"Executing block", workflow_run_id=workflow_run_id, block_label=self.label, block_type=self.block_type
)
return await self.execute(workflow_run_id, workflow_run_block_id, organization_id=organization_id, **kwargs)
return await self.execute(
workflow_run_id,
workflow_run_block_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
**kwargs,
)
except Exception as e:
LOG.exception(
"Block execution failed",
@@ -409,7 +421,12 @@ class BaseTaskBlock(Block):
return order, retry + 1
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
current_retry = 0
@@ -503,7 +520,7 @@ class BaseTaskBlock(Block):
# the first task block will create the browser state and do the navigation
try:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, url=self.url
workflow_run=workflow_run, url=self.url, browser_session_id=browser_session_id
)
# add screenshot artifact for the first task
screenshot = await browser_state.take_screenshot(full_page=True)
@@ -568,6 +585,8 @@ class BaseTaskBlock(Block):
task=task,
step=step,
task_block=self,
browser_session_id=browser_session_id,
close_browser_on_completion=browser_session_id is None,
)
except Exception as e:
# Make sure the task is marked as failed in the database before raising the exception
@@ -918,7 +937,12 @@ class ForLoopBlock(Block):
)
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
try:
@@ -1025,7 +1049,12 @@ class CodeBlock(Block):
self.code = self.format_block_parameter_template_from_workflow_run_context(self.code, workflow_run_context)
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
raise DisabledBlockExecutionError("CodeBlock is disabled")
# get workflow run context
@@ -1145,7 +1174,12 @@ class TextPromptBlock(Block):
return response
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
@@ -1215,7 +1249,12 @@ class DownloadToS3Block(Block):
os.unlink(file_path)
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
@@ -1296,7 +1335,12 @@ class UploadToS3Block(Block):
return f"s3://{s3_bucket}/{s3_key}"
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
@@ -1619,7 +1663,12 @@ class SendEmailBlock(Block):
return msg
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
await app.DATABASE.update_workflow_run_block(
@@ -1716,7 +1765,12 @@ class FileParserBlock(Block):
raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if (
@@ -1784,7 +1838,12 @@ class WaitBlock(Block):
return self.parameters
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# TODO: we need to support to interrupt the sleep when the workflow run failed/cancelled/terminated
await app.DATABASE.update_workflow_run_block(
@@ -1821,7 +1880,12 @@ class ValidationBlock(BaseTaskBlock):
return self.parameters
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
task_order, _ = await self.get_task_order(workflow_run_id, 0)
is_first_task = task_order == 0

View File

@@ -18,6 +18,7 @@ class WorkflowRequestBody(BaseModel):
webhook_callback_url: str | None = None
totp_verification_url: str | None = None
totp_identifier: str | None = None
browser_session_id: str | None = None
@field_validator("webhook_callback_url", "totp_verification_url")
@classmethod

View File

@@ -189,9 +189,16 @@ class WorkflowService:
workflow_run_id: str,
api_key: str,
organization: Organization,
browser_session_id: str | None = None,
) -> WorkflowRun:
"""Execute a workflow."""
organization_id = organization.organization_id
LOG.info(
"Executing workflow",
workflow_run_id=workflow_run_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
workflow_run = await self.get_workflow_run(workflow_run_id=workflow_run_id, organization_id=organization_id)
workflow = await self.get_workflow(workflow_id=workflow_run.workflow_id, organization_id=organization_id)
@@ -236,6 +243,8 @@ class WorkflowService:
workflow_run=workflow_run,
api_key=api_key,
need_call_webhook=True,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
return workflow_run
parameters = block.get_all_parameters(workflow_run_id)
@@ -253,6 +262,7 @@ class WorkflowService:
block_result = await block.execute_safe(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
if block_result.status == BlockStatus.canceled:
LOG.info(
@@ -271,6 +281,8 @@ class WorkflowService:
workflow_run=workflow_run,
api_key=api_key,
need_call_webhook=False,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
return workflow_run
elif block_result.status == BlockStatus.failed:
@@ -292,6 +304,8 @@ class WorkflowService:
workflow=workflow,
workflow_run=workflow_run,
api_key=api_key,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
return workflow_run
@@ -326,6 +340,8 @@ class WorkflowService:
workflow=workflow,
workflow_run=workflow_run,
api_key=api_key,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
return workflow_run
@@ -357,7 +373,13 @@ class WorkflowService:
await self.mark_workflow_run_as_failed(
workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
)
await self.clean_up_workflow(workflow=workflow, workflow_run=workflow_run, api_key=api_key)
await self.clean_up_workflow(
workflow=workflow,
workflow_run=workflow_run,
api_key=api_key,
browser_session_id=browser_session_id,
close_browser_on_completion=browser_session_id is None,
)
return workflow_run
refreshed_workflow_run = await app.DATABASE.get_workflow_run(
@@ -376,7 +398,13 @@ class WorkflowService:
workflow_run_id=workflow_run.workflow_run_id,
workflow_run_status=refreshed_workflow_run.status if refreshed_workflow_run else None,
)
await self.clean_up_workflow(workflow=workflow, workflow_run=workflow_run, api_key=api_key)
await self.clean_up_workflow(
workflow=workflow,
workflow_run=workflow_run,
api_key=api_key,
browser_session_id=browser_session_id,
close_browser_on_completion=browser_session_id is None,
)
return workflow_run
async def create_workflow(
@@ -865,6 +893,7 @@ class WorkflowService:
api_key: str | None = None,
close_browser_on_completion: bool = True,
need_call_webhook: bool = True,
browser_session_id: str | None = None,
) -> None:
analytics.capture("skyvern-oss-agent-workflow-status", {"status": workflow_run.status})
tasks = await self.get_tasks_by_workflow_run_id(workflow_run.workflow_run_id)
@@ -873,6 +902,8 @@ class WorkflowService:
workflow_run.workflow_run_id,
all_workflow_task_ids,
close_browser_on_completion,
browser_session_id,
organization_id=workflow_run.organization_id,
)
if browser_state:
await self.persist_video_data(browser_state, workflow, workflow_run)