Use persistent browser session in runnables (#1510)

Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com>
Co-authored-by: Shuchang Zheng <shu@skyvern.com>
This commit is contained in:
Maksim Ivanov
2025-01-09 22:04:53 +01:00
committed by GitHub
parent 5ed7e5ad8e
commit a4744ed9f5
12 changed files with 506 additions and 59 deletions

View File

@@ -239,6 +239,7 @@ class ForgeAgent:
api_key: str | None = None,
close_browser_on_completion: bool = True,
task_block: BaseTaskBlock | None = None,
browser_session_id: str | None = None,
) -> Tuple[Step, DetailedAgentStepOutput | None, Step | None]:
workflow_run: WorkflowRun | None = None
if task.workflow_run_id:
@@ -284,6 +285,8 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=True,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
)
return step, None, None
@@ -316,7 +319,7 @@ class ForgeAgent:
step,
browser_state,
detailed_output,
) = await self._initialize_execution_state(task, step, workflow_run)
) = await self._initialize_execution_state(task, step, workflow_run, browser_session_id)
if page := await browser_state.get_working_page():
await self.register_async_operations(organization, task, page)
@@ -366,6 +369,7 @@ class ForgeAgent:
last_step=last_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
return last_step, detailed_output, None
@@ -382,6 +386,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
return step, detailed_output, None
elif step.status == StepStatus.completed:
@@ -404,6 +409,7 @@ class ForgeAgent:
last_step=last_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
return last_step, detailed_output, None
elif maybe_next_step:
@@ -433,6 +439,7 @@ class ForgeAgent:
next_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
task_block=task_block,
)
elif settings.execute_all_steps() and next_step:
@@ -442,6 +449,7 @@ class ForgeAgent:
next_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
task_block=task_block,
)
else:
@@ -477,6 +485,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
else:
LOG.warning(
@@ -512,6 +521,7 @@ class ForgeAgent:
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
need_final_screenshot=False,
browser_session_id=browser_session_id,
)
else:
LOG.warning(
@@ -530,6 +540,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=False,
browser_session_id=browser_session_id,
)
return step, detailed_output, None
except InvalidTaskStatusTransition:
@@ -544,6 +555,8 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=False,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
)
return step, detailed_output, None
except (UnsupportedActionType, UnsupportedTaskType, FailedToParseActionInstruction) as e:
@@ -560,6 +573,8 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=False,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
)
return step, detailed_output, None
@@ -581,6 +596,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
else:
LOG.warning(
@@ -1110,14 +1126,23 @@ class ForgeAgent:
)
async def _initialize_execution_state(
self, task: Task, step: Step, workflow_run: WorkflowRun | None = None
self,
task: Task,
step: Step,
workflow_run: WorkflowRun | None = None,
browser_session_id: str | None = None,
) -> tuple[Step, BrowserState, DetailedAgentStepOutput]:
if workflow_run:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, url=task.url
workflow_run=workflow_run,
url=task.url,
browser_session_id=browser_session_id,
)
else:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_task(task)
browser_state = await app.BROWSER_MANAGER.get_or_create_for_task(
task=task,
browser_session_id=browser_session_id,
)
# Initialize video artifact for the task here, afterwards it'll only get updated
if browser_state and browser_state.browser_artifacts:
video_artifacts = await app.BROWSER_MANAGER.get_video_artifacts(
@@ -1465,6 +1490,7 @@ class ForgeAgent:
need_call_webhook: bool = True,
close_browser_on_completion: bool = True,
need_final_screenshot: bool = True,
browser_session_id: str | None = None,
) -> None:
"""
send the task response to the webhook callback url
@@ -1544,7 +1570,9 @@ class ForgeAgent:
)
await self.async_operation_pool.remove_task(task.task_id)
await self.cleanup_browser_and_create_artifacts(close_browser_on_completion, last_step, task)
await self.cleanup_browser_and_create_artifacts(
close_browser_on_completion, last_step, task, browser_session_id=browser_session_id
)
# Wait for all tasks to complete before generating the links for the artifacts
await app.ARTIFACT_MANAGER.wait_for_upload_aiotasks([task.task_id])
@@ -1713,7 +1741,11 @@ class ForgeAgent:
)
async def cleanup_browser_and_create_artifacts(
self, close_browser_on_completion: bool, last_step: Step, task: Task
self,
close_browser_on_completion: bool,
last_step: Step,
task: Task,
browser_session_id: str | None = None,
) -> None:
"""
Developer notes: we should not expect any exception to be raised here.
@@ -1721,7 +1753,12 @@ class ForgeAgent:
If errors are raised and not caught inside this function, please catch and handle them.
"""
# We need to close the browser even if there is no webhook callback url or api key
browser_state = await app.BROWSER_MANAGER.cleanup_for_task(task.task_id, close_browser_on_completion)
browser_state = await app.BROWSER_MANAGER.cleanup_for_task(
task.task_id,
close_browser_on_completion,
browser_session_id,
task.organization_id,
)
if browser_state:
# Update recording artifact after closing the browser, so we can get an accurate recording
video_artifacts = await app.BROWSER_MANAGER.get_video_artifacts(