From df5f40bdb954ecca23b795ed6fe5762c660cda97 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 19 Jun 2025 00:42:34 -0700 Subject: [PATCH] user defined browser header (#2752) Co-authored-by: lawyzheng --- ..._19_0740-a9fd7a08469c_add_extra_headers.py | 37 ++++++++++++++++++ skyvern/forge/agent.py | 2 + skyvern/forge/sdk/db/client.py | 8 ++++ skyvern/forge/sdk/db/models.py | 4 ++ skyvern/forge/sdk/db/utils.py | 3 ++ skyvern/forge/sdk/routes/agent_protocol.py | 6 ++- skyvern/forge/sdk/schemas/task_v2.py | 2 + skyvern/forge/sdk/schemas/tasks.py | 3 ++ skyvern/forge/sdk/workflow/models/workflow.py | 3 ++ skyvern/forge/sdk/workflow/models/yaml.py | 1 + skyvern/forge/sdk/workflow/service.py | 9 +++++ skyvern/schemas/runs.py | 9 ++++- skyvern/services/task_v2_service.py | 5 +++ skyvern/webeye/browser_factory.py | 39 +++++++++++++++---- skyvern/webeye/browser_manager.py | 11 +++++- 15 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 alembic/versions/2025_06_19_0740-a9fd7a08469c_add_extra_headers.py diff --git a/alembic/versions/2025_06_19_0740-a9fd7a08469c_add_extra_headers.py b/alembic/versions/2025_06_19_0740-a9fd7a08469c_add_extra_headers.py new file mode 100644 index 00000000..83aa01b2 --- /dev/null +++ b/alembic/versions/2025_06_19_0740-a9fd7a08469c_add_extra_headers.py @@ -0,0 +1,37 @@ +"""add_extra_headers + +Revision ID: a9fd7a08469c +Revises: 2be3e0ba85ff +Create Date: 2025-06-19 07:40:10.580993+00:00 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "a9fd7a08469c" +down_revision: Union[str, None] = "2be3e0ba85ff" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("observer_cruises", sa.Column("extra_http_headers", sa.JSON(), nullable=True)) + op.add_column("tasks", sa.Column("extra_http_headers", sa.JSON(), nullable=True)) + op.add_column("workflow_runs", sa.Column("extra_http_headers", sa.JSON(), nullable=True)) + op.add_column("workflows", sa.Column("extra_http_headers", sa.JSON(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("workflows", "extra_http_headers") + op.drop_column("workflow_runs", "extra_http_headers") + op.drop_column("tasks", "extra_http_headers") + op.drop_column("observer_cruises", "extra_http_headers") + # ### end Alembic commands ### diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 0344cd05..5d230234 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -184,6 +184,7 @@ class ForgeAgent: include_action_history_in_verification=task_block.include_action_history_in_verification, model=task_block.model, max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolling_times, + extra_http_headers=workflow_run.extra_http_headers, ) LOG.info( "Created a new task for workflow run", @@ -241,6 +242,7 @@ class ForgeAgent: include_action_history_in_verification=task_request.include_action_history_in_verification, model=task_request.model, max_screenshot_scrolling_times=task_request.max_screenshot_scrolling_times, + extra_http_headers=task_request.extra_http_headers, ) LOG.info( "Created new task", diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index ccf17839..53e2a8b2 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -150,6 +150,7 @@ class AgentDB: include_action_history_in_verification: bool | None = None, model: dict[str, Any] | None = None, max_screenshot_scrolling_times: int | None = None, + extra_http_headers: dict[str, str] | None = None, ) -> Task: try: async with self.Session() as session: @@ -178,6 +179,7 @@ class AgentDB: include_action_history_in_verification=include_action_history_in_verification, model=model, max_screenshot_scrolling_times=max_screenshot_scrolling_times, + extra_http_headers=extra_http_headers, ) session.add(new_task) await session.commit() @@ -1300,6 +1302,7 @@ class AgentDB: proxy_location: ProxyLocation | None = None, webhook_callback_url: str | None = None, max_screenshot_scrolling_times: int | None = None, + extra_http_headers: dict[str, str] | None = None, totp_verification_url: str | None = None, totp_identifier: str | None = None, persist_browser_session: bool = False, @@ -1320,6 +1323,7 @@ class AgentDB: totp_verification_url=totp_verification_url, totp_identifier=totp_identifier, max_screenshot_scrolling_times=max_screenshot_scrolling_times, + extra_http_headers=extra_http_headers, persist_browser_session=persist_browser_session, model=model, is_saved_task=is_saved_task, @@ -1564,6 +1568,7 @@ class AgentDB: totp_identifier: str | None = None, parent_workflow_run_id: str | None = None, max_screenshot_scrolling_times: int | None = None, + extra_http_headers: dict[str, str] | None = None, ) -> WorkflowRun: try: async with self.Session() as session: @@ -1578,6 +1583,7 @@ class AgentDB: totp_identifier=totp_identifier, parent_workflow_run_id=parent_workflow_run_id, max_screenshot_scrolling_times=max_screenshot_scrolling_times, + extra_http_headers=extra_http_headers, ) session.add(workflow_run) await session.commit() @@ -2523,6 +2529,7 @@ class AgentDB: error_code_mapping: dict | None = None, model: dict[str, Any] | None = None, max_screenshot_scrolling_times: int | None = None, + extra_http_headers: dict[str, str] | None = None, ) -> TaskV2: async with self.Session() as session: new_task_v2 = TaskV2Model( @@ -2540,6 +2547,7 @@ class AgentDB: organization_id=organization_id, model=model, max_screenshot_scrolling_times=max_screenshot_scrolling_times, + extra_http_headers=extra_http_headers, ) session.add(new_task_v2) await session.commit() diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 67dc5bc2..459e4b68 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -77,6 +77,7 @@ class TaskModel(Base): failure_reason = Column(String) proxy_location = Column(String) extracted_information_schema = Column(JSON) + extra_http_headers = Column(JSON, nullable=True) workflow_run_id = Column(String, ForeignKey("workflow_runs.workflow_run_id"), index=True) order = Column(Integer, nullable=True) retry = Column(Integer, nullable=True) @@ -220,6 +221,7 @@ class WorkflowModel(Base): proxy_location = Column(String) webhook_callback_url = Column(String) max_screenshot_scrolling_times = Column(Integer, nullable=True) + extra_http_headers = Column(JSON, nullable=True) totp_verification_url = Column(String) totp_identifier = Column(String) persist_browser_session = Column(Boolean, default=False, nullable=False) @@ -257,6 +259,7 @@ class WorkflowRunModel(Base): totp_verification_url = Column(String) totp_identifier = Column(String) max_screenshot_scrolling_times = Column(Integer, nullable=True) + extra_http_headers = Column(JSON, nullable=True) queued_at = Column(DateTime, nullable=True) started_at = Column(DateTime, nullable=True) @@ -626,6 +629,7 @@ class TaskV2Model(Base): error_code_mapping = Column(JSON, nullable=True) max_steps = Column(Integer, nullable=True) max_screenshot_scrolling_times = Column(Integer, nullable=True) + extra_http_headers = Column(JSON, nullable=True) queued_at = Column(DateTime, nullable=True) started_at = Column(DateTime, nullable=True) diff --git a/skyvern/forge/sdk/db/utils.py b/skyvern/forge/sdk/db/utils.py index 1d51d008..a739c5dd 100644 --- a/skyvern/forge/sdk/db/utils.py +++ b/skyvern/forge/sdk/db/utils.py @@ -130,6 +130,7 @@ def convert_to_task(task_obj: TaskModel, debug_enabled: bool = False, workflow_p organization_id=task_obj.organization_id, proxy_location=(ProxyLocation(task_obj.proxy_location) if task_obj.proxy_location else None), extracted_information_schema=task_obj.extracted_information_schema, + extra_http_headers=task_obj.extra_http_headers, workflow_run_id=task_obj.workflow_run_id, workflow_permanent_id=workflow_permanent_id, order=task_obj.order, @@ -248,6 +249,7 @@ def convert_to_workflow(workflow_model: WorkflowModel, debug_enabled: bool = Fal modified_at=workflow_model.modified_at, deleted_at=workflow_model.deleted_at, status=WorkflowStatus(workflow_model.status), + extra_http_headers=workflow_model.extra_http_headers, ) @@ -281,6 +283,7 @@ def convert_to_workflow_run( modified_at=workflow_run_model.modified_at, workflow_title=workflow_title, max_screenshot_scrolling_times=workflow_run_model.max_screenshot_scrolling_times, + extra_http_headers=workflow_run_model.extra_http_headers, ) diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 5ba68624..feb09fe8 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -167,6 +167,7 @@ async def run_task( include_action_history_in_verification=run_request.include_action_history_in_verification, model=run_request.model, max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times, + extra_http_headers=run_request.extra_http_headers, ) task_v1_response = await task_v1_service.run_task( task=task_v1_request, @@ -224,6 +225,7 @@ async def run_task( create_task_run=True, model=run_request.model, max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times, + extra_http_headers=run_request.extra_http_headers, ) except MissingBrowserAddressError as e: raise HTTPException(status_code=400, detail=str(e)) from e @@ -320,9 +322,10 @@ async def run_workflow( proxy_location=workflow_run_request.proxy_location, webhook_callback_url=workflow_run_request.webhook_url, totp_identifier=workflow_run_request.totp_identifier, - totp_url=workflow_run_request.totp_url, + totp_verification_url=workflow_run_request.totp_url, browser_session_id=workflow_run_request.browser_session_id, max_screenshot_scrolling_times=workflow_run_request.max_screenshot_scrolling_times, + extra_http_headers=workflow_run_request.extra_http_headers, ) try: @@ -1822,6 +1825,7 @@ async def run_task_v2( error_code_mapping=data.error_code_mapping, max_screenshot_scrolling_times=data.max_screenshot_scrolling_times, browser_session_id=data.browser_session_id, + extra_http_headers=data.extra_http_headers, ) except MissingBrowserAddressError as e: raise HTTPException(status_code=400, detail=str(e)) from e diff --git a/skyvern/forge/sdk/schemas/task_v2.py b/skyvern/forge/sdk/schemas/task_v2.py index 8cbf418a..fcf21ac5 100644 --- a/skyvern/forge/sdk/schemas/task_v2.py +++ b/skyvern/forge/sdk/schemas/task_v2.py @@ -49,6 +49,7 @@ class TaskV2(BaseModel): started_at: datetime | None = None finished_at: datetime | None = None max_screenshot_scrolling_times: int | None = None + extra_http_headers: dict[str, str] | None = None created_at: datetime modified_at: datetime @@ -150,6 +151,7 @@ class TaskV2Request(BaseModel): extracted_information_schema: dict | list | str | None = None error_code_mapping: dict[str, str] | None = None max_screenshot_scrolling_times: int | None = None + extra_http_headers: dict[str, str] | None = None @field_validator("url", "webhook_callback_url", "totp_verification_url") @classmethod diff --git a/skyvern/forge/sdk/schemas/tasks.py b/skyvern/forge/sdk/schemas/tasks.py index 31b0a7e5..7d09d0cd 100644 --- a/skyvern/forge/sdk/schemas/tasks.py +++ b/skyvern/forge/sdk/schemas/tasks.py @@ -73,6 +73,9 @@ class TaskBase(BaseModel): default=None, description="The requested schema of the extracted information.", ) + extra_http_headers: dict[str, str] | None = Field( + None, description="The extra HTTP headers for the requests in browser." + ) complete_criterion: str | None = Field( default=None, description="Criterion to complete", examples=["Complete if 'hello world' shows up on the page"] ) diff --git a/skyvern/forge/sdk/workflow/models/workflow.py b/skyvern/forge/sdk/workflow/models/workflow.py index 0c9002dc..d344d6d8 100644 --- a/skyvern/forge/sdk/workflow/models/workflow.py +++ b/skyvern/forge/sdk/workflow/models/workflow.py @@ -23,6 +23,7 @@ class WorkflowRequestBody(BaseModel): totp_identifier: str | None = None browser_session_id: str | None = None max_screenshot_scrolling_times: int | None = None + extra_http_headers: dict[str, str] | None = None @field_validator("webhook_callback_url", "totp_verification_url") @classmethod @@ -78,6 +79,7 @@ class Workflow(BaseModel): model: dict[str, Any] | None = None status: WorkflowStatus = WorkflowStatus.published max_screenshot_scrolling_times: int | None = None + extra_http_headers: dict[str, str] | None = None created_at: datetime modified_at: datetime @@ -110,6 +112,7 @@ class WorkflowRun(BaseModel): workflow_permanent_id: str organization_id: str status: WorkflowRunStatus + extra_http_headers: dict[str, str] | None = None proxy_location: ProxyLocation | None = None webhook_callback_url: str | None = None totp_verification_url: str | None = None diff --git a/skyvern/forge/sdk/workflow/models/yaml.py b/skyvern/forge/sdk/workflow/models/yaml.py index d2ce6ae4..87a37edc 100644 --- a/skyvern/forge/sdk/workflow/models/yaml.py +++ b/skyvern/forge/sdk/workflow/models/yaml.py @@ -425,4 +425,5 @@ class WorkflowCreateYAMLRequest(BaseModel): workflow_definition: WorkflowDefinitionYAML is_saved_task: bool = False max_screenshot_scrolling_times: int | None = None + extra_http_headers: dict[str, str] | None = None status: WorkflowStatus = WorkflowStatus.published diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index 5509a8c2..c74326c7 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -599,6 +599,7 @@ class WorkflowService: version: int | None = None, is_saved_task: bool = False, status: WorkflowStatus = WorkflowStatus.published, + extra_http_headers: dict[str, str] | None = None, ) -> Workflow: return await app.DATABASE.create_workflow( title=title, @@ -616,6 +617,7 @@ class WorkflowService: version=version, is_saved_task=is_saved_task, status=status, + extra_http_headers=extra_http_headers, ) async def get_workflow(self, workflow_id: str, organization_id: str | None = None) -> Workflow: @@ -782,6 +784,7 @@ class WorkflowService: totp_identifier=workflow_request.totp_identifier, parent_workflow_run_id=parent_workflow_run_id, max_screenshot_scrolling_times=workflow_request.max_screenshot_scrolling_times, + extra_http_headers=workflow_request.extra_http_headers, ) async def mark_workflow_run_as_completed(self, workflow_run_id: str) -> WorkflowRun: @@ -1470,6 +1473,7 @@ class WorkflowService: persist_browser_session=request.persist_browser_session, model=request.model, max_screenshot_scrolling_times=request.max_screenshot_scrolling_times, + extra_http_headers=request.extra_http_headers, workflow_permanent_id=workflow_permanent_id, version=existing_version + 1, is_saved_task=request.is_saved_task, @@ -1488,6 +1492,7 @@ class WorkflowService: persist_browser_session=request.persist_browser_session, model=request.model, max_screenshot_scrolling_times=request.max_screenshot_scrolling_times, + extra_http_headers=request.extra_http_headers, is_saved_task=request.is_saved_task, status=request.status, ) @@ -2069,6 +2074,8 @@ class WorkflowService: organization: Organization, title: str, proxy_location: ProxyLocation | None = None, + max_screenshot_scrolling_times: int | None = None, + extra_http_headers: dict[str, str] | None = None, status: WorkflowStatus = WorkflowStatus.published, ) -> Workflow: """ @@ -2083,6 +2090,8 @@ class WorkflowService: ), proxy_location=proxy_location, status=status, + max_screenshot_scrolling_times=max_screenshot_scrolling_times, + extra_http_headers=extra_http_headers, ) return await app.WORKFLOW_SERVICE.create_workflow_from_request( organization=organization, diff --git a/skyvern/schemas/runs.py b/skyvern/schemas/runs.py index dd809eb7..4aaec6ee 100644 --- a/skyvern/schemas/runs.py +++ b/skyvern/schemas/runs.py @@ -271,7 +271,10 @@ class TaskRunRequest(BaseModel): description=MODEL_CONFIG, examples=None, ) - + extra_http_headers: dict[str, str] | None = Field( + default=None, + description="The extra HTTP headers for the requests in browser.", + ) publish_workflow: bool = Field( default=False, description="Whether to publish this task as a reusable workflow. Only available for skyvern-2.0.", @@ -334,6 +337,10 @@ class WorkflowRunRequest(BaseModel): default=None, description="Scroll down n times to get the merged screenshot of the page after taking an action. When it's None or 0, it takes the current viewpoint screenshot.", ) + extra_http_headers: dict[str, str] | None = Field( + default=None, + description="The extra HTTP headers for the requests in browser.", + ) @field_validator("webhook_url", "totp_url") @classmethod diff --git a/skyvern/services/task_v2_service.py b/skyvern/services/task_v2_service.py index 2609785a..73e01c0b 100644 --- a/skyvern/services/task_v2_service.py +++ b/skyvern/services/task_v2_service.py @@ -167,6 +167,7 @@ async def initialize_task_v2( model: dict[str, Any] | None = None, max_screenshot_scrolling_times: int | None = None, browser_session_id: str | None = None, + extra_http_headers: dict[str, str] | None = None, ) -> TaskV2: task_v2 = await app.DATABASE.create_task_v2( prompt=user_prompt, @@ -179,6 +180,7 @@ async def initialize_task_v2( error_code_mapping=error_code_mapping, model=model, max_screenshot_scrolling_times=max_screenshot_scrolling_times, + extra_http_headers=extra_http_headers, ) # set task_v2_id in context context = skyvern_context.current() @@ -222,12 +224,15 @@ async def initialize_task_v2( metadata.workflow_title, proxy_location=proxy_location, status=workflow_status, + max_screenshot_scrolling_times=max_screenshot_scrolling_times, + extra_http_headers=extra_http_headers, ) workflow_run = await app.WORKFLOW_SERVICE.setup_workflow_run( request_id=None, workflow_request=WorkflowRequestBody( max_screenshot_scrolling_times=max_screenshot_scrolling_times, browser_session_id=browser_session_id, + extra_http_headers=extra_http_headers, ), workflow_permanent_id=new_workflow.workflow_permanent_id, organization=organization, diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index 76c0cc4b..5b3bf0d5 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -170,7 +170,11 @@ class BrowserContextFactory: f.write(preference_file_content) @staticmethod - def build_browser_args(proxy_location: ProxyLocation | None = None, cdp_port: int | None = None) -> dict[str, Any]: + def build_browser_args( + proxy_location: ProxyLocation | None = None, + cdp_port: int | None = None, + extra_http_headers: dict[str, str] | None = None, + ) -> dict[str, Any]: video_dir = f"{settings.VIDEO_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}" har_dir = ( f"{settings.HAR_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}/{BrowserContextFactory.get_subdir()}.har" @@ -214,6 +218,7 @@ class BrowserContextFactory: "width": settings.BROWSER_WIDTH, "height": settings.BROWSER_HEIGHT, }, + "extra_http_headers": extra_http_headers, } if settings.ENABLE_PROXY: @@ -405,7 +410,10 @@ def _is_chrome_running() -> bool: async def _create_headless_chromium( - playwright: Playwright, proxy_location: ProxyLocation | None = None, **kwargs: dict + playwright: Playwright, + proxy_location: ProxyLocation | None = None, + extra_http_headers: dict[str, str] | None = None, + **kwargs: dict, ) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]: user_data_dir = make_temp_directory(prefix="skyvern_browser_") download_dir = initialize_download_dir() @@ -414,7 +422,9 @@ async def _create_headless_chromium( download_dir=download_dir, ) cdp_port: int | None = _get_cdp_port(kwargs) - browser_args = BrowserContextFactory.build_browser_args(proxy_location=proxy_location, cdp_port=cdp_port) + browser_args = BrowserContextFactory.build_browser_args( + proxy_location=proxy_location, cdp_port=cdp_port, extra_http_headers=extra_http_headers + ) browser_args.update( { "user_data_dir": user_data_dir, @@ -428,7 +438,10 @@ async def _create_headless_chromium( async def _create_headful_chromium( - playwright: Playwright, proxy_location: ProxyLocation | None = None, **kwargs: dict + playwright: Playwright, + proxy_location: ProxyLocation | None = None, + extra_http_headers: dict[str, str] | None = None, + **kwargs: dict, ) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]: user_data_dir = make_temp_directory(prefix="skyvern_browser_") download_dir = initialize_download_dir() @@ -437,7 +450,9 @@ async def _create_headful_chromium( download_dir=download_dir, ) cdp_port: int | None = _get_cdp_port(kwargs) - browser_args = BrowserContextFactory.build_browser_args(proxy_location=proxy_location, cdp_port=cdp_port) + browser_args = BrowserContextFactory.build_browser_args( + proxy_location=proxy_location, cdp_port=cdp_port, extra_http_headers=extra_http_headers + ) browser_args.update( { "user_data_dir": user_data_dir, @@ -479,7 +494,10 @@ def is_valid_chromium_user_data_dir(directory: str) -> bool: async def _create_cdp_connection_browser( - playwright: Playwright, proxy_location: ProxyLocation | None = None, **kwargs: dict + playwright: Playwright, + proxy_location: ProxyLocation | None = None, + extra_http_headers: dict[str, str] | None = None, + **kwargs: dict, ) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]: browser_type = settings.BROWSER_TYPE browser_path = settings.CHROME_EXECUTABLE_PATH @@ -528,7 +546,7 @@ async def _create_cdp_connection_browser( else: LOG.info("Port 9222 is in use, using existing browser") - browser_args = BrowserContextFactory.build_browser_args() + browser_args = BrowserContextFactory.build_browser_args(extra_http_headers=extra_http_headers) browser_artifacts = BrowserContextFactory.build_browser_artifacts( har_path=browser_args["record_har_path"], @@ -549,6 +567,7 @@ async def _create_cdp_connection_browser( browser_context = await browser.new_context( record_video_dir=browser_args["record_video_dir"], viewport=browser_args["viewport"], + extra_http_headers=browser_args["extra_http_headers"], ) LOG.info( "Launched browser CDP connection", @@ -608,6 +627,7 @@ class BrowserState: task_id: str | None = None, workflow_run_id: str | None = None, organization_id: str | None = None, + extra_http_headers: dict[str, str] | None = None, ) -> None: if self.browser_context is None: LOG.info("creating browser context") @@ -622,6 +642,7 @@ class BrowserState: task_id=task_id, workflow_run_id=workflow_run_id, organization_id=organization_id, + extra_http_headers=extra_http_headers, ) self.browser_context = browser_context self.browser_artifacts = browser_artifacts @@ -752,6 +773,7 @@ class BrowserState: task_id: str | None = None, workflow_run_id: str | None = None, organization_id: str | None = None, + extra_http_headers: dict[str, str] | None = None, ) -> Page: page = await self.get_working_page() if page is not None: @@ -764,6 +786,7 @@ class BrowserState: task_id=task_id, workflow_run_id=workflow_run_id, organization_id=organization_id, + extra_http_headers=extra_http_headers, ) except Exception as e: error_message = str(e) @@ -778,6 +801,7 @@ class BrowserState: task_id=task_id, workflow_run_id=workflow_run_id, organization_id=organization_id, + extra_http_headers=extra_http_headers, ) page = await self.__assert_page() @@ -791,6 +815,7 @@ class BrowserState: task_id=task_id, workflow_run_id=workflow_run_id, organization_id=organization_id, + extra_http_headers=extra_http_headers, ) page = await self.__assert_page() return page diff --git a/skyvern/webeye/browser_manager.py b/skyvern/webeye/browser_manager.py index 5308a32d..a88dcc06 100644 --- a/skyvern/webeye/browser_manager.py +++ b/skyvern/webeye/browser_manager.py @@ -31,6 +31,7 @@ class BrowserManager: task_id: str | None = None, workflow_run_id: str | None = None, organization_id: str | None = None, + extra_http_headers: dict[str, str] | None = None, ) -> BrowserState: pw = await async_playwright().start() ( @@ -44,6 +45,7 @@ class BrowserManager: task_id=task_id, workflow_run_id=workflow_run_id, organization_id=organization_id, + extra_http_headers=extra_http_headers, ) return BrowserState( pw=pw, @@ -109,6 +111,7 @@ class BrowserManager: url=task.url, task_id=task.task_id, organization_id=task.organization_id, + extra_http_headers=task.extra_http_headers, ) self.pages[task.task_id] = browser_state @@ -118,7 +121,11 @@ class BrowserManager: # The URL here is only used when creating a new page, and not when using an existing page. # This will make sure browser_state.page is not None. await browser_state.get_or_create_page( - url=task.url, proxy_location=task.proxy_location, task_id=task.task_id, organization_id=task.organization_id + url=task.url, + proxy_location=task.proxy_location, + task_id=task.task_id, + organization_id=task.organization_id, + extra_http_headers=task.extra_http_headers, ) return browser_state @@ -173,6 +180,7 @@ class BrowserManager: url=url, workflow_run_id=workflow_run.workflow_run_id, organization_id=workflow_run.organization_id, + extra_http_headers=workflow_run.extra_http_headers, ) self.pages[workflow_run_id] = browser_state @@ -186,6 +194,7 @@ class BrowserManager: proxy_location=workflow_run.proxy_location, workflow_run_id=workflow_run.workflow_run_id, organization_id=workflow_run.organization_id, + extra_http_headers=workflow_run.extra_http_headers, ) return browser_state