From 49fd5f3c0737ffc6b8e75d52c00cea86c230f08c Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Wed, 29 Oct 2025 12:36:44 +0800 Subject: [PATCH] skip msedge download hub page (#3844) --- skyvern/webeye/browser_factory.py | 34 +++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index a697ee55..c914c4dd 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -690,13 +690,9 @@ class BrowserState: page: Page | None = None use_existing_page = False if browser_address and len(self.browser_context.pages) > 0: - pages = [ - http_page - for http_page in self.browser_context.pages - if http_page.url == "about:blank" or urlparse(http_page.url).scheme in ["http", "https"] - ] + pages = await self.list_valid_pages() if len(pages) > 0: - page = pages[0] + page = pages[-1] use_existing_page = True if page is None: page = await self.browser_context.new_page() @@ -749,15 +745,35 @@ class BrowserState: async def get_working_page(self) -> Page | None: # HACK: currently, assuming the last page is always the working page. # Need to refactor this logic when we want to manipulate multi pages together - if self.__page is None or self.browser_context is None or len(self.browser_context.pages) == 0: + # TODO: do not use index of pages, it should be more robust if we want to fully support multi pages manipulation + if self.__page is None or self.browser_context is None: return None - last_page = self.browser_context.pages[-1] + # pick the last and http/https page as the working page + pages = await self.list_valid_pages() + if len(pages) == 0: + LOG.info("No http, https or blank page found in the browser context, return None") + return None + + last_page = pages[-1] if self.__page == last_page: return self.__page - await self.set_working_page(last_page, len(self.browser_context.pages) - 1) + await self.set_working_page(last_page, len(pages) - 1) return last_page + async def list_valid_pages(self) -> list[Page]: + # List all valid pages(blank page, and http/https page) in the browser context + # MSEdge CDP bug(?) + # when using CDP connect to a MSEdge, the download hub will be included in the context.pages + if self.browser_context is None: + return [] + + return [ + http_page + for http_page in self.browser_context.pages + if http_page.url == "about:blank" or urlparse(http_page.url).scheme in ["http", "https"] + ] + async def validate_browser_context(self, page: Page) -> bool: # validate the content try: