From 49fd5f3c0737ffc6b8e75d52c00cea86c230f08c Mon Sep 17 00:00:00 2001
From: LawyZheng <lawyzheng1106@gmail.com>
Date: Wed, 29 Oct 2025 12:36:44 +0800
Subject: [PATCH] skip msedge download hub page (#3844)

---
 skyvern/webeye/browser_factory.py | 34 +++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py
index a697ee55..c914c4dd 100644
--- a/skyvern/webeye/browser_factory.py
+++ b/skyvern/webeye/browser_factory.py
@@ -690,13 +690,9 @@ class BrowserState:
             page: Page | None = None
             use_existing_page = False
             if browser_address and len(self.browser_context.pages) > 0:
-                pages = [
-                    http_page
-                    for http_page in self.browser_context.pages
-                    if http_page.url == "about:blank" or urlparse(http_page.url).scheme in ["http", "https"]
-                ]
+                pages = await self.list_valid_pages()
                 if len(pages) > 0:
-                    page = pages[0]
+                    page = pages[-1]
                     use_existing_page = True
             if page is None:
                 page = await self.browser_context.new_page()
@@ -749,15 +745,35 @@ class BrowserState:
     async def get_working_page(self) -> Page | None:
         # HACK: currently, assuming the last page is always the working page.
         # Need to refactor this logic when we want to manipulate multi pages together
-        if self.__page is None or self.browser_context is None or len(self.browser_context.pages) == 0:
+        # TODO: do not use index of pages, it should be more robust if we want to fully support multi pages manipulation
+        if self.__page is None or self.browser_context is None:
             return None
 
-        last_page = self.browser_context.pages[-1]
+        # pick the last and http/https page as the working page
+        pages = await self.list_valid_pages()
+        if len(pages) == 0:
+            LOG.info("No http, https or blank page found in the browser context, return None")
+            return None
+
+        last_page = pages[-1]
         if self.__page == last_page:
             return self.__page
-        await self.set_working_page(last_page, len(self.browser_context.pages) - 1)
+        await self.set_working_page(last_page, len(pages) - 1)
         return last_page
 
+    async def list_valid_pages(self) -> list[Page]:
+        # List all valid pages(blank page, and http/https page) in the browser context
+        # MSEdge CDP bug(?)
+        # when using CDP connect to a MSEdge, the download hub will be included in the context.pages
+        if self.browser_context is None:
+            return []
+
+        return [
+            http_page
+            for http_page in self.browser_context.pages
+            if http_page.url == "about:blank" or urlparse(http_page.url).scheme in ["http", "https"]
+        ]
+
     async def validate_browser_context(self, page: Page) -> bool:
         # validate the content
         try: