diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index 5a9ceaa2..dc83ac50 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -136,13 +136,11 @@ class BrowserState: browser_context: BrowserContext | None = None, page: Page | None = None, browser_artifacts: BrowserArtifacts = BrowserArtifacts(), - new_context_tree: bool = False, ): self.pw = pw self.browser_context = browser_context self.page = page self.browser_artifacts = browser_artifacts - self.new_context_tree = new_context_tree async def _close_all_other_pages(self) -> None: if not self.browser_context or not self.page: diff --git a/skyvern/webeye/browser_manager.py b/skyvern/webeye/browser_manager.py index 8003194a..92a113f2 100644 --- a/skyvern/webeye/browser_manager.py +++ b/skyvern/webeye/browser_manager.py @@ -25,7 +25,6 @@ class BrowserManager: async def _create_browser_state( proxy_location: ProxyLocation | None = None, url: str | None = None, - new_context_tree: bool = False, task_id: str | None = None, ) -> BrowserState: pw = await async_playwright().start() @@ -40,7 +39,6 @@ class BrowserManager: browser_context=browser_context, page=None, browser_artifacts=browser_artifacts, - new_context_tree=new_context_tree, ) async def get_or_create_for_task(self, task: Task) -> BrowserState: @@ -55,9 +53,8 @@ class BrowserManager: self.pages[task.task_id] = self.pages[task.workflow_run_id] return self.pages[task.task_id] - new_ctx = True - LOG.info("Creating browser state for task", task_id=task.task_id, new_ctx=new_ctx) - browser_state = await self._create_browser_state(task.proxy_location, task.url, new_ctx, task.task_id) + LOG.info("Creating browser state for task", task_id=task.task_id) + browser_state = await self._create_browser_state(task.proxy_location, task.url, task.task_id) # The URL here is only used when creating a new page, and not when using an existing page. # This will make sure browser_state.page is not None. diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index 314086e6..c2d8e94e 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -544,7 +544,7 @@ function getListboxOptions(element) { return selectOptions; } -function buildTreeFromBody(new_ctx = false) { +function buildTreeFromBody() { var elements = []; var resultArray = []; @@ -634,7 +634,6 @@ function buildTreeFromBody(new_ctx = false) { } if ( - new_ctx && checkRequiredFromStyle(element) && !attrs["required"] && !attrs["aria-required"] @@ -712,7 +711,7 @@ function buildTreeFromBody(new_ctx = false) { elements[interactableParentId].children.push(elementObj); } // options already added to the select.options, no need to add options anymore - if (new_ctx && elementObj.options && elementObj.options.length > 0) { + if (elementObj.options && elementObj.options.length > 0) { return elementObj; } // Recursively process the children of the element @@ -744,7 +743,7 @@ function buildTreeFromBody(new_ctx = false) { if (parentEle) { if ( targetParentElements.has(parentEle.tagName.toLowerCase()) || - (new_ctx && checkParentClass(parentEle.className.toLowerCase())) + checkParentClass(parentEle.className.toLowerCase()) ) { targetContextualParent = parentEle; } @@ -939,7 +938,7 @@ function buildTreeFromBody(new_ctx = false) { element.context = context; } - if (new_ctx && checkStringIncludeRequire(context)) { + if (checkStringIncludeRequire(context)) { if ( !element.attributes["required"] && !element.attributes["aria-required"] @@ -949,10 +948,6 @@ function buildTreeFromBody(new_ctx = false) { } } - if (!new_ctx) { - return [elements, resultArray]; - } - resultArray = removeOrphanNode(resultArray); resultArray.forEach((root) => { trimDuplicatedText(root); diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index 199cb3f0..9258d927 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -184,7 +184,7 @@ async def scrape_web_unsafe( await remove_bounding_boxes(page) await scroll_to_top(page, drow_boxes=False) - elements, element_tree = await get_interactable_element_tree(page, browser_state.new_context_tree) + elements, element_tree = await get_interactable_element_tree(page) element_tree = cleanup_elements(copy.deepcopy(element_tree)) _build_element_links(elements) @@ -211,15 +211,15 @@ async def scrape_web_unsafe( ) -async def get_interactable_element_tree(page: Page, new_context_tree: bool) -> tuple[list[dict], list[dict]]: +async def get_interactable_element_tree(page: Page) -> tuple[list[dict], list[dict]]: """ Get the element tree of the page, including all the elements that are interactable. :param page: Page instance to get the element tree from. :return: Tuple containing the element tree and a map of element IDs to elements. """ await page.evaluate(JS_FUNCTION_DEFS) - js_script = "(new_ctx) => buildTreeFromBody(new_ctx)" - elements, element_tree = await page.evaluate(js_script, new_context_tree) + js_script = "() => buildTreeFromBody()" + elements, element_tree = await page.evaluate(js_script) return elements, element_tree