From 699e64089fc2d599ad662e43f7b697aa667c05ac Mon Sep 17 00:00:00 2001 From: pedrohsdb Date: Tue, 17 Feb 2026 17:28:05 -0800 Subject: [PATCH] Fix cached script element IDs missing after page navigation (#4774) --- .../script_generations/script_skyvern_page.py | 34 +++++ tests/unit/test_script_skyvern_page.py | 133 +++++++++++++++++- 2 files changed, 164 insertions(+), 3 deletions(-) diff --git a/skyvern/core/script_generations/script_skyvern_page.py b/skyvern/core/script_generations/script_skyvern_page.py index ba2da0e6..599365ec 100644 --- a/skyvern/core/script_generations/script_skyvern_page.py +++ b/skyvern/core/script_generations/script_skyvern_page.py @@ -240,6 +240,7 @@ class ScriptSkyvernPage(SkyvernPage): # Wait for page to be ready before executing action # This helps prevent issues where cached actions execute before the page is fully loaded await self._wait_for_page_ready_before_action() + await self._ensure_element_ids_on_page() call.result = await fn(self, *args, **kwargs) @@ -561,6 +562,39 @@ class ScriptSkyvernPage(SkyvernPage): # Don't block action execution if page readiness check fails LOG.debug("Page readiness check failed, proceeding with action", exc_info=True) + async def _ensure_element_ids_on_page(self) -> None: + """ + Ensure unique_id attributes exist on DOM elements for cached selectors. + + After page navigation, the new DOM has no unique_id attributes because + they are only set during scraping (domUtils.js buildTreeFromBody). Cached + actions use [unique_id='XXX'] selectors, so we need to build the element + tree before executing cached actions on a new page. + """ + try: + if not self.page: + return + + # Quick check: do unique_id attributes already exist? + has_unique_ids = await self.page.evaluate("() => document.querySelector('[unique_id]') !== null") + if has_unique_ids: + return + + # Inject domUtils.js and build the element tree to set unique_id attrs. + # Use a short timeout since this is best-effort; we don't want to hang for 60s. + skyvern_frame = await SkyvernFrame.create_instance(frame=self.page) + await skyvern_frame.build_tree_from_body( + frame_name="main.frame", + frame_index=0, + timeout_ms=15000, + ) + LOG.info("Injected element IDs on page for cached script execution") + except Exception: + LOG.debug( + "Failed to ensure element IDs on page, proceeding with action", + exc_info=True, + ) + async def get_actual_value( self, value: str, diff --git a/tests/unit/test_script_skyvern_page.py b/tests/unit/test_script_skyvern_page.py index bcce702e..386f76cc 100644 --- a/tests/unit/test_script_skyvern_page.py +++ b/tests/unit/test_script_skyvern_page.py @@ -1,8 +1,8 @@ """ -Unit tests for ScriptSkyvernPage, specifically testing _wait_for_page_ready_before_action. +Unit tests for ScriptSkyvernPage. -This test file exists to prevent regressions like the AttributeError bug where -self._page was used instead of self.page (see PR #8425, SKY-7676). +Tests _wait_for_page_ready_before_action (regression test for self._page bug, PR #8425) +and _ensure_element_ids_on_page (injects unique_id attrs after page navigation). """ import inspect @@ -222,3 +222,130 @@ async def test_wait_for_page_ready_attribute_access_regression(): f"Found 'self._page' in code at line {i}: {line.strip()}\n" "This is a regression! SkyvernPage uses self.page, not self._page." ) + + +# ============================================================================= +# Tests for _ensure_element_ids_on_page +# ============================================================================= + + +@pytest.mark.asyncio +async def test_ensure_element_ids_skips_when_ids_exist(mock_scraped_page, mock_ai): + """ + When unique_id attributes already exist on the page, build_tree_from_body + should NOT be called (fast path). + """ + mock_page = create_mock_page() + # SkyvernPage.__getattribute__ delegates self.page to mock_page.page + mock_page.page.evaluate = AsyncMock(return_value=True) # unique_ids exist + + with patch( + "skyvern.core.script_generations.skyvern_page.Page.__init__", + return_value=None, + ): + script_page = ScriptSkyvernPage( + scraped_page=mock_scraped_page, + page=mock_page, + ai=mock_ai, + ) + + with patch( + "skyvern.core.script_generations.script_skyvern_page.SkyvernFrame.create_instance", + new_callable=AsyncMock, + ) as mock_create_instance: + await script_page._ensure_element_ids_on_page() + + # Should NOT inject domUtils.js since IDs already exist + mock_create_instance.assert_not_called() + + +@pytest.mark.asyncio +async def test_ensure_element_ids_injects_when_ids_missing(mock_scraped_page, mock_ai): + """ + When no unique_id attributes exist (after page navigation), should inject + domUtils.js and call buildTreeFromBody to set them. + """ + mock_page = create_mock_page() + # SkyvernPage.__getattribute__ delegates self.page to mock_page.page, + # so set evaluate on the delegated object + mock_page.page.evaluate = AsyncMock(return_value=False) # no unique_ids + + with patch( + "skyvern.core.script_generations.skyvern_page.Page.__init__", + return_value=None, + ): + script_page = ScriptSkyvernPage( + scraped_page=mock_scraped_page, + page=mock_page, + ai=mock_ai, + ) + + mock_skyvern_frame = MagicMock() + mock_skyvern_frame.build_tree_from_body = AsyncMock(return_value=([], [])) + + with patch( + "skyvern.core.script_generations.script_skyvern_page.SkyvernFrame.create_instance", + new_callable=AsyncMock, + return_value=mock_skyvern_frame, + ) as mock_create_instance: + await script_page._ensure_element_ids_on_page() + + # Should inject domUtils.js + mock_create_instance.assert_called_once() + + # Should build element tree + mock_skyvern_frame.build_tree_from_body.assert_called_once_with( + frame_name="main.frame", + frame_index=0, + timeout_ms=15000, + ) + + +@pytest.mark.asyncio +async def test_ensure_element_ids_handles_no_page(mock_scraped_page, mock_ai): + """ + When self.page is None, should return early without error. + """ + mock_page = create_mock_page() + + with patch( + "skyvern.core.script_generations.skyvern_page.Page.__init__", + return_value=None, + ): + script_page = ScriptSkyvernPage( + scraped_page=mock_scraped_page, + page=mock_page, + ai=mock_ai, + ) + script_page.page = None + + with patch( + "skyvern.core.script_generations.script_skyvern_page.SkyvernFrame.create_instance", + new_callable=AsyncMock, + ) as mock_create_instance: + await script_page._ensure_element_ids_on_page() + mock_create_instance.assert_not_called() + + +@pytest.mark.asyncio +async def test_ensure_element_ids_catches_exceptions(mock_scraped_page, mock_ai): + """ + Exceptions in _ensure_element_ids_on_page should be caught and not block + action execution. + """ + mock_page = create_mock_page() + # SkyvernPage.__getattribute__ delegates self.page to mock_page.page + mock_page.page.evaluate = AsyncMock(side_effect=Exception("Page crashed")) + + with patch( + "skyvern.core.script_generations.skyvern_page.Page.__init__", + return_value=None, + ): + script_page = ScriptSkyvernPage( + scraped_page=mock_scraped_page, + page=mock_page, + ai=mock_ai, + ) + + # Should NOT raise + await script_page._ensure_element_ids_on_page()