always use refreshed scrap page in cache (#4059)

This commit is contained in:
Shuchang Zheng
2025-11-21 00:28:54 -08:00
committed by GitHub
parent 2b1b28e4d7
commit dc34a657e8

View File

@@ -133,6 +133,13 @@ class RealSkyvernPageAi(SkyvernPageAi):
self.page = page self.page = page
self.current_label: str | None = None self.current_label: str | None = None
async def _refresh_scraped_page(
self, take_screenshots: bool = True, max_retries: int = SKYVERN_PAGE_MAX_SCRAPING_RETRIES
) -> None:
self.scraped_page = await self.scraped_page.generate_scraped_page(
take_screenshots=take_screenshots, max_retries=max_retries
)
async def ai_click( async def ai_click(
self, self,
selector: str | None, selector: str | None,
@@ -145,10 +152,8 @@ class RealSkyvernPageAi(SkyvernPageAi):
# Build the element tree of the current page for the prompt # Build the element tree of the current page for the prompt
context = skyvern_context.ensure_context() context = skyvern_context.ensure_context()
payload_str = _get_context_data(data) payload_str = _get_context_data(data)
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots( await self._refresh_scraped_page(take_screenshots=False)
max_retries=SKYVERN_PAGE_MAX_SCRAPING_RETRIES element_tree = self.scraped_page.build_element_tree()
)
element_tree = refreshed_page.build_element_tree()
organization_id = context.organization_id if context else None organization_id = context.organization_id if context else None
step_id = context.step_id if context else None step_id = context.step_id if context else None
@@ -246,10 +251,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
else: else:
data = {SPECIAL_FIELD_VERIFICATION_CODE: verification_code} data = {SPECIAL_FIELD_VERIFICATION_CODE: verification_code}
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots( await self._refresh_scraped_page(take_screenshots=False)
max_retries=SKYVERN_PAGE_MAX_SCRAPING_RETRIES
)
self.scraped_page = refreshed_page
# Try to get element_id from selector if selector is provided # Try to get element_id from selector if selector is provided
element_id = await _get_element_id_by_selector(selector, self.page) if selector else None element_id = await _get_element_id_by_selector(selector, self.page) if selector else None
@@ -288,7 +290,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
) )
else: else:
# Use a heavier single-input-action when selector is not found # Use a heavier single-input-action when selector is not found
element_tree = refreshed_page.build_element_tree() element_tree = self.scraped_page.build_element_tree()
payload_str = _get_context_data(data) payload_str = _get_context_data(data)
merged_goal = INPUT_GOAL.format(intention=intention, prompt=prompt) merged_goal = INPUT_GOAL.format(intention=intention, prompt=prompt)
@@ -309,7 +311,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
actions_json = json_response.get("actions", []) actions_json = json_response.get("actions", [])
if actions_json and task and step: if actions_json and task and step:
actions = parse_actions(task, step.step_id, step.order, refreshed_page, actions_json) actions = parse_actions(task, step.step_id, step.order, self.scraped_page, actions_json)
if actions and isinstance(actions[0], InputTextAction): if actions and isinstance(actions[0], InputTextAction):
action = cast(InputTextAction, actions[0]) action = cast(InputTextAction, actions[0])
except Exception: except Exception:
@@ -352,10 +354,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
if files and isinstance(data, dict) and "files" not in data: if files and isinstance(data, dict) and "files" not in data:
data["files"] = files data["files"] = files
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots( await self._refresh_scraped_page(take_screenshots=False)
max_retries=SKYVERN_PAGE_MAX_SCRAPING_RETRIES
)
self.scraped_page = refreshed_page
# Try to get element_id from selector if selector is provided # Try to get element_id from selector if selector is provided
element_id = await _get_element_id_by_selector(selector, self.page) if selector else None element_id = await _get_element_id_by_selector(selector, self.page) if selector else None
@@ -390,7 +389,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
) )
else: else:
# Use a heavier single-upload-action when selector is not found # Use a heavier single-upload-action when selector is not found
element_tree = refreshed_page.build_element_tree() element_tree = self.scraped_page.build_element_tree()
payload_str = _get_context_data(data) payload_str = _get_context_data(data)
merged_goal = UPLOAD_GOAL.format(intention=intention, prompt=prompt) merged_goal = UPLOAD_GOAL.format(intention=intention, prompt=prompt)
@@ -411,7 +410,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
actions_json = json_response.get("actions", []) actions_json = json_response.get("actions", [])
if actions_json and task and step: if actions_json and task and step:
actions = parse_actions(task, step.step_id, step.order, refreshed_page, actions_json) actions = parse_actions(task, step.step_id, step.order, self.scraped_page, actions_json)
if actions and isinstance(actions[0], UploadFileAction): if actions and isinstance(actions[0], UploadFileAction):
action = cast(UploadFileAction, actions[0]) action = cast(UploadFileAction, actions[0])
files = action.file_url files = action.file_url
@@ -451,11 +450,8 @@ class RealSkyvernPageAi(SkyvernPageAi):
if value and isinstance(data, dict) and "value" not in data: if value and isinstance(data, dict) and "value" not in data:
data["value"] = value data["value"] = value
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots( await self._refresh_scraped_page(take_screenshots=False)
max_retries=SKYVERN_PAGE_MAX_SCRAPING_RETRIES element_tree = self.scraped_page.build_element_tree()
)
self.scraped_page = refreshed_page
element_tree = refreshed_page.build_element_tree()
merged_goal = SELECT_OPTION_GOAL.format(intention=intention, prompt=prompt) merged_goal = SELECT_OPTION_GOAL.format(intention=intention, prompt=prompt)
single_select_prompt = prompt_engine.load_prompt( single_select_prompt = prompt_engine.load_prompt(
template="single-select-action", template="single-select-action",
@@ -509,21 +505,21 @@ class RealSkyvernPageAi(SkyvernPageAi):
) -> dict[str, Any] | list | str | None: ) -> dict[str, Any] | list | str | None:
"""Extract information from the page using AI.""" """Extract information from the page using AI."""
scraped_page_refreshed = await self.scraped_page.refresh(max_retries=SKYVERN_PAGE_MAX_SCRAPING_RETRIES) await self._refresh_scraped_page(take_screenshots=True)
context = skyvern_context.current() context = skyvern_context.current()
tz_info = datetime.now(tz=timezone.utc).tzinfo tz_info = datetime.now(tz=timezone.utc).tzinfo
if context and context.tz_info: if context and context.tz_info:
tz_info = context.tz_info tz_info = context.tz_info
prompt = _render_template_with_label(prompt, label=self.current_label) prompt = _render_template_with_label(prompt, label=self.current_label)
extract_information_prompt = load_prompt_with_elements( extract_information_prompt = load_prompt_with_elements(
element_tree_builder=scraped_page_refreshed, element_tree_builder=self.scraped_page,
prompt_engine=prompt_engine, prompt_engine=prompt_engine,
template_name="extract-information", template_name="extract-information",
html_need_skyvern_attrs=False, html_need_skyvern_attrs=False,
data_extraction_goal=prompt, data_extraction_goal=prompt,
extracted_information_schema=schema, extracted_information_schema=schema,
current_url=scraped_page_refreshed.url, current_url=self.scraped_page.url,
extracted_text=scraped_page_refreshed.extracted_text, extracted_text=self.scraped_page.extracted_text,
error_code_mapping_str=(json.dumps(error_code_mapping) if error_code_mapping else None), error_code_mapping_str=(json.dumps(error_code_mapping) if error_code_mapping else None),
local_datetime=datetime.now(tz_info).isoformat(), local_datetime=datetime.now(tz_info).isoformat(),
) )
@@ -537,7 +533,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
result = await app.EXTRACTION_LLM_API_HANDLER( result = await app.EXTRACTION_LLM_API_HANDLER(
prompt=extract_information_prompt, prompt=extract_information_prompt,
step=step, step=step,
screenshots=scraped_page_refreshed.screenshots, screenshots=self.scraped_page.screenshots,
prompt_name="extract-information", prompt_name="extract-information",
) )
if context and context.script_mode: if context and context.script_mode:
@@ -606,11 +602,8 @@ class RealSkyvernPageAi(SkyvernPageAi):
reasoning=action_info.get("reasoning"), reasoning=action_info.get("reasoning"),
) )
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots( await self._refresh_scraped_page(take_screenshots=False)
max_retries=SKYVERN_PAGE_MAX_SCRAPING_RETRIES element_tree = self.scraped_page.build_element_tree()
)
self.scraped_page = refreshed_page
element_tree = refreshed_page.build_element_tree()
template: str template: str
llm_handler: Any llm_handler: Any
@@ -652,7 +645,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
LOG.warning("ai_act: no actions generated", prompt=prompt, action_type=action_type) LOG.warning("ai_act: no actions generated", prompt=prompt, action_type=action_type)
return return
actions = parse_actions(task, step.step_id, step.order, refreshed_page, actions_json) actions = parse_actions(task, step.step_id, step.order, self.scraped_page, actions_json)
if not actions: if not actions:
LOG.warning("ai_act: failed to parse actions", prompt=prompt, action_type=action_type) LOG.warning("ai_act: failed to parse actions", prompt=prompt, action_type=action_type)
return return
@@ -660,13 +653,13 @@ class RealSkyvernPageAi(SkyvernPageAi):
action = actions[0] action = actions[0]
if action_type == "CLICK" and isinstance(action, ClickAction): if action_type == "CLICK" and isinstance(action, ClickAction):
result = await handle_click_action(action, self.page, refreshed_page, task, step) result = await handle_click_action(action, self.page, self.scraped_page, task, step)
elif action_type == "INPUT_TEXT" and isinstance(action, InputTextAction): elif action_type == "INPUT_TEXT" and isinstance(action, InputTextAction):
result = await handle_input_text_action(action, self.page, refreshed_page, task, step) result = await handle_input_text_action(action, self.page, self.scraped_page, task, step)
elif action_type == "UPLOAD_FILE" and isinstance(action, UploadFileAction): elif action_type == "UPLOAD_FILE" and isinstance(action, UploadFileAction):
result = await handle_upload_file_action(action, self.page, refreshed_page, task, step) result = await handle_upload_file_action(action, self.page, self.scraped_page, task, step)
elif action_type == "SELECT_OPTION" and isinstance(action, SelectOptionAction): elif action_type == "SELECT_OPTION" and isinstance(action, SelectOptionAction):
result = await handle_select_option_action(action, self.page, refreshed_page, task, step) result = await handle_select_option_action(action, self.page, self.scraped_page, task, step)
else: else:
LOG.warning( LOG.warning(
"ai_act: action type mismatch", "ai_act: action type mismatch",