diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index 3818bfab..935f75ea 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -463,6 +463,11 @@ class FailToSelectByIndex(SkyvernException): super().__init__(f"Failed to select by index. element_id={element_id}") +class EmptyDomOrHtmlTree(SkyvernException): + def __init__(self) -> None: + super().__init__("Empty dom or html tree") + + class OptionIndexOutOfBound(SkyvernException): def __init__(self, element_id: str): super().__init__(f"Option index is out of bound. element_id={element_id}") diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 07e1a8a5..8abbc224 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -1373,7 +1373,7 @@ class ForgeAgent: reasoning = reasonings[0].summary[0].text if reasonings and reasonings[0].summary else None assistant_message = assistant_messages[0].content[0].text if assistant_messages else None skyvern_repsonse_prompt = load_prompt_with_elements( - scraped_page=scraped_page, + element_tree_builder=scraped_page, prompt_engine=prompt_engine, template_name="cua-answer-question", navigation_goal=task.navigation_goal, @@ -1597,7 +1597,7 @@ class ForgeAgent: actions_and_results_str = await self._get_action_results(task, current_step=step) verification_prompt = load_prompt_with_elements( - scraped_page=scraped_page_refreshed, + element_tree_builder=scraped_page_refreshed, prompt_engine=prompt_engine, template_name="check-user-goal", navigation_goal=task.navigation_goal, @@ -1974,7 +1974,7 @@ class ForgeAgent: context = skyvern_context.ensure_context() return load_prompt_with_elements( - scraped_page=scraped_page, + element_tree_builder=scraped_page, prompt_engine=prompt_engine, template_name=template, navigation_goal=navigation_goal, diff --git a/skyvern/services/task_v2_service.py b/skyvern/services/task_v2_service.py index 62141b25..18646dfe 100644 --- a/skyvern/services/task_v2_service.py +++ b/skyvern/services/task_v2_service.py @@ -1251,7 +1251,7 @@ async def _generate_extraction_task( # extract the data context = skyvern_context.ensure_context() generate_extraction_task_prompt = load_prompt_with_elements( - scraped_page=scraped_page, + element_tree_builder=scraped_page, prompt_engine=prompt_engine, template_name="task_v2_generate_extraction_task", current_url=current_url, diff --git a/skyvern/utils/prompt_engine.py b/skyvern/utils/prompt_engine.py index b2c68459..48b1cee7 100644 --- a/skyvern/utils/prompt_engine.py +++ b/skyvern/utils/prompt_engine.py @@ -6,7 +6,7 @@ from pydantic import BaseModel from skyvern.constants import DEFAULT_MAX_TOKENS from skyvern.forge.sdk.prompting import PromptEngine from skyvern.utils.token_counter import count_tokens -from skyvern.webeye.scraper.scraper import ScrapedPage +from skyvern.webeye.scraper.scraper import ElementTreeBuilder LOG = structlog.get_logger() @@ -20,22 +20,26 @@ class CheckPhoneNumberFormatResponse(BaseModel): recommended_phone_number: str | None +HTMLTreeStr = str + + def load_prompt_with_elements( - scraped_page: ScrapedPage, + element_tree_builder: ElementTreeBuilder, prompt_engine: PromptEngine, template_name: str, html_need_skyvern_attrs: bool = True, **kwargs: Any, ) -> str: + elements = element_tree_builder.build_element_tree(html_need_skyvern_attrs=html_need_skyvern_attrs) prompt = prompt_engine.load_prompt( template_name, - elements=scraped_page.build_element_tree(html_need_skyvern_attrs=html_need_skyvern_attrs), + elements=elements, **kwargs, ) token_count = count_tokens(prompt) - if token_count > DEFAULT_MAX_TOKENS: + if token_count > DEFAULT_MAX_TOKENS and element_tree_builder.support_economy_elements_tree(): # get rid of all the secondary elements like SVG, etc - economy_elements_tree = scraped_page.build_economy_elements_tree( + economy_elements_tree = element_tree_builder.build_economy_elements_tree( html_need_skyvern_attrs=html_need_skyvern_attrs ) prompt = prompt_engine.load_prompt(template_name, elements=economy_elements_tree, **kwargs) @@ -50,7 +54,7 @@ def load_prompt_with_elements( if economy_token_count > DEFAULT_MAX_TOKENS: # !!! HACK alert # dump the last 1/3 of the html context and keep the first 2/3 of the html context - economy_elements_tree_dumped = scraped_page.build_economy_elements_tree( + economy_elements_tree_dumped = element_tree_builder.build_economy_elements_tree( html_need_skyvern_attrs=html_need_skyvern_attrs, percent_to_keep=2 / 3, ) diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 9a29b7a2..4213a099 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -91,6 +91,7 @@ from skyvern.webeye.actions.actions import ( from skyvern.webeye.actions.responses import ActionAbort, ActionFailure, ActionResult, ActionSuccess from skyvern.webeye.scraper.scraper import ( CleanupElementTreeFunc, + ElementTreeBuilder, IncrementalScrapePage, ScrapedPage, hash_element, @@ -1352,10 +1353,14 @@ async def handle_select_option_action( step_id=step.step_id, exc_info=True, ) - return await normal_select(action=action, skyvern_element=skyvern_element, dom=dom, task=task, step=step) + return await normal_select( + action=action, skyvern_element=skyvern_element, builder=dom.scraped_page, task=task, step=step + ) if not exist: - return await normal_select(action=action, skyvern_element=skyvern_element, dom=dom, task=task, step=step) + return await normal_select( + action=action, skyvern_element=skyvern_element, builder=dom.scraped_page, task=task, step=step + ) if blocking_element is None: LOG.info( @@ -1373,11 +1378,13 @@ async def handle_select_option_action( exc_info=True, ) return await normal_select( - action=action, skyvern_element=skyvern_element, dom=dom, task=task, step=step + action=action, skyvern_element=skyvern_element, builder=dom.scraped_page, task=task, step=step ) if not exist or blocking_element is None: - return await normal_select(action=action, skyvern_element=skyvern_element, dom=dom, task=task, step=step) + return await normal_select( + action=action, skyvern_element=skyvern_element, builder=dom.scraped_page, task=task, step=step + ) LOG.info( " element, we only handle the