only take up to 1 screenshot if the html too big (#2108)

This commit is contained in:
Shuchang Zheng
2025-04-05 23:33:34 -04:00
committed by GitHub
parent 3c612968ce
commit a72fcadd9a
5 changed files with 64 additions and 38 deletions

View File

@@ -2,11 +2,11 @@ from typing import Any
import structlog
from skyvern.constants import DEFAULT_MAX_TOKENS
from skyvern.forge.sdk.prompting import PromptEngine
from skyvern.utils.token_counter import count_tokens
from skyvern.webeye.scraper.scraper import ScrapedPage
DEFAULT_MAX_TOKENS = 100000
LOG = structlog.get_logger()
@@ -14,13 +14,20 @@ def load_prompt_with_elements(
scraped_page: ScrapedPage,
prompt_engine: PromptEngine,
template_name: str,
html_need_skyvern_attrs: bool = True,
**kwargs: Any,
) -> str:
prompt = prompt_engine.load_prompt(template_name, elements=scraped_page.build_element_tree(), **kwargs)
prompt = prompt_engine.load_prompt(
template_name,
elements=scraped_page.build_element_tree(html_need_skyvern_attrs=html_need_skyvern_attrs),
**kwargs,
)
token_count = count_tokens(prompt)
if token_count > DEFAULT_MAX_TOKENS:
# get rid of all the secondary elements like SVG, etc
economy_elements_tree = scraped_page.build_economy_elements_tree()
economy_elements_tree = scraped_page.build_economy_elements_tree(
html_need_skyvern_attrs=html_need_skyvern_attrs
)
prompt = prompt_engine.load_prompt(template_name, elements=economy_elements_tree, **kwargs)
economy_token_count = count_tokens(prompt)
LOG.warning(
@@ -33,7 +40,10 @@ def load_prompt_with_elements(
if economy_token_count > DEFAULT_MAX_TOKENS:
# !!! HACK alert
# dump the last 1/3 of the html context and keep the first 2/3 of the html context
economy_elements_tree_dumped = scraped_page.build_economy_elements_tree(percent_to_keep=2 / 3)
economy_elements_tree_dumped = scraped_page.build_economy_elements_tree(
html_need_skyvern_attrs=html_need_skyvern_attrs,
percent_to_keep=2 / 3,
)
prompt = prompt_engine.load_prompt(template_name, elements=economy_elements_tree_dumped, **kwargs)
token_count_after_dump = count_tokens(prompt)
LOG.warning(