decrease parse input prompt token (#3210)

This commit is contained in:
LawyZheng
2025-08-16 10:05:38 +08:00
committed by GitHub
parent 618fba5371
commit 9a359ebfde
3 changed files with 68 additions and 30 deletions

View File

@@ -741,7 +741,8 @@ async def handle_sequential_click_for_dropdown(
reasoning=action.reasoning, intention=action.intention, element_id=action.element_id
),
step=step,
scraped_page=scraped_page,
element_tree_builder=scraped_page,
skyvern_element=anchor_element,
)
if dropdown_select_context.is_date_related:
@@ -934,7 +935,8 @@ async def handle_input_text_action(
input_or_select_context = await _get_input_or_select_context(
action=action,
scraped_page=scraped_page,
element_tree_builder=scraped_page,
skyvern_element=skyvern_element,
step=step,
)
@@ -1538,7 +1540,7 @@ async def handle_select_option_action(
)
input_or_select_context = await _get_input_or_select_context(
action=action, scraped_page=scraped_page, step=step
action=action, element_tree_builder=scraped_page, step=step, skyvern_element=skyvern_element
)
if len(incremental_element) == 0:
@@ -3332,26 +3334,9 @@ async def normal_select(
action_result: List[ActionResult] = []
is_success = False
locator = skyvern_element.get_locator()
prompt = load_prompt_with_elements(
element_tree_builder=builder,
prompt_engine=prompt_engine,
template_name="parse-input-or-select-context",
action_reasoning=action.reasoning,
element_id=action.element_id,
input_or_select_context = await _get_input_or_select_context(
action=action, element_tree_builder=builder, step=step, skyvern_element=skyvern_element
)
json_response = await app.SECONDARY_LLM_API_HANDLER(
prompt=prompt, step=step, prompt_name="parse-input-or-select-context"
)
json_response["intention"] = action.intention
input_or_select_context = InputOrSelectContext.model_validate(json_response)
LOG.info(
"Parsed input/select context",
context=input_or_select_context,
task_id=task.task_id,
step_id=step.step_id,
)
await skyvern_element.refresh_select_options()
options_html = skyvern_element.build_HTML()
field_information = (
@@ -3694,10 +3679,46 @@ class AbstractActionForContextParse(BaseModel):
async def _get_input_or_select_context(
action: InputTextAction | SelectOptionAction | AbstractActionForContextParse, scraped_page: ScrapedPage, step: Step
action: InputTextAction | SelectOptionAction | AbstractActionForContextParse,
skyvern_element: SkyvernElement,
element_tree_builder: ElementTreeBuilder,
step: Step,
ancestor_depth: int = 5,
) -> InputOrSelectContext:
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
try:
depth = await skyvern_frame.get_element_dom_depth(await skyvern_element.get_element_handler())
except Exception:
LOG.warning("Failed to get element depth, using the original element tree", exc_info=True)
depth = 0
if depth > ancestor_depth:
# use ancestor to build the context
path = "/".join([".."] * ancestor_depth)
locator = skyvern_element.get_locator().locator(path)
try:
element_handle = await locator.element_handle(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
if element_handle is not None:
elements, element_tree = await skyvern_frame.build_tree_from_element(
starter=element_handle,
frame=skyvern_element.get_frame_id(),
)
clean_up_func = app.AGENT_FUNCTION.cleanup_element_tree_factory()
element_tree = await clean_up_func(skyvern_element.get_frame(), "", copy.deepcopy(element_tree))
element_tree_trimmed = trim_element_tree(copy.deepcopy(element_tree))
element_tree_builder = ScrapedPage(
elements=elements,
element_tree=element_tree,
element_tree_trimmed=element_tree_trimmed,
_browser_state=None,
_clean_up_func=None,
_scrape_exclude=None,
)
except Exception:
LOG.warning("Failed to get sub element tree, using the original element tree", exc_info=True, path=path)
prompt = load_prompt_with_elements(
element_tree_builder=scraped_page,
element_tree_builder=element_tree_builder,
prompt_engine=prompt_engine,
template_name="parse-input-or-select-context",
action_reasoning=action.reasoning,