decrease parse input prompt token (#3210)
This commit is contained in:
@@ -741,7 +741,8 @@ async def handle_sequential_click_for_dropdown(
|
|||||||
reasoning=action.reasoning, intention=action.intention, element_id=action.element_id
|
reasoning=action.reasoning, intention=action.intention, element_id=action.element_id
|
||||||
),
|
),
|
||||||
step=step,
|
step=step,
|
||||||
scraped_page=scraped_page,
|
element_tree_builder=scraped_page,
|
||||||
|
skyvern_element=anchor_element,
|
||||||
)
|
)
|
||||||
|
|
||||||
if dropdown_select_context.is_date_related:
|
if dropdown_select_context.is_date_related:
|
||||||
@@ -934,7 +935,8 @@ async def handle_input_text_action(
|
|||||||
|
|
||||||
input_or_select_context = await _get_input_or_select_context(
|
input_or_select_context = await _get_input_or_select_context(
|
||||||
action=action,
|
action=action,
|
||||||
scraped_page=scraped_page,
|
element_tree_builder=scraped_page,
|
||||||
|
skyvern_element=skyvern_element,
|
||||||
step=step,
|
step=step,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1538,7 +1540,7 @@ async def handle_select_option_action(
|
|||||||
)
|
)
|
||||||
|
|
||||||
input_or_select_context = await _get_input_or_select_context(
|
input_or_select_context = await _get_input_or_select_context(
|
||||||
action=action, scraped_page=scraped_page, step=step
|
action=action, element_tree_builder=scraped_page, step=step, skyvern_element=skyvern_element
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(incremental_element) == 0:
|
if len(incremental_element) == 0:
|
||||||
@@ -3332,26 +3334,9 @@ async def normal_select(
|
|||||||
action_result: List[ActionResult] = []
|
action_result: List[ActionResult] = []
|
||||||
is_success = False
|
is_success = False
|
||||||
locator = skyvern_element.get_locator()
|
locator = skyvern_element.get_locator()
|
||||||
|
input_or_select_context = await _get_input_or_select_context(
|
||||||
prompt = load_prompt_with_elements(
|
action=action, element_tree_builder=builder, step=step, skyvern_element=skyvern_element
|
||||||
element_tree_builder=builder,
|
|
||||||
prompt_engine=prompt_engine,
|
|
||||||
template_name="parse-input-or-select-context",
|
|
||||||
action_reasoning=action.reasoning,
|
|
||||||
element_id=action.element_id,
|
|
||||||
)
|
)
|
||||||
json_response = await app.SECONDARY_LLM_API_HANDLER(
|
|
||||||
prompt=prompt, step=step, prompt_name="parse-input-or-select-context"
|
|
||||||
)
|
|
||||||
json_response["intention"] = action.intention
|
|
||||||
input_or_select_context = InputOrSelectContext.model_validate(json_response)
|
|
||||||
LOG.info(
|
|
||||||
"Parsed input/select context",
|
|
||||||
context=input_or_select_context,
|
|
||||||
task_id=task.task_id,
|
|
||||||
step_id=step.step_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
await skyvern_element.refresh_select_options()
|
await skyvern_element.refresh_select_options()
|
||||||
options_html = skyvern_element.build_HTML()
|
options_html = skyvern_element.build_HTML()
|
||||||
field_information = (
|
field_information = (
|
||||||
@@ -3694,10 +3679,46 @@ class AbstractActionForContextParse(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
async def _get_input_or_select_context(
|
async def _get_input_or_select_context(
|
||||||
action: InputTextAction | SelectOptionAction | AbstractActionForContextParse, scraped_page: ScrapedPage, step: Step
|
action: InputTextAction | SelectOptionAction | AbstractActionForContextParse,
|
||||||
|
skyvern_element: SkyvernElement,
|
||||||
|
element_tree_builder: ElementTreeBuilder,
|
||||||
|
step: Step,
|
||||||
|
ancestor_depth: int = 5,
|
||||||
) -> InputOrSelectContext:
|
) -> InputOrSelectContext:
|
||||||
|
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
|
||||||
|
try:
|
||||||
|
depth = await skyvern_frame.get_element_dom_depth(await skyvern_element.get_element_handler())
|
||||||
|
except Exception:
|
||||||
|
LOG.warning("Failed to get element depth, using the original element tree", exc_info=True)
|
||||||
|
depth = 0
|
||||||
|
|
||||||
|
if depth > ancestor_depth:
|
||||||
|
# use ancestor to build the context
|
||||||
|
path = "/".join([".."] * ancestor_depth)
|
||||||
|
locator = skyvern_element.get_locator().locator(path)
|
||||||
|
try:
|
||||||
|
element_handle = await locator.element_handle(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
|
||||||
|
if element_handle is not None:
|
||||||
|
elements, element_tree = await skyvern_frame.build_tree_from_element(
|
||||||
|
starter=element_handle,
|
||||||
|
frame=skyvern_element.get_frame_id(),
|
||||||
|
)
|
||||||
|
clean_up_func = app.AGENT_FUNCTION.cleanup_element_tree_factory()
|
||||||
|
element_tree = await clean_up_func(skyvern_element.get_frame(), "", copy.deepcopy(element_tree))
|
||||||
|
element_tree_trimmed = trim_element_tree(copy.deepcopy(element_tree))
|
||||||
|
element_tree_builder = ScrapedPage(
|
||||||
|
elements=elements,
|
||||||
|
element_tree=element_tree,
|
||||||
|
element_tree_trimmed=element_tree_trimmed,
|
||||||
|
_browser_state=None,
|
||||||
|
_clean_up_func=None,
|
||||||
|
_scrape_exclude=None,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
LOG.warning("Failed to get sub element tree, using the original element tree", exc_info=True, path=path)
|
||||||
|
|
||||||
prompt = load_prompt_with_elements(
|
prompt = load_prompt_with_elements(
|
||||||
element_tree_builder=scraped_page,
|
element_tree_builder=element_tree_builder,
|
||||||
prompt_engine=prompt_engine,
|
prompt_engine=prompt_engine,
|
||||||
template_name="parse-input-or-select-context",
|
template_name="parse-input-or-select-context",
|
||||||
action_reasoning=action.reasoning,
|
action_reasoning=action.reasoning,
|
||||||
|
|||||||
@@ -257,16 +257,16 @@ class ScrapedPage(BaseModel, ElementTreeBuilder):
|
|||||||
elements: list[dict]
|
elements: list[dict]
|
||||||
id_to_element_dict: dict[str, dict] = {}
|
id_to_element_dict: dict[str, dict] = {}
|
||||||
id_to_frame_dict: dict[str, str] = {}
|
id_to_frame_dict: dict[str, str] = {}
|
||||||
id_to_css_dict: dict[str, str]
|
id_to_css_dict: dict[str, str] = {}
|
||||||
id_to_element_hash: dict[str, str]
|
id_to_element_hash: dict[str, str] = {}
|
||||||
hash_to_element_ids: dict[str, list[str]]
|
hash_to_element_ids: dict[str, list[str]] = {}
|
||||||
element_tree: list[dict]
|
element_tree: list[dict]
|
||||||
element_tree_trimmed: list[dict]
|
element_tree_trimmed: list[dict]
|
||||||
economy_element_tree: list[dict] | None = None
|
economy_element_tree: list[dict] | None = None
|
||||||
last_used_element_tree: list[dict] | None = None
|
last_used_element_tree: list[dict] | None = None
|
||||||
screenshots: list[bytes]
|
screenshots: list[bytes] = []
|
||||||
url: str
|
url: str = ""
|
||||||
html: str
|
html: str = ""
|
||||||
extracted_text: str | None = None
|
extracted_text: str | None = None
|
||||||
window_dimension: dict[str, int] | None = None
|
window_dimension: dict[str, int] | None = None
|
||||||
_browser_state: BrowserState = PrivateAttr()
|
_browser_state: BrowserState = PrivateAttr()
|
||||||
|
|||||||
@@ -477,6 +477,10 @@ class SkyvernFrame:
|
|||||||
js_script = "([element]) => getSelectOptions(element)"
|
js_script = "([element]) => getSelectOptions(element)"
|
||||||
return await self.evaluate(frame=self.frame, expression=js_script, arg=[element])
|
return await self.evaluate(frame=self.frame, expression=js_script, arg=[element])
|
||||||
|
|
||||||
|
async def get_element_dom_depth(self, element: ElementHandle) -> int:
|
||||||
|
js_script = "([element]) => getElementDomDepth(element)"
|
||||||
|
return await self.evaluate(frame=self.frame, expression=js_script, arg=[element])
|
||||||
|
|
||||||
@TraceManager.traced_async()
|
@TraceManager.traced_async()
|
||||||
async def build_tree_from_body(
|
async def build_tree_from_body(
|
||||||
self,
|
self,
|
||||||
@@ -500,6 +504,19 @@ class SkyvernFrame:
|
|||||||
frame=self.frame, expression=js_script, timeout_ms=timeout_ms, arg=[wait_until_finished]
|
frame=self.frame, expression=js_script, timeout_ms=timeout_ms, arg=[wait_until_finished]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@TraceManager.traced_async()
|
||||||
|
async def build_tree_from_element(
|
||||||
|
self,
|
||||||
|
starter: ElementHandle,
|
||||||
|
frame: str,
|
||||||
|
full_tree: bool = False,
|
||||||
|
timeout_ms: float = SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
||||||
|
) -> tuple[list[dict], list[dict]]:
|
||||||
|
js_script = "async ([starter, frame, full_tree]) => await buildElementTree(starter, frame, full_tree)"
|
||||||
|
return await self.evaluate(
|
||||||
|
frame=self.frame, expression=js_script, timeout_ms=timeout_ms, arg=[starter, frame, full_tree]
|
||||||
|
)
|
||||||
|
|
||||||
async def safe_wait_for_animation_end(self, timeout_ms: float = 3000) -> None:
|
async def safe_wait_for_animation_end(self, timeout_ms: float = 3000) -> None:
|
||||||
try:
|
try:
|
||||||
async with asyncio.timeout(timeout_ms / 1000):
|
async with asyncio.timeout(timeout_ms / 1000):
|
||||||
|
|||||||
Reference in New Issue
Block a user