speed up extraction (#1617)
This commit is contained in:
@@ -2684,7 +2684,7 @@ async def extract_information_for_navigation_goal(
|
||||
|
||||
# TODO: we only use HTML element for now, introduce a way to switch in the future
|
||||
element_tree_format = ElementTreeFormat.HTML
|
||||
element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format)
|
||||
element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format, html_need_skyvern_attrs=False)
|
||||
|
||||
scraped_page_refreshed = await scraped_page.refresh()
|
||||
|
||||
|
||||
@@ -703,9 +703,19 @@ function isInteractable(element, hoverStylesMap) {
|
||||
// Check if element has hover styles that change cursor to pointer
|
||||
// This is to handle the case where an element's cursor is "auto", but resolves to "pointer" on hover
|
||||
if (elementCursor === "auto") {
|
||||
// TODO: we need a better algorithm to match the selector with better performance
|
||||
for (const [selector, styles] of hoverStylesMap) {
|
||||
if (element.matches(selector) && styles.cursor === "pointer") {
|
||||
return true;
|
||||
let shouldMatch = false;
|
||||
for (const className of element.classList) {
|
||||
if (selector.includes(className)) {
|
||||
shouldMatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (shouldMatch || selector.includes(tagName)) {
|
||||
if (element.matches(selector) && styles.cursor === "pointer") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -247,12 +247,17 @@ class ScrapedPage(BaseModel):
|
||||
self._clean_up_func = clean_up_func
|
||||
self._scrape_exclude = scrape_exclude
|
||||
|
||||
def build_element_tree(self, fmt: ElementTreeFormat = ElementTreeFormat.HTML) -> str:
|
||||
def build_element_tree(
|
||||
self, fmt: ElementTreeFormat = ElementTreeFormat.HTML, html_need_skyvern_attrs: bool = True
|
||||
) -> str:
|
||||
if fmt == ElementTreeFormat.JSON:
|
||||
return json.dumps(self.element_tree_trimmed)
|
||||
|
||||
if fmt == ElementTreeFormat.HTML:
|
||||
return "".join(json_to_html(element) for element in self.element_tree_trimmed)
|
||||
return "".join(
|
||||
json_to_html(element, need_skyvern_attrs=html_need_skyvern_attrs)
|
||||
for element in self.element_tree_trimmed
|
||||
)
|
||||
|
||||
raise UnknownElementTreeFormat(fmt=fmt)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user