speed up extraction (#1617)
This commit is contained in:
@@ -2684,7 +2684,7 @@ async def extract_information_for_navigation_goal(
|
|||||||
|
|
||||||
# TODO: we only use HTML element for now, introduce a way to switch in the future
|
# TODO: we only use HTML element for now, introduce a way to switch in the future
|
||||||
element_tree_format = ElementTreeFormat.HTML
|
element_tree_format = ElementTreeFormat.HTML
|
||||||
element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format)
|
element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format, html_need_skyvern_attrs=False)
|
||||||
|
|
||||||
scraped_page_refreshed = await scraped_page.refresh()
|
scraped_page_refreshed = await scraped_page.refresh()
|
||||||
|
|
||||||
|
|||||||
@@ -703,9 +703,19 @@ function isInteractable(element, hoverStylesMap) {
|
|||||||
// Check if element has hover styles that change cursor to pointer
|
// Check if element has hover styles that change cursor to pointer
|
||||||
// This is to handle the case where an element's cursor is "auto", but resolves to "pointer" on hover
|
// This is to handle the case where an element's cursor is "auto", but resolves to "pointer" on hover
|
||||||
if (elementCursor === "auto") {
|
if (elementCursor === "auto") {
|
||||||
|
// TODO: we need a better algorithm to match the selector with better performance
|
||||||
for (const [selector, styles] of hoverStylesMap) {
|
for (const [selector, styles] of hoverStylesMap) {
|
||||||
if (element.matches(selector) && styles.cursor === "pointer") {
|
let shouldMatch = false;
|
||||||
return true;
|
for (const className of element.classList) {
|
||||||
|
if (selector.includes(className)) {
|
||||||
|
shouldMatch = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (shouldMatch || selector.includes(tagName)) {
|
||||||
|
if (element.matches(selector) && styles.cursor === "pointer") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -247,12 +247,17 @@ class ScrapedPage(BaseModel):
|
|||||||
self._clean_up_func = clean_up_func
|
self._clean_up_func = clean_up_func
|
||||||
self._scrape_exclude = scrape_exclude
|
self._scrape_exclude = scrape_exclude
|
||||||
|
|
||||||
def build_element_tree(self, fmt: ElementTreeFormat = ElementTreeFormat.HTML) -> str:
|
def build_element_tree(
|
||||||
|
self, fmt: ElementTreeFormat = ElementTreeFormat.HTML, html_need_skyvern_attrs: bool = True
|
||||||
|
) -> str:
|
||||||
if fmt == ElementTreeFormat.JSON:
|
if fmt == ElementTreeFormat.JSON:
|
||||||
return json.dumps(self.element_tree_trimmed)
|
return json.dumps(self.element_tree_trimmed)
|
||||||
|
|
||||||
if fmt == ElementTreeFormat.HTML:
|
if fmt == ElementTreeFormat.HTML:
|
||||||
return "".join(json_to_html(element) for element in self.element_tree_trimmed)
|
return "".join(
|
||||||
|
json_to_html(element, need_skyvern_attrs=html_need_skyvern_attrs)
|
||||||
|
for element in self.element_tree_trimmed
|
||||||
|
)
|
||||||
|
|
||||||
raise UnknownElementTreeFormat(fmt=fmt)
|
raise UnknownElementTreeFormat(fmt=fmt)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user