cleanup_element_tree_factory compatibility without task/step (#1252)

This commit is contained in:
Shuchang Zheng
2024-11-24 20:11:59 -08:00
committed by GitHub
parent 284fba0aba
commit 4103b7594c
2 changed files with 37 additions and 32 deletions

View File

@@ -624,7 +624,6 @@ class ForgeAgent:
task, task,
step, step,
browser_state, browser_state,
organization,
) )
detailed_agent_step_output.scraped_page = scraped_page detailed_agent_step_output.scraped_page = scraped_page
detailed_agent_step_output.extract_action_prompt = extract_action_prompt detailed_agent_step_output.extract_action_prompt = extract_action_prompt
@@ -1098,7 +1097,6 @@ class ForgeAgent:
step: Step, step: Step,
browser_state: BrowserState, browser_state: BrowserState,
scrape_type: ScrapeType, scrape_type: ScrapeType,
organization: Organization | None = None,
) -> ScrapedPage: ) -> ScrapedPage:
if scrape_type == ScrapeType.NORMAL: if scrape_type == ScrapeType.NORMAL:
pass pass
@@ -1121,7 +1119,7 @@ class ForgeAgent:
return await scrape_website( return await scrape_website(
browser_state, browser_state,
task.url, task.url,
app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step, organization=organization), app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step),
scrape_exclude=app.scrape_exclude, scrape_exclude=app.scrape_exclude,
) )
@@ -1130,7 +1128,6 @@ class ForgeAgent:
task: Task, task: Task,
step: Step, step: Step,
browser_state: BrowserState, browser_state: BrowserState,
organization: Organization | None = None,
) -> tuple[ScrapedPage, str]: ) -> tuple[ScrapedPage, str]:
# start the async tasks while running scrape_website # start the async tasks while running scrape_website
self.async_operation_pool.run_operation(task.task_id, AgentPhase.scrape) self.async_operation_pool.run_operation(task.task_id, AgentPhase.scrape)
@@ -1148,7 +1145,6 @@ class ForgeAgent:
step=step, step=step,
browser_state=browser_state, browser_state=browser_state,
scrape_type=scrape_type, scrape_type=scrape_type,
organization=organization,
) )
break break
except FailedToTakeScreenshot as e: except FailedToTakeScreenshot as e:

View File

@@ -96,13 +96,19 @@ def _remove_skyvern_attributes(element: Dict) -> Dict:
return element_copied return element_copied
async def _convert_svg_to_string(task: Task, step: Step, organization: Organization | None, element: Dict) -> None: async def _convert_svg_to_string(
element: Dict,
task: Task | None = None,
step: Step | None = None,
) -> None:
if element.get("tagName") != "svg": if element.get("tagName") != "svg":
return return
if element.get("isDropped", False): if element.get("isDropped", False):
return return
task_id = task.task_id if task else None
step_id = step.step_id if step else None
element_id = element.get("id", "") element_id = element.get("id", "")
svg_element = _remove_skyvern_attributes(element) svg_element = _remove_skyvern_attributes(element)
svg_html = json_to_html(svg_element) svg_html = json_to_html(svg_element)
@@ -117,8 +123,8 @@ async def _convert_svg_to_string(task: Task, step: Step, organization: Organizat
except Exception: except Exception:
LOG.warning( LOG.warning(
"Failed to loaded SVG cache", "Failed to loaded SVG cache",
task_id=task.task_id, task_id=task_id,
step_id=step.step_id, step_id=step_id,
exc_info=True, exc_info=True,
key=svg_key, key=svg_key,
) )
@@ -131,8 +137,8 @@ async def _convert_svg_to_string(task: Task, step: Step, organization: Organizat
LOG.warning( LOG.warning(
"SVG element is too large to convert, going to drop the svg element.", "SVG element is too large to convert, going to drop the svg element.",
element_id=element_id, element_id=element_id,
task_id=task.task_id, task_id=task_id,
step_id=step.step_id, step_id=step_id,
length=len(svg_html), length=len(svg_html),
) )
del element["children"] del element["children"]
@@ -154,8 +160,8 @@ async def _convert_svg_to_string(task: Task, step: Step, organization: Organizat
except Exception: except Exception:
LOG.exception( LOG.exception(
"Failed to convert SVG to string shape by secondary llm. Will retry if haven't met the max try attempt after 3s.", "Failed to convert SVG to string shape by secondary llm. Will retry if haven't met the max try attempt after 3s.",
task_id=task.task_id, task_id=task_id,
step_id=step.step_id, step_id=step_id,
element_id=element_id, element_id=element_id,
retry=retry, retry=retry,
) )
@@ -170,10 +176,15 @@ async def _convert_svg_to_string(task: Task, step: Step, organization: Organizat
async def _convert_css_shape_to_string( async def _convert_css_shape_to_string(
task: Task, step: Step, organization: Organization | None, frame: Page | Frame, element: Dict frame: Page | Frame,
element: Dict,
task: Task | None = None,
step: Step | None = None,
) -> None: ) -> None:
element_id: str = element.get("id", "") element_id: str = element.get("id", "")
task_id = task.task_id if task else None
step_id = step.step_id if step else None
shape_element = _remove_skyvern_attributes(element) shape_element = _remove_skyvern_attributes(element)
svg_html = json_to_html(shape_element) svg_html = json_to_html(shape_element)
hash_object = hashlib.sha256() hash_object = hashlib.sha256()
@@ -187,8 +198,8 @@ async def _convert_css_shape_to_string(
except Exception: except Exception:
LOG.warning( LOG.warning(
"Failed to loaded CSS shape cache", "Failed to loaded CSS shape cache",
task_id=task.task_id, task_id=task_id,
step_id=step.step_id, step_id=step_id,
exc_info=True, exc_info=True,
key=shape_key, key=shape_key,
) )
@@ -201,8 +212,8 @@ async def _convert_css_shape_to_string(
if await locater.count() == 0: if await locater.count() == 0:
LOG.info( LOG.info(
"No locater found to convert css shape", "No locater found to convert css shape",
task_id=task.task_id, task_id=task_id,
step_id=step.step_id, step_id=step_id,
element_id=element_id, element_id=element_id,
) )
return None return None
@@ -210,8 +221,8 @@ async def _convert_css_shape_to_string(
if await locater.count() > 1: if await locater.count() > 1:
LOG.info( LOG.info(
"multiple locaters found to convert css shape", "multiple locaters found to convert css shape",
task_id=task.task_id, task_id=task_id,
step_id=step.step_id, step_id=step_id,
element_id=element_id, element_id=element_id,
) )
return None return None
@@ -235,8 +246,8 @@ async def _convert_css_shape_to_string(
except Exception: except Exception:
LOG.exception( LOG.exception(
"Failed to convert css shape to string shape by secondary llm. Will retry if haven't met the max try attempt after 3s.", "Failed to convert css shape to string shape by secondary llm. Will retry if haven't met the max try attempt after 3s.",
task_id=task.task_id, task_id=task_id,
step_id=step.step_id, step_id=step_id,
element_id=element_id, element_id=element_id,
retry=retry, retry=retry,
) )
@@ -244,16 +255,16 @@ async def _convert_css_shape_to_string(
else: else:
LOG.info( LOG.info(
"Max css shape convertion retry, going to abort the convertion.", "Max css shape convertion retry, going to abort the convertion.",
task_id=task.task_id, task_id=task_id,
step_id=step.step_id, step_id=step_id,
element_id=element_id, element_id=element_id,
) )
return None return None
except Exception: except Exception:
LOG.warning( LOG.warning(
"Failed to convert css shape to string shape by LLM", "Failed to convert css shape to string shape by LLM",
task_id=task.task_id, task_id=task_id,
step_id=step.step_id, step_id=step_id,
element_id=element_id, element_id=element_id,
exc_info=True, exc_info=True,
) )
@@ -316,9 +327,8 @@ class AgentFunction:
def cleanup_element_tree_factory( def cleanup_element_tree_factory(
self, self,
task: Task, task: Task | None = None,
step: Step, step: Step | None = None,
organization: Organization | None = None,
) -> CleanupElementTreeFunc: ) -> CleanupElementTreeFunc:
async def cleanup_element_tree_func(frame: Page | Frame, url: str, element_tree: list[dict]) -> list[dict]: async def cleanup_element_tree_func(frame: Page | Frame, url: str, element_tree: list[dict]) -> list[dict]:
""" """
@@ -335,15 +345,14 @@ class AgentFunction:
while queue: while queue:
queue_ele = queue.pop(0) queue_ele = queue.pop(0)
_remove_rect(queue_ele) _remove_rect(queue_ele)
await _convert_svg_to_string(task, step, organization, queue_ele) await _convert_svg_to_string(queue_ele, task, step)
if _should_css_shape_convert(element=queue_ele): if _should_css_shape_convert(element=queue_ele):
await _convert_css_shape_to_string( await _convert_css_shape_to_string(
task=task,
step=step,
organization=organization,
frame=frame, frame=frame,
element=queue_ele, element=queue_ele,
task=task,
step=step,
) )
# TODO: we can come back to test removing the unique_id # TODO: we can come back to test removing the unique_id