make sure we do a web scrape before data extraction at the end of task to ensure the refreshness of the scraped data (#1152)

This commit is contained in:
Shuchang Zheng
2024-11-06 22:55:21 -08:00
committed by GitHub
parent f7c9dc2279
commit c80597e7a5
4 changed files with 12 additions and 17 deletions

View File

@@ -907,17 +907,19 @@ class ForgeAgent:
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
scraped_page_without_screenshots = await scraped_page.refresh(with_screenshot=False)
scraped_page_refreshed = await scraped_page.refresh()
verification_prompt = prompt_engine.load_prompt(
"check-user-goal",
navigation_goal=task.navigation_goal,
navigation_payload=task.navigation_payload,
elements=scraped_page_without_screenshots.build_element_tree(ElementTreeFormat.HTML),
elements=scraped_page_refreshed.build_element_tree(ElementTreeFormat.HTML),
)
# this prompt is critical to our agent so let's use the primary LLM API handler
verification_response = await app.LLM_API_HANDLER(prompt=verification_prompt, step=step, screenshots=None)
verification_response = await app.LLM_API_HANDLER(
prompt=verification_prompt, step=step, screenshots=scraped_page_refreshed.screenshots
)
if "user_goal_achieved" not in verification_response or "thoughts" not in verification_response:
LOG.error(
"Invalid LLM response for user goal success verification, skipping verification",

View File

@@ -1,4 +1,4 @@
Your are here to help the user determine if the user has completed their goal on the web. Use the content of the elements parsed from the page, the user goal and user details to determine whether the user goal has been completed or not.
Your are here to help the user determine if the user has completed their goal on the web. Use the content of the elements parsed from the page, the screenshots of the page, the user goal and user details to determine whether the user goal has been completed or not.
Make sure to ONLY return the JSON object in this format with no additional text before or after it:
```json

View File

@@ -2237,6 +2237,7 @@ async def extract_information_for_navigation_goal(
element_tree_format = ElementTreeFormat.HTML
element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format)
scraped_page_refreshed = await scraped_page.refresh()
extract_information_prompt = prompt_engine.load_prompt(
prompt_template,
navigation_goal=task.navigation_goal,
@@ -2244,8 +2245,8 @@ async def extract_information_for_navigation_goal(
elements=element_tree_in_prompt,
data_extraction_goal=task.data_extraction_goal,
extracted_information_schema=task.extracted_information_schema,
current_url=scraped_page.url,
extracted_text=scraped_page.extracted_text,
current_url=scraped_page_refreshed.url,
extracted_text=scraped_page_refreshed.extracted_text,
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
utc_datetime=datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
)

View File

@@ -241,13 +241,12 @@ class ScrapedPage(BaseModel):
raise UnknownElementTreeFormat(fmt=fmt)
async def refresh(self, with_screenshot: bool = True) -> Self:
async def refresh(self) -> Self:
refreshed_page = await scrape_website(
browser_state=self._browser_state,
url=self.url,
cleanup_element_tree=self._clean_up_func,
scrape_exclude=self._scrape_exclude,
with_screenshot=with_screenshot,
)
self.elements = refreshed_page.elements
self.id_to_css_dict = refreshed_page.id_to_css_dict
@@ -260,6 +259,7 @@ class ScrapedPage(BaseModel):
self.screenshots = refreshed_page.screenshots or self.screenshots
self.html = refreshed_page.html
self.extracted_text = refreshed_page.extracted_text
self.url = refreshed_page.url
return self
@@ -269,7 +269,6 @@ async def scrape_website(
cleanup_element_tree: CleanupElementTreeFunc,
num_retry: int = 0,
scrape_exclude: ScrapeExcludeFunc | None = None,
with_screenshot: bool = True,
) -> ScrapedPage:
"""
************************************************************************************************
@@ -299,7 +298,6 @@ async def scrape_website(
url=url,
cleanup_element_tree=cleanup_element_tree,
scrape_exclude=scrape_exclude,
with_screenshot=with_screenshot,
)
except Exception as e:
# NOTE: MAX_SCRAPING_RETRIES is set to 0 in both staging and production
@@ -321,7 +319,6 @@ async def scrape_website(
cleanup_element_tree,
num_retry=num_retry,
scrape_exclude=scrape_exclude,
with_screenshot=with_screenshot,
)
@@ -369,7 +366,6 @@ async def scrape_web_unsafe(
url: str,
cleanup_element_tree: CleanupElementTreeFunc,
scrape_exclude: ScrapeExcludeFunc | None = None,
with_screenshot: bool = True,
) -> ScrapedPage:
"""
Asynchronous function that performs web scraping without any built-in error handling. This function is intended
@@ -394,11 +390,7 @@ async def scrape_web_unsafe(
LOG.info("Waiting for 5 seconds before scraping the website.")
await asyncio.sleep(5)
screenshots: list[bytes] = []
# TODO: do we need to scroll to the button when we scrape without screenshots?
if with_screenshot:
screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=url, draw_boxes=True)
screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=url, draw_boxes=True)
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude)
element_tree = await cleanup_element_tree(page, url, copy.deepcopy(element_tree))