Extract BrowserState.scrape_website (#4184)
This commit is contained in:
committed by
GitHub
parent
ce01f2cb35
commit
f754272f9c
@@ -20,6 +20,8 @@ from skyvern.schemas.runs import ProxyLocationInput
|
||||
from skyvern.webeye.browser_artifacts import BrowserArtifacts, VideoArtifact
|
||||
from skyvern.webeye.browser_factory import BrowserCleanupFunc, BrowserContextFactory
|
||||
from skyvern.webeye.browser_state import BrowserState
|
||||
from skyvern.webeye.scraper import scraper
|
||||
from skyvern.webeye.scraper.scraped_page import CleanupElementTreeFunc, ScrapedPage, ScrapeExcludeFunc
|
||||
from skyvern.webeye.utils.page import ScreenshotMode, SkyvernFrame
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
@@ -378,6 +380,35 @@ class RealBrowserState(BrowserState):
|
||||
LOG.exception(f"Error while reload url: {repr(e)}")
|
||||
raise FailedToReloadPage(url=page.url, error_message=repr(e))
|
||||
|
||||
async def scrape_website(
|
||||
self,
|
||||
url: str,
|
||||
cleanup_element_tree: CleanupElementTreeFunc,
|
||||
num_retry: int = 0,
|
||||
max_retries: int = settings.MAX_SCRAPING_RETRIES,
|
||||
scrape_exclude: ScrapeExcludeFunc | None = None,
|
||||
take_screenshots: bool = True,
|
||||
draw_boxes: bool = True,
|
||||
max_screenshot_number: int = settings.MAX_NUM_SCREENSHOTS,
|
||||
scroll: bool = True,
|
||||
support_empty_page: bool = False,
|
||||
wait_seconds: float = 0,
|
||||
) -> ScrapedPage:
|
||||
return await scraper.scrape_website(
|
||||
browser_state=self,
|
||||
url=url,
|
||||
cleanup_element_tree=cleanup_element_tree,
|
||||
num_retry=num_retry,
|
||||
max_retries=max_retries,
|
||||
scrape_exclude=scrape_exclude,
|
||||
take_screenshots=take_screenshots,
|
||||
draw_boxes=draw_boxes,
|
||||
max_screenshot_number=max_screenshot_number,
|
||||
scroll=scroll,
|
||||
support_empty_page=support_empty_page,
|
||||
wait_seconds=wait_seconds,
|
||||
)
|
||||
|
||||
async def close(self, close_browser_on_completion: bool = True) -> None:
|
||||
LOG.info("Closing browser state")
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user