Extract BrowserState.scrape_website (#4184)

This commit is contained in:
Stanislav Novosad
2025-12-03 15:08:32 -07:00
committed by GitHub
parent ce01f2cb35
commit f754272f9c
16 changed files with 375 additions and 313 deletions

View File

@@ -61,7 +61,7 @@ from skyvern.schemas.scripts import (
ScriptStatus,
)
from skyvern.schemas.workflows import BlockStatus, BlockType, FileStorageType, FileType
from skyvern.webeye.scraper.scraper import ElementTreeFormat
from skyvern.webeye.scraper.scraped_page import ElementTreeFormat
LOG = structlog.get_logger()
jinja_sandbox_env = SandboxedEnvironment()

View File

@@ -56,7 +56,7 @@ from skyvern.schemas.workflows import (
from skyvern.utils.prompt_engine import load_prompt_with_elements
from skyvern.utils.strings import generate_random_string
from skyvern.webeye.browser_state import BrowserState
from skyvern.webeye.scraper.scraper import ScrapedPage, scrape_website
from skyvern.webeye.scraper.scraped_page import ScrapedPage
from skyvern.webeye.utils.page import SkyvernFrame
LOG = structlog.get_logger()
@@ -682,10 +682,9 @@ async def run_task_v2_helper(
)
else:
try:
scraped_page = await scrape_website(
browser_state,
url,
app.AGENT_FUNCTION.cleanup_element_tree_factory(),
scraped_page = await browser_state.scrape_website(
url=url,
cleanup_element_tree=app.AGENT_FUNCTION.cleanup_element_tree_factory(),
scrape_exclude=app.scrape_exclude,
)
if page is None:
@@ -908,10 +907,9 @@ async def run_task_v2_helper(
browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
)
scraped_page = await scrape_website(
browser_state,
url,
app.AGENT_FUNCTION.cleanup_element_tree_factory(),
scraped_page = await browser_state.scrape_website(
url=url,
cleanup_element_tree=app.AGENT_FUNCTION.cleanup_element_tree_factory(),
scrape_exclude=app.scrape_exclude,
)
completion_screenshots = scraped_page.screenshots