fix speculative artifact persistence (#4211)
This commit is contained in:
@@ -947,6 +947,12 @@ class ForgeAgent:
|
|||||||
reuse_speculative_llm_response = json_response is not None
|
reuse_speculative_llm_response = json_response is not None
|
||||||
speculative_llm_metadata = speculative_plan.llm_metadata
|
speculative_llm_metadata = speculative_plan.llm_metadata
|
||||||
prompt_name = speculative_plan.prompt_name
|
prompt_name = speculative_plan.prompt_name
|
||||||
|
await self._persist_scrape_artifacts(
|
||||||
|
task=task,
|
||||||
|
step=step,
|
||||||
|
scraped_page=scraped_page,
|
||||||
|
context=context,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
(
|
(
|
||||||
scraped_page,
|
scraped_page,
|
||||||
@@ -2371,10 +2377,11 @@ class ForgeAgent:
|
|||||||
use_caching = False
|
use_caching = False
|
||||||
|
|
||||||
if persist_artifacts:
|
if persist_artifacts:
|
||||||
await app.ARTIFACT_MANAGER.create_artifact(
|
await self._persist_scrape_artifacts(
|
||||||
|
task=task,
|
||||||
step=step,
|
step=step,
|
||||||
artifact_type=ArtifactType.HTML_SCRAPE,
|
scraped_page=scraped_page,
|
||||||
data=scraped_page.html.encode(),
|
context=context,
|
||||||
)
|
)
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Scraped website",
|
"Scraped website",
|
||||||
@@ -2383,54 +2390,6 @@ class ForgeAgent:
|
|||||||
num_elements=len(scraped_page.elements),
|
num_elements=len(scraped_page.elements),
|
||||||
url=task.url,
|
url=task.url,
|
||||||
)
|
)
|
||||||
# TODO: we only use HTML element for now, introduce a way to switch in the future
|
|
||||||
enable_speed_optimizations = getattr(context, "enable_speed_optimizations", False)
|
|
||||||
element_tree_format = ElementTreeFormat.HTML
|
|
||||||
|
|
||||||
# OPTIMIZATION: Use economy tree (skip SVGs) when ENABLE_SPEED_OPTIMIZATIONS is enabled
|
|
||||||
# Economy tree removes all SVG elements from the DOM tree sent to LLM
|
|
||||||
# - SVGs are decorative (icons, logos, graphics) - not needed for action planning
|
|
||||||
# - Even for charts/graphs: LLM sees them in screenshots, not SVG code
|
|
||||||
# - Saves ~8s per SVG x ~15 SVGs = ~120s per workflow (30% speedup!)
|
|
||||||
#
|
|
||||||
# RETRY STRATEGY: Use economy tree on first attempt only
|
|
||||||
# - retry_index 0: Use economy tree (fast, no SVGs)
|
|
||||||
# - retry_index 1+: Use regular tree (SVGs loaded from existing 4-week cache)
|
|
||||||
# Note: SVG conversions are already cached globally with 4-week TTL, so retries are fast
|
|
||||||
#
|
|
||||||
# COORDINATION: The enable_speed_optimizations decision is made ONCE before scraping
|
|
||||||
# and stored in context. Both SVG conversion skip (agent_functions.py) and tree
|
|
||||||
# selection (here) use the SAME value, ensuring perfect coordination.
|
|
||||||
element_tree_in_prompt: str = ""
|
|
||||||
|
|
||||||
# Use the speed optimization decision from context (set before scraping)
|
|
||||||
enable_speed_optimizations = context.enable_speed_optimizations if context else False
|
|
||||||
|
|
||||||
if not enable_speed_optimizations:
|
|
||||||
# Optimization disabled - use regular tree always
|
|
||||||
element_tree_in_prompt = scraped_page.build_element_tree(element_tree_format)
|
|
||||||
elif step.retry_index == 0:
|
|
||||||
# First attempt - use economy tree (fast, no SVG conversion)
|
|
||||||
# Note: SVG conversion was already skipped in cleanup_element_tree_func
|
|
||||||
# based on the same context.enable_speed_optimizations value
|
|
||||||
element_tree_in_prompt = scraped_page.build_economy_elements_tree(element_tree_format)
|
|
||||||
LOG.info(
|
|
||||||
"Speed optimization: Using economy element tree (skipping SVGs)",
|
|
||||||
step_order=step.order,
|
|
||||||
step_retry=step.retry_index,
|
|
||||||
task_id=task.task_id,
|
|
||||||
workflow_run_id=task.workflow_run_id,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Retry 1+ - use regular tree (SVGs will be loaded from existing 4-week cache)
|
|
||||||
element_tree_in_prompt = scraped_page.build_element_tree(element_tree_format)
|
|
||||||
LOG.info(
|
|
||||||
"Speed optimization: Using regular tree on retry (SVGs from global cache)",
|
|
||||||
step_order=step.order,
|
|
||||||
step_retry=step.retry_index,
|
|
||||||
task_id=task.task_id,
|
|
||||||
workflow_run_id=task.workflow_run_id,
|
|
||||||
)
|
|
||||||
extract_action_prompt = ""
|
extract_action_prompt = ""
|
||||||
prompt_name = EXTRACT_ACTION_PROMPT_NAME # Default; overwritten below for non-CUA engines
|
prompt_name = EXTRACT_ACTION_PROMPT_NAME # Default; overwritten below for non-CUA engines
|
||||||
if engine not in CUA_ENGINES:
|
if engine not in CUA_ENGINES:
|
||||||
@@ -2443,35 +2402,101 @@ class ForgeAgent:
|
|||||||
expire_verification_code=True,
|
expire_verification_code=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
if persist_artifacts:
|
|
||||||
await app.ARTIFACT_MANAGER.create_artifact(
|
|
||||||
step=step,
|
|
||||||
artifact_type=ArtifactType.VISIBLE_ELEMENTS_ID_CSS_MAP,
|
|
||||||
data=json.dumps(scraped_page.id_to_css_dict, indent=2).encode(),
|
|
||||||
)
|
|
||||||
await app.ARTIFACT_MANAGER.create_artifact(
|
|
||||||
step=step,
|
|
||||||
artifact_type=ArtifactType.VISIBLE_ELEMENTS_ID_FRAME_MAP,
|
|
||||||
data=json.dumps(scraped_page.id_to_frame_dict, indent=2).encode(),
|
|
||||||
)
|
|
||||||
await app.ARTIFACT_MANAGER.create_artifact(
|
|
||||||
step=step,
|
|
||||||
artifact_type=ArtifactType.VISIBLE_ELEMENTS_TREE,
|
|
||||||
data=json.dumps(scraped_page.element_tree, indent=2).encode(),
|
|
||||||
)
|
|
||||||
await app.ARTIFACT_MANAGER.create_artifact(
|
|
||||||
step=step,
|
|
||||||
artifact_type=ArtifactType.VISIBLE_ELEMENTS_TREE_TRIMMED,
|
|
||||||
data=json.dumps(scraped_page.element_tree_trimmed, indent=2).encode(),
|
|
||||||
)
|
|
||||||
await app.ARTIFACT_MANAGER.create_artifact(
|
|
||||||
step=step,
|
|
||||||
artifact_type=ArtifactType.VISIBLE_ELEMENTS_TREE_IN_PROMPT,
|
|
||||||
data=element_tree_in_prompt.encode(),
|
|
||||||
)
|
|
||||||
|
|
||||||
return scraped_page, extract_action_prompt, use_caching, prompt_name
|
return scraped_page, extract_action_prompt, use_caching, prompt_name
|
||||||
|
|
||||||
|
async def _persist_scrape_artifacts(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
task: Task,
|
||||||
|
step: Step,
|
||||||
|
scraped_page: ScrapedPage,
|
||||||
|
context: SkyvernContext | None,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Persist the core scrape artifacts (HTML + element metadata) for a step.
|
||||||
|
This is used both for regular runs and when adopting a speculative plan.
|
||||||
|
"""
|
||||||
|
|
||||||
|
await app.ARTIFACT_MANAGER.create_artifact(
|
||||||
|
step=step,
|
||||||
|
artifact_type=ArtifactType.HTML_SCRAPE,
|
||||||
|
data=scraped_page.html.encode(),
|
||||||
|
)
|
||||||
|
|
||||||
|
element_tree_format = ElementTreeFormat.HTML
|
||||||
|
element_tree_in_prompt = self._build_element_tree_for_prompt(
|
||||||
|
scraped_page=scraped_page,
|
||||||
|
step=step,
|
||||||
|
task=task,
|
||||||
|
context=context,
|
||||||
|
element_tree_format=element_tree_format,
|
||||||
|
)
|
||||||
|
|
||||||
|
await app.ARTIFACT_MANAGER.create_artifact(
|
||||||
|
step=step,
|
||||||
|
artifact_type=ArtifactType.VISIBLE_ELEMENTS_ID_CSS_MAP,
|
||||||
|
data=json.dumps(scraped_page.id_to_css_dict, indent=2).encode(),
|
||||||
|
)
|
||||||
|
await app.ARTIFACT_MANAGER.create_artifact(
|
||||||
|
step=step,
|
||||||
|
artifact_type=ArtifactType.VISIBLE_ELEMENTS_ID_FRAME_MAP,
|
||||||
|
data=json.dumps(scraped_page.id_to_frame_dict, indent=2).encode(),
|
||||||
|
)
|
||||||
|
await app.ARTIFACT_MANAGER.create_artifact(
|
||||||
|
step=step,
|
||||||
|
artifact_type=ArtifactType.VISIBLE_ELEMENTS_TREE,
|
||||||
|
data=json.dumps(scraped_page.element_tree, indent=2).encode(),
|
||||||
|
)
|
||||||
|
await app.ARTIFACT_MANAGER.create_artifact(
|
||||||
|
step=step,
|
||||||
|
artifact_type=ArtifactType.VISIBLE_ELEMENTS_TREE_TRIMMED,
|
||||||
|
data=json.dumps(scraped_page.element_tree_trimmed, indent=2).encode(),
|
||||||
|
)
|
||||||
|
await app.ARTIFACT_MANAGER.create_artifact(
|
||||||
|
step=step,
|
||||||
|
artifact_type=ArtifactType.VISIBLE_ELEMENTS_TREE_IN_PROMPT,
|
||||||
|
data=element_tree_in_prompt.encode(),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_element_tree_for_prompt(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
scraped_page: ScrapedPage,
|
||||||
|
step: Step,
|
||||||
|
task: Task,
|
||||||
|
context: SkyvernContext | None,
|
||||||
|
element_tree_format: ElementTreeFormat,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Determine which element tree representation should be captured for the prompt/artifacts.
|
||||||
|
Mirrors the previous inline logic so that speculative runs can reuse it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
enable_speed_optimizations = context.enable_speed_optimizations if context else False
|
||||||
|
if not enable_speed_optimizations:
|
||||||
|
return scraped_page.build_element_tree(element_tree_format)
|
||||||
|
|
||||||
|
if step.retry_index == 0:
|
||||||
|
element_tree_in_prompt = scraped_page.build_economy_elements_tree(element_tree_format)
|
||||||
|
LOG.info(
|
||||||
|
"Speed optimization: Using economy element tree (skipping SVGs)",
|
||||||
|
step_order=step.order,
|
||||||
|
step_retry=step.retry_index,
|
||||||
|
task_id=task.task_id,
|
||||||
|
workflow_run_id=task.workflow_run_id,
|
||||||
|
)
|
||||||
|
return element_tree_in_prompt
|
||||||
|
|
||||||
|
element_tree_in_prompt = scraped_page.build_element_tree(element_tree_format)
|
||||||
|
LOG.info(
|
||||||
|
"Speed optimization: Using regular tree on retry (SVGs from global cache)",
|
||||||
|
step_order=step.order,
|
||||||
|
step_retry=step.retry_index,
|
||||||
|
task_id=task.task_id,
|
||||||
|
workflow_run_id=task.workflow_run_id,
|
||||||
|
)
|
||||||
|
return element_tree_in_prompt
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_extract_action_cache_variant(
|
def _build_extract_action_cache_variant(
|
||||||
verification_code_check: bool,
|
verification_code_check: bool,
|
||||||
|
|||||||
Reference in New Issue
Block a user