current viewpoint screenshot and scrolling n screenshot (#2716)
Co-authored-by: lawyzheng <lawyzheng1106@gmail.com>
This commit is contained in:
@@ -23,6 +23,7 @@ class SkyvernContext:
|
||||
hashed_href_map: dict[str, str] = field(default_factory=dict)
|
||||
refresh_working_page: bool = False
|
||||
frame_index_map: dict[Frame, int] = field(default_factory=dict)
|
||||
max_screenshot_scrolling_times: int | None = None
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, task_v2_id={self.task_v2_id}, max_steps_override={self.max_steps_override})"
|
||||
|
||||
@@ -149,6 +149,7 @@ class AgentDB:
|
||||
application: str | None = None,
|
||||
include_action_history_in_verification: bool | None = None,
|
||||
model: dict[str, Any] | None = None,
|
||||
max_screenshot_scrolling_times: int | None = None,
|
||||
) -> Task:
|
||||
try:
|
||||
async with self.Session() as session:
|
||||
@@ -176,6 +177,7 @@ class AgentDB:
|
||||
application=application,
|
||||
include_action_history_in_verification=include_action_history_in_verification,
|
||||
model=model,
|
||||
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
|
||||
)
|
||||
session.add(new_task)
|
||||
await session.commit()
|
||||
@@ -1217,6 +1219,7 @@ class AgentDB:
|
||||
description: str | None = None,
|
||||
proxy_location: ProxyLocation | None = None,
|
||||
webhook_callback_url: str | None = None,
|
||||
max_screenshot_scrolling_times: int | None = None,
|
||||
totp_verification_url: str | None = None,
|
||||
totp_identifier: str | None = None,
|
||||
persist_browser_session: bool = False,
|
||||
@@ -1236,6 +1239,7 @@ class AgentDB:
|
||||
webhook_callback_url=webhook_callback_url,
|
||||
totp_verification_url=totp_verification_url,
|
||||
totp_identifier=totp_identifier,
|
||||
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
|
||||
persist_browser_session=persist_browser_session,
|
||||
model=model,
|
||||
is_saved_task=is_saved_task,
|
||||
@@ -1479,6 +1483,7 @@ class AgentDB:
|
||||
totp_verification_url: str | None = None,
|
||||
totp_identifier: str | None = None,
|
||||
parent_workflow_run_id: str | None = None,
|
||||
max_screenshot_scrolling_times: int | None = None,
|
||||
) -> WorkflowRun:
|
||||
try:
|
||||
async with self.Session() as session:
|
||||
@@ -1492,6 +1497,7 @@ class AgentDB:
|
||||
totp_verification_url=totp_verification_url,
|
||||
totp_identifier=totp_identifier,
|
||||
parent_workflow_run_id=parent_workflow_run_id,
|
||||
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
|
||||
)
|
||||
session.add(workflow_run)
|
||||
await session.commit()
|
||||
@@ -2436,6 +2442,7 @@ class AgentDB:
|
||||
extracted_information_schema: dict | list | str | None = None,
|
||||
error_code_mapping: dict | None = None,
|
||||
model: dict[str, Any] | None = None,
|
||||
max_screenshot_scrolling_times: int | None = None,
|
||||
) -> TaskV2:
|
||||
async with self.Session() as session:
|
||||
new_task_v2 = TaskV2Model(
|
||||
@@ -2452,6 +2459,7 @@ class AgentDB:
|
||||
error_code_mapping=error_code_mapping,
|
||||
organization_id=organization_id,
|
||||
model=model,
|
||||
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
|
||||
)
|
||||
session.add(new_task_v2)
|
||||
await session.commit()
|
||||
|
||||
@@ -88,6 +88,7 @@ class TaskModel(Base):
|
||||
queued_at = Column(DateTime, nullable=True)
|
||||
started_at = Column(DateTime, nullable=True)
|
||||
finished_at = Column(DateTime, nullable=True)
|
||||
max_screenshot_scrolling_times = Column(Integer, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True)
|
||||
modified_at = Column(
|
||||
DateTime,
|
||||
@@ -218,6 +219,7 @@ class WorkflowModel(Base):
|
||||
workflow_definition = Column(JSON, nullable=False)
|
||||
proxy_location = Column(String)
|
||||
webhook_callback_url = Column(String)
|
||||
max_screenshot_scrolling_times = Column(Integer, nullable=True)
|
||||
totp_verification_url = Column(String)
|
||||
totp_identifier = Column(String)
|
||||
persist_browser_session = Column(Boolean, default=False, nullable=False)
|
||||
@@ -254,6 +256,7 @@ class WorkflowRunModel(Base):
|
||||
webhook_callback_url = Column(String)
|
||||
totp_verification_url = Column(String)
|
||||
totp_identifier = Column(String)
|
||||
max_screenshot_scrolling_times = Column(Integer, nullable=True)
|
||||
|
||||
queued_at = Column(DateTime, nullable=True)
|
||||
started_at = Column(DateTime, nullable=True)
|
||||
@@ -621,6 +624,7 @@ class TaskV2Model(Base):
|
||||
extracted_information_schema = Column(JSON, nullable=True)
|
||||
error_code_mapping = Column(JSON, nullable=True)
|
||||
max_steps = Column(Integer, nullable=True)
|
||||
max_screenshot_scrolling_times = Column(Integer, nullable=True)
|
||||
|
||||
queued_at = Column(DateTime, nullable=True)
|
||||
started_at = Column(DateTime, nullable=True)
|
||||
|
||||
@@ -142,6 +142,7 @@ def convert_to_task(task_obj: TaskModel, debug_enabled: bool = False, workflow_p
|
||||
queued_at=task_obj.queued_at,
|
||||
started_at=task_obj.started_at,
|
||||
finished_at=task_obj.finished_at,
|
||||
max_screenshot_scrolling_times=task_obj.max_screenshot_scrolling_times,
|
||||
)
|
||||
return task
|
||||
|
||||
@@ -238,6 +239,7 @@ def convert_to_workflow(workflow_model: WorkflowModel, debug_enabled: bool = Fal
|
||||
persist_browser_session=workflow_model.persist_browser_session,
|
||||
model=workflow_model.model,
|
||||
proxy_location=(ProxyLocation(workflow_model.proxy_location) if workflow_model.proxy_location else None),
|
||||
max_screenshot_scrolling_times=workflow_model.max_screenshot_scrolling_times,
|
||||
version=workflow_model.version,
|
||||
is_saved_task=workflow_model.is_saved_task,
|
||||
description=workflow_model.description,
|
||||
@@ -278,6 +280,7 @@ def convert_to_workflow_run(
|
||||
created_at=workflow_run_model.created_at,
|
||||
modified_at=workflow_run_model.modified_at,
|
||||
workflow_title=workflow_title,
|
||||
max_screenshot_scrolling_times=workflow_run_model.max_screenshot_scrolling_times,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -107,6 +107,7 @@ class BackgroundTaskExecutor(AsyncExecutor):
|
||||
context.task_id = task.task_id
|
||||
context.organization_id = organization_id
|
||||
context.max_steps_override = max_steps_override
|
||||
context.max_screenshot_scrolling_times = task.max_screenshot_scrolling_times
|
||||
|
||||
if background_tasks:
|
||||
await initialize_skyvern_state_file(task_id=task_id, organization_id=organization_id)
|
||||
|
||||
@@ -166,6 +166,7 @@ async def run_task(
|
||||
totp_identifier=run_request.totp_identifier,
|
||||
include_action_history_in_verification=run_request.include_action_history_in_verification,
|
||||
model=run_request.model,
|
||||
max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times,
|
||||
)
|
||||
task_v1_response = await task_v1_service.run_task(
|
||||
task=task_v1_request,
|
||||
@@ -203,6 +204,7 @@ async def run_task(
|
||||
data_extraction_schema=task_v1_response.extracted_information_schema,
|
||||
error_code_mapping=task_v1_response.error_code_mapping,
|
||||
browser_session_id=run_request.browser_session_id,
|
||||
max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times,
|
||||
),
|
||||
)
|
||||
if run_request.engine == RunEngine.skyvern_v2:
|
||||
@@ -221,6 +223,7 @@ async def run_task(
|
||||
error_code_mapping=run_request.error_code_mapping,
|
||||
create_task_run=True,
|
||||
model=run_request.model,
|
||||
max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times,
|
||||
)
|
||||
except MissingBrowserAddressError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e)) from e
|
||||
@@ -263,6 +266,7 @@ async def run_task(
|
||||
error_code_mapping=task_v2.error_code_mapping,
|
||||
data_extraction_schema=task_v2.extracted_information_schema,
|
||||
publish_workflow=run_request.publish_workflow,
|
||||
max_screenshot_scrolling_times=run_request.max_screenshot_scrolling_times,
|
||||
),
|
||||
)
|
||||
LOG.error("Invalid agent engine", engine=run_request.engine, organization_id=current_org.organization_id)
|
||||
@@ -318,6 +322,7 @@ async def run_workflow(
|
||||
totp_identifier=workflow_run_request.totp_identifier,
|
||||
totp_url=workflow_run_request.totp_url,
|
||||
browser_session_id=workflow_run_request.browser_session_id,
|
||||
max_screenshot_scrolling_times=workflow_run_request.max_screenshot_scrolling_times,
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -1765,6 +1770,7 @@ async def run_task_v2(
|
||||
create_task_run=True,
|
||||
extracted_information_schema=data.extracted_information_schema,
|
||||
error_code_mapping=data.error_code_mapping,
|
||||
max_screenshot_scrolling_times=data.max_screenshot_scrolling_times,
|
||||
)
|
||||
except MissingBrowserAddressError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e)) from e
|
||||
|
||||
@@ -48,6 +48,8 @@ class TaskV2(BaseModel):
|
||||
queued_at: datetime | None = None
|
||||
started_at: datetime | None = None
|
||||
finished_at: datetime | None = None
|
||||
max_screenshot_scrolling_times: int | None = None
|
||||
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
|
||||
@@ -147,6 +149,7 @@ class TaskV2Request(BaseModel):
|
||||
publish_workflow: bool = False
|
||||
extracted_information_schema: dict | list | str | None = None
|
||||
error_code_mapping: dict[str, str] | None = None
|
||||
max_screenshot_scrolling_times: int | None = None
|
||||
|
||||
@field_validator("url", "webhook_callback_url", "totp_verification_url")
|
||||
@classmethod
|
||||
|
||||
@@ -96,6 +96,11 @@ class TaskBase(BaseModel):
|
||||
description="Whether to include the action history when verifying the task is complete",
|
||||
examples=[True, False],
|
||||
)
|
||||
max_screenshot_scrolling_times: int | None = Field(
|
||||
default=None,
|
||||
description="Scroll down n times to get the merged screenshot of the page after taking an action. When it's None or 0, it takes the current viewpoint screenshot.",
|
||||
examples=[10],
|
||||
)
|
||||
|
||||
|
||||
class TaskRequest(TaskBase):
|
||||
@@ -314,6 +319,7 @@ class Task(TaskBase):
|
||||
errors=self.errors,
|
||||
max_steps_per_run=self.max_steps_per_run,
|
||||
workflow_run_id=self.workflow_run_id,
|
||||
max_screenshot_scrolling_times=self.max_screenshot_scrolling_times,
|
||||
)
|
||||
|
||||
|
||||
@@ -337,6 +343,7 @@ class TaskResponse(BaseModel):
|
||||
queued_at: datetime | None = None
|
||||
started_at: datetime | None = None
|
||||
finished_at: datetime | None = None
|
||||
max_screenshot_scrolling_times: int | None = None
|
||||
|
||||
|
||||
class TaskOutput(BaseModel):
|
||||
|
||||
@@ -307,7 +307,12 @@ class Block(BaseModel, abc.ABC):
|
||||
if not browser_state:
|
||||
LOG.warning("No browser state found when creating workflow_run_block", workflow_run_id=workflow_run_id)
|
||||
else:
|
||||
screenshot = await browser_state.take_screenshot(full_page=True)
|
||||
screenshot = await browser_state.take_fullpage_screenshot(
|
||||
use_playwright_fullpage=app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
|
||||
"ENABLE_PLAYWRIGHT_FULLPAGE",
|
||||
str(organization_id),
|
||||
)
|
||||
)
|
||||
if screenshot:
|
||||
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact(
|
||||
workflow_run_block=workflow_run_block,
|
||||
@@ -569,8 +574,15 @@ class BaseTaskBlock(Block):
|
||||
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
||||
workflow_run=workflow_run, url=self.url, browser_session_id=browser_session_id
|
||||
)
|
||||
# assert that the browser state is not None, otherwise we can't go through typing
|
||||
assert browser_state is not None
|
||||
# add screenshot artifact for the first task
|
||||
screenshot = await browser_state.take_screenshot(full_page=True)
|
||||
screenshot = await browser_state.take_fullpage_screenshot(
|
||||
use_playwright_fullpage=app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
|
||||
"ENABLE_PLAYWRIGHT_FULLPAGE",
|
||||
str(organization_id),
|
||||
)
|
||||
)
|
||||
if screenshot:
|
||||
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact(
|
||||
workflow_run_block=workflow_run_block,
|
||||
@@ -2486,6 +2498,7 @@ class TaskV2Block(Block):
|
||||
proxy_location=workflow_run.proxy_location,
|
||||
totp_identifier=self.totp_identifier,
|
||||
totp_verification_url=self.totp_verification_url,
|
||||
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolling_times,
|
||||
)
|
||||
await app.DATABASE.update_task_v2(
|
||||
task_v2.observer_cruise_id, status=TaskV2Status.queued, organization_id=organization_id
|
||||
@@ -2517,6 +2530,7 @@ class TaskV2Block(Block):
|
||||
workflow_permanent_id=workflow_run.workflow_permanent_id,
|
||||
workflow_run_id=workflow_run_id,
|
||||
browser_session_id=browser_session_id,
|
||||
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolling_times,
|
||||
)
|
||||
)
|
||||
result_dict = None
|
||||
|
||||
@@ -22,6 +22,7 @@ class WorkflowRequestBody(BaseModel):
|
||||
totp_verification_url: str | None = None
|
||||
totp_identifier: str | None = None
|
||||
browser_session_id: str | None = None
|
||||
max_screenshot_scrolling_times: int | None = None
|
||||
|
||||
@field_validator("webhook_callback_url", "totp_verification_url")
|
||||
@classmethod
|
||||
@@ -76,6 +77,7 @@ class Workflow(BaseModel):
|
||||
persist_browser_session: bool = False
|
||||
model: dict[str, Any] | None = None
|
||||
status: WorkflowStatus = WorkflowStatus.published
|
||||
max_screenshot_scrolling_times: int | None = None
|
||||
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
@@ -115,6 +117,7 @@ class WorkflowRun(BaseModel):
|
||||
failure_reason: str | None = None
|
||||
parent_workflow_run_id: str | None = None
|
||||
workflow_title: str | None = None
|
||||
max_screenshot_scrolling_times: int | None = None
|
||||
|
||||
queued_at: datetime | None = None
|
||||
started_at: datetime | None = None
|
||||
@@ -162,3 +165,4 @@ class WorkflowRunResponseBase(BaseModel):
|
||||
task_v2: TaskV2 | None = None
|
||||
workflow_title: str | None = None
|
||||
browser_session_id: str | None = None
|
||||
max_screenshot_scrolling_times: int | None = None
|
||||
|
||||
@@ -424,4 +424,5 @@ class WorkflowCreateYAMLRequest(BaseModel):
|
||||
model: dict[str, Any] | None = None
|
||||
workflow_definition: WorkflowDefinitionYAML
|
||||
is_saved_task: bool = False
|
||||
max_screenshot_scrolling_times: int | None = None
|
||||
status: WorkflowStatus = WorkflowStatus.published
|
||||
|
||||
@@ -169,6 +169,7 @@ class WorkflowService:
|
||||
organization_id=workflow.organization_id,
|
||||
proxy_location=workflow_request.proxy_location,
|
||||
webhook_callback_url=workflow_request.webhook_callback_url,
|
||||
max_screenshot_scrolling_times=workflow_request.max_screenshot_scrolling_times,
|
||||
)
|
||||
skyvern_context.set(
|
||||
SkyvernContext(
|
||||
@@ -178,6 +179,7 @@ class WorkflowService:
|
||||
workflow_id=workflow_id,
|
||||
workflow_run_id=workflow_run.workflow_run_id,
|
||||
max_steps_override=max_steps_override,
|
||||
max_screenshot_scrolling_times=workflow_request.max_screenshot_scrolling_times,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -577,6 +579,7 @@ class WorkflowService:
|
||||
workflow_definition: WorkflowDefinition,
|
||||
description: str | None = None,
|
||||
proxy_location: ProxyLocation | None = None,
|
||||
max_screenshot_scrolling_times: int | None = None,
|
||||
webhook_callback_url: str | None = None,
|
||||
totp_verification_url: str | None = None,
|
||||
totp_identifier: str | None = None,
|
||||
@@ -594,6 +597,7 @@ class WorkflowService:
|
||||
description=description,
|
||||
proxy_location=proxy_location,
|
||||
webhook_callback_url=webhook_callback_url,
|
||||
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
|
||||
totp_verification_url=totp_verification_url,
|
||||
totp_identifier=totp_identifier,
|
||||
persist_browser_session=persist_browser_session,
|
||||
@@ -767,6 +771,7 @@ class WorkflowService:
|
||||
totp_verification_url=workflow_request.totp_verification_url,
|
||||
totp_identifier=workflow_request.totp_identifier,
|
||||
parent_workflow_run_id=parent_workflow_run_id,
|
||||
max_screenshot_scrolling_times=workflow_request.max_screenshot_scrolling_times,
|
||||
)
|
||||
|
||||
async def mark_workflow_run_as_completed(self, workflow_run_id: str) -> WorkflowRun:
|
||||
@@ -1180,6 +1185,7 @@ class WorkflowService:
|
||||
total_steps=total_steps,
|
||||
total_cost=total_cost,
|
||||
workflow_title=workflow.title,
|
||||
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolling_times,
|
||||
)
|
||||
|
||||
async def clean_up_workflow(
|
||||
@@ -1453,6 +1459,7 @@ class WorkflowService:
|
||||
totp_identifier=request.totp_identifier,
|
||||
persist_browser_session=request.persist_browser_session,
|
||||
model=request.model,
|
||||
max_screenshot_scrolling_times=request.max_screenshot_scrolling_times,
|
||||
workflow_permanent_id=workflow_permanent_id,
|
||||
version=existing_version + 1,
|
||||
is_saved_task=request.is_saved_task,
|
||||
@@ -1470,6 +1477,7 @@ class WorkflowService:
|
||||
totp_identifier=request.totp_identifier,
|
||||
persist_browser_session=request.persist_browser_session,
|
||||
model=request.model,
|
||||
max_screenshot_scrolling_times=request.max_screenshot_scrolling_times,
|
||||
is_saved_task=request.is_saved_task,
|
||||
status=request.status,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user