Browser recording: events to blocks (#4195)
This commit is contained in:
@@ -32,6 +32,7 @@ from skyvern.forge.sdk.services.credential.custom_credential_vault_service impor
|
|||||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||||
from skyvern.forge.sdk.workflow.context_manager import WorkflowContextManager
|
from skyvern.forge.sdk.workflow.context_manager import WorkflowContextManager
|
||||||
from skyvern.forge.sdk.workflow.service import WorkflowService
|
from skyvern.forge.sdk.workflow.service import WorkflowService
|
||||||
|
from skyvern.services.browser_recording.service import BrowserSessionRecordingService
|
||||||
from skyvern.webeye.browser_manager import BrowserManager
|
from skyvern.webeye.browser_manager import BrowserManager
|
||||||
from skyvern.webeye.persistent_sessions_manager import PersistentSessionsManager
|
from skyvern.webeye.persistent_sessions_manager import PersistentSessionsManager
|
||||||
from skyvern.webeye.real_browser_manager import RealBrowserManager
|
from skyvern.webeye.real_browser_manager import RealBrowserManager
|
||||||
@@ -69,6 +70,7 @@ class ForgeApp:
|
|||||||
WORKFLOW_SERVICE: WorkflowService
|
WORKFLOW_SERVICE: WorkflowService
|
||||||
AGENT_FUNCTION: AgentFunction
|
AGENT_FUNCTION: AgentFunction
|
||||||
PERSISTENT_SESSIONS_MANAGER: PersistentSessionsManager
|
PERSISTENT_SESSIONS_MANAGER: PersistentSessionsManager
|
||||||
|
BROWSER_SESSION_RECORDING_SERVICE: BrowserSessionRecordingService
|
||||||
BITWARDEN_CREDENTIAL_VAULT_SERVICE: BitwardenCredentialVaultService
|
BITWARDEN_CREDENTIAL_VAULT_SERVICE: BitwardenCredentialVaultService
|
||||||
AZURE_CREDENTIAL_VAULT_SERVICE: AzureCredentialVaultService | None
|
AZURE_CREDENTIAL_VAULT_SERVICE: AzureCredentialVaultService | None
|
||||||
CUSTOM_CREDENTIAL_VAULT_SERVICE: CustomCredentialVaultService | None
|
CUSTOM_CREDENTIAL_VAULT_SERVICE: CustomCredentialVaultService | None
|
||||||
@@ -177,6 +179,7 @@ def create_forge_app() -> ForgeApp:
|
|||||||
app.WORKFLOW_SERVICE = WorkflowService()
|
app.WORKFLOW_SERVICE = WorkflowService()
|
||||||
app.AGENT_FUNCTION = AgentFunction()
|
app.AGENT_FUNCTION = AgentFunction()
|
||||||
app.PERSISTENT_SESSIONS_MANAGER = PersistentSessionsManager(database=app.DATABASE)
|
app.PERSISTENT_SESSIONS_MANAGER = PersistentSessionsManager(database=app.DATABASE)
|
||||||
|
app.BROWSER_SESSION_RECORDING_SERVICE = BrowserSessionRecordingService()
|
||||||
|
|
||||||
app.AZURE_CLIENT_FACTORY = RealAzureClientFactory()
|
app.AZURE_CLIENT_FACTORY = RealAzureClientFactory()
|
||||||
app.BITWARDEN_CREDENTIAL_VAULT_SERVICE = BitwardenCredentialVaultService()
|
app.BITWARDEN_CREDENTIAL_VAULT_SERVICE = BitwardenCredentialVaultService()
|
||||||
|
|||||||
@@ -0,0 +1,21 @@
|
|||||||
|
Given a browser action, come up with a templated one-line prompt suitable for a browser agent, a block label, and a title.
|
||||||
|
|
||||||
|
The templated prompt should have one jinja variable in it. Come up with a good name for the variable that is
|
||||||
|
lower case, no spaces, underscores permitted.
|
||||||
|
|
||||||
|
Example: "Enter {{ address }} into the address field."
|
||||||
|
|
||||||
|
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||||
|
|
||||||
|
Reply in JSON format with the following keys:
|
||||||
|
{
|
||||||
|
"prompt": str, // A templated, one-line prompt suitable for a browser agent describing the user action.
|
||||||
|
"title": str, // A descriptive and informative title for the goal. Use no more than 5 words
|
||||||
|
"block_label": str, // A label for the block. Lower case. Based off of the "title". Underscores are permitted.
|
||||||
|
"parameter_name": { "key": str } // The name of the parameter being input. Lower case, no spaces. Underscores are permitted.
|
||||||
|
}
|
||||||
|
|
||||||
|
User action:
|
||||||
|
```
|
||||||
|
{{ action }}
|
||||||
|
```
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
Given a browser action, come up with a one-line prompt suitable for a browser agent, a block label, and a title.
|
||||||
|
|
||||||
|
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||||
|
|
||||||
|
Reply in JSON format with the following keys:
|
||||||
|
{
|
||||||
|
"prompt": str, // A one-line prompt suitable for a browser agent describing the user action.
|
||||||
|
"title": str, // A descriptive and informative title for the goal. Use no more than 5 words
|
||||||
|
"block_label": str // A label for the block. Lower case. Based off of the "title". Underscores are permitted.
|
||||||
|
}
|
||||||
|
|
||||||
|
User action:
|
||||||
|
```
|
||||||
|
{{ action }}
|
||||||
|
```
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
Given a "go to URL" action, come up with a block label suitable for a browser agent. It should include a short version of a url, if the url exists.
|
||||||
|
|
||||||
|
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||||
|
|
||||||
|
Reply in JSON format with the following keys:
|
||||||
|
{
|
||||||
|
"block_label": str // A label for the block. Lower case. Underscores are permitted.
|
||||||
|
}
|
||||||
|
|
||||||
|
go to URL action:
|
||||||
|
```
|
||||||
|
{{ action }}
|
||||||
|
```
|
||||||
15
skyvern/forge/prompts/skyvern/recording-wait-block-prompt.j2
Normal file
15
skyvern/forge/prompts/skyvern/recording-wait-block-prompt.j2
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
Given a wait action, come up with a block label. It should include the duration in seconds, and a short version of a url, if the url exists.
|
||||||
|
|
||||||
|
If the duration for the wait action is in milliseconds, so be sure to convert to seconds.
|
||||||
|
|
||||||
|
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||||
|
|
||||||
|
Reply in JSON format with the following keys:
|
||||||
|
{
|
||||||
|
"block_label": str // A label for the block. Lower case. Underscores are permitted.
|
||||||
|
}
|
||||||
|
|
||||||
|
Wait action:
|
||||||
|
```
|
||||||
|
{{ action }}
|
||||||
|
```
|
||||||
@@ -18,7 +18,11 @@ from skyvern.forge.sdk.routes.code_samples import (
|
|||||||
from skyvern.forge.sdk.routes.routers import base_router
|
from skyvern.forge.sdk.routes.routers import base_router
|
||||||
from skyvern.forge.sdk.schemas.organizations import Organization
|
from skyvern.forge.sdk.schemas.organizations import Organization
|
||||||
from skyvern.forge.sdk.services import org_auth_service
|
from skyvern.forge.sdk.services import org_auth_service
|
||||||
from skyvern.schemas.browser_sessions import CreateBrowserSessionRequest
|
from skyvern.schemas.browser_sessions import (
|
||||||
|
CreateBrowserSessionRequest,
|
||||||
|
ProcessBrowserSessionRecordingRequest,
|
||||||
|
ProcessBrowserSessionRecordingResponse,
|
||||||
|
)
|
||||||
from skyvern.webeye.schemas import BrowserSessionResponse
|
from skyvern.webeye.schemas import BrowserSessionResponse
|
||||||
|
|
||||||
|
|
||||||
@@ -217,3 +221,29 @@ async def get_browser_sessions(
|
|||||||
for browser_session in browser_sessions
|
for browser_session in browser_sessions
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@base_router.post(
|
||||||
|
"/browser_sessions/{browser_session_id}/process_recording",
|
||||||
|
include_in_schema=False,
|
||||||
|
)
|
||||||
|
async def process_recording(
|
||||||
|
browser_session_id: str = Path(..., description="The ID of the browser session.", examples=["pbs_123456"]),
|
||||||
|
recording_request: ProcessBrowserSessionRecordingRequest = ProcessBrowserSessionRecordingRequest(),
|
||||||
|
current_org: Organization = Depends(org_auth_service.get_current_org),
|
||||||
|
) -> ProcessBrowserSessionRecordingResponse:
|
||||||
|
browser_session = await app.PERSISTENT_SESSIONS_MANAGER.get_session(
|
||||||
|
browser_session_id,
|
||||||
|
current_org.organization_id,
|
||||||
|
)
|
||||||
|
if not browser_session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Browser session {browser_session_id} not found")
|
||||||
|
|
||||||
|
blocks, parameters = await app.BROWSER_SESSION_RECORDING_SERVICE.process_recording(
|
||||||
|
organization_id=current_org.organization_id,
|
||||||
|
browser_session_id=browser_session_id,
|
||||||
|
compressed_chunks=recording_request.compressed_chunks,
|
||||||
|
workflow_permanent_id=recording_request.workflow_permanent_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
return ProcessBrowserSessionRecordingResponse(blocks=blocks, parameters=parameters)
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import asyncio
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
import enum
|
import enum
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
import typing as t
|
import typing as t
|
||||||
|
|
||||||
import structlog
|
import structlog
|
||||||
@@ -39,6 +40,7 @@ class ExfiltratedEvent:
|
|||||||
# TODO(jdo): improve typing for params
|
# TODO(jdo): improve typing for params
|
||||||
params: dict = dataclasses.field(default_factory=dict)
|
params: dict = dataclasses.field(default_factory=dict)
|
||||||
source: ExfiltratedEventSource = ExfiltratedEventSource.NOT_SPECIFIED
|
source: ExfiltratedEventSource = ExfiltratedEventSource.NOT_SPECIFIED
|
||||||
|
timestamp: float = dataclasses.field(default_factory=lambda: time.time()) # seconds since epoch
|
||||||
|
|
||||||
|
|
||||||
OnExfiltrationEvent = t.Callable[[list[ExfiltratedEvent]], None]
|
OnExfiltrationEvent = t.Callable[[list[ExfiltratedEvent]], None]
|
||||||
@@ -68,6 +70,7 @@ class ExfiltrationChannel(CdpChannel):
|
|||||||
event_name="user_interaction",
|
event_name="user_interaction",
|
||||||
params=event_data,
|
params=event_data,
|
||||||
source=ExfiltratedEventSource.CONSOLE,
|
source=ExfiltratedEventSource.CONSOLE,
|
||||||
|
timestamp=time.time(),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -84,20 +87,25 @@ class ExfiltrationChannel(CdpChannel):
|
|||||||
event_name=event_name,
|
event_name=event_name,
|
||||||
params=params,
|
params=params,
|
||||||
source=ExfiltratedEventSource.CDP,
|
source=ExfiltratedEventSource.CDP,
|
||||||
|
timestamp=time.time(),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
self.on_event(messages)
|
self.on_event(messages)
|
||||||
|
|
||||||
if event_name in ("frame_navigated", "navigated_within_document"):
|
async def adorn(self, page: Page) -> t.Self:
|
||||||
# optimistically re-apply exfiltration and decoration on navigation
|
"""Add a mouse-following follower to the page."""
|
||||||
# (these operations should be idempotent)
|
if page.url.startswith("devtools:"):
|
||||||
pages = self.browser_context.pages if self.browser_context else []
|
return self
|
||||||
LOG.info(f"{self.class_name} re-applying exfiltration and decoration on navigation.", event_name=event_name)
|
|
||||||
|
|
||||||
for page in pages:
|
LOG.info(f"{self.class_name} adorning page.", url=page.url)
|
||||||
asyncio.create_task(self.exfiltrate(page))
|
|
||||||
asyncio.create_task(self.decorate(page))
|
(await page.evaluate(self.js("adorn")),)
|
||||||
|
(await page.add_init_script(self.js("adorn")),)
|
||||||
|
|
||||||
|
LOG.info(f"{self.class_name} adornment complete on page.", url=page.url)
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
async def connect(self, cdp_url: str | None = None) -> t.Self:
|
async def connect(self, cdp_url: str | None = None) -> t.Self:
|
||||||
if self.browser and self.browser.is_connected() and self.cdp_session:
|
if self.browser and self.browser.is_connected() and self.cdp_session:
|
||||||
@@ -121,12 +129,18 @@ class ExfiltrationChannel(CdpChannel):
|
|||||||
async def exfiltrate(self, page: Page) -> t.Self:
|
async def exfiltrate(self, page: Page) -> t.Self:
|
||||||
"""
|
"""
|
||||||
Track user interactions and send to console for CDP to capture.
|
Track user interactions and send to console for CDP to capture.
|
||||||
|
|
||||||
|
Uses add_init_script to ensure the exfiltration script is re-injected
|
||||||
|
on every navigation (including address bar navigations).
|
||||||
"""
|
"""
|
||||||
|
if page.url.startswith("devtools:"):
|
||||||
|
return self
|
||||||
|
|
||||||
LOG.info(f"{self.class_name} setting up exfiltration on new page.", url=page.url)
|
LOG.info(f"{self.class_name} setting up exfiltration on new page.", url=page.url)
|
||||||
|
|
||||||
page.on("console", self._handle_console_event)
|
page.on("console", self._handle_console_event)
|
||||||
|
|
||||||
|
await page.add_init_script(self.js("exfiltrate"))
|
||||||
await page.evaluate(self.js("exfiltrate"))
|
await page.evaluate(self.js("exfiltrate"))
|
||||||
|
|
||||||
LOG.info(f"{self.class_name} setup complete on page.", url=page.url)
|
LOG.info(f"{self.class_name} setup complete on page.", url=page.url)
|
||||||
@@ -135,8 +149,12 @@ class ExfiltrationChannel(CdpChannel):
|
|||||||
|
|
||||||
async def decorate(self, page: Page) -> t.Self:
|
async def decorate(self, page: Page) -> t.Self:
|
||||||
"""Add a mouse-following follower to the page."""
|
"""Add a mouse-following follower to the page."""
|
||||||
|
if page.url.startswith("devtools:"):
|
||||||
|
return self
|
||||||
|
|
||||||
LOG.info(f"{self.class_name} adding decoration to page.", url=page.url)
|
LOG.info(f"{self.class_name} adding decoration to page.", url=page.url)
|
||||||
|
|
||||||
|
await page.add_init_script(self.js("decorate"))
|
||||||
await page.evaluate(self.js("decorate"))
|
await page.evaluate(self.js("decorate"))
|
||||||
|
|
||||||
LOG.info(f"{self.class_name} decoration setup complete on page.", url=page.url)
|
LOG.info(f"{self.class_name} decoration setup complete on page.", url=page.url)
|
||||||
@@ -145,8 +163,12 @@ class ExfiltrationChannel(CdpChannel):
|
|||||||
|
|
||||||
async def undecorate(self, page: Page) -> t.Self:
|
async def undecorate(self, page: Page) -> t.Self:
|
||||||
"""Remove the mouse-following follower from the page."""
|
"""Remove the mouse-following follower from the page."""
|
||||||
|
if page.url.startswith("devtools:"):
|
||||||
|
return self
|
||||||
|
|
||||||
LOG.info(f"{self.class_name} removing decoration from page.", url=page.url)
|
LOG.info(f"{self.class_name} removing decoration from page.", url=page.url)
|
||||||
|
|
||||||
|
await page.add_init_script(self.js("undecorate"))
|
||||||
await page.evaluate(self.js("undecorate"))
|
await page.evaluate(self.js("undecorate"))
|
||||||
|
|
||||||
LOG.info(f"{self.class_name} decoration removed from page.", url=page.url)
|
LOG.info(f"{self.class_name} decoration removed from page.", url=page.url)
|
||||||
@@ -174,10 +196,35 @@ class ExfiltrationChannel(CdpChannel):
|
|||||||
cdp_session.on("Target.targetCreated", lambda params: self._handle_cdp_event("target_created", params))
|
cdp_session.on("Target.targetCreated", lambda params: self._handle_cdp_event("target_created", params))
|
||||||
cdp_session.on("Target.targetDestroyed", lambda params: self._handle_cdp_event("target_destroyed", params))
|
cdp_session.on("Target.targetDestroyed", lambda params: self._handle_cdp_event("target_destroyed", params))
|
||||||
cdp_session.on("Target.targetInfoChanged", lambda params: self._handle_cdp_event("target_info_changed", params))
|
cdp_session.on("Target.targetInfoChanged", lambda params: self._handle_cdp_event("target_info_changed", params))
|
||||||
cdp_session.on("Page.frameNavigated", lambda params: self._handle_cdp_event("frame_navigated", params))
|
|
||||||
cdp_session.on(
|
cdp_session.on(
|
||||||
"Page.navigatedWithinDocument", lambda params: self._handle_cdp_event("navigated_within_document", params)
|
"Page.frameRequestedNavigation",
|
||||||
|
lambda params: self._handle_cdp_event("nav:frame_requested_navigation", params),
|
||||||
)
|
)
|
||||||
|
cdp_session.on(
|
||||||
|
"Page.frameStartedNavigating", lambda params: self._handle_cdp_event("nav:frame_started_navigating", params)
|
||||||
|
)
|
||||||
|
cdp_session.on("Page.frameNavigated", lambda params: self._handle_cdp_event("nav:frame_navigated", params))
|
||||||
|
cdp_session.on(
|
||||||
|
"Page.navigatedWithinDocument",
|
||||||
|
lambda params: self._handle_cdp_event("nav:navigated_within_document", params),
|
||||||
|
)
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def enable_adornment(self) -> t.Self:
|
||||||
|
browser_context = self.browser_context
|
||||||
|
|
||||||
|
if not browser_context:
|
||||||
|
LOG.error(f"{self.class_name} no browser context to enable adornment.")
|
||||||
|
return self
|
||||||
|
|
||||||
|
tasks: list[asyncio.Task] = []
|
||||||
|
for page in browser_context.pages:
|
||||||
|
tasks.append(asyncio.create_task(self.adorn(page)))
|
||||||
|
|
||||||
|
await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
browser_context.on("page", lambda page: asyncio.create_task(self.adorn(page)))
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@@ -214,6 +261,8 @@ class ExfiltrationChannel(CdpChannel):
|
|||||||
|
|
||||||
await self.enable_cdp_events()
|
await self.enable_cdp_events()
|
||||||
|
|
||||||
|
await self.enable_adornment()
|
||||||
|
|
||||||
self.enable_console_events()
|
self.enable_console_events()
|
||||||
|
|
||||||
self.enable_decoration()
|
self.enable_decoration()
|
||||||
@@ -236,7 +285,10 @@ class ExfiltrationChannel(CdpChannel):
|
|||||||
pages = self.browser_context.pages if self.browser_context else []
|
pages = self.browser_context.pages if self.browser_context else []
|
||||||
|
|
||||||
for page in pages:
|
for page in pages:
|
||||||
page.remove_listener("console", self._handle_console_event)
|
try:
|
||||||
|
page.remove_listener("console", self._handle_console_event)
|
||||||
|
except KeyError:
|
||||||
|
pass # listener not found
|
||||||
await self.undecorate(page)
|
await self.undecorate(page)
|
||||||
|
|
||||||
LOG.info(f"{self.class_name} stopped.")
|
LOG.info(f"{self.class_name} stopped.")
|
||||||
|
|||||||
97
skyvern/forge/sdk/routes/streaming/channels/js/adorn.js
Normal file
97
skyvern/forge/sdk/routes/streaming/channels/js/adorn.js
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
/**
|
||||||
|
* DOM-adornment: assign stable identifiers to all DOM elements.
|
||||||
|
*/
|
||||||
|
|
||||||
|
(function () {
|
||||||
|
console.log("[SYS] adornment evaluated");
|
||||||
|
|
||||||
|
window.__skyvern_assignedEls = window.__skyvern_assignedEls ?? 0;
|
||||||
|
|
||||||
|
const visited = (window.__skyvern_visited =
|
||||||
|
window.__skyvern_visited ?? new Set());
|
||||||
|
|
||||||
|
function __skyvern_generateUniqueId() {
|
||||||
|
const timestamp = Date.now().toString(36);
|
||||||
|
const randomPart = Math.random().toString(36).substring(2);
|
||||||
|
|
||||||
|
return `sky-${timestamp}-${randomPart}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
window.__skyvern_generateUniqueId = __skyvern_generateUniqueId;
|
||||||
|
|
||||||
|
function __skyvern_assignSkyIds(node) {
|
||||||
|
if (!node) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node.nodeType === 1) {
|
||||||
|
if (!node.dataset.skyId) {
|
||||||
|
window.__skyvern_assignedEls += 1;
|
||||||
|
node.dataset.skyId = __skyvern_generateUniqueId();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (visited.has(node)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
visited.add(node);
|
||||||
|
|
||||||
|
const children = node.querySelectorAll("*");
|
||||||
|
|
||||||
|
children.forEach((child) => {
|
||||||
|
__skyvern_assignSkyIds(child);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (document.body) {
|
||||||
|
__skyvern_assignSkyIds(document.body);
|
||||||
|
console.log(
|
||||||
|
"[SYS] adornment: initially assigned skyIds to elements:",
|
||||||
|
window.__skyvern_assignedEls,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener("DOMContentLoaded", () => {
|
||||||
|
__skyvern_assignSkyIds(document.body);
|
||||||
|
console.log(
|
||||||
|
"[SYS] adornment: assigned skyIds to elements on DOMContentLoaded:",
|
||||||
|
window.__skyvern_assignedEls,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
const observerConfig = {
|
||||||
|
childList: true,
|
||||||
|
subtree: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
const observer = new MutationObserver(function (mutationsList) {
|
||||||
|
for (const mutation of mutationsList) {
|
||||||
|
if (mutation.type === "childList") {
|
||||||
|
mutation.addedNodes.forEach((node) => {
|
||||||
|
__skyvern_assignSkyIds(node);
|
||||||
|
console.log(
|
||||||
|
"[SYS] adornment: assigned skyIds to new elements:",
|
||||||
|
window.__skyvern_assignedEls,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function observeWhenReady() {
|
||||||
|
if (document.body) {
|
||||||
|
observer.observe(document.body, observerConfig);
|
||||||
|
} else {
|
||||||
|
document.addEventListener("DOMContentLoaded", () => {
|
||||||
|
if (document.body) {
|
||||||
|
observer.observe(document.body, observerConfig);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
observeWhenReady();
|
||||||
|
|
||||||
|
window.__skyvern_adornment_observer = observer;
|
||||||
|
})();
|
||||||
@@ -1,109 +1,118 @@
|
|||||||
(function () {
|
(function () {
|
||||||
if (!window.__skyvern_decoration_initialized) {
|
console.log("[SYS] decorate: evaluated");
|
||||||
window.__skyvern_decoration_initialized = true;
|
|
||||||
|
|
||||||
window.__skyvern_create_mouse_follower = function () {
|
function initiate() {
|
||||||
// create the circle element
|
if (!window.__skyvern_decoration_initialized) {
|
||||||
const existingCircle = document.getElementById(
|
console.log("[SYS] decorate: initializing");
|
||||||
"__skyvern_mouse_follower",
|
|
||||||
);
|
|
||||||
|
|
||||||
if (existingCircle) {
|
window.__skyvern_decoration_initialized = true;
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const circle = document.createElement("div");
|
window.__skyvern_create_mouse_follower = function () {
|
||||||
window.__skyvern_decoration_mouse_follower = circle;
|
const preexistingCircles = document.querySelectorAll(
|
||||||
circle.id = "__skyvern_mouse_follower";
|
"#__skyvern_mouse_follower",
|
||||||
circle.style.position = "fixed";
|
);
|
||||||
circle.style.left = "0";
|
|
||||||
circle.style.top = "0";
|
|
||||||
circle.style.width = "30px";
|
|
||||||
circle.style.height = "30px";
|
|
||||||
circle.style.borderRadius = "50%";
|
|
||||||
circle.style.backgroundColor = "rgba(255, 0, 0, 0.2)";
|
|
||||||
circle.style.pointerEvents = "none";
|
|
||||||
circle.style.zIndex = "999999";
|
|
||||||
circle.style.willChange = "transform";
|
|
||||||
document.body.appendChild(circle);
|
|
||||||
|
|
||||||
return true;
|
if (preexistingCircles.length > 0) {
|
||||||
};
|
for (const circle of preexistingCircles) {
|
||||||
|
circle.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const wasCreated = window.__skyvern_create_mouse_follower();
|
const circle = document.createElement("div");
|
||||||
|
window.__skyvern_decoration_mouse_follower = circle;
|
||||||
|
circle.id = "__skyvern_mouse_follower";
|
||||||
|
circle.style.position = "fixed";
|
||||||
|
circle.style.left = "0";
|
||||||
|
circle.style.top = "0";
|
||||||
|
circle.style.width = "30px";
|
||||||
|
circle.style.height = "30px";
|
||||||
|
circle.style.borderRadius = "50%";
|
||||||
|
circle.style.backgroundColor = "rgba(255, 0, 0, 0.2)";
|
||||||
|
circle.style.pointerEvents = "none";
|
||||||
|
circle.style.zIndex = "999999";
|
||||||
|
circle.style.willChange = "transform";
|
||||||
|
document.body.appendChild(circle);
|
||||||
|
};
|
||||||
|
|
||||||
if (!wasCreated) {
|
window.__skyvern_create_mouse_follower();
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let scale = 1;
|
let scale = 1;
|
||||||
let targetScale = 1;
|
let targetScale = 1;
|
||||||
let mouseX = 0;
|
let mouseX = 0;
|
||||||
let mouseY = 0;
|
let mouseY = 0;
|
||||||
|
|
||||||
// smooth scale animation
|
// smooth scale animation
|
||||||
function animate() {
|
function animate() {
|
||||||
if (!window.__skyvern_decoration_mouse_follower) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const follower = window.__skyvern_decoration_mouse_follower;
|
|
||||||
|
|
||||||
scale += (targetScale - scale) * 0.2;
|
|
||||||
|
|
||||||
if (Math.abs(targetScale - scale) > 0.001) {
|
|
||||||
requestAnimationFrame(animate);
|
|
||||||
} else {
|
|
||||||
scale = targetScale;
|
|
||||||
}
|
|
||||||
|
|
||||||
follower.style.transform = `translate(${mouseX - 15}px, ${mouseY - 15}px) scale(${scale})`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update follower position on mouse move
|
|
||||||
document.addEventListener(
|
|
||||||
"mousemove",
|
|
||||||
(e) => {
|
|
||||||
if (!window.__skyvern_decoration_mouse_follower) {
|
if (!window.__skyvern_decoration_mouse_follower) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const follower = window.__skyvern_decoration_mouse_follower;
|
const follower = window.__skyvern_decoration_mouse_follower;
|
||||||
mouseX = e.clientX;
|
|
||||||
mouseY = e.clientY;
|
scale += (targetScale - scale) * 0.2;
|
||||||
|
|
||||||
|
if (Math.abs(targetScale - scale) > 0.001) {
|
||||||
|
requestAnimationFrame(animate);
|
||||||
|
} else {
|
||||||
|
scale = targetScale;
|
||||||
|
}
|
||||||
|
|
||||||
follower.style.transform = `translate(${mouseX - 15}px, ${mouseY - 15}px) scale(${scale})`;
|
follower.style.transform = `translate(${mouseX - 15}px, ${mouseY - 15}px) scale(${scale})`;
|
||||||
},
|
}
|
||||||
true,
|
|
||||||
);
|
|
||||||
|
|
||||||
// expand follower on mouse down
|
// update follower position on mouse move
|
||||||
document.addEventListener(
|
document.addEventListener(
|
||||||
"mousedown",
|
"mousemove",
|
||||||
() => {
|
(e) => {
|
||||||
if (!window.__skyvern_decoration_mouse_follower) {
|
if (!window.__skyvern_decoration_mouse_follower) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
targetScale = 50 / 30;
|
const follower = window.__skyvern_decoration_mouse_follower;
|
||||||
requestAnimationFrame(animate);
|
mouseX = e.clientX;
|
||||||
},
|
mouseY = e.clientY;
|
||||||
true,
|
follower.style.transform = `translate(${mouseX - 15}px, ${mouseY - 15}px) scale(${scale})`;
|
||||||
);
|
},
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
|
||||||
// return follower to original size on mouse up
|
// expand follower on mouse down
|
||||||
document.addEventListener(
|
document.addEventListener(
|
||||||
"mouseup",
|
"mousedown",
|
||||||
() => {
|
() => {
|
||||||
if (!window.__skyvern_decoration_mouse_follower) {
|
if (!window.__skyvern_decoration_mouse_follower) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
targetScale = 1;
|
targetScale = 50 / 30;
|
||||||
requestAnimationFrame(animate);
|
requestAnimationFrame(animate);
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// return follower to original size on mouse up
|
||||||
|
document.addEventListener(
|
||||||
|
"mouseup",
|
||||||
|
() => {
|
||||||
|
if (!window.__skyvern_decoration_mouse_follower) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
targetScale = 1;
|
||||||
|
requestAnimationFrame(animate);
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
window.__skyvern_create_mouse_follower();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (document.body) {
|
||||||
|
console.log("[SYS] decorate: document already loaded, initiating");
|
||||||
|
initiate();
|
||||||
} else {
|
} else {
|
||||||
window.__skyvern_create_mouse_follower();
|
console.log("[SYS] decorate: waiting for DOMContentLoaded to initiate");
|
||||||
|
document.addEventListener("DOMContentLoaded", initiate);
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
(function () {
|
(function () {
|
||||||
|
console.log("[SYS] exfiltration: evaluated");
|
||||||
if (!window.__skyvern_exfiltration_initialized) {
|
if (!window.__skyvern_exfiltration_initialized) {
|
||||||
|
console.log("[SYS] exfiltration: initializing");
|
||||||
window.__skyvern_exfiltration_initialized = true;
|
window.__skyvern_exfiltration_initialized = true;
|
||||||
|
|
||||||
[
|
[
|
||||||
@@ -55,6 +57,10 @@
|
|||||||
const getElementText = (element) => {
|
const getElementText = (element) => {
|
||||||
const textSources = [];
|
const textSources = [];
|
||||||
|
|
||||||
|
if (!element.getAttribute) {
|
||||||
|
return textSources;
|
||||||
|
}
|
||||||
|
|
||||||
if (element.getAttribute("aria-label")) {
|
if (element.getAttribute("aria-label")) {
|
||||||
textSources.push(element.getAttribute("aria-label"));
|
textSources.push(element.getAttribute("aria-label"));
|
||||||
}
|
}
|
||||||
@@ -96,6 +102,52 @@
|
|||||||
return textSources.length > 0 ? textSources : [];
|
return textSources.length > 0 ? textSources : [];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const skyId = e.target?.dataset?.skyId || null;
|
||||||
|
|
||||||
|
if (!skyId && e.target?.tagName !== "HTML") {
|
||||||
|
console.log("[SYS] exfiltration: target element has no skyId.");
|
||||||
|
|
||||||
|
if (window.__skyvern_generateUniqueId && e.target?.dataset) {
|
||||||
|
const newSkyId = window.__skyvern_generateUniqueId();
|
||||||
|
e.target.dataset.skyId = newSkyId;
|
||||||
|
console.log(
|
||||||
|
`[SYS] exfiltration: assigned new skyId to target element: ${newSkyId}`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
console.log(
|
||||||
|
"[SYS] exfiltration: cannot assign skyId, generator not found.",
|
||||||
|
);
|
||||||
|
|
||||||
|
const info = {
|
||||||
|
tagName: e.target?.tagName,
|
||||||
|
target: e.target,
|
||||||
|
targetType: typeof e.target,
|
||||||
|
eventType,
|
||||||
|
id: e.target?.id,
|
||||||
|
className: e.target?.className,
|
||||||
|
value: e.target?.value,
|
||||||
|
text: getElementText(e.target),
|
||||||
|
labels: getAssociatedLabels(e.target),
|
||||||
|
skyId: e.target?.dataset?.skyId,
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const infoS = JSON.stringify(info, null, 2);
|
||||||
|
console.log(
|
||||||
|
`[SYS] exfiltration: target element info: ${infoS}`,
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
console.log(
|
||||||
|
"[SYS] exfiltration: target element info: [unserializable]",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const classText = String(
|
||||||
|
e.target.classList?.value ?? e.target.getAttribute("class") ?? "",
|
||||||
|
);
|
||||||
|
|
||||||
const eventData = {
|
const eventData = {
|
||||||
url: window.location.href,
|
url: window.location.href,
|
||||||
type: eventType,
|
type: eventType,
|
||||||
@@ -103,10 +155,13 @@
|
|||||||
target: {
|
target: {
|
||||||
tagName: e.target?.tagName,
|
tagName: e.target?.tagName,
|
||||||
id: e.target?.id,
|
id: e.target?.id,
|
||||||
className: e.target?.className,
|
isHtml: e.target instanceof HTMLElement,
|
||||||
|
isSvg: e.target instanceof SVGElement,
|
||||||
|
className: classText,
|
||||||
value: e.target?.value,
|
value: e.target?.value,
|
||||||
text: getElementText(e.target),
|
text: getElementText(e.target),
|
||||||
labels: getAssociatedLabels(e.target),
|
labels: getAssociatedLabels(e.target),
|
||||||
|
skyId: e.target?.dataset?.skyId,
|
||||||
},
|
},
|
||||||
inputValue: ["input", "focus", "blur"].includes(eventType)
|
inputValue: ["input", "focus", "blur"].includes(eventType)
|
||||||
? e.target?.value
|
? e.target?.value
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
(function () {
|
(function () {
|
||||||
|
console.log("[SYS] undecorate: evaluated");
|
||||||
|
|
||||||
const followers = document.querySelectorAll("#__skyvern_mouse_follower");
|
const followers = document.querySelectorAll("#__skyvern_mouse_follower");
|
||||||
|
|
||||||
for (const follower of followers) {
|
for (const follower of followers) {
|
||||||
|
|||||||
@@ -122,6 +122,7 @@ class MessageOutExfiltratedEvent(Message):
|
|||||||
# TODO(jdo): improve typing for params
|
# TODO(jdo): improve typing for params
|
||||||
params: dict = dataclasses.field(default_factory=dict)
|
params: dict = dataclasses.field(default_factory=dict)
|
||||||
source: ExfiltratedEventSource = ExfiltratedEventSource.NOT_SPECIFIED
|
source: ExfiltratedEventSource = ExfiltratedEventSource.NOT_SPECIFIED
|
||||||
|
timestamp: float = dataclasses.field(default_factory=lambda: 0.0) # seconds since epoch
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
@@ -433,6 +434,7 @@ async def loop_stream_messages(message_channel: MessageChannel) -> None:
|
|||||||
event_name=event.event_name,
|
event_name=event.event_name,
|
||||||
params=event.params,
|
params=event.params,
|
||||||
source=t.cast(ExfiltratedEventSource, event.source or ExfiltratedEventSource.NOT_SPECIFIED),
|
source=t.cast(ExfiltratedEventSource, event.source or ExfiltratedEventSource.NOT_SPECIFIED),
|
||||||
|
timestamp=event.timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
message_channel.send_nowait(messages=[message_out_exfiltrated_event])
|
message_channel.send_nowait(messages=[message_out_exfiltrated_event])
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from skyvern.client.types.workflow_definition_yaml_blocks_item import WorkflowDefinitionYamlBlocksItem
|
||||||
|
from skyvern.client.types.workflow_definition_yaml_parameters_item import WorkflowDefinitionYamlParametersItem_Workflow
|
||||||
from skyvern.schemas.docs.doc_strings import PROXY_LOCATION_DOC_STRING
|
from skyvern.schemas.docs.doc_strings import PROXY_LOCATION_DOC_STRING
|
||||||
from skyvern.schemas.runs import ProxyLocation
|
from skyvern.schemas.runs import ProxyLocation
|
||||||
|
|
||||||
@@ -19,3 +21,25 @@ class CreateBrowserSessionRequest(BaseModel):
|
|||||||
default=None,
|
default=None,
|
||||||
description=PROXY_LOCATION_DOC_STRING,
|
description=PROXY_LOCATION_DOC_STRING,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ProcessBrowserSessionRecordingRequest(BaseModel):
|
||||||
|
compressed_chunks: list[str] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of base64 encoded and compressed (gzip) event strings representing the browser session recording.",
|
||||||
|
)
|
||||||
|
workflow_permanent_id: str = Field(
|
||||||
|
default="no-such-wpid",
|
||||||
|
description="Permanent ID of the workflow associated with the browser session recording.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ProcessBrowserSessionRecordingResponse(BaseModel):
|
||||||
|
blocks: list[WorkflowDefinitionYamlBlocksItem] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of workflow blocks generated from the processed browser session recording.",
|
||||||
|
)
|
||||||
|
parameters: list[WorkflowDefinitionYamlParametersItem_Workflow] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of workflow parameters generated from the processed browser session recording.",
|
||||||
|
)
|
||||||
|
|||||||
3
skyvern/services/browser_recording/__init__.py
Normal file
3
skyvern/services/browser_recording/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from .service import BrowserSessionRecordingService
|
||||||
|
|
||||||
|
__all__ = ["BrowserSessionRecordingService"]
|
||||||
491
skyvern/services/browser_recording/service.py
Normal file
491
skyvern/services/browser_recording/service.py
Normal file
@@ -0,0 +1,491 @@
|
|||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
import typing as t
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
import skyvern.services.browser_recording.state_machines as sm
|
||||||
|
from skyvern.client.types.workflow_definition_yaml_blocks_item import (
|
||||||
|
WorkflowDefinitionYamlBlocksItem_Action,
|
||||||
|
WorkflowDefinitionYamlBlocksItem_GotoUrl,
|
||||||
|
WorkflowDefinitionYamlBlocksItem_Wait,
|
||||||
|
)
|
||||||
|
from skyvern.client.types.workflow_definition_yaml_parameters_item import WorkflowDefinitionYamlParametersItem_Workflow
|
||||||
|
from skyvern.forge import app
|
||||||
|
from skyvern.forge.prompts import prompt_engine
|
||||||
|
from skyvern.services.browser_recording.types import (
|
||||||
|
Action,
|
||||||
|
ActionBlockable,
|
||||||
|
ActionKind,
|
||||||
|
ActionUrlChange,
|
||||||
|
ActionWait,
|
||||||
|
ExfiltratedCdpEvent,
|
||||||
|
ExfiltratedConsoleEvent,
|
||||||
|
ExfiltratedEvent,
|
||||||
|
OutputBlock,
|
||||||
|
)
|
||||||
|
|
||||||
|
LOG = structlog.get_logger(__name__)
|
||||||
|
|
||||||
|
# avoid decompression bombs
|
||||||
|
MAX_BASE64_SIZE = 14 * 1024 * 1024 # ~10MB compressed + base64 overhead
|
||||||
|
|
||||||
|
|
||||||
|
class Processor:
|
||||||
|
"""
|
||||||
|
Process browser session recordings into workflow definition blocks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
browser_session_id: str,
|
||||||
|
organization_id: str,
|
||||||
|
workflow_permanent_id: str,
|
||||||
|
) -> None:
|
||||||
|
self.browser_session_id = browser_session_id
|
||||||
|
self.organization_id = organization_id
|
||||||
|
self.workflow_permanent_id = workflow_permanent_id
|
||||||
|
|
||||||
|
@property
|
||||||
|
def class_name(self) -> str:
|
||||||
|
return self.__class__.__name__
|
||||||
|
|
||||||
|
@property
|
||||||
|
def identity(self) -> dict[str, str]:
|
||||||
|
return dict(
|
||||||
|
browser_session_id=self.browser_session_id,
|
||||||
|
organization_id=self.organization_id,
|
||||||
|
workflow_permanent_id=self.workflow_permanent_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def decompress(self, base64_payload: str) -> bytes | None:
|
||||||
|
"""
|
||||||
|
Decode a base64 string, decompress it using gzip, and return it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if len(base64_payload) > MAX_BASE64_SIZE:
|
||||||
|
LOG.warning(f"{self.class_name}: base64 payload too large: {len(base64_payload)} bytes", **self.identity)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
# base64 decode -> gzip binary data
|
||||||
|
#
|
||||||
|
# NOTE(llm): The data sent from btoa() is technically a "non-standard"
|
||||||
|
# Base64, but Python's standard decoder is usually robust enough to
|
||||||
|
# handle it.
|
||||||
|
compressed_data: bytes = base64.b64decode(base64_payload)
|
||||||
|
except Exception as ex:
|
||||||
|
LOG.warning(f"{self.class_name} failed to decode Base64 payload", exc_info=ex, **self.identity)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
# gzip decompression -> bytes
|
||||||
|
#
|
||||||
|
# NOTE(llm): We use zlib.decompress with wbits=16 + zlib.MAX_WBITS (31).
|
||||||
|
# This tells zlib to automatically detect and handle Gzip headers,
|
||||||
|
# which is essential since the browser used CompressionStream('gzip').
|
||||||
|
# Using zlib is often faster than the higher-level gzip module for this
|
||||||
|
# purpose.
|
||||||
|
decompressed_bytes: bytes = zlib.decompress(compressed_data, wbits=16 + zlib.MAX_WBITS)
|
||||||
|
except zlib.error as e:
|
||||||
|
LOG.warning(f"{self.class_name} decompression error: {e}", **self.identity)
|
||||||
|
# Log the error, maybe log the first few characters of the payload for debugging
|
||||||
|
return None
|
||||||
|
|
||||||
|
return decompressed_bytes
|
||||||
|
|
||||||
|
def serialize(self, decompressed_bytes: bytes | None) -> list[dict[str, t.Any]]:
|
||||||
|
"""
|
||||||
|
Convert decompressed bytes into a list of events (Python list/dictionary).
|
||||||
|
"""
|
||||||
|
if not decompressed_bytes:
|
||||||
|
LOG.warning(f"{self.class_name} No decompressed bytes to serialize", **self.identity)
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# bytes -> JSON string
|
||||||
|
json_string: str = decompressed_bytes.decode("utf-8")
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warning(f"{self.class_name} decode error: {e}", **self.identity)
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# JSON string -> list of dicts
|
||||||
|
events_list: list[dict[str, t.Any]] = json.loads(json_string)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warning(f"{self.class_name} JSON parsing error: {e}", **self.identity)
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not isinstance(events_list, list):
|
||||||
|
LOG.warning(f"{self.class_name} Expected a list of events, got:", type(events_list), **self.identity)
|
||||||
|
return []
|
||||||
|
|
||||||
|
return events_list
|
||||||
|
|
||||||
|
def reify(self, events_list: list[dict[str, t.Any]]) -> list[ExfiltratedEvent]:
|
||||||
|
"""
|
||||||
|
Convert a list of event dictionaries into a list of `ExfiltratedEvent`s.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not events_list:
|
||||||
|
LOG.warning(f"{self.class_name} No events to reify", **self.identity)
|
||||||
|
return []
|
||||||
|
|
||||||
|
reified_events: list[ExfiltratedEvent] = []
|
||||||
|
for event in events_list:
|
||||||
|
if event.get("source") == "cdp":
|
||||||
|
try:
|
||||||
|
reified_event = ExfiltratedCdpEvent(**event)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warning(f"{self.class_name} Failed to reify CDP event: {e}", **self.identity)
|
||||||
|
continue
|
||||||
|
elif event.get("source") == "console":
|
||||||
|
try:
|
||||||
|
reified_event = ExfiltratedConsoleEvent(**event)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warning(f"{self.class_name} Failed to reify console event: {e}", **self.identity)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
LOG.error(f"{self.class_name} Unknown event source: {event.get('source')}", **self.identity)
|
||||||
|
continue
|
||||||
|
reified_events.append(reified_event)
|
||||||
|
|
||||||
|
return reified_events
|
||||||
|
|
||||||
|
def compressed_chunks_to_events(self, compressed_chunks: list[str]) -> list[ExfiltratedEvent]:
|
||||||
|
"""
|
||||||
|
Convert a list of base64 encoded and compressed (gzip) event strings into
|
||||||
|
a list of `ExfiltratedEvent`s.
|
||||||
|
"""
|
||||||
|
all_events: list[ExfiltratedEvent] = []
|
||||||
|
|
||||||
|
for compressed_chunk in compressed_chunks:
|
||||||
|
decompressed = self.decompress(compressed_chunk)
|
||||||
|
serialized = self.serialize(decompressed)
|
||||||
|
reified = self.reify(serialized)
|
||||||
|
all_events.extend(reified)
|
||||||
|
|
||||||
|
return all_events
|
||||||
|
|
||||||
|
def events_to_actions(
|
||||||
|
self,
|
||||||
|
events: list[ExfiltratedEvent],
|
||||||
|
machines: list[sm.StateMachine] | None = None,
|
||||||
|
) -> list[Action]:
|
||||||
|
"""
|
||||||
|
Convert a list of `ExfiltratedEvent`s into `Action`s.
|
||||||
|
"""
|
||||||
|
actions: list[Action] = []
|
||||||
|
|
||||||
|
machines = machines or [
|
||||||
|
sm.Click(),
|
||||||
|
sm.Hover(),
|
||||||
|
sm.InputText(),
|
||||||
|
sm.UrlChange(),
|
||||||
|
sm.Wait(),
|
||||||
|
]
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
for machine in machines:
|
||||||
|
action = machine.tick(event, actions)
|
||||||
|
|
||||||
|
if not action:
|
||||||
|
continue
|
||||||
|
|
||||||
|
allow_action = True
|
||||||
|
|
||||||
|
for m in machines:
|
||||||
|
if not m.on_action(action, actions):
|
||||||
|
allow_action = False
|
||||||
|
LOG.debug(
|
||||||
|
f"{self.class_name} action vetoed by state machine {m.__class__.__name__}",
|
||||||
|
action=action,
|
||||||
|
**self.identity,
|
||||||
|
)
|
||||||
|
|
||||||
|
if allow_action:
|
||||||
|
actions.append(action)
|
||||||
|
else:
|
||||||
|
# if an action was vetoed, we do not allow further processing
|
||||||
|
# of this event through subsequent state machines
|
||||||
|
break
|
||||||
|
|
||||||
|
return actions
|
||||||
|
|
||||||
|
def dedupe_block_labels(self, suspects: list[OutputBlock]) -> list[OutputBlock]:
|
||||||
|
"""
|
||||||
|
Detect if any block labels are duplicated, and, if so, rename them for
|
||||||
|
uniqueness.
|
||||||
|
"""
|
||||||
|
|
||||||
|
blocks: list[OutputBlock] = []
|
||||||
|
labels: set[str] = set()
|
||||||
|
|
||||||
|
for block in suspects:
|
||||||
|
if block.label not in labels:
|
||||||
|
labels.add(block.label)
|
||||||
|
blocks.append(block)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
original_label = block.label
|
||||||
|
count = 0
|
||||||
|
while True:
|
||||||
|
new_label = f"{original_label}_{count}"
|
||||||
|
if new_label not in labels:
|
||||||
|
cls = block.__class__
|
||||||
|
data = block.model_dump() | {"label": new_label}
|
||||||
|
new_block = cls(**data)
|
||||||
|
blocks.append(new_block)
|
||||||
|
labels.add(new_label)
|
||||||
|
break
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
async def actions_to_blocks(self, actions: list[Action]) -> list[OutputBlock]:
|
||||||
|
"""
|
||||||
|
Convert a list of `Action` objects into workflow definition (YAML) blocks.
|
||||||
|
"""
|
||||||
|
tasks: list[asyncio.Task] = []
|
||||||
|
|
||||||
|
for action in actions:
|
||||||
|
action_kind = action.kind.value
|
||||||
|
|
||||||
|
match action.kind:
|
||||||
|
case ActionKind.CLICK | ActionKind.HOVER | ActionKind.INPUT_TEXT:
|
||||||
|
task = asyncio.create_task(self.create_action_block(action))
|
||||||
|
tasks.append(task)
|
||||||
|
case ActionKind.URL_CHANGE:
|
||||||
|
task = asyncio.create_task(self.create_url_block(action))
|
||||||
|
tasks.append(task)
|
||||||
|
case ActionKind.WAIT:
|
||||||
|
task = asyncio.create_task(self.create_wait_block(action))
|
||||||
|
tasks.append(task)
|
||||||
|
case _:
|
||||||
|
LOG.warning(
|
||||||
|
f"{self.class_name} Unknown action kind: {action_kind}",
|
||||||
|
action=action,
|
||||||
|
**self.identity,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
blocks: list[OutputBlock] = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
blocks = self.dedupe_block_labels(blocks)
|
||||||
|
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
def blocks_to_parameters(self, blocks: list[OutputBlock]) -> list[WorkflowDefinitionYamlParametersItem_Workflow]:
|
||||||
|
"""
|
||||||
|
Convert a list of workflow definition (YAML) blocks into workflow definition (YAML) parameters.
|
||||||
|
"""
|
||||||
|
parameter_names: set[str] = set()
|
||||||
|
|
||||||
|
for block in blocks:
|
||||||
|
if isinstance(block, WorkflowDefinitionYamlBlocksItem_Action):
|
||||||
|
for param_name in block.parameter_keys or []:
|
||||||
|
parameter_names.add(param_name)
|
||||||
|
|
||||||
|
parameters: list[WorkflowDefinitionYamlParametersItem_Workflow] = []
|
||||||
|
|
||||||
|
for param_name in parameter_names:
|
||||||
|
parameter = WorkflowDefinitionYamlParametersItem_Workflow(
|
||||||
|
key=param_name,
|
||||||
|
workflow_parameter_type="string",
|
||||||
|
default_value="",
|
||||||
|
description="",
|
||||||
|
)
|
||||||
|
parameters.append(parameter)
|
||||||
|
|
||||||
|
return parameters
|
||||||
|
|
||||||
|
async def create_action_block(self, action: ActionBlockable) -> WorkflowDefinitionYamlBlocksItem_Action:
|
||||||
|
"""
|
||||||
|
Create a YAML action block from an `ActionBlockable`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
DEFAULT_BLOCK_TITLE = "Browser Action"
|
||||||
|
|
||||||
|
if action.kind == ActionKind.INPUT_TEXT:
|
||||||
|
prompt_name = "recording-action-block-prompt-input-text"
|
||||||
|
else:
|
||||||
|
prompt_name = "recording-action-block-prompt"
|
||||||
|
|
||||||
|
metadata_prompt = prompt_engine.load_prompt(
|
||||||
|
prompt_name,
|
||||||
|
action=action,
|
||||||
|
)
|
||||||
|
|
||||||
|
metadata_response = await app.LLM_API_HANDLER(
|
||||||
|
prompt=metadata_prompt,
|
||||||
|
prompt_name=prompt_name,
|
||||||
|
organization_id=self.organization_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
block_label: str = metadata_response.get("block_label", None) or "act"
|
||||||
|
title: str = metadata_response.get("title", None) or DEFAULT_BLOCK_TITLE
|
||||||
|
navigation_goal: str = metadata_response.get("prompt", "")
|
||||||
|
parameter_name: dict | None = metadata_response.get("parameter_name", None)
|
||||||
|
|
||||||
|
block = WorkflowDefinitionYamlBlocksItem_Action(
|
||||||
|
label=block_label,
|
||||||
|
title=title,
|
||||||
|
navigation_goal=navigation_goal,
|
||||||
|
error_code_mapping=None,
|
||||||
|
parameters=[parameter_name] if parameter_name else [], # sic(jdo): the frontend requires this
|
||||||
|
parameter_keys=[parameter_name.get("key")] if parameter_name else [],
|
||||||
|
)
|
||||||
|
|
||||||
|
return block
|
||||||
|
|
||||||
|
async def create_url_block(self, action: ActionUrlChange) -> WorkflowDefinitionYamlBlocksItem_GotoUrl:
|
||||||
|
"""
|
||||||
|
Create a YAML goto URL block from an `ActionUrlChange`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt_name = "recording-go-to-url-block-prompt"
|
||||||
|
|
||||||
|
metadata_prompt = prompt_engine.load_prompt(
|
||||||
|
prompt_name,
|
||||||
|
action=action,
|
||||||
|
)
|
||||||
|
|
||||||
|
metadata_response = await app.LLM_API_HANDLER(
|
||||||
|
prompt=metadata_prompt,
|
||||||
|
prompt_name=prompt_name,
|
||||||
|
organization_id=self.organization_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
block_label: str = metadata_response.get("block_label", None) or "goto_url"
|
||||||
|
|
||||||
|
block = WorkflowDefinitionYamlBlocksItem_GotoUrl(
|
||||||
|
label=block_label,
|
||||||
|
url=action.url,
|
||||||
|
)
|
||||||
|
|
||||||
|
return block
|
||||||
|
|
||||||
|
async def create_wait_block(self, action: ActionWait) -> WorkflowDefinitionYamlBlocksItem_Wait:
|
||||||
|
"""
|
||||||
|
Create a YAML wait block from an `ActionWait`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt_name = "recording-wait-block-prompt"
|
||||||
|
|
||||||
|
metadata_prompt = prompt_engine.load_prompt(
|
||||||
|
prompt_name,
|
||||||
|
action=action,
|
||||||
|
)
|
||||||
|
|
||||||
|
metadata_response = await app.LLM_API_HANDLER(
|
||||||
|
prompt=metadata_prompt,
|
||||||
|
prompt_name=prompt_name,
|
||||||
|
organization_id=self.organization_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
block_label: str = metadata_response.get("block_label", None) or "wait"
|
||||||
|
|
||||||
|
block = WorkflowDefinitionYamlBlocksItem_Wait(
|
||||||
|
label=block_label,
|
||||||
|
wait_sec=int(max(action.duration_ms / 1000.0, ActionWait.MIN_DURATION_THRESHOLD_MS / 1000.0)),
|
||||||
|
)
|
||||||
|
|
||||||
|
return block
|
||||||
|
|
||||||
|
async def process(
|
||||||
|
self, compressed_chunks: list[str]
|
||||||
|
) -> tuple[list[OutputBlock], list[WorkflowDefinitionYamlParametersItem_Workflow]]:
|
||||||
|
"""
|
||||||
|
Process the compressed browser session recording into workflow definition blocks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
events = self.compressed_chunks_to_events(compressed_chunks)
|
||||||
|
actions = self.events_to_actions(events)
|
||||||
|
blocks = await self.actions_to_blocks(actions)
|
||||||
|
parameters = self.blocks_to_parameters(blocks)
|
||||||
|
|
||||||
|
return blocks, parameters
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserSessionRecordingService:
|
||||||
|
async def process_recording(
|
||||||
|
self,
|
||||||
|
browser_session_id: str,
|
||||||
|
organization_id: str,
|
||||||
|
workflow_permanent_id: str,
|
||||||
|
compressed_chunks: list[str],
|
||||||
|
) -> tuple[list[OutputBlock], list[WorkflowDefinitionYamlParametersItem_Workflow]]:
|
||||||
|
"""
|
||||||
|
Process compressed browser session recording events into workflow definition blocks.
|
||||||
|
"""
|
||||||
|
processor = Processor(
|
||||||
|
browser_session_id,
|
||||||
|
organization_id,
|
||||||
|
workflow_permanent_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
return await processor.process(compressed_chunks)
|
||||||
|
|
||||||
|
|
||||||
|
async def smoke() -> None:
|
||||||
|
with open(pathlib.Path("/path/to/uncompressed/events.json")) as f:
|
||||||
|
raw_events: list[dict] = json.load(f)
|
||||||
|
|
||||||
|
events: list[ExfiltratedEvent] = []
|
||||||
|
|
||||||
|
for i, raw_event in enumerate(raw_events):
|
||||||
|
if not isinstance(raw_event, dict):
|
||||||
|
LOG.debug(f"~ skipping non-dict event: {raw_event}")
|
||||||
|
continue
|
||||||
|
if raw_event.get("source") == "cdp":
|
||||||
|
try:
|
||||||
|
event = ExfiltratedCdpEvent(**raw_event)
|
||||||
|
except Exception:
|
||||||
|
LOG.exception(f"{i} Failed to parse exfiltrated CDP event")
|
||||||
|
LOG.debug(f"~ raw event: {json.dumps(raw_event, sort_keys=True, indent=2)}")
|
||||||
|
continue
|
||||||
|
events.append(event)
|
||||||
|
elif raw_event.get("source") == "console":
|
||||||
|
event = ExfiltratedConsoleEvent(**raw_event)
|
||||||
|
events.append(event)
|
||||||
|
|
||||||
|
LOG.debug(f"{len(events)} events.")
|
||||||
|
|
||||||
|
my_local_org_id = "o_389844905020748346"
|
||||||
|
processor = Processor("pbs_123", my_local_org_id, "wpid_123")
|
||||||
|
actions = processor.events_to_actions(events)
|
||||||
|
|
||||||
|
LOG.debug(f"{len(actions)} actions:")
|
||||||
|
|
||||||
|
for action in actions:
|
||||||
|
id = action.target.sky_id if action.target.sky_id else action.target.id
|
||||||
|
text = ",".join(action.target.texts or [])
|
||||||
|
LOG.debug(f" {action.kind} [{id}] [{text}] @ {action.url}")
|
||||||
|
|
||||||
|
blocks = await processor.actions_to_blocks(actions)
|
||||||
|
|
||||||
|
LOG.debug(f"{len(blocks)} blocks:")
|
||||||
|
|
||||||
|
for block in blocks:
|
||||||
|
LOG.debug(f" {block.label}")
|
||||||
|
|
||||||
|
if isinstance(block, WorkflowDefinitionYamlBlocksItem_Action):
|
||||||
|
LOG.debug(f" title: {block.title}")
|
||||||
|
LOG.debug(f" nav goal: {block.navigation_goal}")
|
||||||
|
|
||||||
|
if isinstance(block, WorkflowDefinitionYamlBlocksItem_GotoUrl):
|
||||||
|
LOG.debug(f" url: {block.url}")
|
||||||
|
|
||||||
|
if isinstance(block, WorkflowDefinitionYamlBlocksItem_Wait):
|
||||||
|
LOG.debug(f" wait sec: {block.wait_sec}")
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == "__main__":
|
||||||
|
# from skyvern.forge.forge_app_initializer import start_forge_app
|
||||||
|
|
||||||
|
# start_forge_app()
|
||||||
|
|
||||||
|
# asyncio.run(smoke())
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
from .click import StateMachineClick as Click
|
||||||
|
from .hover import StateMachineHover as Hover
|
||||||
|
from .input_text import StateMachineInputText as InputText
|
||||||
|
from .state_machine import StateMachine
|
||||||
|
from .url_change import StateMachineUrlChange as UrlChange
|
||||||
|
from .wait import StateMachineWait as Wait
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Click",
|
||||||
|
"Hover",
|
||||||
|
"InputText",
|
||||||
|
"StateMachine",
|
||||||
|
"UrlChange",
|
||||||
|
"Wait",
|
||||||
|
]
|
||||||
89
skyvern/services/browser_recording/state_machines/click.py
Normal file
89
skyvern/services/browser_recording/state_machines/click.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
import typing as t
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
from skyvern.services.browser_recording.types import (
|
||||||
|
Action,
|
||||||
|
ActionClick,
|
||||||
|
ActionKind,
|
||||||
|
ActionTarget,
|
||||||
|
EventTarget,
|
||||||
|
ExfiltratedEvent,
|
||||||
|
Mouse,
|
||||||
|
MousePosition,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .state_machine import StateMachine
|
||||||
|
|
||||||
|
LOG = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class StateMachineClick(StateMachine):
|
||||||
|
state: t.Literal["void"] = "void"
|
||||||
|
target: EventTarget | None = None
|
||||||
|
timestamp: float | None = None
|
||||||
|
mouse: MousePosition | None = None
|
||||||
|
url: str | None = None
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionClick | None:
|
||||||
|
if event.source != "console":
|
||||||
|
return None
|
||||||
|
|
||||||
|
if event.params.type != "click":
|
||||||
|
if event.params.mousePosition:
|
||||||
|
if event.params.mousePosition.xp is not None and event.params.mousePosition.yp is not None:
|
||||||
|
self.mouse = event.params.mousePosition
|
||||||
|
return None
|
||||||
|
|
||||||
|
LOG.debug(f"~ click detected [{event.params.target.skyId or event.params.target.id}]")
|
||||||
|
|
||||||
|
self.target = event.params.target
|
||||||
|
self.timestamp = event.params.timestamp
|
||||||
|
self.url = event.params.url
|
||||||
|
|
||||||
|
if event.params.mousePosition:
|
||||||
|
self.mouse = event.params.mousePosition
|
||||||
|
|
||||||
|
return self.emit(event)
|
||||||
|
|
||||||
|
def emit(self, event: ExfiltratedEvent) -> ActionClick | None:
|
||||||
|
if not self.target:
|
||||||
|
LOG.debug("~ cannot emit click, missing target; resetting")
|
||||||
|
self.reset()
|
||||||
|
return None
|
||||||
|
|
||||||
|
xp = (self.mouse.xp or -1) if self.mouse else None
|
||||||
|
yp = (self.mouse.yp or -1) if self.mouse else None
|
||||||
|
|
||||||
|
LOG.debug("~ emitting click action", exfiltrated_event=event)
|
||||||
|
|
||||||
|
action_target = ActionTarget(
|
||||||
|
class_name=self.target.className,
|
||||||
|
id=self.target.id,
|
||||||
|
mouse=Mouse(xp=xp, yp=yp),
|
||||||
|
sky_id=self.target.skyId,
|
||||||
|
tag_name=self.target.tagName,
|
||||||
|
texts=self.target.text,
|
||||||
|
)
|
||||||
|
|
||||||
|
action = ActionClick(
|
||||||
|
kind=ActionKind.CLICK.value,
|
||||||
|
target=action_target,
|
||||||
|
timestamp_start=self.timestamp,
|
||||||
|
timestamp_end=self.timestamp,
|
||||||
|
url=self.url,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return action
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
self.state = "void"
|
||||||
|
self.target = None
|
||||||
|
self.timestamp = None
|
||||||
|
self.mouse = None
|
||||||
|
self.url = None
|
||||||
154
skyvern/services/browser_recording/state_machines/hover.py
Normal file
154
skyvern/services/browser_recording/state_machines/hover.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
import typing as t
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
from skyvern.services.browser_recording.types import (
|
||||||
|
Action,
|
||||||
|
ActionHover,
|
||||||
|
ActionKind,
|
||||||
|
ActionTarget,
|
||||||
|
EventTarget,
|
||||||
|
ExfiltratedEvent,
|
||||||
|
Mouse,
|
||||||
|
MousePosition,
|
||||||
|
target_has_changed,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .state_machine import StateMachine
|
||||||
|
|
||||||
|
LOG = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class StateMachineHover(StateMachine):
|
||||||
|
state: t.Literal["mouseenter", "next-event"] = "mouseenter"
|
||||||
|
target: EventTarget | None = None
|
||||||
|
timestamp_start: float | None = None
|
||||||
|
timestamp_end: float | None = None
|
||||||
|
mouse: MousePosition | None = None
|
||||||
|
# --
|
||||||
|
threshold_ms: int = ActionHover.DURATION_THRESHOLD_MS
|
||||||
|
|
||||||
|
def __init__(self, threshold_ms: int | None = None) -> None:
|
||||||
|
self.threshold_ms = max(
|
||||||
|
threshold_ms or ActionHover.DURATION_THRESHOLD_MS,
|
||||||
|
ActionHover.MIN_DURATION_THRESHOLD_MS,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionHover | None:
|
||||||
|
if event.source != "console":
|
||||||
|
return None
|
||||||
|
|
||||||
|
match self.state:
|
||||||
|
case "mouseenter":
|
||||||
|
if event.params.mousePosition:
|
||||||
|
if event.params.mousePosition.xp is not None and event.params.mousePosition.yp is not None:
|
||||||
|
self.mouse = event.params.mousePosition
|
||||||
|
|
||||||
|
if event.params.type != "mouseenter":
|
||||||
|
return None
|
||||||
|
|
||||||
|
text = ",".join(event.params.target.text or [])
|
||||||
|
LOG.debug(
|
||||||
|
f"~ hover: mouseenter detected [{event.params.target.skyId or event.params.target.id}] [{text}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.target = event.params.target
|
||||||
|
self.state = "next-event"
|
||||||
|
self.timestamp_start = event.params.timestamp
|
||||||
|
|
||||||
|
case "next-event":
|
||||||
|
if event.params.type == "mouseenter":
|
||||||
|
if target_has_changed(self.target, event.params.target):
|
||||||
|
self.timestamp_start = event.params.timestamp
|
||||||
|
self.target = event.params.target
|
||||||
|
return None
|
||||||
|
|
||||||
|
event_end = event.params.timestamp
|
||||||
|
|
||||||
|
if not self.timestamp_start:
|
||||||
|
LOG.debug("~ missing hover start timestamp, resetting")
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
duration_ms = int(event_end - self.timestamp_start)
|
||||||
|
|
||||||
|
if event.params.type == "mousemove":
|
||||||
|
target = event.params.target
|
||||||
|
|
||||||
|
if not target_has_changed(self.target, target):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if duration_ms < self.threshold_ms:
|
||||||
|
LOG.debug(f"~ hover duration {duration_ms}ms below threshold {self.threshold_ms}ms, resetting")
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
self.timestamp_end = event.params.timestamp
|
||||||
|
LOG.debug(
|
||||||
|
f"~ hover duration {duration_ms}ms meets threshold {self.threshold_ms}ms, emitting",
|
||||||
|
start=self.timestamp_start,
|
||||||
|
end=self.timestamp_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
# dedupe consecutive hover actions on same target
|
||||||
|
if current_actions:
|
||||||
|
last_action = current_actions[-1]
|
||||||
|
|
||||||
|
if last_action.kind == ActionKind.HOVER and self.target:
|
||||||
|
target_id = self.target.skyId or self.target.id
|
||||||
|
last_action_target_id = last_action.target.sky_id or last_action.target.id
|
||||||
|
|
||||||
|
if target_id:
|
||||||
|
if target_id == last_action_target_id:
|
||||||
|
LOG.debug("~ hover: duplicate hover action - skipping", target=self.target)
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
return self.emit(event)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def emit(self, event: ExfiltratedEvent) -> ActionHover | None:
|
||||||
|
if not self.target:
|
||||||
|
LOG.debug("~ cannot emit hover, missing target; resetting")
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
xp = (self.mouse.xp or -1) if self.mouse else None
|
||||||
|
yp = (self.mouse.yp or -1) if self.mouse else None
|
||||||
|
|
||||||
|
LOG.debug("~ emitting hover action", exfiltrated_event=event)
|
||||||
|
|
||||||
|
action_target = ActionTarget(
|
||||||
|
class_name=self.target.className,
|
||||||
|
id=self.target.id,
|
||||||
|
mouse=Mouse(xp=xp, yp=yp),
|
||||||
|
sky_id=self.target.skyId,
|
||||||
|
tag_name=self.target.tagName,
|
||||||
|
texts=self.target.text,
|
||||||
|
)
|
||||||
|
|
||||||
|
action = ActionHover(
|
||||||
|
kind=ActionKind.HOVER.value,
|
||||||
|
target=action_target,
|
||||||
|
timestamp_start=self.timestamp_start or -1,
|
||||||
|
timestamp_end=self.timestamp_end or -1,
|
||||||
|
url=event.params.url,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return action
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
self.state = "mouseenter"
|
||||||
|
self.target = None
|
||||||
|
self.timestamp_start = None
|
||||||
|
self.timestamp_end = None
|
||||||
|
self.mouse = None
|
||||||
156
skyvern/services/browser_recording/state_machines/input_text.py
Normal file
156
skyvern/services/browser_recording/state_machines/input_text.py
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
import typing as t
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
from skyvern.services.browser_recording.types import (
|
||||||
|
Action,
|
||||||
|
ActionInputText,
|
||||||
|
ActionKind,
|
||||||
|
ActionTarget,
|
||||||
|
EventTarget,
|
||||||
|
ExfiltratedEvent,
|
||||||
|
Mouse,
|
||||||
|
MousePosition,
|
||||||
|
target_has_changed,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .state_machine import StateMachine
|
||||||
|
|
||||||
|
LOG = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class StateMachineInputText(StateMachine):
|
||||||
|
state: t.Literal["focus", "keydown", "blur"] = "focus"
|
||||||
|
target: EventTarget | None = None
|
||||||
|
timestamp_start: float | None = None
|
||||||
|
mouse: MousePosition | None = None
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionInputText | None:
|
||||||
|
if event.source != "console":
|
||||||
|
return None
|
||||||
|
|
||||||
|
match self.state:
|
||||||
|
case "focus":
|
||||||
|
if event.params.type != "focus":
|
||||||
|
if event.params.mousePosition:
|
||||||
|
if event.params.mousePosition.xp is not None and event.params.mousePosition.yp is not None:
|
||||||
|
self.mouse = event.params.mousePosition
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
LOG.debug(f"~ focus detected [{event.params.target.skyId or event.params.target.id}]")
|
||||||
|
|
||||||
|
if event.params.type == "focus":
|
||||||
|
self.target = event.params.target
|
||||||
|
self.state = "keydown"
|
||||||
|
self.timestamp_start = event.params.timestamp
|
||||||
|
|
||||||
|
case "keydown":
|
||||||
|
if event.params.type != "keydown":
|
||||||
|
return None
|
||||||
|
|
||||||
|
if self.update_target(event.params.target):
|
||||||
|
return None
|
||||||
|
|
||||||
|
LOG.debug(f"~ initial keydown detected '{event.params.key}'")
|
||||||
|
|
||||||
|
self.state = "blur"
|
||||||
|
case "blur":
|
||||||
|
if event.params.type == "keydown":
|
||||||
|
LOG.debug(f"~ input-text: subsequent keydown detected '{event.params.key}'")
|
||||||
|
|
||||||
|
if event.params.key == "Enter":
|
||||||
|
return self.emit(event)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
if event.params.type != "blur":
|
||||||
|
return None
|
||||||
|
|
||||||
|
if self.update_target(event.params.target):
|
||||||
|
return None
|
||||||
|
|
||||||
|
LOG.debug("~ blur detected")
|
||||||
|
|
||||||
|
return self.emit(event)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def update_target(self, event_target: EventTarget) -> bool:
|
||||||
|
if target_has_changed(self.target, event_target):
|
||||||
|
self.target = event_target
|
||||||
|
|
||||||
|
LOG.debug("~ input-text target changed, resetting state machine")
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def emit(self, event: ExfiltratedEvent) -> ActionInputText | None:
|
||||||
|
if not self.target:
|
||||||
|
LOG.debug("~ cannot emit, missing target or mouse; resetting")
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
return None
|
||||||
|
|
||||||
|
xp = (self.mouse.xp or -1) if self.mouse else None
|
||||||
|
yp = (self.mouse.yp or -1) if self.mouse else None
|
||||||
|
|
||||||
|
LOG.debug("~ emitting input text action", exfiltrated_event=event)
|
||||||
|
|
||||||
|
input_value = event.params.target.value
|
||||||
|
|
||||||
|
if input_value is None:
|
||||||
|
LOG.debug("~ cannot emit, missing input value; resetting")
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
return None
|
||||||
|
|
||||||
|
action_target = ActionTarget(
|
||||||
|
class_name=self.target.className,
|
||||||
|
id=self.target.id,
|
||||||
|
mouse=Mouse(xp=xp, yp=yp),
|
||||||
|
sky_id=self.target.skyId,
|
||||||
|
tag_name=self.target.tagName,
|
||||||
|
texts=self.target.text,
|
||||||
|
)
|
||||||
|
|
||||||
|
action = ActionInputText(
|
||||||
|
kind=ActionKind.INPUT_TEXT.value,
|
||||||
|
target=action_target,
|
||||||
|
timestamp_start=self.timestamp_start,
|
||||||
|
timestamp_end=event.params.timestamp,
|
||||||
|
url=event.params.url,
|
||||||
|
input_value=str(input_value),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return action
|
||||||
|
|
||||||
|
def on_action(self, action: Action, current_actions: list[Action]) -> bool:
|
||||||
|
if action.kind == ActionKind.CLICK:
|
||||||
|
# NOTE(jdo): skipping self.reset here; a focus event on an element can often be followed by a
|
||||||
|
# click event, and the identity doesn't always match due to nesting. I think a more precise
|
||||||
|
# check would be to:
|
||||||
|
# - check identity match for click and focus; if not matching:
|
||||||
|
# - ask the browser via cdp if there is a nesting relation between the two elements
|
||||||
|
# - if yes, allow and carry on, otherwise reset
|
||||||
|
# That's another round trip. It's likely pretty fast, tho. For now, we'll just assume a click does
|
||||||
|
# not invalidate the state of the input text state machine.
|
||||||
|
return True
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
self.state = "focus"
|
||||||
|
self.target = None
|
||||||
|
self.mouse = None
|
||||||
|
self.timestamp_start = None
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
from skyvern.services.browser_recording.types import (
|
||||||
|
Action,
|
||||||
|
ExfiltratedEvent,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StateMachine:
|
||||||
|
"""
|
||||||
|
A minimal, concrete StateMachineProtocol.
|
||||||
|
"""
|
||||||
|
|
||||||
|
state: str
|
||||||
|
|
||||||
|
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> Action | None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def on_action(self, action: Action, current_actions: list[Action]) -> bool:
|
||||||
|
"""
|
||||||
|
Optional callback when an action is emitted by _any_ state machine.
|
||||||
|
Default is that a state machine resets.
|
||||||
|
|
||||||
|
Return `True` (the default) to allow the action to proceed; return `False`
|
||||||
|
to veto the action.
|
||||||
|
"""
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
pass
|
||||||
@@ -0,0 +1,91 @@
|
|||||||
|
import typing as t
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
from skyvern.services.browser_recording.types import (
|
||||||
|
Action,
|
||||||
|
ActionKind,
|
||||||
|
ActionTarget,
|
||||||
|
ActionUrlChange,
|
||||||
|
ExfiltratedEvent,
|
||||||
|
Mouse,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .state_machine import StateMachine
|
||||||
|
|
||||||
|
LOG = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class StateMachineUrlChange(StateMachine):
|
||||||
|
state: t.Literal["void"] = "void"
|
||||||
|
last_url: str | None = None
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionUrlChange | None:
|
||||||
|
if event.source != "cdp":
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not event.event_name.startswith("nav:"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if event.event_name == "nav:frame_started_navigating":
|
||||||
|
url = event.params.url
|
||||||
|
|
||||||
|
if url == self.last_url:
|
||||||
|
LOG.debug("~ ignoring navigation to same URL", url=url)
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
if event.params.frame:
|
||||||
|
self.last_url = event.params.frame.url
|
||||||
|
elif event.params.url:
|
||||||
|
self.last_url = event.params.url
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
self.last_url = url
|
||||||
|
|
||||||
|
LOG.debug("~ emitting URL change action", url=url)
|
||||||
|
|
||||||
|
action_target = ActionTarget(
|
||||||
|
class_name=None,
|
||||||
|
id=None,
|
||||||
|
mouse=Mouse(xp=None, yp=None),
|
||||||
|
sky_id=None,
|
||||||
|
tag_name=None,
|
||||||
|
texts=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
return ActionUrlChange(
|
||||||
|
kind=ActionKind.URL_CHANGE.value,
|
||||||
|
target=action_target,
|
||||||
|
timestamp_start=event.timestamp,
|
||||||
|
timestamp_end=event.timestamp,
|
||||||
|
url=url,
|
||||||
|
)
|
||||||
|
|
||||||
|
def on_action(self, action: Action, current_actions: list[Action]) -> bool:
|
||||||
|
if action.kind != ActionKind.URL_CHANGE:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not current_actions:
|
||||||
|
return True
|
||||||
|
|
||||||
|
last_action = current_actions[-1]
|
||||||
|
|
||||||
|
if last_action.kind != ActionKind.URL_CHANGE:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if last_action.url == action.url:
|
||||||
|
LOG.debug("~ vetoing duplicate URL change action", url=action.url)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
self.state = "void"
|
||||||
|
self.last_url = None
|
||||||
78
skyvern/services/browser_recording/state_machines/wait.py
Normal file
78
skyvern/services/browser_recording/state_machines/wait.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
import typing as t
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
from skyvern.services.browser_recording.types import (
|
||||||
|
Action,
|
||||||
|
ActionKind,
|
||||||
|
ActionTarget,
|
||||||
|
ActionWait,
|
||||||
|
ExfiltratedEvent,
|
||||||
|
Mouse,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .state_machine import StateMachine
|
||||||
|
|
||||||
|
LOG = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class StateMachineWait(StateMachine):
|
||||||
|
state: t.Literal["void"] = "void"
|
||||||
|
last_event_timestamp: float | None = None
|
||||||
|
threshold_ms: int = ActionWait.MIN_DURATION_THRESHOLD_MS
|
||||||
|
|
||||||
|
def __init__(self, threshold_ms: int | None = None) -> None:
|
||||||
|
self.threshold_ms = max(
|
||||||
|
threshold_ms or ActionWait.MIN_DURATION_THRESHOLD_MS,
|
||||||
|
ActionWait.MIN_DURATION_THRESHOLD_MS,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionWait | None:
|
||||||
|
if event.source != "console":
|
||||||
|
return None
|
||||||
|
|
||||||
|
if self.last_event_timestamp is not None:
|
||||||
|
duration_ms = int(event.params.timestamp - self.last_event_timestamp)
|
||||||
|
|
||||||
|
if duration_ms >= self.threshold_ms:
|
||||||
|
LOG.debug("~ emitting wait action", duration_ms=duration_ms)
|
||||||
|
|
||||||
|
action_target = ActionTarget(
|
||||||
|
class_name=None,
|
||||||
|
id=None,
|
||||||
|
mouse=Mouse(xp=None, yp=None),
|
||||||
|
sky_id=None,
|
||||||
|
tag_name=None,
|
||||||
|
texts=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
action = ActionWait(
|
||||||
|
kind=ActionKind.WAIT.value,
|
||||||
|
target=action_target,
|
||||||
|
timestamp_start=self.last_event_timestamp,
|
||||||
|
timestamp_end=event.params.timestamp,
|
||||||
|
url=event.params.url,
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return action
|
||||||
|
|
||||||
|
self.last_event_timestamp = event.params.timestamp
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def on_action(self, action: Action, current_actions: list[Action]) -> bool:
|
||||||
|
if action.kind == ActionKind.HOVER:
|
||||||
|
return True
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
self.state = "void"
|
||||||
|
self.last_event_timestamp = None
|
||||||
243
skyvern/services/browser_recording/types.py
Normal file
243
skyvern/services/browser_recording/types.py
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
"""
|
||||||
|
A types module for browser recording actions and events.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import enum
|
||||||
|
import typing as t
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from skyvern.client.types.workflow_definition_yaml_blocks_item import (
|
||||||
|
WorkflowDefinitionYamlBlocksItem_Action,
|
||||||
|
WorkflowDefinitionYamlBlocksItem_GotoUrl,
|
||||||
|
WorkflowDefinitionYamlBlocksItem_Wait,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionKind(enum.StrEnum):
|
||||||
|
CLICK = "click"
|
||||||
|
HOVER = "hover"
|
||||||
|
INPUT_TEXT = "input_text"
|
||||||
|
URL_CHANGE = "url_change"
|
||||||
|
WAIT = "wait"
|
||||||
|
|
||||||
|
|
||||||
|
class ActionBase(BaseModel):
|
||||||
|
kind: ActionKind
|
||||||
|
# --
|
||||||
|
target: "ActionTarget"
|
||||||
|
timestamp_start: float
|
||||||
|
timestamp_end: float
|
||||||
|
url: str
|
||||||
|
|
||||||
|
|
||||||
|
class ActionClick(ActionBase):
|
||||||
|
kind: t.Literal[ActionKind.CLICK]
|
||||||
|
|
||||||
|
|
||||||
|
class ActionHover(ActionBase):
|
||||||
|
kind: t.Literal[ActionKind.HOVER]
|
||||||
|
# --
|
||||||
|
DURATION_THRESHOLD_MS: t.ClassVar[int] = 2000
|
||||||
|
MIN_DURATION_THRESHOLD_MS: t.ClassVar[int] = 1000
|
||||||
|
|
||||||
|
|
||||||
|
class ActionInputText(ActionBase):
|
||||||
|
kind: t.Literal[ActionKind.INPUT_TEXT]
|
||||||
|
# --
|
||||||
|
input_value: str
|
||||||
|
|
||||||
|
|
||||||
|
class ActionUrlChange(ActionBase):
|
||||||
|
kind: t.Literal[ActionKind.URL_CHANGE]
|
||||||
|
|
||||||
|
|
||||||
|
class ActionWait(ActionBase):
|
||||||
|
kind: t.Literal[ActionKind.WAIT]
|
||||||
|
# --
|
||||||
|
duration_ms: int
|
||||||
|
MIN_DURATION_THRESHOLD_MS: t.ClassVar[int] = 5000
|
||||||
|
|
||||||
|
|
||||||
|
Action = ActionClick | ActionHover | ActionInputText | ActionUrlChange | ActionWait
|
||||||
|
|
||||||
|
ActionBlockable = ActionClick | ActionHover | ActionInputText
|
||||||
|
|
||||||
|
|
||||||
|
class ActionTarget(BaseModel):
|
||||||
|
class_name: str | None = None
|
||||||
|
id: str | None = None
|
||||||
|
mouse: "Mouse"
|
||||||
|
sky_id: str | None = None
|
||||||
|
tag_name: str | None = None
|
||||||
|
texts: list[str] = []
|
||||||
|
|
||||||
|
|
||||||
|
class Mouse(BaseModel):
|
||||||
|
xp: float | None = None
|
||||||
|
"""
|
||||||
|
0 to 1.0 inclusive, percentage across the viewport
|
||||||
|
"""
|
||||||
|
yp: float | None = None
|
||||||
|
"""
|
||||||
|
0 to 1.0 inclusive, percentage down the viewport
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
OutputBlock = t.Union[
|
||||||
|
WorkflowDefinitionYamlBlocksItem_Action,
|
||||||
|
WorkflowDefinitionYamlBlocksItem_GotoUrl,
|
||||||
|
WorkflowDefinitionYamlBlocksItem_Wait,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TargetInfo(BaseModel):
|
||||||
|
attached: bool | None = None
|
||||||
|
browserContextId: str | None = None
|
||||||
|
canAccessOpener: bool | None = None
|
||||||
|
targetId: str | None = None
|
||||||
|
title: str | None = None
|
||||||
|
type: str | None = None
|
||||||
|
url: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class CdpEventFrame(BaseModel):
|
||||||
|
url: str | None = None
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
extra = "allow"
|
||||||
|
|
||||||
|
|
||||||
|
class ExfiltratedEventCdpParams(BaseModel):
|
||||||
|
# target_info_changed events
|
||||||
|
targetInfo: TargetInfo | None = None
|
||||||
|
|
||||||
|
# frame_requested_navigation events
|
||||||
|
disposition: str | None = None
|
||||||
|
frameId: str | None = None
|
||||||
|
reason: str | None = None
|
||||||
|
url: str | None = None
|
||||||
|
|
||||||
|
# frame_navigated events
|
||||||
|
frame: CdpEventFrame | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class EventTarget(BaseModel):
|
||||||
|
className: str | None = None
|
||||||
|
id: str | None = None
|
||||||
|
isHtml: bool = False
|
||||||
|
isSvg: bool = False
|
||||||
|
innerText: str | None = None
|
||||||
|
skyId: str | None = None
|
||||||
|
tagName: str | None = None
|
||||||
|
text: list[str] = []
|
||||||
|
value: str | int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class MousePosition(BaseModel):
|
||||||
|
xa: float | None = None
|
||||||
|
ya: float | None = None
|
||||||
|
xp: float | None = None
|
||||||
|
yp: float | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class BoundingRect(BaseModel):
|
||||||
|
bottom: float
|
||||||
|
height: float
|
||||||
|
left: float
|
||||||
|
right: float
|
||||||
|
top: float
|
||||||
|
width: float
|
||||||
|
x: float
|
||||||
|
y: float
|
||||||
|
|
||||||
|
|
||||||
|
class Scroll(BaseModel):
|
||||||
|
clientHeight: float
|
||||||
|
clientWidth: float
|
||||||
|
scrollHeight: float
|
||||||
|
scrollLeft: float
|
||||||
|
scrollTop: float
|
||||||
|
scrollWidth: float
|
||||||
|
|
||||||
|
|
||||||
|
class ActiveElement(BaseModel):
|
||||||
|
boundingRect: BoundingRect | None = None
|
||||||
|
className: str | None = None
|
||||||
|
id: str | None = None
|
||||||
|
scroll: Scroll | None = None
|
||||||
|
tagName: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class Window(BaseModel):
|
||||||
|
height: float
|
||||||
|
scrollX: float
|
||||||
|
scrollY: float
|
||||||
|
width: float
|
||||||
|
|
||||||
|
|
||||||
|
class ExfiltratedEventConsoleParams(BaseModel):
|
||||||
|
activeElement: ActiveElement
|
||||||
|
code: str | None = None
|
||||||
|
inputValue: str | None = None
|
||||||
|
key: str | None = None
|
||||||
|
mousePosition: MousePosition
|
||||||
|
target: EventTarget
|
||||||
|
timestamp: float
|
||||||
|
type: str
|
||||||
|
url: str
|
||||||
|
window: Window
|
||||||
|
|
||||||
|
|
||||||
|
class ExfiltratedCdpEvent(BaseModel):
|
||||||
|
kind: Literal["exfiltrated-event"]
|
||||||
|
event_name: str
|
||||||
|
params: ExfiltratedEventCdpParams
|
||||||
|
source: Literal["cdp"]
|
||||||
|
timestamp: float
|
||||||
|
|
||||||
|
|
||||||
|
class ExfiltratedConsoleEvent(BaseModel):
|
||||||
|
kind: Literal["exfiltrated-event"]
|
||||||
|
event_name: str
|
||||||
|
params: ExfiltratedEventConsoleParams
|
||||||
|
source: Literal["console"]
|
||||||
|
timestamp: float
|
||||||
|
|
||||||
|
|
||||||
|
ExfiltratedEvent = ExfiltratedCdpEvent | ExfiltratedConsoleEvent
|
||||||
|
|
||||||
|
|
||||||
|
class StateMachineProtocol(t.Protocol):
|
||||||
|
state: str
|
||||||
|
|
||||||
|
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> Action | None: ...
|
||||||
|
|
||||||
|
def on_action(self, action: Action, current_actions: list[Action]) -> bool: ...
|
||||||
|
|
||||||
|
def reset(self) -> None: ...
|
||||||
|
|
||||||
|
|
||||||
|
# -- guards, predicates, etc.
|
||||||
|
|
||||||
|
|
||||||
|
def target_has_changed(current_target: EventTarget | None, event_target: EventTarget) -> bool:
|
||||||
|
if not current_target:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not event_target.skyId and not event_target.id:
|
||||||
|
return True # sic: we cannot compare, so assume changed
|
||||||
|
|
||||||
|
if not current_target.skyId and not current_target.id:
|
||||||
|
return True # sic: we cannot compare, so assume changed
|
||||||
|
|
||||||
|
if current_target.id and event_target.id:
|
||||||
|
if current_target.id != event_target.id:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if current_target.skyId and event_target.skyId:
|
||||||
|
if current_target.skyId != event_target.skyId:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
Reference in New Issue
Block a user