add browser console log artifact (#1097)

This commit is contained in:
LawyZheng
2024-10-31 23:10:11 +08:00
committed by GitHub
parent 00549c921b
commit de8e887e0f
10 changed files with 94 additions and 2 deletions

View File

@@ -48,6 +48,7 @@ repos:
- alembic - alembic
- "sqlalchemy[mypy]" - "sqlalchemy[mypy]"
- types-PyYAML - types-PyYAML
- types-aiofiles
exclude: | exclude: |
(?x)( (?x)(
^tests.*| ^tests.*|

View File

@@ -19,6 +19,7 @@ COPY . /app
ENV PYTHONPATH="/app:$PYTHONPATH" ENV PYTHONPATH="/app:$PYTHONPATH"
ENV VIDEO_PATH=/data/videos ENV VIDEO_PATH=/data/videos
ENV HAR_PATH=/data/har ENV HAR_PATH=/data/har
ENV LOG_PATH=/data/log
ENV ARTIFACT_STORAGE_PATH=/data/artifacts ENV ARTIFACT_STORAGE_PATH=/data/artifacts
COPY ./entrypoint-skyvern.sh /app/entrypoint-skyvern.sh COPY ./entrypoint-skyvern.sh /app/entrypoint-skyvern.sh

View File

@@ -30,6 +30,7 @@ services:
- ./artifacts:/data/artifacts - ./artifacts:/data/artifacts
- ./videos:/data/videos - ./videos:/data/videos
- ./har:/data/har - ./har:/data/har
- ./log:/data/log
- ./.streamlit:/app/.streamlit - ./.streamlit:/app/.streamlit
environment: environment:
- DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern - DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern

View File

@@ -13,6 +13,7 @@ class Settings(BaseSettings):
MAX_SCRAPING_RETRIES: int = 0 MAX_SCRAPING_RETRIES: int = 0
VIDEO_PATH: str | None = None VIDEO_PATH: str | None = None
HAR_PATH: str | None = "./har" HAR_PATH: str | None = "./har"
LOG_PATH: str = "./log"
BROWSER_ACTION_TIMEOUT_MS: int = 5000 BROWSER_ACTION_TIMEOUT_MS: int = 5000
BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000 BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
BROWSER_LOADING_TIMEOUT_MS: int = 120000 BROWSER_LOADING_TIMEOUT_MS: int = 120000

View File

@@ -1517,6 +1517,16 @@ class ForgeAgent:
data=har_data, data=har_data,
) )
browser_log = await app.BROWSER_MANAGER.get_browser_console_log(
task_id=task.task_id, browser_state=browser_state
)
if browser_log:
await app.ARTIFACT_MANAGER.create_artifact(
step=last_step,
artifact_type=ArtifactType.BROWSER_CONSOLE_LOG,
data=browser_log,
)
if browser_state.browser_context and browser_state.browser_artifacts.traces_dir: if browser_state.browser_context and browser_state.browser_artifacts.traces_dir:
trace_path = f"{browser_state.browser_artifacts.traces_dir}/{task.task_id}.zip" trace_path = f"{browser_state.browser_artifacts.traces_dir}/{task.task_id}.zip"
await app.ARTIFACT_MANAGER.create_artifact( await app.ARTIFACT_MANAGER.create_artifact(

View File

@@ -8,6 +8,7 @@ from pydantic import BaseModel, Field, field_serializer
class ArtifactType(StrEnum): class ArtifactType(StrEnum):
RECORDING = "recording" RECORDING = "recording"
BROWSER_CONSOLE_LOG = "browser_console_log"
# DEPRECATED. pls use SCREENSHOT_LLM, SCREENSHOT_ACTION or SCREENSHOT_FINAL # DEPRECATED. pls use SCREENSHOT_LLM, SCREENSHOT_ACTION or SCREENSHOT_FINAL
SCREENSHOT = "screenshot" SCREENSHOT = "screenshot"

View File

@@ -6,6 +6,7 @@ from skyvern.forge.sdk.models import Step
# TODO: This should be a part of the ArtifactType model # TODO: This should be a part of the ArtifactType model
FILE_EXTENTSION_MAP: dict[ArtifactType, str] = { FILE_EXTENTSION_MAP: dict[ArtifactType, str] = {
ArtifactType.RECORDING: "webm", ArtifactType.RECORDING: "webm",
ArtifactType.BROWSER_CONSOLE_LOG: "log",
ArtifactType.SCREENSHOT_LLM: "png", ArtifactType.SCREENSHOT_LLM: "png",
ArtifactType.SCREENSHOT_ACTION: "png", ArtifactType.SCREENSHOT_ACTION: "png",
ArtifactType.SCREENSHOT_FINAL: "png", ArtifactType.SCREENSHOT_FINAL: "png",

View File

@@ -843,6 +843,25 @@ class WorkflowService:
data=har_data, data=har_data,
) )
async def persist_browser_console_log(
self,
browser_state: BrowserState,
last_step: Step,
workflow: Workflow,
workflow_run: WorkflowRun,
) -> None:
browser_log = await app.BROWSER_MANAGER.get_browser_console_log(
workflow_id=workflow.workflow_id,
workflow_run_id=workflow_run.workflow_run_id,
browser_state=browser_state,
)
if browser_log:
await app.ARTIFACT_MANAGER.create_artifact(
step=last_step,
artifact_type=ArtifactType.BROWSER_CONSOLE_LOG,
data=browser_log,
)
async def persist_tracing_data( async def persist_tracing_data(
self, browser_state: BrowserState, last_step: Step, workflow_run: WorkflowRun self, browser_state: BrowserState, last_step: Step, workflow_run: WorkflowRun
) -> None: ) -> None:

View File

@@ -8,9 +8,10 @@ import uuid
from datetime import datetime from datetime import datetime
from typing import Any, Awaitable, Callable, Protocol from typing import Any, Awaitable, Callable, Protocol
import aiofiles
import structlog import structlog
from playwright.async_api import BrowserContext, Error, Page, Playwright, async_playwright from playwright.async_api import BrowserContext, ConsoleMessage, Error, Page, Playwright, async_playwright
from pydantic import BaseModel from pydantic import BaseModel, PrivateAttr
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import REPO_ROOT_DIR from skyvern.constants import REPO_ROOT_DIR
@@ -40,6 +41,23 @@ def get_download_dir(workflow_run_id: str | None, task_id: str | None) -> str:
return download_dir return download_dir
def set_browser_console_log(browser_context: BrowserContext, browser_artifacts: BrowserArtifacts) -> str:
if browser_artifacts.browser_console_log_path is None:
log_path = f"{settings.LOG_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}/{uuid.uuid4()}.log"
os.makedirs(os.path.dirname(log_path), exist_ok=True)
browser_artifacts.browser_console_log_path = log_path
async def browser_console_log(msg: ConsoleMessage) -> None:
current_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
key_values = " ".join([f"{key}={value}" for key, value in msg.location.items()])
format_log = f"{current_time}[{msg.type}]{msg.text} {key_values}\n"
await browser_artifacts.append_browser_console_log(format_log)
LOG.info("browser console log is saved", log_path=browser_artifacts.browser_console_log_path)
browser_context.on("console", browser_console_log)
return browser_artifacts.browser_console_log_path
class BrowserContextCreator(Protocol): class BrowserContextCreator(Protocol):
def __call__( def __call__(
self, playwright: Playwright, **kwargs: dict[str, Any] self, playwright: Playwright, **kwargs: dict[str, Any]
@@ -91,12 +109,14 @@ class BrowserContextFactory:
har_path: str | None = None, har_path: str | None = None,
traces_dir: str | None = None, traces_dir: str | None = None,
browser_session_dir: str | None = None, browser_session_dir: str | None = None,
browser_console_log_path: str | None = None,
) -> BrowserArtifacts: ) -> BrowserArtifacts:
return BrowserArtifacts( return BrowserArtifacts(
video_artifacts=video_artifacts or [], video_artifacts=video_artifacts or [],
har_path=har_path, har_path=har_path,
traces_dir=traces_dir, traces_dir=traces_dir,
browser_session_dir=browser_session_dir, browser_session_dir=browser_session_dir,
browser_console_log_path=browser_console_log_path,
) )
@classmethod @classmethod
@@ -113,6 +133,7 @@ class BrowserContextFactory:
if not creator: if not creator:
raise UnknownBrowserType(browser_type) raise UnknownBrowserType(browser_type)
browser_context, browser_artifacts, cleanup_func = await creator(playwright, **kwargs) browser_context, browser_artifacts, cleanup_func = await creator(playwright, **kwargs)
set_browser_console_log(browser_context=browser_context, browser_artifacts=browser_artifacts)
return browser_context, browser_artifacts, cleanup_func return browser_context, browser_artifacts, cleanup_func
except UnknownBrowserType as e: except UnknownBrowserType as e:
raise e raise e
@@ -141,6 +162,24 @@ class BrowserArtifacts(BaseModel):
har_path: str | None = None har_path: str | None = None
traces_dir: str | None = None traces_dir: str | None = None
browser_session_dir: str | None = None browser_session_dir: str | None = None
browser_console_log_path: str | None = None
_browser_console_log_lock: asyncio.Lock = PrivateAttr(default_factory=asyncio.Lock)
async def append_browser_console_log(self, msg: str) -> int:
if self.browser_console_log_path is None:
return 0
async with self._browser_console_log_lock:
async with aiofiles.open(self.browser_console_log_path, "a") as f:
return await f.write(msg)
async def read_browser_console_log(self) -> bytes:
if self.browser_console_log_path is None:
return b""
async with self._browser_console_log_lock:
async with aiofiles.open(self.browser_console_log_path, "rb") as f:
return await f.read()
async def _create_headless_chromium( async def _create_headless_chromium(

View File

@@ -169,6 +169,24 @@ class BrowserManager:
) )
return b"" return b""
async def get_browser_console_log(
self,
browser_state: BrowserState,
task_id: str = "",
workflow_id: str = "",
workflow_run_id: str = "",
) -> bytes:
if browser_state.browser_artifacts.browser_console_log_path is None:
LOG.warning(
"browser console log not found for task",
task_id=task_id,
workflow_id=workflow_id,
workflow_run_id=workflow_run_id,
)
return b""
return await browser_state.browser_artifacts.read_browser_console_log()
@classmethod @classmethod
async def close(cls) -> None: async def close(cls) -> None:
LOG.info("Closing BrowserManager") LOG.info("Closing BrowserManager")