add browser console log artifact (#1097)

This commit is contained in:
LawyZheng
2024-10-31 23:10:11 +08:00
committed by GitHub
parent 00549c921b
commit de8e887e0f
10 changed files with 94 additions and 2 deletions

View File

@@ -48,6 +48,7 @@ repos:
- alembic
- "sqlalchemy[mypy]"
- types-PyYAML
- types-aiofiles
exclude: |
(?x)(
^tests.*|

View File

@@ -19,6 +19,7 @@ COPY . /app
ENV PYTHONPATH="/app:$PYTHONPATH"
ENV VIDEO_PATH=/data/videos
ENV HAR_PATH=/data/har
ENV LOG_PATH=/data/log
ENV ARTIFACT_STORAGE_PATH=/data/artifacts
COPY ./entrypoint-skyvern.sh /app/entrypoint-skyvern.sh

View File

@@ -30,6 +30,7 @@ services:
- ./artifacts:/data/artifacts
- ./videos:/data/videos
- ./har:/data/har
- ./log:/data/log
- ./.streamlit:/app/.streamlit
environment:
- DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern

View File

@@ -13,6 +13,7 @@ class Settings(BaseSettings):
MAX_SCRAPING_RETRIES: int = 0
VIDEO_PATH: str | None = None
HAR_PATH: str | None = "./har"
LOG_PATH: str = "./log"
BROWSER_ACTION_TIMEOUT_MS: int = 5000
BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
BROWSER_LOADING_TIMEOUT_MS: int = 120000

View File

@@ -1517,6 +1517,16 @@ class ForgeAgent:
data=har_data,
)
browser_log = await app.BROWSER_MANAGER.get_browser_console_log(
task_id=task.task_id, browser_state=browser_state
)
if browser_log:
await app.ARTIFACT_MANAGER.create_artifact(
step=last_step,
artifact_type=ArtifactType.BROWSER_CONSOLE_LOG,
data=browser_log,
)
if browser_state.browser_context and browser_state.browser_artifacts.traces_dir:
trace_path = f"{browser_state.browser_artifacts.traces_dir}/{task.task_id}.zip"
await app.ARTIFACT_MANAGER.create_artifact(

View File

@@ -8,6 +8,7 @@ from pydantic import BaseModel, Field, field_serializer
class ArtifactType(StrEnum):
RECORDING = "recording"
BROWSER_CONSOLE_LOG = "browser_console_log"
# DEPRECATED. pls use SCREENSHOT_LLM, SCREENSHOT_ACTION or SCREENSHOT_FINAL
SCREENSHOT = "screenshot"

View File

@@ -6,6 +6,7 @@ from skyvern.forge.sdk.models import Step
# TODO: This should be a part of the ArtifactType model
FILE_EXTENTSION_MAP: dict[ArtifactType, str] = {
ArtifactType.RECORDING: "webm",
ArtifactType.BROWSER_CONSOLE_LOG: "log",
ArtifactType.SCREENSHOT_LLM: "png",
ArtifactType.SCREENSHOT_ACTION: "png",
ArtifactType.SCREENSHOT_FINAL: "png",

View File

@@ -843,6 +843,25 @@ class WorkflowService:
data=har_data,
)
async def persist_browser_console_log(
self,
browser_state: BrowserState,
last_step: Step,
workflow: Workflow,
workflow_run: WorkflowRun,
) -> None:
browser_log = await app.BROWSER_MANAGER.get_browser_console_log(
workflow_id=workflow.workflow_id,
workflow_run_id=workflow_run.workflow_run_id,
browser_state=browser_state,
)
if browser_log:
await app.ARTIFACT_MANAGER.create_artifact(
step=last_step,
artifact_type=ArtifactType.BROWSER_CONSOLE_LOG,
data=browser_log,
)
async def persist_tracing_data(
self, browser_state: BrowserState, last_step: Step, workflow_run: WorkflowRun
) -> None:

View File

@@ -8,9 +8,10 @@ import uuid
from datetime import datetime
from typing import Any, Awaitable, Callable, Protocol
import aiofiles
import structlog
from playwright.async_api import BrowserContext, Error, Page, Playwright, async_playwright
from pydantic import BaseModel
from playwright.async_api import BrowserContext, ConsoleMessage, Error, Page, Playwright, async_playwright
from pydantic import BaseModel, PrivateAttr
from skyvern.config import settings
from skyvern.constants import REPO_ROOT_DIR
@@ -40,6 +41,23 @@ def get_download_dir(workflow_run_id: str | None, task_id: str | None) -> str:
return download_dir
def set_browser_console_log(browser_context: BrowserContext, browser_artifacts: BrowserArtifacts) -> str:
if browser_artifacts.browser_console_log_path is None:
log_path = f"{settings.LOG_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}/{uuid.uuid4()}.log"
os.makedirs(os.path.dirname(log_path), exist_ok=True)
browser_artifacts.browser_console_log_path = log_path
async def browser_console_log(msg: ConsoleMessage) -> None:
current_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
key_values = " ".join([f"{key}={value}" for key, value in msg.location.items()])
format_log = f"{current_time}[{msg.type}]{msg.text} {key_values}\n"
await browser_artifacts.append_browser_console_log(format_log)
LOG.info("browser console log is saved", log_path=browser_artifacts.browser_console_log_path)
browser_context.on("console", browser_console_log)
return browser_artifacts.browser_console_log_path
class BrowserContextCreator(Protocol):
def __call__(
self, playwright: Playwright, **kwargs: dict[str, Any]
@@ -91,12 +109,14 @@ class BrowserContextFactory:
har_path: str | None = None,
traces_dir: str | None = None,
browser_session_dir: str | None = None,
browser_console_log_path: str | None = None,
) -> BrowserArtifacts:
return BrowserArtifacts(
video_artifacts=video_artifacts or [],
har_path=har_path,
traces_dir=traces_dir,
browser_session_dir=browser_session_dir,
browser_console_log_path=browser_console_log_path,
)
@classmethod
@@ -113,6 +133,7 @@ class BrowserContextFactory:
if not creator:
raise UnknownBrowserType(browser_type)
browser_context, browser_artifacts, cleanup_func = await creator(playwright, **kwargs)
set_browser_console_log(browser_context=browser_context, browser_artifacts=browser_artifacts)
return browser_context, browser_artifacts, cleanup_func
except UnknownBrowserType as e:
raise e
@@ -141,6 +162,24 @@ class BrowserArtifacts(BaseModel):
har_path: str | None = None
traces_dir: str | None = None
browser_session_dir: str | None = None
browser_console_log_path: str | None = None
_browser_console_log_lock: asyncio.Lock = PrivateAttr(default_factory=asyncio.Lock)
async def append_browser_console_log(self, msg: str) -> int:
if self.browser_console_log_path is None:
return 0
async with self._browser_console_log_lock:
async with aiofiles.open(self.browser_console_log_path, "a") as f:
return await f.write(msg)
async def read_browser_console_log(self) -> bytes:
if self.browser_console_log_path is None:
return b""
async with self._browser_console_log_lock:
async with aiofiles.open(self.browser_console_log_path, "rb") as f:
return await f.read()
async def _create_headless_chromium(

View File

@@ -169,6 +169,24 @@ class BrowserManager:
)
return b""
async def get_browser_console_log(
self,
browser_state: BrowserState,
task_id: str = "",
workflow_id: str = "",
workflow_run_id: str = "",
) -> bytes:
if browser_state.browser_artifacts.browser_console_log_path is None:
LOG.warning(
"browser console log not found for task",
task_id=task_id,
workflow_id=workflow_id,
workflow_run_id=workflow_run_id,
)
return b""
return await browser_state.browser_artifacts.read_browser_console_log()
@classmethod
async def close(cls) -> None:
LOG.info("Closing BrowserManager")