From f0007935915e73d54793a8ad1f3fe9b82a2a414a Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Thu, 25 Apr 2024 09:38:39 +0800 Subject: [PATCH] Upgrade context limit (#235) --- skyvern/webeye/browser_manager.py | 7 ++++--- skyvern/webeye/scraper/domUtils.js | 15 ++++----------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/skyvern/webeye/browser_manager.py b/skyvern/webeye/browser_manager.py index f0fd697d..93922bad 100644 --- a/skyvern/webeye/browser_manager.py +++ b/skyvern/webeye/browser_manager.py @@ -1,5 +1,7 @@ from __future__ import annotations +import random + import structlog from playwright.async_api import Browser, Playwright, async_playwright @@ -52,9 +54,8 @@ class BrowserManager: self.pages[task.task_id] = self.pages[task.workflow_run_id] return self.pages[task.task_id] - # TODO: percentage to use new context tree, starting from 20% - # new_ctx = random.choices([False, True], weights=[0.8, 0.2], k=1)[0] - new_ctx = False + # TODO: percentage (50%) to use new context tree + new_ctx = random.choices([False, True], weights=[0.5, 0.5], k=1)[0] LOG.info("Creating browser state for task", task_id=task.task_id, new_ctx=new_ctx) browser_state = await self._create_browser_state(task.proxy_location, task.url, new_ctx) diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index 1c9df42a..314086e6 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -469,11 +469,6 @@ function getElementContext(element) { if (childContext.length > 0) { fullContext.push(childContext); } - - const charLimit = 1000; - if (fullContext.join(";").length > charLimit) { - fullContext = new Array(); - } } return fullContext.join(";"); } @@ -509,10 +504,9 @@ function getElementContent(element, skipped_element = null) { nodeContent = cleanupText(nodeTextContentList.join(";")); } let finalTextContent = cleanupText(textContent); - // Currently we don't support too much context. Character limit is 1000 per element. // we don't think element context has to be that big - const charLimit = 1000; + const charLimit = 5000; if (finalTextContent.length > charLimit) { if (nodeContent.length <= charLimit) { finalTextContent = nodeContent; @@ -842,13 +836,13 @@ function buildTreeFromBody(new_ctx = false) { ) { let grandParentElement = parentElement.parentElement; if (grandParentElement) { - let context = getElementContext(grandParentElement, element.context); + let context = getElementContext(grandParentElement); if (context.length > 0) { ctx.push(context); } } } - let context = getElementContext(parentElement, element.context); + let context = getElementContext(parentElement); if (context.length > 0) { ctx.push(context); } @@ -941,8 +935,7 @@ function buildTreeFromBody(new_ctx = false) { ctxList = getContextByParent(element, ctxList); ctxList = getContextByTable(element, ctxList); const context = ctxList.join(";"); - // const context = getContextByParent(element) - if (context && context.length <= 1000) { + if (context && context.length <= 5000) { element.context = context; }