Upgrade context limit (#235)

This commit is contained in:
LawyZheng
2024-04-25 09:38:39 +08:00
committed by GitHub
parent fdef12dac3
commit f000793591
2 changed files with 8 additions and 14 deletions

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import random
import structlog
from playwright.async_api import Browser, Playwright, async_playwright
@@ -52,9 +54,8 @@ class BrowserManager:
self.pages[task.task_id] = self.pages[task.workflow_run_id]
return self.pages[task.task_id]
# TODO: percentage to use new context tree, starting from 20%
# new_ctx = random.choices([False, True], weights=[0.8, 0.2], k=1)[0]
new_ctx = False
# TODO: percentage (50%) to use new context tree
new_ctx = random.choices([False, True], weights=[0.5, 0.5], k=1)[0]
LOG.info("Creating browser state for task", task_id=task.task_id, new_ctx=new_ctx)
browser_state = await self._create_browser_state(task.proxy_location, task.url, new_ctx)

View File

@@ -469,11 +469,6 @@ function getElementContext(element) {
if (childContext.length > 0) {
fullContext.push(childContext);
}
const charLimit = 1000;
if (fullContext.join(";").length > charLimit) {
fullContext = new Array();
}
}
return fullContext.join(";");
}
@@ -509,10 +504,9 @@ function getElementContent(element, skipped_element = null) {
nodeContent = cleanupText(nodeTextContentList.join(";"));
}
let finalTextContent = cleanupText(textContent);
// Currently we don't support too much context. Character limit is 1000 per element.
// we don't think element context has to be that big
const charLimit = 1000;
const charLimit = 5000;
if (finalTextContent.length > charLimit) {
if (nodeContent.length <= charLimit) {
finalTextContent = nodeContent;
@@ -842,13 +836,13 @@ function buildTreeFromBody(new_ctx = false) {
) {
let grandParentElement = parentElement.parentElement;
if (grandParentElement) {
let context = getElementContext(grandParentElement, element.context);
let context = getElementContext(grandParentElement);
if (context.length > 0) {
ctx.push(context);
}
}
}
let context = getElementContext(parentElement, element.context);
let context = getElementContext(parentElement);
if (context.length > 0) {
ctx.push(context);
}
@@ -941,8 +935,7 @@ function buildTreeFromBody(new_ctx = false) {
ctxList = getContextByParent(element, ctxList);
ctxList = getContextByTable(element, ctxList);
const context = ctxList.join(";");
// const context = getContextByParent(element)
if (context && context.length <= 1000) {
if (context && context.length <= 5000) {
element.context = context;
}