Upgrade context limit (#235)
This commit is contained in:
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
import structlog
|
import structlog
|
||||||
from playwright.async_api import Browser, Playwright, async_playwright
|
from playwright.async_api import Browser, Playwright, async_playwright
|
||||||
|
|
||||||
@@ -52,9 +54,8 @@ class BrowserManager:
|
|||||||
self.pages[task.task_id] = self.pages[task.workflow_run_id]
|
self.pages[task.task_id] = self.pages[task.workflow_run_id]
|
||||||
return self.pages[task.task_id]
|
return self.pages[task.task_id]
|
||||||
|
|
||||||
# TODO: percentage to use new context tree, starting from 20%
|
# TODO: percentage (50%) to use new context tree
|
||||||
# new_ctx = random.choices([False, True], weights=[0.8, 0.2], k=1)[0]
|
new_ctx = random.choices([False, True], weights=[0.5, 0.5], k=1)[0]
|
||||||
new_ctx = False
|
|
||||||
LOG.info("Creating browser state for task", task_id=task.task_id, new_ctx=new_ctx)
|
LOG.info("Creating browser state for task", task_id=task.task_id, new_ctx=new_ctx)
|
||||||
browser_state = await self._create_browser_state(task.proxy_location, task.url, new_ctx)
|
browser_state = await self._create_browser_state(task.proxy_location, task.url, new_ctx)
|
||||||
|
|
||||||
|
|||||||
@@ -469,11 +469,6 @@ function getElementContext(element) {
|
|||||||
if (childContext.length > 0) {
|
if (childContext.length > 0) {
|
||||||
fullContext.push(childContext);
|
fullContext.push(childContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
const charLimit = 1000;
|
|
||||||
if (fullContext.join(";").length > charLimit) {
|
|
||||||
fullContext = new Array();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return fullContext.join(";");
|
return fullContext.join(";");
|
||||||
}
|
}
|
||||||
@@ -509,10 +504,9 @@ function getElementContent(element, skipped_element = null) {
|
|||||||
nodeContent = cleanupText(nodeTextContentList.join(";"));
|
nodeContent = cleanupText(nodeTextContentList.join(";"));
|
||||||
}
|
}
|
||||||
let finalTextContent = cleanupText(textContent);
|
let finalTextContent = cleanupText(textContent);
|
||||||
|
|
||||||
// Currently we don't support too much context. Character limit is 1000 per element.
|
// Currently we don't support too much context. Character limit is 1000 per element.
|
||||||
// we don't think element context has to be that big
|
// we don't think element context has to be that big
|
||||||
const charLimit = 1000;
|
const charLimit = 5000;
|
||||||
if (finalTextContent.length > charLimit) {
|
if (finalTextContent.length > charLimit) {
|
||||||
if (nodeContent.length <= charLimit) {
|
if (nodeContent.length <= charLimit) {
|
||||||
finalTextContent = nodeContent;
|
finalTextContent = nodeContent;
|
||||||
@@ -842,13 +836,13 @@ function buildTreeFromBody(new_ctx = false) {
|
|||||||
) {
|
) {
|
||||||
let grandParentElement = parentElement.parentElement;
|
let grandParentElement = parentElement.parentElement;
|
||||||
if (grandParentElement) {
|
if (grandParentElement) {
|
||||||
let context = getElementContext(grandParentElement, element.context);
|
let context = getElementContext(grandParentElement);
|
||||||
if (context.length > 0) {
|
if (context.length > 0) {
|
||||||
ctx.push(context);
|
ctx.push(context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let context = getElementContext(parentElement, element.context);
|
let context = getElementContext(parentElement);
|
||||||
if (context.length > 0) {
|
if (context.length > 0) {
|
||||||
ctx.push(context);
|
ctx.push(context);
|
||||||
}
|
}
|
||||||
@@ -941,8 +935,7 @@ function buildTreeFromBody(new_ctx = false) {
|
|||||||
ctxList = getContextByParent(element, ctxList);
|
ctxList = getContextByParent(element, ctxList);
|
||||||
ctxList = getContextByTable(element, ctxList);
|
ctxList = getContextByTable(element, ctxList);
|
||||||
const context = ctxList.join(";");
|
const context = ctxList.join(";");
|
||||||
// const context = getContextByParent(element)
|
if (context && context.length <= 5000) {
|
||||||
if (context && context.length <= 1000) {
|
|
||||||
element.context = context;
|
element.context = context;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user