feat: clean auto list data extraction
This commit is contained in:
@@ -304,8 +304,6 @@ export const BrowserWindow = () => {
|
|||||||
|
|
||||||
const createFieldsFromChildSelectors = useCallback(
|
const createFieldsFromChildSelectors = useCallback(
|
||||||
(childSelectors: string[], listSelector: string) => {
|
(childSelectors: string[], listSelector: string) => {
|
||||||
if (!childSelectors.length || !currentSnapshot) return {};
|
|
||||||
|
|
||||||
const iframeElement = document.querySelector(
|
const iframeElement = document.querySelector(
|
||||||
"#dom-browser-iframe"
|
"#dom-browser-iframe"
|
||||||
) as HTMLIFrameElement;
|
) as HTMLIFrameElement;
|
||||||
@@ -323,7 +321,6 @@ export const BrowserWindow = () => {
|
|||||||
|
|
||||||
const uniqueChildSelectors = [...new Set(childSelectors)];
|
const uniqueChildSelectors = [...new Set(childSelectors)];
|
||||||
|
|
||||||
// Filter child selectors that occur in at least 2 out of first 10 list elements
|
|
||||||
const validateChildSelectors = (selectors: string[]): string[] => {
|
const validateChildSelectors = (selectors: string[]): string[] => {
|
||||||
try {
|
try {
|
||||||
// Get first 10 list elements
|
// Get first 10 list elements
|
||||||
@@ -352,13 +349,10 @@ export const BrowserWindow = () => {
|
|||||||
|
|
||||||
// If we can't access the element, it's likely in shadow DOM - include it
|
// If we can't access the element, it's likely in shadow DOM - include it
|
||||||
if (!testElement) {
|
if (!testElement) {
|
||||||
console.log(`Including potentially shadow DOM selector: ${selector}`);
|
|
||||||
validSelectors.push(selector);
|
validSelectors.push(selector);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} catch (accessError) {
|
} catch (accessError) {
|
||||||
// If there's an error accessing, assume shadow DOM and include it
|
|
||||||
console.log(`Including selector due to access error: ${selector}`);
|
|
||||||
validSelectors.push(selector);
|
validSelectors.push(selector);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -395,7 +389,6 @@ export const BrowserWindow = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Enhanced XPath evaluation for multiple elements
|
|
||||||
const evaluateXPathAllWithShadowSupport = (
|
const evaluateXPathAllWithShadowSupport = (
|
||||||
document: Document,
|
document: Document,
|
||||||
xpath: string,
|
xpath: string,
|
||||||
@@ -423,8 +416,6 @@ export const BrowserWindow = () => {
|
|||||||
return elements;
|
return elements;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal
|
|
||||||
// This is a simplified version - for multiple elements, we'll primarily rely on regular XPath
|
|
||||||
return elements;
|
return elements;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("XPath evaluation failed:", xpath, err);
|
console.error("XPath evaluation failed:", xpath, err);
|
||||||
@@ -432,7 +423,9 @@ export const BrowserWindow = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors);
|
const isValidData = (text: string | null | undefined): boolean => {
|
||||||
|
return !!text && text.trim().length > 0;
|
||||||
|
};
|
||||||
|
|
||||||
const isElementVisible = (element: HTMLElement): boolean => {
|
const isElementVisible = (element: HTMLElement): boolean => {
|
||||||
try {
|
try {
|
||||||
@@ -443,443 +436,119 @@ export const BrowserWindow = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const isValidData = (data: string): boolean => {
|
const createFieldData = (element: HTMLElement, selector: string, forceAttribute?: string) => {
|
||||||
if (!data || data.trim().length === 0) return false;
|
const tagName = element.tagName.toLowerCase();
|
||||||
|
let data = '';
|
||||||
|
let attribute = forceAttribute || 'innerText';
|
||||||
|
|
||||||
const trimmed = data.trim();
|
if (forceAttribute) {
|
||||||
|
if (forceAttribute === 'href') {
|
||||||
// Filter out single letters
|
data = element.getAttribute('href') || '';
|
||||||
if (trimmed.length === 1) {
|
} else if (forceAttribute === 'innerText') {
|
||||||
return false;
|
data = (element.textContent || '').trim();
|
||||||
}
|
|
||||||
|
|
||||||
// Filter out pure symbols/punctuation
|
|
||||||
if (trimmed.length < 3 && /^[^\w\s]+$/.test(trimmed)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Filter out whitespace and punctuation only
|
|
||||||
if (/^[\s\p{P}\p{S}]*$/u.test(trimmed)) return false;
|
|
||||||
|
|
||||||
return trimmed.length > 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Enhanced shadow DOM-aware element evaluation
|
|
||||||
const evaluateXPathWithShadowSupport = (
|
|
||||||
document: Document,
|
|
||||||
xpath: string,
|
|
||||||
isShadow: boolean = false
|
|
||||||
): Element | null => {
|
|
||||||
try {
|
|
||||||
// First try regular XPath evaluation
|
|
||||||
const result = document.evaluate(
|
|
||||||
xpath,
|
|
||||||
document,
|
|
||||||
null,
|
|
||||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
|
||||||
null
|
|
||||||
).singleNodeValue as Element | null;
|
|
||||||
|
|
||||||
if (!isShadow || result) {
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
} else if (tagName === 'img') {
|
||||||
// If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal
|
data = element.getAttribute('src') || '';
|
||||||
let cleanPath = xpath;
|
attribute = 'src';
|
||||||
let isIndexed = false;
|
} else if (tagName === 'a') {
|
||||||
|
const href = element.getAttribute('href') || '';
|
||||||
const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/);
|
const text = (element.textContent || '').trim();
|
||||||
if (indexedMatch) {
|
if (href && href !== '#' && !href.startsWith('javascript:')) {
|
||||||
cleanPath = indexedMatch[1] + indexedMatch[3];
|
data = href;
|
||||||
isIndexed = true;
|
attribute = 'href';
|
||||||
|
} else if (text) {
|
||||||
|
data = text;
|
||||||
|
attribute = 'innerText';
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
const pathParts = cleanPath
|
data = (element.textContent || '').trim();
|
||||||
.replace(/^\/\//, "")
|
attribute = 'innerText';
|
||||||
.split("/")
|
|
||||||
.map((p) => p.trim())
|
|
||||||
.filter((p) => p.length > 0);
|
|
||||||
|
|
||||||
let currentContexts: (Document | Element | ShadowRoot)[] = [document];
|
|
||||||
|
|
||||||
for (let i = 0; i < pathParts.length; i++) {
|
|
||||||
const part = pathParts[i];
|
|
||||||
const nextContexts: (Element | ShadowRoot)[] = [];
|
|
||||||
|
|
||||||
for (const ctx of currentContexts) {
|
|
||||||
const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/);
|
|
||||||
let partWithoutPosition = part;
|
|
||||||
let requestedPosition: number | null = null;
|
|
||||||
|
|
||||||
if (positionalMatch) {
|
|
||||||
partWithoutPosition = positionalMatch[1];
|
|
||||||
requestedPosition = parseInt(positionalMatch[2]);
|
|
||||||
}
|
|
||||||
|
|
||||||
const matched = queryInsideContext(ctx, partWithoutPosition);
|
|
||||||
|
|
||||||
let elementsToAdd = matched;
|
|
||||||
if (requestedPosition !== null) {
|
|
||||||
const index = requestedPosition - 1;
|
|
||||||
if (index >= 0 && index < matched.length) {
|
|
||||||
elementsToAdd = [matched[index]];
|
|
||||||
} else {
|
|
||||||
elementsToAdd = [];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
elementsToAdd.forEach((el) => {
|
|
||||||
nextContexts.push(el);
|
|
||||||
if (el.shadowRoot) {
|
|
||||||
nextContexts.push(el.shadowRoot);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nextContexts.length === 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
currentContexts = nextContexts;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (currentContexts.length > 0) {
|
|
||||||
if (isIndexed && indexedMatch) {
|
|
||||||
const requestedIndex = parseInt(indexedMatch[2]) - 1;
|
|
||||||
if (requestedIndex >= 0 && requestedIndex < currentContexts.length) {
|
|
||||||
return currentContexts[requestedIndex] as Element;
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return currentContexts[0] as Element;
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
} catch (err) {
|
|
||||||
console.error("XPath evaluation failed:", xpath, err);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const queryInsideContext = (
|
|
||||||
context: Document | Element | ShadowRoot,
|
|
||||||
part: string
|
|
||||||
): Element[] => {
|
|
||||||
try {
|
|
||||||
const { tagName, conditions } = parseXPathPart(part);
|
|
||||||
|
|
||||||
const candidateElements = Array.from(context.querySelectorAll(tagName));
|
|
||||||
if (candidateElements.length === 0) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
const matchingElements = candidateElements.filter((el) => {
|
|
||||||
return elementMatchesConditions(el, conditions);
|
|
||||||
});
|
|
||||||
|
|
||||||
return matchingElements;
|
|
||||||
} catch (err) {
|
|
||||||
console.error("Error in queryInsideContext:", err);
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const parseXPathPart = (
|
|
||||||
part: string
|
|
||||||
): { tagName: string; conditions: string[] } => {
|
|
||||||
const tagMatch = part.match(/^([a-zA-Z0-9-]+)/);
|
|
||||||
const tagName = tagMatch ? tagMatch[1] : "*";
|
|
||||||
|
|
||||||
const conditionMatches = part.match(/\[([^\]]+)\]/g);
|
|
||||||
const conditions = conditionMatches
|
|
||||||
? conditionMatches.map((c) => c.slice(1, -1))
|
|
||||||
: [];
|
|
||||||
|
|
||||||
return { tagName, conditions };
|
|
||||||
};
|
|
||||||
|
|
||||||
const elementMatchesConditions = (
|
|
||||||
element: Element,
|
|
||||||
conditions: string[]
|
|
||||||
): boolean => {
|
|
||||||
for (const condition of conditions) {
|
|
||||||
if (!elementMatchesCondition(element, condition)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
const elementMatchesCondition = (
|
|
||||||
element: Element,
|
|
||||||
condition: string
|
|
||||||
): boolean => {
|
|
||||||
condition = condition.trim();
|
|
||||||
|
|
||||||
if (/^\d+$/.test(condition)) {
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle @attribute="value"
|
if (!data) return null;
|
||||||
const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/);
|
|
||||||
if (attrMatch) {
|
|
||||||
const [, attr, value] = attrMatch;
|
|
||||||
const elementValue = element.getAttribute(attr);
|
|
||||||
return elementValue === value;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle contains(@class, 'value')
|
return {
|
||||||
const classContainsMatch = condition.match(
|
data,
|
||||||
/^contains\(@class,\s*["']([^"']+)["']\)$/
|
selectorObj: {
|
||||||
);
|
selector,
|
||||||
if (classContainsMatch) {
|
attribute,
|
||||||
const className = classContainsMatch[1];
|
tag: tagName.toUpperCase(),
|
||||||
return element.classList.contains(className);
|
isShadow: element.getRootNode() instanceof ShadowRoot
|
||||||
}
|
|
||||||
|
|
||||||
// Handle contains(@attribute, 'value')
|
|
||||||
const attrContainsMatch = condition.match(
|
|
||||||
/^contains\(@([^,]+),\s*["']([^"']+)["']\)$/
|
|
||||||
);
|
|
||||||
if (attrContainsMatch) {
|
|
||||||
const [, attr, value] = attrContainsMatch;
|
|
||||||
const elementValue = element.getAttribute(attr) || "";
|
|
||||||
return elementValue.includes(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle text()="value"
|
|
||||||
const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/);
|
|
||||||
if (textMatch) {
|
|
||||||
const expectedText = textMatch[1];
|
|
||||||
const elementText = element.textContent?.trim() || "";
|
|
||||||
return elementText === expectedText;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle contains(text(), 'value')
|
|
||||||
const textContainsMatch = condition.match(
|
|
||||||
/^contains\(text\(\),\s*["']([^"']+)["']\)$/
|
|
||||||
);
|
|
||||||
if (textContainsMatch) {
|
|
||||||
const expectedText = textContainsMatch[1];
|
|
||||||
const elementText = element.textContent?.trim() || "";
|
|
||||||
return elementText.includes(expectedText);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle count(*)=0 (element has no children)
|
|
||||||
if (condition === "count(*)=0") {
|
|
||||||
return element.children.length === 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle other count conditions
|
|
||||||
const countMatch = condition.match(/^count\(\*\)=(\d+)$/);
|
|
||||||
if (countMatch) {
|
|
||||||
const expectedCount = parseInt(countMatch[1]);
|
|
||||||
return element.children.length === expectedCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Enhanced value extraction with shadow DOM support
|
|
||||||
const extractValueWithShadowSupport = (
|
|
||||||
element: Element,
|
|
||||||
attribute: string
|
|
||||||
): string | null => {
|
|
||||||
if (!element) return null;
|
|
||||||
|
|
||||||
const baseURL =
|
|
||||||
element.ownerDocument?.location?.href || window.location.origin;
|
|
||||||
|
|
||||||
// Check shadow DOM content first
|
|
||||||
if (element.shadowRoot) {
|
|
||||||
const shadowContent = element.shadowRoot.textContent;
|
|
||||||
if (shadowContent?.trim()) {
|
|
||||||
return shadowContent.trim();
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (attribute === "innerText") {
|
|
||||||
let textContent =
|
|
||||||
(element as HTMLElement).innerText?.trim() ||
|
|
||||||
(element as HTMLElement).textContent?.trim();
|
|
||||||
|
|
||||||
if (!textContent) {
|
|
||||||
const dataAttributes = [
|
|
||||||
"data-600",
|
|
||||||
"data-text",
|
|
||||||
"data-label",
|
|
||||||
"data-value",
|
|
||||||
"data-content",
|
|
||||||
];
|
|
||||||
for (const attr of dataAttributes) {
|
|
||||||
const dataValue = element.getAttribute(attr);
|
|
||||||
if (dataValue && dataValue.trim()) {
|
|
||||||
textContent = dataValue.trim();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return textContent || null;
|
|
||||||
} else if (attribute === "innerHTML") {
|
|
||||||
return element.innerHTML?.trim() || null;
|
|
||||||
} else if (attribute === "href") {
|
|
||||||
let anchorElement = element;
|
|
||||||
|
|
||||||
if (element.tagName !== "A") {
|
|
||||||
anchorElement =
|
|
||||||
element.closest("a") ||
|
|
||||||
element.parentElement?.closest("a") ||
|
|
||||||
element;
|
|
||||||
}
|
|
||||||
|
|
||||||
const hrefValue = anchorElement.getAttribute("href");
|
|
||||||
if (!hrefValue || hrefValue.trim() === "") {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
return new URL(hrefValue, baseURL).href;
|
|
||||||
} catch (e) {
|
|
||||||
console.warn("Error creating URL from", hrefValue, e);
|
|
||||||
return hrefValue;
|
|
||||||
}
|
|
||||||
} else if (attribute === "src") {
|
|
||||||
const attrValue = element.getAttribute(attribute);
|
|
||||||
const dataAttr = attrValue || element.getAttribute("data-" + attribute);
|
|
||||||
|
|
||||||
if (!dataAttr || dataAttr.trim() === "") {
|
|
||||||
const style = window.getComputedStyle(element as HTMLElement);
|
|
||||||
const bgImage = style.backgroundImage;
|
|
||||||
if (bgImage && bgImage !== "none") {
|
|
||||||
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
|
||||||
return matches ? new URL(matches[1], baseURL).href : null;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
return new URL(dataAttr, baseURL).href;
|
|
||||||
} catch (e) {
|
|
||||||
console.warn("Error creating URL from", dataAttr, e);
|
|
||||||
return dataAttr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return element.getAttribute(attribute);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Simple deepest child finder - limit depth to prevent hanging
|
|
||||||
const findDeepestChild = (element: HTMLElement): HTMLElement => {
|
|
||||||
let deepest = element;
|
|
||||||
let maxDepth = 0;
|
|
||||||
|
|
||||||
const traverse = (el: HTMLElement, depth: number) => {
|
|
||||||
if (depth > 3) return;
|
|
||||||
|
|
||||||
const text = el.textContent?.trim() || "";
|
|
||||||
if (isValidData(text) && depth > maxDepth) {
|
|
||||||
maxDepth = depth;
|
|
||||||
deepest = el;
|
|
||||||
}
|
|
||||||
|
|
||||||
const children = Array.from(el.children).slice(0, 3);
|
|
||||||
children.forEach((child) => {
|
|
||||||
if (child instanceof HTMLElement) {
|
|
||||||
traverse(child, depth + 1);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
};
|
};
|
||||||
|
|
||||||
traverse(element, 0);
|
|
||||||
return deepest;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
validatedChildSelectors.forEach((childSelector, index) => {
|
const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors);
|
||||||
|
|
||||||
|
validatedChildSelectors.forEach((selector, index) => {
|
||||||
try {
|
try {
|
||||||
// Detect if this selector should use shadow DOM traversal
|
const elements = evaluateXPathAllWithShadowSupport(
|
||||||
const isShadowSelector = childSelector.includes('>>') ||
|
|
||||||
childSelector.startsWith('//') &&
|
|
||||||
(listSelector.includes('>>') || currentSnapshot?.snapshot);
|
|
||||||
|
|
||||||
const element = evaluateXPathWithShadowSupport(
|
|
||||||
iframeElement.contentDocument!,
|
iframeElement.contentDocument!,
|
||||||
childSelector,
|
selector,
|
||||||
isShadowSelector
|
selector.includes(">>") || selector.startsWith("//")
|
||||||
) as HTMLElement;
|
);
|
||||||
|
|
||||||
if (element && isElementVisible(element)) {
|
if (elements.length === 0) return;
|
||||||
|
|
||||||
|
const element = elements[0] as HTMLElement;
|
||||||
|
const tagName = element.tagName.toLowerCase();
|
||||||
|
const isShadow = element.getRootNode() instanceof ShadowRoot;
|
||||||
|
|
||||||
|
if (isElementVisible(element)) {
|
||||||
const rect = element.getBoundingClientRect();
|
const rect = element.getBoundingClientRect();
|
||||||
const position = { x: rect.left, y: rect.top };
|
const position = { x: rect.left, y: rect.top };
|
||||||
|
|
||||||
const tagName = element.tagName.toLowerCase();
|
if (tagName === 'a') {
|
||||||
const isShadow = element.getRootNode() instanceof ShadowRoot;
|
const href = element.getAttribute('href');
|
||||||
|
const text = (element.textContent || '').trim();
|
||||||
if (tagName === "a") {
|
|
||||||
const anchor = element as HTMLAnchorElement;
|
|
||||||
const href = extractValueWithShadowSupport(anchor, "href");
|
|
||||||
const text = extractValueWithShadowSupport(anchor, "innerText");
|
|
||||||
|
|
||||||
if (
|
|
||||||
href &&
|
|
||||||
href.trim() !== "" &&
|
|
||||||
href !== window.location.href &&
|
|
||||||
!href.startsWith("javascript:") &&
|
|
||||||
!href.startsWith("#")
|
|
||||||
) {
|
|
||||||
const fieldIdHref = Date.now() + index * 1000;
|
|
||||||
|
|
||||||
candidateFields.push({
|
|
||||||
id: fieldIdHref,
|
|
||||||
element: element,
|
|
||||||
isLeaf: true,
|
|
||||||
depth: 0,
|
|
||||||
position: position,
|
|
||||||
field: {
|
|
||||||
id: fieldIdHref,
|
|
||||||
type: "text",
|
|
||||||
label: `Label ${index * 2 + 1}`,
|
|
||||||
data: href,
|
|
||||||
selectorObj: {
|
|
||||||
selector: childSelector,
|
|
||||||
tag: element.tagName,
|
|
||||||
isShadow: isShadow,
|
|
||||||
attribute: "href",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const fieldIdText = Date.now() + index * 1000 + 1;
|
|
||||||
|
|
||||||
if (text && isValidData(text)) {
|
if (text && isValidData(text)) {
|
||||||
candidateFields.push({
|
const textField = createFieldData(element, selector, 'innerText');
|
||||||
id: fieldIdText,
|
if (textField && textField.data) {
|
||||||
element: element,
|
const fieldId = Date.now() + index * 1000;
|
||||||
isLeaf: true,
|
|
||||||
depth: 0,
|
candidateFields.push({
|
||||||
position: position,
|
id: fieldId,
|
||||||
field: {
|
element: element,
|
||||||
id: fieldIdText,
|
isLeaf: true,
|
||||||
type: "text",
|
depth: 0,
|
||||||
label: `Label ${index * 2 + 2}`,
|
position: position,
|
||||||
data: text,
|
field: {
|
||||||
selectorObj: {
|
id: fieldId,
|
||||||
selector: childSelector,
|
type: "text",
|
||||||
tag: element.tagName,
|
label: `Label ${index * 2 + 1}`,
|
||||||
isShadow: isShadow,
|
data: textField.data,
|
||||||
attribute: "innerText",
|
selectorObj: textField.selectorObj
|
||||||
},
|
}
|
||||||
},
|
});
|
||||||
});
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (href && href !== '#' && !href.startsWith('javascript:')) {
|
||||||
|
const hrefField = createFieldData(element, selector, 'href');
|
||||||
|
if (hrefField && hrefField.data) {
|
||||||
|
const fieldId = Date.now() + index * 1000 + 1;
|
||||||
|
|
||||||
|
candidateFields.push({
|
||||||
|
id: fieldId,
|
||||||
|
element: element,
|
||||||
|
isLeaf: true,
|
||||||
|
depth: 0,
|
||||||
|
position: position,
|
||||||
|
field: {
|
||||||
|
id: fieldId,
|
||||||
|
type: "text",
|
||||||
|
label: `Label ${index * 2 + 2}`,
|
||||||
|
data: hrefField.data,
|
||||||
|
selectorObj: hrefField.selectorObj
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (tagName === "img") {
|
} else if (tagName === "img") {
|
||||||
const img = element as HTMLImageElement;
|
const src = element.getAttribute("src");
|
||||||
const src = extractValueWithShadowSupport(img, "src");
|
|
||||||
const alt = extractValueWithShadowSupport(img, "alt");
|
|
||||||
|
|
||||||
if (src && !src.startsWith("data:") && src.length > 10) {
|
if (src && isValidData(src)) {
|
||||||
const fieldId = Date.now() + index * 1000;
|
const fieldId = Date.now() + index * 1000;
|
||||||
|
|
||||||
candidateFields.push({
|
candidateFields.push({
|
||||||
@@ -894,7 +563,7 @@ export const BrowserWindow = () => {
|
|||||||
label: `Label ${index + 1}`,
|
label: `Label ${index + 1}`,
|
||||||
data: src,
|
data: src,
|
||||||
selectorObj: {
|
selectorObj: {
|
||||||
selector: childSelector,
|
selector: selector,
|
||||||
tag: element.tagName,
|
tag: element.tagName,
|
||||||
isShadow: isShadow,
|
isShadow: isShadow,
|
||||||
attribute: "src",
|
attribute: "src",
|
||||||
@@ -902,9 +571,11 @@ export const BrowserWindow = () => {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
const fieldData = createFieldData(element, selector);
|
||||||
|
|
||||||
if (alt && isValidData(alt)) {
|
if (fieldData && fieldData.data && isValidData(fieldData.data)) {
|
||||||
const fieldId = Date.now() + index * 1000 + 1;
|
const fieldId = Date.now() + index * 1000;
|
||||||
|
|
||||||
candidateFields.push({
|
candidateFields.push({
|
||||||
id: fieldId,
|
id: fieldId,
|
||||||
@@ -912,61 +583,19 @@ export const BrowserWindow = () => {
|
|||||||
isLeaf: true,
|
isLeaf: true,
|
||||||
depth: 0,
|
depth: 0,
|
||||||
position: position,
|
position: position,
|
||||||
field: {
|
|
||||||
id: fieldId,
|
|
||||||
type: "text",
|
|
||||||
label: `Label ${index + 2}`,
|
|
||||||
data: alt,
|
|
||||||
selectorObj: {
|
|
||||||
selector: childSelector,
|
|
||||||
tag: element.tagName,
|
|
||||||
isShadow: isShadow,
|
|
||||||
attribute: "alt",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const deepestElement = findDeepestChild(element);
|
|
||||||
const data = extractValueWithShadowSupport(deepestElement, "innerText");
|
|
||||||
|
|
||||||
if (data && isValidData(data)) {
|
|
||||||
const isLeaf = isLeafElement(deepestElement);
|
|
||||||
const depth = getElementDepthFromList(
|
|
||||||
deepestElement,
|
|
||||||
listSelector,
|
|
||||||
iframeElement.contentDocument!
|
|
||||||
);
|
|
||||||
|
|
||||||
const fieldId = Date.now() + index;
|
|
||||||
|
|
||||||
candidateFields.push({
|
|
||||||
id: fieldId,
|
|
||||||
element: deepestElement,
|
|
||||||
isLeaf: isLeaf,
|
|
||||||
depth: depth,
|
|
||||||
position: position,
|
|
||||||
field: {
|
field: {
|
||||||
id: fieldId,
|
id: fieldId,
|
||||||
type: "text",
|
type: "text",
|
||||||
label: `Label ${index + 1}`,
|
label: `Label ${index + 1}`,
|
||||||
data: data,
|
data: fieldData.data,
|
||||||
selectorObj: {
|
selectorObj: fieldData.selectorObj
|
||||||
selector: childSelector,
|
}
|
||||||
tag: deepestElement.tagName,
|
|
||||||
isShadow: deepestElement.getRootNode() instanceof ShadowRoot,
|
|
||||||
attribute: "innerText",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn(
|
console.warn(`Failed to process child selector ${selector}:`, error);
|
||||||
`Failed to process child selector ${childSelector}:`,
|
|
||||||
error
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -981,58 +610,12 @@ export const BrowserWindow = () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const filteredCandidates = removeParentChildDuplicates(candidateFields);
|
const filteredCandidates = removeParentChildDuplicates(candidateFields);
|
||||||
|
|
||||||
const finalFields = removeDuplicateContent(filteredCandidates);
|
const finalFields = removeDuplicateContent(filteredCandidates);
|
||||||
return finalFields;
|
return finalFields;
|
||||||
},
|
},
|
||||||
[currentSnapshot]
|
[currentSnapshot]
|
||||||
);
|
);
|
||||||
|
|
||||||
const isLeafElement = (element: HTMLElement): boolean => {
|
|
||||||
const children = Array.from(element.children) as HTMLElement[];
|
|
||||||
|
|
||||||
if (children.length === 0) return true;
|
|
||||||
|
|
||||||
const hasContentfulChildren = children.some((child) => {
|
|
||||||
const text = child.textContent?.trim() || "";
|
|
||||||
return text.length > 0 && text !== element.textContent?.trim();
|
|
||||||
});
|
|
||||||
|
|
||||||
return !hasContentfulChildren;
|
|
||||||
};
|
|
||||||
|
|
||||||
const getElementDepthFromList = (
|
|
||||||
element: HTMLElement,
|
|
||||||
listSelector: string,
|
|
||||||
document: Document
|
|
||||||
): number => {
|
|
||||||
try {
|
|
||||||
const listResult = document.evaluate(
|
|
||||||
listSelector,
|
|
||||||
document,
|
|
||||||
null,
|
|
||||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
|
||||||
null
|
|
||||||
);
|
|
||||||
|
|
||||||
const listElement = listResult.singleNodeValue as HTMLElement;
|
|
||||||
if (!listElement) return 0;
|
|
||||||
|
|
||||||
let depth = 0;
|
|
||||||
let current = element;
|
|
||||||
|
|
||||||
while (current && current !== listElement && current.parentElement) {
|
|
||||||
depth++;
|
|
||||||
current = current.parentElement;
|
|
||||||
if (depth > 20) break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return current === listElement ? depth : 0;
|
|
||||||
} catch (error) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const removeParentChildDuplicates = (
|
const removeParentChildDuplicates = (
|
||||||
candidates: Array<{
|
candidates: Array<{
|
||||||
id: number;
|
id: number;
|
||||||
|
|||||||
@@ -2499,34 +2499,24 @@ class ClientSelectorGenerator {
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (parentElements.length > 10) {
|
const maxItems = 10;
|
||||||
parentElements = parentElements.slice(0, 10);
|
const limitedParents = parentElements.slice(0, Math.min(maxItems, parentElements.length));
|
||||||
}
|
|
||||||
|
|
||||||
const allChildSelectors = new Set<string>();
|
const allChildSelectors: string[] = [];
|
||||||
const processedParents = new Set<HTMLElement>();
|
|
||||||
|
|
||||||
for (const parentElement of parentElements) {
|
for (let i = 0; i < limitedParents.length; i++) {
|
||||||
if (processedParents.has(parentElement)) continue;
|
const parent = limitedParents[i];
|
||||||
processedParents.add(parentElement);
|
const otherListElements = limitedParents.filter((_, index) => index !== i);
|
||||||
|
|
||||||
const otherListElements = parentElements.filter(
|
const selectors = this.generateOptimizedChildXPaths(
|
||||||
(el) => el !== parentElement
|
parent,
|
||||||
);
|
|
||||||
|
|
||||||
const childSelectors = this.generateOptimizedChildXPaths(
|
|
||||||
parentElement,
|
|
||||||
parentSelector,
|
parentSelector,
|
||||||
iframeDoc,
|
|
||||||
otherListElements
|
otherListElements
|
||||||
);
|
);
|
||||||
|
allChildSelectors.push(...selectors);
|
||||||
for (const selector of childSelectors) {
|
|
||||||
allChildSelectors.add(selector);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = Array.from(allChildSelectors).sort();
|
const result = Array.from(new Set(allChildSelectors)).sort();
|
||||||
this.selectorCache.set(cacheKey, result);
|
this.selectorCache.set(cacheKey, result);
|
||||||
return result;
|
return result;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -2609,7 +2599,6 @@ class ClientSelectorGenerator {
|
|||||||
private generateOptimizedChildXPaths(
|
private generateOptimizedChildXPaths(
|
||||||
parentElement: HTMLElement,
|
parentElement: HTMLElement,
|
||||||
listSelector: string,
|
listSelector: string,
|
||||||
document: Document,
|
|
||||||
otherListElements: HTMLElement[] = []
|
otherListElements: HTMLElement[] = []
|
||||||
): string[] {
|
): string[] {
|
||||||
const selectors: string[] = [];
|
const selectors: string[] = [];
|
||||||
|
|||||||
Reference in New Issue
Block a user