fix: auto extract all fields
This commit is contained in:
@@ -482,12 +482,38 @@ export const BrowserWindow = () => {
|
|||||||
|
|
||||||
validatedChildSelectors.forEach((selector, index) => {
|
validatedChildSelectors.forEach((selector, index) => {
|
||||||
try {
|
try {
|
||||||
const elements = evaluateXPathAllWithShadowSupport(
|
const listElements = evaluateXPathAllWithShadowSupport(
|
||||||
|
iframeElement.contentDocument!,
|
||||||
|
listSelector,
|
||||||
|
listSelector.includes(">>") || listSelector.startsWith("//")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (listElements.length === 0) return;
|
||||||
|
|
||||||
|
const hasNumericPredicate = /\[\d+\](?![^\[]*@)/.test(selector);
|
||||||
|
|
||||||
|
if (hasNumericPredicate && listElements.length >= 3) {
|
||||||
|
const allMatches = evaluateXPathAllWithShadowSupport(
|
||||||
iframeElement.contentDocument!,
|
iframeElement.contentDocument!,
|
||||||
selector,
|
selector,
|
||||||
selector.includes(">>") || selector.startsWith("//")
|
selector.includes(">>") || selector.startsWith("//")
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const matchRatio = allMatches.length / listElements.length;
|
||||||
|
|
||||||
|
if (matchRatio < 0.6) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const firstListElement = listElements[0];
|
||||||
|
|
||||||
|
const elements = evaluateXPathAllWithShadowSupport(
|
||||||
|
iframeElement.contentDocument!,
|
||||||
|
selector,
|
||||||
|
selector.includes(">>") || selector.startsWith("//")
|
||||||
|
).filter(el => firstListElement.contains(el as Node));
|
||||||
|
|
||||||
if (elements.length === 0) return;
|
if (elements.length === 0) return;
|
||||||
|
|
||||||
const element = elements[0] as HTMLElement;
|
const element = elements[0] as HTMLElement;
|
||||||
@@ -591,10 +617,44 @@ export const BrowserWindow = () => {
|
|||||||
selectorObj: fieldData.selectorObj
|
selectorObj: fieldData.selectorObj
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
const anchorParent = element.closest('a');
|
||||||
|
if (anchorParent) {
|
||||||
|
const href = anchorParent.getAttribute('href');
|
||||||
|
if (href && href !== '#' && !href.startsWith('javascript:') && isValidData(href)) {
|
||||||
|
let anchorSelector = selector;
|
||||||
|
if (selector.includes('/a[')) {
|
||||||
|
const anchorMatch = selector.match(/(.*\/a\[[^\]]+\])/);
|
||||||
|
if (anchorMatch) {
|
||||||
|
anchorSelector = anchorMatch[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fieldId = Date.now() + index * 1000 + 500;
|
||||||
|
candidateFields.push({
|
||||||
|
id: fieldId,
|
||||||
|
element: anchorParent as HTMLElement,
|
||||||
|
isLeaf: true,
|
||||||
|
depth: 0,
|
||||||
|
position: position,
|
||||||
|
field: {
|
||||||
|
id: fieldId,
|
||||||
|
type: "text",
|
||||||
|
label: `Label ${index + 1} Link`,
|
||||||
|
data: href,
|
||||||
|
selectorObj: {
|
||||||
|
selector: anchorSelector,
|
||||||
|
attribute: 'href',
|
||||||
|
tag: 'A',
|
||||||
|
isShadow: anchorParent.getRootNode() instanceof ShadowRoot,
|
||||||
|
fallbackSelector: generateFallbackSelector(anchorParent as HTMLElement)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
}
|
||||||
|
} catch (error) {
|
||||||
console.warn(`Failed to process child selector ${selector}:`, error);
|
console.warn(`Failed to process child selector ${selector}:`, error);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -610,8 +670,43 @@ export const BrowserWindow = () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const filteredCandidates = removeParentChildDuplicates(candidateFields);
|
const filteredCandidates = removeParentChildDuplicates(candidateFields);
|
||||||
const finalFields = removeDuplicateContent(filteredCandidates);
|
const cleanedCandidates = filteredCandidates.filter((candidate) => {
|
||||||
return finalFields;
|
const data = candidate.field.data.trim();
|
||||||
|
|
||||||
|
const textChildren = Array.from(candidate.element.children).filter(child =>
|
||||||
|
(child.textContent || '').trim().length > 0
|
||||||
|
);
|
||||||
|
|
||||||
|
if (textChildren.length === 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const childCandidates = filteredCandidates.filter((other) => {
|
||||||
|
if (other === candidate) return false;
|
||||||
|
return candidate.element.contains(other.element);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (childCandidates.length === 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let coveredLength = 0;
|
||||||
|
childCandidates.forEach(child => {
|
||||||
|
const childText = child.field.data.trim();
|
||||||
|
if (data.includes(childText)) {
|
||||||
|
coveredLength += childText.length;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const coverageRatio = coveredLength / data.length;
|
||||||
|
const hasMultipleChildTexts = childCandidates.length >= 2;
|
||||||
|
const highCoverage = coverageRatio > 0.7;
|
||||||
|
|
||||||
|
return !(hasMultipleChildTexts && highCoverage);
|
||||||
|
});
|
||||||
|
|
||||||
|
const finalFields = removeDuplicateContent(cleanedCandidates);
|
||||||
|
return finalFields;
|
||||||
},
|
},
|
||||||
[currentSnapshot]
|
[currentSnapshot]
|
||||||
);
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user