diff --git a/src/components/browser/BrowserWindow.tsx b/src/components/browser/BrowserWindow.tsx index 707776ec..0ad16099 100644 --- a/src/components/browser/BrowserWindow.tsx +++ b/src/components/browser/BrowserWindow.tsx @@ -166,7 +166,7 @@ export const BrowserWindow = () => { const [viewportInfo, setViewportInfo] = useState({ width: browserWidth, height: browserHeight }); const [isLoading, setIsLoading] = useState(false); const [cachedChildSelectors, setCachedChildSelectors] = useState([]); - + const [processingGroupCoordinates, setProcessingGroupCoordinates] = useState>([]); const [listSelector, setListSelector] = useState(null); const [fields, setFields] = useState>({}); const [paginationSelector, setPaginationSelector] = useState(''); @@ -308,6 +308,795 @@ export const BrowserWindow = () => { } }, [isDOMMode, getList, listSelector, paginationMode]); + const createFieldsFromChildSelectors = useCallback( + (childSelectors: string[], listSelector: string) => { + if (!childSelectors.length || !currentSnapshot) return {}; + + const iframeElement = document.querySelector( + "#dom-browser-iframe" + ) as HTMLIFrameElement; + + if (!iframeElement?.contentDocument) return {}; + + const candidateFields: Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> = []; + + const uniqueChildSelectors = [...new Set(childSelectors)]; + + // Filter child selectors that occur in at least 2 out of first 10 list elements + const validateChildSelectors = (selectors: string[]): string[] => { + try { + // Get first 10 list elements + const listElements = evaluateXPathAllWithShadowSupport( + iframeElement.contentDocument!, + listSelector, + listSelector.includes('>>') || listSelector.startsWith('//') + ).slice(0, 10); + + if (listElements.length < 2) { + return selectors; + } + + const validSelectors: string[] = []; + + for (const selector of selectors) { + let occurrenceCount = 0; + + // Get all elements that match this child selector + const childElements = evaluateXPathAllWithShadowSupport( + iframeElement.contentDocument!, + selector, + selector.includes('>>') || selector.startsWith('//') + ); + + // Check how many of these child elements are contained within our list elements + for (const childElement of childElements) { + for (const listElement of listElements) { + if (listElement.contains(childElement)) { + occurrenceCount++; + break; + } + } + } + + // Only include selectors that occur in at least 2 list elements + if (occurrenceCount >= 2) { + validSelectors.push(selector); + } + } + + return validSelectors; + } catch (error) { + console.warn("Failed to validate child selectors:", error); + return selectors; + } + }; + + // Enhanced XPath evaluation for multiple elements + const evaluateXPathAllWithShadowSupport = ( + document: Document, + xpath: string, + isShadow: boolean = false + ): Element[] => { + try { + // First try regular XPath evaluation + const result = document.evaluate( + xpath, + document, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + null + ); + + const elements: Element[] = []; + for (let i = 0; i < result.snapshotLength; i++) { + const node = result.snapshotItem(i); + if (node && node.nodeType === Node.ELEMENT_NODE) { + elements.push(node as Element); + } + } + + if (!isShadow || elements.length > 0) { + return elements; + } + + // If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal + // This is a simplified version - for multiple elements, we'll primarily rely on regular XPath + return elements; + } catch (err) { + console.error("XPath evaluation failed:", xpath, err); + return []; + } + }; + + const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors); + + const isElementVisible = (element: HTMLElement): boolean => { + try { + const rect = element.getBoundingClientRect(); + return rect.width > 0 && rect.height > 0; + } catch (error) { + return false; + } + }; + + const isValidData = (data: string): boolean => { + if (!data || data.trim().length === 0) return false; + + const trimmed = data.trim(); + + // Filter out single letters + if (trimmed.length === 1) { + return false; + } + + // Filter out pure symbols/punctuation + if (trimmed.length < 3 && /^[^\w\s]+$/.test(trimmed)) { + return false; + } + + // Filter out whitespace and punctuation only + if (/^[\s\p{P}\p{S}]*$/u.test(trimmed)) return false; + + return trimmed.length > 0; + }; + + // Enhanced shadow DOM-aware element evaluation + const evaluateXPathWithShadowSupport = ( + document: Document, + xpath: string, + isShadow: boolean = false + ): Element | null => { + try { + // First try regular XPath evaluation + const result = document.evaluate( + xpath, + document, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null + ).singleNodeValue as Element | null; + + if (!isShadow || result) { + return result; + } + + // If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal + let cleanPath = xpath; + let isIndexed = false; + + const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/); + if (indexedMatch) { + cleanPath = indexedMatch[1] + indexedMatch[3]; + isIndexed = true; + } + + const pathParts = cleanPath + .replace(/^\/\//, "") + .split("/") + .map((p) => p.trim()) + .filter((p) => p.length > 0); + + let currentContexts: (Document | Element | ShadowRoot)[] = [document]; + + for (let i = 0; i < pathParts.length; i++) { + const part = pathParts[i]; + const nextContexts: (Element | ShadowRoot)[] = []; + + for (const ctx of currentContexts) { + const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/); + let partWithoutPosition = part; + let requestedPosition: number | null = null; + + if (positionalMatch) { + partWithoutPosition = positionalMatch[1]; + requestedPosition = parseInt(positionalMatch[2]); + } + + const matched = queryInsideContext(ctx, partWithoutPosition); + + let elementsToAdd = matched; + if (requestedPosition !== null) { + const index = requestedPosition - 1; + if (index >= 0 && index < matched.length) { + elementsToAdd = [matched[index]]; + } else { + elementsToAdd = []; + } + } + + elementsToAdd.forEach((el) => { + nextContexts.push(el); + if (el.shadowRoot) { + nextContexts.push(el.shadowRoot); + } + }); + } + + if (nextContexts.length === 0) { + return null; + } + + currentContexts = nextContexts; + } + + if (currentContexts.length > 0) { + if (isIndexed && indexedMatch) { + const requestedIndex = parseInt(indexedMatch[2]) - 1; + if (requestedIndex >= 0 && requestedIndex < currentContexts.length) { + return currentContexts[requestedIndex] as Element; + } else { + return null; + } + } + + return currentContexts[0] as Element; + } + + return null; + } catch (err) { + console.error("XPath evaluation failed:", xpath, err); + return null; + } + }; + + const queryInsideContext = ( + context: Document | Element | ShadowRoot, + part: string + ): Element[] => { + try { + const { tagName, conditions } = parseXPathPart(part); + + const candidateElements = Array.from(context.querySelectorAll(tagName)); + if (candidateElements.length === 0) { + return []; + } + + const matchingElements = candidateElements.filter((el) => { + return elementMatchesConditions(el, conditions); + }); + + return matchingElements; + } catch (err) { + console.error("Error in queryInsideContext:", err); + return []; + } + }; + + const parseXPathPart = ( + part: string + ): { tagName: string; conditions: string[] } => { + const tagMatch = part.match(/^([a-zA-Z0-9-]+)/); + const tagName = tagMatch ? tagMatch[1] : "*"; + + const conditionMatches = part.match(/\[([^\]]+)\]/g); + const conditions = conditionMatches + ? conditionMatches.map((c) => c.slice(1, -1)) + : []; + + return { tagName, conditions }; + }; + + const elementMatchesConditions = ( + element: Element, + conditions: string[] + ): boolean => { + for (const condition of conditions) { + if (!elementMatchesCondition(element, condition)) { + return false; + } + } + return true; + }; + + const elementMatchesCondition = ( + element: Element, + condition: string + ): boolean => { + condition = condition.trim(); + + if (/^\d+$/.test(condition)) { + return true; + } + + // Handle @attribute="value" + const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/); + if (attrMatch) { + const [, attr, value] = attrMatch; + const elementValue = element.getAttribute(attr); + return elementValue === value; + } + + // Handle contains(@class, 'value') + const classContainsMatch = condition.match( + /^contains\(@class,\s*["']([^"']+)["']\)$/ + ); + if (classContainsMatch) { + const className = classContainsMatch[1]; + return element.classList.contains(className); + } + + // Handle contains(@attribute, 'value') + const attrContainsMatch = condition.match( + /^contains\(@([^,]+),\s*["']([^"']+)["']\)$/ + ); + if (attrContainsMatch) { + const [, attr, value] = attrContainsMatch; + const elementValue = element.getAttribute(attr) || ""; + return elementValue.includes(value); + } + + // Handle text()="value" + const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/); + if (textMatch) { + const expectedText = textMatch[1]; + const elementText = element.textContent?.trim() || ""; + return elementText === expectedText; + } + + // Handle contains(text(), 'value') + const textContainsMatch = condition.match( + /^contains\(text\(\),\s*["']([^"']+)["']\)$/ + ); + if (textContainsMatch) { + const expectedText = textContainsMatch[1]; + const elementText = element.textContent?.trim() || ""; + return elementText.includes(expectedText); + } + + // Handle count(*)=0 (element has no children) + if (condition === "count(*)=0") { + return element.children.length === 0; + } + + // Handle other count conditions + const countMatch = condition.match(/^count\(\*\)=(\d+)$/); + if (countMatch) { + const expectedCount = parseInt(countMatch[1]); + return element.children.length === expectedCount; + } + + return true; + }; + + // Enhanced value extraction with shadow DOM support + const extractValueWithShadowSupport = ( + element: Element, + attribute: string + ): string | null => { + if (!element) return null; + + const baseURL = + element.ownerDocument?.location?.href || window.location.origin; + + // Check shadow DOM content first + if (element.shadowRoot) { + const shadowContent = element.shadowRoot.textContent; + if (shadowContent?.trim()) { + return shadowContent.trim(); + } + } + + if (attribute === "innerText") { + let textContent = + (element as HTMLElement).innerText?.trim() || + (element as HTMLElement).textContent?.trim(); + + if (!textContent) { + const dataAttributes = [ + "data-600", + "data-text", + "data-label", + "data-value", + "data-content", + ]; + for (const attr of dataAttributes) { + const dataValue = element.getAttribute(attr); + if (dataValue && dataValue.trim()) { + textContent = dataValue.trim(); + break; + } + } + } + + return textContent || null; + } else if (attribute === "innerHTML") { + return element.innerHTML?.trim() || null; + } else if (attribute === "href") { + let anchorElement = element; + + if (element.tagName !== "A") { + anchorElement = + element.closest("a") || + element.parentElement?.closest("a") || + element; + } + + const hrefValue = anchorElement.getAttribute("href"); + if (!hrefValue || hrefValue.trim() === "") { + return null; + } + + try { + return new URL(hrefValue, baseURL).href; + } catch (e) { + console.warn("Error creating URL from", hrefValue, e); + return hrefValue; + } + } else if (attribute === "src") { + const attrValue = element.getAttribute(attribute); + const dataAttr = attrValue || element.getAttribute("data-" + attribute); + + if (!dataAttr || dataAttr.trim() === "") { + const style = window.getComputedStyle(element as HTMLElement); + const bgImage = style.backgroundImage; + if (bgImage && bgImage !== "none") { + const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); + return matches ? new URL(matches[1], baseURL).href : null; + } + return null; + } + + try { + return new URL(dataAttr, baseURL).href; + } catch (e) { + console.warn("Error creating URL from", dataAttr, e); + return dataAttr; + } + } + return element.getAttribute(attribute); + }; + + // Simple deepest child finder - limit depth to prevent hanging + const findDeepestChild = (element: HTMLElement): HTMLElement => { + let deepest = element; + let maxDepth = 0; + + const traverse = (el: HTMLElement, depth: number) => { + if (depth > 3) return; + + const text = el.textContent?.trim() || ""; + if (isValidData(text) && depth > maxDepth) { + maxDepth = depth; + deepest = el; + } + + const children = Array.from(el.children).slice(0, 3); + children.forEach((child) => { + if (child instanceof HTMLElement) { + traverse(child, depth + 1); + } + }); + }; + + traverse(element, 0); + return deepest; + }; + + validatedChildSelectors.forEach((childSelector, index) => { + try { + // Detect if this selector should use shadow DOM traversal + const isShadowSelector = childSelector.includes('>>') || + childSelector.startsWith('//') && + (listSelector.includes('>>') || currentSnapshot?.snapshot); + + const element = evaluateXPathWithShadowSupport( + iframeElement.contentDocument!, + childSelector, + isShadowSelector + ) as HTMLElement; + + if (element && isElementVisible(element)) { + const rect = element.getBoundingClientRect(); + const position = { x: rect.left, y: rect.top }; + + const tagName = element.tagName.toLowerCase(); + const isShadow = element.getRootNode() instanceof ShadowRoot; + + if (tagName === "a") { + const anchor = element as HTMLAnchorElement; + const href = extractValueWithShadowSupport(anchor, "href"); + const text = extractValueWithShadowSupport(anchor, "innerText"); + + if ( + href && + href.trim() !== "" && + href !== window.location.href && + !href.startsWith("javascript:") && + !href.startsWith("#") + ) { + const fieldIdHref = Date.now() + index * 1000; + + candidateFields.push({ + id: fieldIdHref, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldIdHref, + type: "text", + label: `Label ${index * 2 + 1}`, + data: href, + selectorObj: { + selector: childSelector, + tag: element.tagName, + isShadow: isShadow, + attribute: "href", + }, + }, + }); + } + + const fieldIdText = Date.now() + index * 1000 + 1; + + if (text && isValidData(text)) { + candidateFields.push({ + id: fieldIdText, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldIdText, + type: "text", + label: `Label ${index * 2 + 2}`, + data: text, + selectorObj: { + selector: childSelector, + tag: element.tagName, + isShadow: isShadow, + attribute: "innerText", + }, + }, + }); + } + } else if (tagName === "img") { + const img = element as HTMLImageElement; + const src = extractValueWithShadowSupport(img, "src"); + const alt = extractValueWithShadowSupport(img, "alt"); + + if (src && !src.startsWith("data:") && src.length > 10) { + const fieldId = Date.now() + index * 1000; + + candidateFields.push({ + id: fieldId, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldId, + type: "text", + label: `Label ${index + 1}`, + data: src, + selectorObj: { + selector: childSelector, + tag: element.tagName, + isShadow: isShadow, + attribute: "src", + }, + }, + }); + } + + if (alt && isValidData(alt)) { + const fieldId = Date.now() + index * 1000 + 1; + + candidateFields.push({ + id: fieldId, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldId, + type: "text", + label: `Label ${index + 2}`, + data: alt, + selectorObj: { + selector: childSelector, + tag: element.tagName, + isShadow: isShadow, + attribute: "alt", + }, + }, + }); + } + } else { + const deepestElement = findDeepestChild(element); + const data = extractValueWithShadowSupport(deepestElement, "innerText"); + + if (data && isValidData(data)) { + const isLeaf = isLeafElement(deepestElement); + const depth = getElementDepthFromList( + deepestElement, + listSelector, + iframeElement.contentDocument! + ); + + const fieldId = Date.now() + index; + + candidateFields.push({ + id: fieldId, + element: deepestElement, + isLeaf: isLeaf, + depth: depth, + position: position, + field: { + id: fieldId, + type: "text", + label: `Label ${index + 1}`, + data: data, + selectorObj: { + selector: childSelector, + tag: deepestElement.tagName, + isShadow: deepestElement.getRootNode() instanceof ShadowRoot, + attribute: "innerText", + }, + }, + }); + } + } + } + } catch (error) { + console.warn( + `Failed to process child selector ${childSelector}:`, + error + ); + } + }); + + candidateFields.sort((a, b) => { + const yDiff = a.position.y - b.position.y; + + if (Math.abs(yDiff) <= 5) { + return a.position.x - b.position.x; + } + + return yDiff; + }); + + const filteredCandidates = removeParentChildDuplicates(candidateFields); + + const finalFields = removeDuplicateContent(filteredCandidates); + return finalFields; + }, + [currentSnapshot] + ); + + const isLeafElement = (element: HTMLElement): boolean => { + const children = Array.from(element.children) as HTMLElement[]; + + if (children.length === 0) return true; + + const hasContentfulChildren = children.some((child) => { + const text = child.textContent?.trim() || ""; + return text.length > 0 && text !== element.textContent?.trim(); + }); + + return !hasContentfulChildren; + }; + + const getElementDepthFromList = ( + element: HTMLElement, + listSelector: string, + document: Document + ): number => { + try { + const listResult = document.evaluate( + listSelector, + document, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null + ); + + const listElement = listResult.singleNodeValue as HTMLElement; + if (!listElement) return 0; + + let depth = 0; + let current = element; + + while (current && current !== listElement && current.parentElement) { + depth++; + current = current.parentElement; + if (depth > 20) break; + } + + return current === listElement ? depth : 0; + } catch (error) { + return 0; + } + }; + + const removeParentChildDuplicates = ( + candidates: Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> + ): Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> => { + const filtered: Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> = []; + + for (const candidate of candidates) { + let shouldInclude = true; + + for (const existing of filtered) { + if (candidate.element.contains(existing.element)) { + shouldInclude = false; + break; + } else if (existing.element.contains(candidate.element)) { + const existingIndex = filtered.indexOf(existing); + filtered.splice(existingIndex, 1); + break; + } + } + + if (candidate.element.tagName.toLowerCase() === "a") { + shouldInclude = true; + } + + if (shouldInclude) { + filtered.push(candidate); + } + } + + return filtered; + }; + + const removeDuplicateContent = ( + candidates: Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> + ): Record => { + const finalFields: Record = {}; + const seenContent = new Set(); + let labelCounter = 1; + + for (const candidate of candidates) { + const content = candidate.field.data.trim().toLowerCase(); + + if (!seenContent.has(content)) { + seenContent.add(content); + finalFields[candidate.id] = { + ...candidate.field, + label: `Label ${labelCounter++}`, + }; + } + } + + return finalFields; + }; + useEffect(() => { if (isDOMMode && listSelector) { socket?.emit("setGetList", { getList: true }); @@ -339,6 +1128,25 @@ export const BrowserWindow = () => { ); setCachedChildSelectors(childSelectors); + + const autoFields = createFieldsFromChildSelectors( + childSelectors, + listSelector + ); + + if (Object.keys(autoFields).length > 0) { + setFields(autoFields); + + addListStep( + listSelector, + autoFields, + currentListId || Date.now(), + currentListActionId || `list-${crypto.randomUUID()}`, + { type: "", selector: paginationSelector }, + undefined, + false + ); + } } catch (error) { console.error("Error during child selector caching:", error); } finally { @@ -787,6 +1595,15 @@ export const BrowserWindow = () => { !listSelector && highlighterData.groupInfo?.isGroupElement ) { + if (highlighterData?.groupInfo.groupElements) { + setProcessingGroupCoordinates( + highlighterData.groupInfo.groupElements.map((element) => ({ + element, + rect: element.getBoundingClientRect(), + })) + ); + } + let cleanedSelector = highlighterData.selector; setListSelector(cleanedSelector); @@ -1499,6 +2316,8 @@ export const BrowserWindow = () => { {/* Loading overlay positioned specifically over DOM content */} {isCachingChildSelectors && ( + <> + {/* Background overlay */}
{ width: "100%", height: "100%", background: "rgba(255, 255, 255, 0.8)", - display: "flex", - alignItems: "center", - justifyContent: "center", zIndex: 9999, pointerEvents: "none", - borderRadius: "0px 0px 5px 5px", // Match the DOM renderer border radius + borderRadius: "0px 0px 5px 5px", }} - > + /> + + {/* Use processing coordinates captured before listSelector was set */} + {processingGroupCoordinates.map((groupElement, index) => ( + + {/* Original highlight box */} +
+ + {/* Label */} +
+ List item {index + 1} +
+ + {/* Scanning animation */} +
+
+
+ + + + ))} + + {/* Fallback loader */} + {processingGroupCoordinates.length === 0 && (
-
- )} + > +
+
+ )} + + )}
) : ( /* Screenshot mode canvas */ diff --git a/src/components/recorder/DOMBrowserRenderer.tsx b/src/components/recorder/DOMBrowserRenderer.tsx index 2828a114..09df5a34 100644 --- a/src/components/recorder/DOMBrowserRenderer.tsx +++ b/src/components/recorder/DOMBrowserRenderer.tsx @@ -667,6 +667,12 @@ export const DOMBrowserRenderer: React.FC = ({ return; } + if (isCachingChildSelectors) { + e.preventDefault(); + e.stopPropagation(); + return; + } + e.preventDefault(); if (!isInCaptureMode) { @@ -1009,7 +1015,8 @@ export const DOMBrowserRenderer: React.FC = ({ height: "100%", border: "none", display: "block", - overflow: "hidden !important", + overflow: isCachingChildSelectors ? "hidden !important" : "hidden !important", + pointerEvents: isCachingChildSelectors ? "none" : "auto", }} sandbox="allow-same-origin allow-forms allow-scripts" title="DOM Browser Content"