From 0d763f78213671d5809102b9b501920921418ffd Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 20 Dec 2024 18:32:08 +0530 Subject: [PATCH 001/111] feat: iframe support for get element info --- server/src/workflow-management/selector.ts | 214 ++++++++++++++++----- 1 file changed, 168 insertions(+), 46 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 240f8921..16979487 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -23,10 +23,8 @@ export const getElementInformation = async ( if (!getList || listSelector !== '') { const elementInfo = await page.evaluate( async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; - if (el) { - const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; + // Helper function to get element info + const getElementInfo = (element: HTMLElement) => { let info: { tagName: string; hasOnlyText?: boolean; @@ -36,9 +34,12 @@ export const getElementInformation = async ( attributes?: Record; innerHTML?: string; outerHTML?: string; + fromIframe?: boolean; + iframePath?: string[]; } = { tagName: element?.tagName ?? '', }; + if (element) { info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { @@ -48,7 +49,7 @@ export const getElementInformation = async ( {} as Record ); } - // Gather specific information based on the tag + if (element?.tagName === 'A') { info.url = (element as HTMLAnchorElement).href; info.innerText = element.innerText ?? ''; @@ -61,29 +62,186 @@ export const getElementInformation = async ( ...info.attributes, selectedValue: selectElement.value, }; - } else if (element?.tagName === 'INPUT' && (element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date') { + } else if (element?.tagName === 'INPUT' && + ((element as HTMLInputElement).type === 'time' || + (element as HTMLInputElement).type === 'date')) { info.innerText = (element as HTMLInputElement).value; } else { info.hasOnlyText = element?.children?.length === 0 && element?.innerText?.length > 0; info.innerText = element?.innerText ?? ''; } + info.innerHTML = element.innerHTML; info.outerHTML = element.outerHTML; return info; + }; + + // Helper function to search in iframe + const searchInIframe = ( + iframe: HTMLIFrameElement, + relativeX: number, + relativeY: number, + iframePath: string[] + ) => { + try { + if (!iframe.contentDocument) return null; + + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; + if (!el) return null; + + const { parentElement } = el; + const element = parentElement?.tagName === 'A' ? parentElement : el; + + const info = getElementInfo(element); + info.fromIframe = true; + info.iframePath = iframePath; + + return info; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + }; + + const el = document.elementFromPoint(x, y) as HTMLElement; + if (el) { + // Check if the element is an iframe + if (el.tagName === 'IFRAME') { + const iframe = el as HTMLIFrameElement; + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + const iframeResult = searchInIframe( + iframe, + relativeX, + relativeY, + [iframe.id || 'unnamed-iframe'] + ); + if (iframeResult) return iframeResult; + } + + const { parentElement } = el; + const element = parentElement?.tagName === 'A' ? parentElement : el; + return getElementInfo(element); } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return elementInfo; } else { const elementInfo = await page.evaluate( async ({ x, y }) => { + // Helper function to get element info (same as above) + const getElementInfo = (element: HTMLElement) => { + let info: { + tagName: string; + hasOnlyText?: boolean; + innerText?: string; + url?: string; + imageUrl?: string; + attributes?: Record; + innerHTML?: string; + outerHTML?: string; + fromIframe?: boolean; + iframePath?: string[]; + } = { + tagName: element?.tagName ?? '', + }; + + if (element) { + info.attributes = Array.from(element.attributes).reduce( + (acc, attr) => { + acc[attr.name] = attr.value; + return acc; + }, + {} as Record + ); + } + + if (element?.tagName === 'A') { + info.url = (element as HTMLAnchorElement).href; + info.innerText = element.innerText ?? ''; + } else if (element?.tagName === 'IMG') { + info.imageUrl = (element as HTMLImageElement).src; + } else { + info.hasOnlyText = element?.children?.length === 0 && + element?.innerText?.length > 0; + info.innerText = element?.innerText ?? ''; + } + + info.innerHTML = element.innerHTML; + info.outerHTML = element.outerHTML; + return info; + }; + + // Helper function to search in iframe (same as above) + const searchInIframe = ( + iframe: HTMLIFrameElement, + relativeX: number, + relativeY: number, + iframePath: string[] + ) => { + try { + if (!iframe.contentDocument) return null; + + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; + if (!el) return null; + + let element = el; + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + } + } + + const info = getElementInfo(element); + info.fromIframe = true; + info.iframePath = iframePath; + + return info; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + }; + const originalEl = document.elementFromPoint(x, y) as HTMLElement; if (originalEl) { - let element = originalEl; + // Check if the element is an iframe + if (originalEl.tagName === 'IFRAME') { + const iframe = originalEl as HTMLIFrameElement; + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + const iframeResult = searchInIframe( + iframe, + relativeX, + relativeY, + [iframe.id || 'unnamed-iframe'] + ); + if (iframeResult) return iframeResult; + } + let element = originalEl; while (element.parentElement) { const parentRect = element.parentElement.getBoundingClientRect(); const childRect = element.getBoundingClientRect(); @@ -105,47 +263,11 @@ export const getElementInformation = async ( } } - let info: { - tagName: string; - hasOnlyText?: boolean; - innerText?: string; - url?: string; - imageUrl?: string; - attributes?: Record; - innerHTML?: string; - outerHTML?: string; - } = { - tagName: element?.tagName ?? '', - }; - - if (element) { - info.attributes = Array.from(element.attributes).reduce( - (acc, attr) => { - acc[attr.name] = attr.value; - return acc; - }, - {} as Record - ); - } - - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; - return info; + return getElementInfo(element); } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return elementInfo; } From 6904933036bc48bc09fc331479efbfe174181c78 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 20 Dec 2024 20:28:11 +0530 Subject: [PATCH 002/111] feat: iframe support for getRect --- server/src/workflow-management/selector.ts | 189 ++++++++++++++++++--- 1 file changed, 166 insertions(+), 23 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 16979487..83491042 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -292,25 +292,90 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector if (!getList || listSelector !== '') { const rect = await page.evaluate( async ({ x, y }) => { + // Helper function to convert rectangle to plain object + const getRectangleInfo = (rectangle: DOMRect) => { + const info = { + x: rectangle.x, + y: rectangle.y, + width: rectangle.width, + height: rectangle.height, + top: rectangle.top, + right: rectangle.right, + bottom: rectangle.bottom, + left: rectangle.left, + fromIframe: false, + iframePath: [] as string[] + }; + return info; + }; + + // Helper function to search in iframe + const searchInIframe = ( + iframe: HTMLIFrameElement, + relativeX: number, + relativeY: number, + iframePath: string[] + ) => { + try { + if (!iframe.contentDocument) return null; + + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; + if (!el) return null; + + const { parentElement } = el; + const element = parentElement?.tagName === 'A' ? parentElement : el; + const rectangle = element?.getBoundingClientRect(); + + if (rectangle) { + const iframeRect = iframe.getBoundingClientRect(); + const rectInfo = getRectangleInfo(rectangle); + + // Adjust coordinates relative to the main document + rectInfo.x += iframeRect.x; + rectInfo.y += iframeRect.y; + rectInfo.top += iframeRect.top; + rectInfo.right += iframeRect.left; + rectInfo.bottom += iframeRect.top; + rectInfo.left += iframeRect.left; + rectInfo.fromIframe = true; + rectInfo.iframePath = iframePath; + + return rectInfo; + } + return null; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + }; + const el = document.elementFromPoint(x, y) as HTMLElement; if (el) { + // Check if the element is an iframe + if (el.tagName === 'IFRAME') { + const iframe = el as HTMLIFrameElement; + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + const iframeResult = searchInIframe( + iframe, + relativeX, + relativeY, + [iframe.id || 'unnamed-iframe'] + ); + if (iframeResult) return iframeResult; + } + const { parentElement } = el; - // Match the logic in recorder.ts for link clicks const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); + if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; + return getRectangleInfo(rectangle); } } + return null; }, { x: coordinates.x, y: coordinates.y }, ); @@ -318,10 +383,98 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } else { const rect = await page.evaluate( async ({ x, y }) => { + // Helper function to convert rectangle to plain object (same as above) + const getRectangleInfo = (rectangle: DOMRect) => ({ + x: rectangle.x, + y: rectangle.y, + width: rectangle.width, + height: rectangle.height, + top: rectangle.top, + right: rectangle.right, + bottom: rectangle.bottom, + left: rectangle.left, + fromIframe: false, + iframePath: [] as string[] + }); + + // Helper function to search in iframe (same as above) + const searchInIframe = ( + iframe: HTMLIFrameElement, + relativeX: number, + relativeY: number, + iframePath: string[] + ) => { + try { + if (!iframe.contentDocument) return null; + + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; + if (!el) return null; + + let element = el; + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + } + } + + const rectangle = element?.getBoundingClientRect(); + if (rectangle) { + const iframeRect = iframe.getBoundingClientRect(); + const rectInfo = getRectangleInfo(rectangle); + + // Adjust coordinates relative to the main document + rectInfo.x += iframeRect.x; + rectInfo.y += iframeRect.y; + rectInfo.top += iframeRect.top; + rectInfo.right += iframeRect.left; + rectInfo.bottom += iframeRect.top; + rectInfo.left += iframeRect.left; + rectInfo.fromIframe = true; + rectInfo.iframePath = iframePath; + + return rectInfo; + } + return null; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + }; + const originalEl = document.elementFromPoint(x, y) as HTMLElement; if (originalEl) { - let element = originalEl; + // Check if the element is an iframe + if (originalEl.tagName === 'IFRAME') { + const iframe = originalEl as HTMLIFrameElement; + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + const iframeResult = searchInIframe( + iframe, + relativeX, + relativeY, + [iframe.id || 'unnamed-iframe'] + ); + if (iframeResult) return iframeResult; + } + let element = originalEl; while (element.parentElement) { const parentRect = element.parentElement.getBoundingClientRect(); const childRect = element.getBoundingClientRect(); @@ -344,18 +497,8 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } const rectangle = element?.getBoundingClientRect(); - if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; + return getRectangleInfo(rectangle); } } return null; From 8ba928dae6cac4d7e5924bcc799e792068e6734d Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 20 Dec 2024 20:28:24 +0530 Subject: [PATCH 003/111] chore: fix format --- server/src/workflow-management/selector.ts | 76 +++++++++++----------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 83491042..dd869f3d 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -62,9 +62,9 @@ export const getElementInformation = async ( ...info.attributes, selectedValue: selectElement.value, }; - } else if (element?.tagName === 'INPUT' && - ((element as HTMLInputElement).type === 'time' || - (element as HTMLInputElement).type === 'date')) { + } else if (element?.tagName === 'INPUT' && + ((element as HTMLInputElement).type === 'time' || + (element as HTMLInputElement).type === 'date')) { info.innerText = (element as HTMLInputElement).value; } else { info.hasOnlyText = element?.children?.length === 0 && @@ -79,20 +79,20 @@ export const getElementInformation = async ( // Helper function to search in iframe const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, + iframe: HTMLIFrameElement, + relativeX: number, relativeY: number, iframePath: string[] ) => { try { if (!iframe.contentDocument) return null; - + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; if (!el) return null; const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; - + const info = getElementInfo(element); info.fromIframe = true; info.iframePath = iframePath; @@ -112,11 +112,11 @@ export const getElementInformation = async ( const rect = iframe.getBoundingClientRect(); const relativeX = x - rect.left; const relativeY = y - rect.top; - + const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, + iframe, + relativeX, + relativeY, [iframe.id || 'unnamed-iframe'] ); if (iframeResult) return iframeResult; @@ -179,14 +179,14 @@ export const getElementInformation = async ( // Helper function to search in iframe (same as above) const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, + iframe: HTMLIFrameElement, + relativeX: number, relativeY: number, iframePath: string[] ) => { try { if (!iframe.contentDocument) return null; - + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; if (!el) return null; @@ -231,11 +231,11 @@ export const getElementInformation = async ( const rect = iframe.getBoundingClientRect(); const relativeX = x - rect.left; const relativeY = y - rect.top; - + const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, + iframe, + relativeX, + relativeY, [iframe.id || 'unnamed-iframe'] ); if (iframeResult) return iframeResult; @@ -311,25 +311,25 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector // Helper function to search in iframe const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, + iframe: HTMLIFrameElement, + relativeX: number, relativeY: number, iframePath: string[] ) => { try { if (!iframe.contentDocument) return null; - + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; if (!el) return null; const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); - + if (rectangle) { const iframeRect = iframe.getBoundingClientRect(); const rectInfo = getRectangleInfo(rectangle); - + // Adjust coordinates relative to the main document rectInfo.x += iframeRect.x; rectInfo.y += iframeRect.y; @@ -339,7 +339,7 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector rectInfo.left += iframeRect.left; rectInfo.fromIframe = true; rectInfo.iframePath = iframePath; - + return rectInfo; } return null; @@ -357,11 +357,11 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const rect = iframe.getBoundingClientRect(); const relativeX = x - rect.left; const relativeY = y - rect.top; - + const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, + iframe, + relativeX, + relativeY, [iframe.id || 'unnamed-iframe'] ); if (iframeResult) return iframeResult; @@ -370,7 +370,7 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); - + if (rectangle) { return getRectangleInfo(rectangle); } @@ -399,14 +399,14 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector // Helper function to search in iframe (same as above) const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, + iframe: HTMLIFrameElement, + relativeX: number, relativeY: number, iframePath: string[] ) => { try { if (!iframe.contentDocument) return null; - + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; if (!el) return null; @@ -436,7 +436,7 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector if (rectangle) { const iframeRect = iframe.getBoundingClientRect(); const rectInfo = getRectangleInfo(rectangle); - + // Adjust coordinates relative to the main document rectInfo.x += iframeRect.x; rectInfo.y += iframeRect.y; @@ -446,7 +446,7 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector rectInfo.left += iframeRect.left; rectInfo.fromIframe = true; rectInfo.iframePath = iframePath; - + return rectInfo; } return null; @@ -464,11 +464,11 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const rect = iframe.getBoundingClientRect(); const relativeX = x - rect.left; const relativeY = y - rect.top; - + const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, + iframe, + relativeX, + relativeY, [iframe.id || 'unnamed-iframe'] ); if (iframeResult) return iframeResult; From a131ce6c04d1d9f5b1982493f4200b972e989555 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 24 Dec 2024 23:44:20 +0530 Subject: [PATCH 004/111] feat: shadow dom selection --- server/src/workflow-management/selector.ts | 105 +++++++++++++++++---- 1 file changed, 89 insertions(+), 16 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index c0fa21f1..5a7273df 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -23,7 +23,28 @@ export const getElementInformation = async ( if (!getList || listSelector !== '') { const elementInfo = await page.evaluate( async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; + // Helper function to get element from point including shadow DOM + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Traverse through shadow roots + let current = element; + while (current) { + // Check if element has shadow root + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + // Try to find deeper element in shadow DOM + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + current = shadowElement; + } + return current; + }; + + const el = getDeepestElementFromPoint(x, y); if (el) { const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; @@ -36,9 +57,12 @@ export const getElementInformation = async ( attributes?: Record; innerHTML?: string; outerHTML?: string; + isShadowRoot?: boolean; } = { tagName: element?.tagName ?? '', + isShadowRoot: !!element?.shadowRoot }; + if (element) { info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { @@ -48,6 +72,7 @@ export const getElementInformation = async ( {} as Record ); } + // Gather specific information based on the tag if (element?.tagName === 'A') { info.url = (element as HTMLAnchorElement).href; @@ -61,7 +86,7 @@ export const getElementInformation = async ( ...info.attributes, selectedValue: selectElement.value, }; - } else if (element?.tagName === 'INPUT' && (element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date') { + } else if (element?.tagName === 'INPUT' && ((element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date')) { info.innerText = (element as HTMLInputElement).value; } else { info.hasOnlyText = element?.children?.length === 0 && @@ -80,7 +105,26 @@ export const getElementInformation = async ( } else { const elementInfo = await page.evaluate( async ({ x, y }) => { - const originalEl = document.elementFromPoint(x, y) as HTMLElement; + // Helper function to get element from point including shadow DOM + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Traverse through shadow roots + let current = element; + while (current) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + current = shadowElement; + } + return current; + }; + + const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; @@ -114,8 +158,10 @@ export const getElementInformation = async ( attributes?: Record; innerHTML?: string; outerHTML?: string; + isShadowRoot?: boolean; } = { tagName: element?.tagName ?? '', + isShadowRoot: !!element?.shadowRoot }; if (element) { @@ -156,24 +202,33 @@ export const getElementInformation = async ( } }; -/** - * Returns a {@link Rectangle} object representing - * the coordinates, width, height and corner points of the element. - * If an element is not found, returns null. - * @param page The page instance. - * @param coordinates Coordinates of an element. - * @category WorkflowManagement-Selectors - * @returns {Promise} - */ export const getRect = async (page: Page, coordinates: Coordinates, listSelector: string, getList: boolean) => { try { if (!getList || listSelector !== '') { const rect = await page.evaluate( async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; + // Helper function to get element from point including shadow DOM + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Traverse through shadow roots + let current = element; + while (current) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + current = shadowElement; + } + return current; + }; + + const el = getDeepestElementFromPoint(x, y); if (el) { const { parentElement } = el; - // Match the logic in recorder.ts for link clicks const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); if (rectangle) { @@ -196,7 +251,26 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } else { const rect = await page.evaluate( async ({ x, y }) => { - const originalEl = document.elementFromPoint(x, y) as HTMLElement; + // Helper function to get element from point including shadow DOM + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Traverse through shadow roots + let current = element; + while (current) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + current = shadowElement; + } + return current; + }; + + const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; @@ -249,7 +323,6 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } }; - /** * Returns the best and unique css {@link Selectors} for the element on the page. * Internally uses a finder function from https://github.com/antonmedv/finder/blob/master/finder.ts From 6ac3e19b82cbeada5030755afff4fdb5c1f82a83 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 28 Dec 2024 17:35:57 +0530 Subject: [PATCH 005/111] feat: add del functionality for text and list steps --- src/components/organisms/RightSidePanel.tsx | 56 ++++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 12f75028..403f78f6 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -169,6 +169,22 @@ export const RightSidePanel: React.FC = ({ onFinishCapture }); }; + const handleTextStepDelete = (id: number) => { + deleteBrowserStep(id); + setTextLabels(prevLabels => { + const { [id]: _, ...rest } = prevLabels; + return rest; + }); + setConfirmedTextSteps(prev => { + const { [id]: _, ...rest } = prev; + return rest; + }); + setErrors(prevErrors => { + const { [id]: _, ...rest } = prevErrors; + return rest; + }); + }; + const handleListTextFieldConfirm = (listId: number, fieldKey: string) => { setConfirmedListTextFields(prev => ({ ...prev, @@ -195,6 +211,22 @@ export const RightSidePanel: React.FC = ({ onFinishCapture }); }; + const handleListTextFieldDelete = (listId: number, fieldKey: string) => { + removeListTextField(listId, fieldKey); + setConfirmedListTextFields(prev => { + const updatedListFields = { ...(prev[listId] || {}) }; + delete updatedListFields[fieldKey]; + return { + ...prev, + [listId]: updatedListFields + }; + }); + setErrors(prev => { + const { [fieldKey]: _, ...rest } = prev; + return rest; + }); + }; + const getTextSettingsObject = useCallback(() => { const settings: Record = {}; browserSteps.forEach(step => { @@ -526,11 +558,21 @@ export const RightSidePanel: React.FC = ({ onFinishCapture ) }} /> - {!confirmedTextSteps[step.id] && ( + {!confirmedTextSteps[step.id] ? ( + ) : ( + + + )} )} @@ -578,7 +620,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture ) }} /> - {!confirmedListTextFields[step.id]?.[key] && ( + {!confirmedListTextFields[step.id]?.[key] ? ( + ) : ( + + + )} ))} From 3cf0b858933f0a9aa0541a7b714677600861939f Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 28 Dec 2024 17:39:04 +0530 Subject: [PATCH 006/111] feat: add lang translation for delete button --- public/locales/de.json | 3 ++- public/locales/en.json | 3 ++- public/locales/es.json | 3 ++- public/locales/ja.json | 3 ++- public/locales/zh.json | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/public/locales/de.json b/public/locales/de.json index 411d8f22..db0ce562 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -162,7 +162,8 @@ "confirm_limit": "Limit bestätigen", "finish_capture": "Erfassung abschließen", "finish": "Fertig", - "cancel": "Abbrechen" + "cancel": "Abbrechen", + "delete": "Löschen" }, "screenshot": { "capture_fullpage": "Vollständige Seite erfassen", diff --git a/public/locales/en.json b/public/locales/en.json index c5a2ff4c..9b4defbc 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -163,7 +163,8 @@ "confirm_limit": "Confirm Limit", "finish_capture": "Finish Capture", "finish": "Finish", - "cancel": "Cancel" + "cancel": "Cancel", + "delete": "Delete" }, "screenshot": { "capture_fullpage": "Capture Fullpage", diff --git a/public/locales/es.json b/public/locales/es.json index 6e52cc6f..e897914e 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -163,7 +163,8 @@ "confirm_limit": "Confirmar Límite", "finish_capture": "Finalizar Captura", "finish": "Finalizar", - "cancel": "Cancelar" + "cancel": "Cancelar", + "delete": "Eliminar" }, "screenshot": { "capture_fullpage": "Capturar Página Completa", diff --git a/public/locales/ja.json b/public/locales/ja.json index 9d2d9a89..9ae226dc 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -163,7 +163,8 @@ "confirm_limit": "制限を確認", "finish_capture": "取得を完了", "finish": "完了", - "cancel": "キャンセル" + "cancel": "キャンセル", + "delete": "削除" }, "screenshot": { "capture_fullpage": "フルページを取得", diff --git a/public/locales/zh.json b/public/locales/zh.json index 69561d5c..344a58a7 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -163,7 +163,8 @@ "confirm_limit": "确认限制", "finish_capture": "完成捕获", "finish": "完成", - "cancel": "取消" + "cancel": "取消", + "delete": "删除" }, "screenshot": { "capture_fullpage": "捕获整页", From fd7e4ab626fe2b862de7fea14819be18c40012d8 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 28 Dec 2024 18:11:24 +0530 Subject: [PATCH 007/111] feat: check confirm capture and render delete button --- src/components/organisms/RightSidePanel.tsx | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 403f78f6..c6b3479f 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -56,6 +56,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture const [showCaptureText, setShowCaptureText] = useState(true); const [hoverStates, setHoverStates] = useState<{ [id: string]: boolean }>({}); const [browserStepIdList, setBrowserStepIdList] = useState([]); + const [isCaptureTextConfirmed, setIsCaptureTextConfirmed] = useState(false); const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog } = useGlobalInfoStore(); const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot, getList, startGetList, stopGetList, startPaginationMode, stopPaginationMode, paginationType, updatePaginationType, limitType, customLimit, updateLimitType, updateCustomLimit, stopLimitMode, startLimitMode, captureStage, setCaptureStage } = useActionContext(); @@ -130,6 +131,11 @@ export const RightSidePanel: React.FC = ({ onFinishCapture const handlePairDelete = () => { } + const handleStartGetText = () => { + setIsCaptureTextConfirmed(false); + startGetText(); + } + const handleTextLabelChange = (id: number, label: string, listId?: number, fieldKey?: string) => { if (listId !== undefined && fieldKey !== undefined) { // Prevent editing if the field is confirmed @@ -256,6 +262,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture if (hasTextSteps) { socket?.emit('action', { action: 'scrapeSchema', settings }); } + setIsCaptureTextConfirmed(true); resetInterpretationLog(); onFinishCapture(); }, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps, resetInterpretationLog]); @@ -502,7 +509,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture )} - {!getText && !getScreenshot && !getList && showCaptureText && } + {!getText && !getScreenshot && !getList && showCaptureText && } {getText && <> @@ -563,7 +570,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture - ) : ( + ) : !isCaptureTextConfirmed && ( - ) : ( + ) : !isCaptureListConfirmed && ( + )} - + )} From dead389e480cae29978242047a554e3a3f780f30 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 29 Dec 2024 17:55:01 +0530 Subject: [PATCH 011/111] feat: add translation for back button of capture list action --- public/locales/de.json | 1 + public/locales/en.json | 1 + public/locales/es.json | 1 + public/locales/ja.json | 1 + public/locales/zh.json | 1 + 5 files changed, 5 insertions(+) diff --git a/public/locales/de.json b/public/locales/de.json index 411d8f22..c43b46a2 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -161,6 +161,7 @@ "confirm_pagination": "Paginierung bestätigen", "confirm_limit": "Limit bestätigen", "finish_capture": "Erfassung abschließen", + "back": "Zurück", "finish": "Fertig", "cancel": "Abbrechen" }, diff --git a/public/locales/en.json b/public/locales/en.json index c5a2ff4c..1a68faed 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -162,6 +162,7 @@ "confirm_pagination": "Confirm Pagination", "confirm_limit": "Confirm Limit", "finish_capture": "Finish Capture", + "back": "Back", "finish": "Finish", "cancel": "Cancel" }, diff --git a/public/locales/es.json b/public/locales/es.json index 6e52cc6f..b2fb55a4 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -162,6 +162,7 @@ "confirm_pagination": "Confirmar Paginación", "confirm_limit": "Confirmar Límite", "finish_capture": "Finalizar Captura", + "back": "Atrás", "finish": "Finalizar", "cancel": "Cancelar" }, diff --git a/public/locales/ja.json b/public/locales/ja.json index 9d2d9a89..e4073814 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -162,6 +162,7 @@ "confirm_pagination": "ページネーションを確認", "confirm_limit": "制限を確認", "finish_capture": "取得を完了", + "back": "戻る", "finish": "完了", "cancel": "キャンセル" }, diff --git a/public/locales/zh.json b/public/locales/zh.json index 69561d5c..d171c2c9 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -162,6 +162,7 @@ "confirm_pagination": "确认分页", "confirm_limit": "确认限制", "finish_capture": "完成捕获", + "back": "返回", "finish": "完成", "cancel": "取消" }, From a09b03e4a75627d2adc8189f8fdd361b36b8a82b Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 29 Dec 2024 23:36:06 +0530 Subject: [PATCH 012/111] feat: get deepest shadowDOM element selector --- server/src/workflow-management/selector.ts | 243 +++++++++++++++------ 1 file changed, 179 insertions(+), 64 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 5a7273df..9b3af66e 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -23,31 +23,41 @@ export const getElementInformation = async ( if (!getList || listSelector !== '') { const elementInfo = await page.evaluate( async ({ x, y }) => { - // Helper function to get element from point including shadow DOM + // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; // Traverse through shadow roots let current = element; - while (current) { - // Check if element has shadow root - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - - // Try to find deeper element in shadow DOM + let shadowRoot = current.shadowRoot; + + // Keep track of the deepest shadow DOM element found + let deepestElement = current; + + while (shadowRoot) { + // Try to find element at same point in shadow DOM const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; - + + // Update our tracking of the deepest element + deepestElement = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; } - return current; + + return deepestElement; }; const el = getDeepestElementFromPoint(x, y); if (el) { const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; + + // Get the containing shadow root if any + const containingShadowRoot = element.getRootNode() as ShadowRoot; + const isShadowRoot = containingShadowRoot instanceof ShadowRoot; + let info: { tagName: string; hasOnlyText?: boolean; @@ -58,11 +68,20 @@ export const getElementInformation = async ( innerHTML?: string; outerHTML?: string; isShadowRoot?: boolean; + shadowRootMode?: string; + shadowRootContent?: string; } = { tagName: element?.tagName ?? '', - isShadowRoot: !!element?.shadowRoot + isShadowRoot: isShadowRoot }; + + if (isShadowRoot) { + // Include shadow root specific information + info.shadowRootMode = containingShadowRoot.mode; + info.shadowRootContent = containingShadowRoot.innerHTML; + } + // Get attributes including those from shadow DOM context if (element) { info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { @@ -71,84 +90,82 @@ export const getElementInformation = async ( }, {} as Record ); + + // Get text content considering shadow DOM context + info.innerText = element.textContent ?? ''; + info.innerHTML = element.innerHTML; + info.outerHTML = element.outerHTML; + info.hasOnlyText = element.children.length === 0 && + (element.textContent !== null && + element.textContent.trim().length > 0); } - // Gather specific information based on the tag - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else if (element?.tagName === 'SELECT') { - const selectElement = element as HTMLSelectElement; - info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ''; - info.attributes = { - ...info.attributes, - selectedValue: selectElement.value, - }; - } else if (element?.tagName === 'INPUT' && ((element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date')) { - info.innerText = (element as HTMLInputElement).value; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; return info; } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return elementInfo; } else { const elementInfo = await page.evaluate( async ({ x, y }) => { - // Helper function to get element from point including shadow DOM + // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - + // Traverse through shadow roots let current = element; - while (current) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - + let shadowRoot = current.shadowRoot; + + // Keep track of the deepest shadow DOM element found + let deepestElement = current; + + while (shadowRoot) { + // Try to find element at same point in shadow DOM const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; - + + // Update our tracking of the deepest element + deepestElement = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; } - return current; + + return deepestElement; }; - + const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; - + + // Handle element hierarchy traversal for list items while (element.parentElement) { const parentRect = element.parentElement.getBoundingClientRect(); const childRect = element.getBoundingClientRect(); - + const fullyContained = parentRect.left <= childRect.left && parentRect.right >= childRect.right && parentRect.top <= childRect.top && parentRect.bottom >= childRect.bottom; - + const significantOverlap = (childRect.width * childRect.height) / (parentRect.width * parentRect.height) > 0.5; - + if (fullyContained && significantOverlap) { element = element.parentElement; } else { break; } } - + + // Get the containing shadow root if any + const containingShadowRoot = element.getRootNode() as ShadowRoot; + const isShadowRoot = containingShadowRoot instanceof ShadowRoot; + let info: { tagName: string; hasOnlyText?: boolean; @@ -159,12 +176,21 @@ export const getElementInformation = async ( innerHTML?: string; outerHTML?: string; isShadowRoot?: boolean; + shadowRootMode?: string; + shadowRootContent?: string; } = { tagName: element?.tagName ?? '', - isShadowRoot: !!element?.shadowRoot + isShadowRoot: isShadowRoot }; - + + if (isShadowRoot) { + // Include shadow root specific information + info.shadowRootMode = containingShadowRoot.mode; + info.shadowRootContent = containingShadowRoot.innerHTML; + } + if (element) { + // Get attributes including those from shadow DOM context info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; @@ -172,21 +198,25 @@ export const getElementInformation = async ( }, {} as Record ); + + // Handle specific element types + if (element.tagName === 'A') { + info.url = (element as HTMLAnchorElement).href; + info.innerText = element.textContent ?? ''; + } else if (element.tagName === 'IMG') { + info.imageUrl = (element as HTMLImageElement).src; + } else { + // Handle text content with proper null checking + info.hasOnlyText = element.children.length === 0 && + (element.textContent !== null && + element.textContent.trim().length > 0); + info.innerText = element.textContent ?? ''; + } + + info.innerHTML = element.innerHTML; + info.outerHTML = element.outerHTML; } - - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; + return info; } return null; @@ -793,6 +823,76 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return output; } + const MAX_DEPTH = 10; + + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + let current = element; + let deepestElement = current; + let depth = 0; + + while (current && depth < MAX_DEPTH) { + const shadowRoot = current.shadowRoot; + if (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement) break; + + deepestElement = shadowElement; + current = shadowElement; + } else { + break; + } + depth++; + } + + return deepestElement; + }; + + const genSelectorForShadowDOM = (element: HTMLElement) => { + const findShadowContext = (element: HTMLElement): { host: HTMLElement, root: ShadowRoot } | null => { + let current: HTMLElement | null = element; + let depth = 0; + + while (current && depth < MAX_DEPTH) { + // Check if element is inside a shadow root + if (current.parentNode instanceof ShadowRoot) { + return { + host: (current.parentNode as ShadowRoot).host as HTMLElement, + root: current.parentNode as ShadowRoot + }; + } + current = current.parentElement; + depth++; + } + return null; + }; + + const shadowContext = findShadowContext(element); + if (!shadowContext) return null; + + try { + // Generate selector for the shadow host + const hostSelector = finder(shadowContext.host); + + // Generate selector for the element within the shadow DOM + const shadowElementSelector = finder(element, { + root: shadowContext.root as unknown as Element + }); + + return { + fullSelector: `${hostSelector} >>> ${shadowElementSelector}`, + hostSelector, + shadowElementSelector, + mode: shadowContext.root.mode + }; + } catch (e) { + console.warn('Error generating shadow DOM selector:', e); + return null; + } + }; + const genSelectors = (element: HTMLElement | null) => { if (element == null) { return null; @@ -812,6 +912,9 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } catch (e) { } + // Generate shadow DOM specific selector + const shadowSelector = genSelectorForShadowDOM(element); + const hrefSelector = genSelectorForAttributes(element, ['href']); const formSelector = genSelectorForAttributes(element, [ 'name', @@ -858,9 +961,21 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { hrefSelector, accessibilitySelector, formSelector, + // Shadow DOM selector + shadowSelector: shadowSelector ? { + // Full selector that can traverse shadow DOM + full: shadowSelector.fullSelector, + // Individual parts for more flexible usage + host: shadowSelector.hostSelector, + element: shadowSelector.shadowElementSelector, + // Shadow root mode (open/closed) + mode: shadowSelector.mode + } : null }; } + + function genAttributeSet(element: HTMLElement, attributes: string[]) { return new Set( attributes.filter((attr) => { @@ -900,7 +1015,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return char.length === 1 && char.match(/[0-9]/); } - const hoveredElement = document.elementFromPoint(x, y) as HTMLElement; + const hoveredElement = getDeepestElementFromPoint(x, y); if ( hoveredElement != null && !hoveredElement.closest('#overlay-controls') != null From 542f4d31fa43359928d052b9edb3caa5f446c1c3 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 29 Dec 2024 23:41:19 +0530 Subject: [PATCH 013/111] feat: change shadowDOM full selector path --- server/src/workflow-management/selector.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 9b3af66e..690fb0b1 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -882,7 +882,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { }); return { - fullSelector: `${hostSelector} >>> ${shadowElementSelector}`, + fullSelector: `${hostSelector} > ${shadowElementSelector}`, hostSelector, shadowElementSelector, mode: shadowContext.root.mode From b60f4b73b8424151f17bfd3389f5698f72c429df Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 01:24:32 +0530 Subject: [PATCH 014/111] feat: add functionality to scrape shadowDOM elements --- maxun-core/src/browserSide/scraper.js | 126 ++++++++++++++++---------- 1 file changed, 80 insertions(+), 46 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index a2009d78..ef979828 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -189,68 +189,102 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} */ window.scrapeSchema = function (lists) { + // These utility functions remain unchanged as they work perfectly function omap(object, f, kf = (x) => x) { - return Object.fromEntries( - Object.entries(object) - .map(([k, v]) => [kf(k), f(v)]), - ); + return Object.fromEntries( + Object.entries(object) + .map(([k, v]) => [kf(k), f(v)]), + ); } function ofilter(object, f) { - return Object.fromEntries( - Object.entries(object) - .filter(([k, v]) => f(k, v)), - ); + return Object.fromEntries( + Object.entries(object) + .filter(([k, v]) => f(k, v)), + ); } - function getSeedKey(listObj) { - const maxLength = Math.max(...Object.values(omap(listObj, (x) => document.querySelectorAll(x.selector).length))); - return Object.keys(ofilter(listObj, (_, v) => document.querySelectorAll(v.selector).length === maxLength))[0]; - } - - function getMBEs(elements) { - return elements.map((element) => { - let candidate = element; - const isUniqueChild = (e) => elements - .filter((elem) => e.parentNode?.contains(elem)) - .length === 1; - - while (candidate && isUniqueChild(candidate)) { - candidate = candidate.parentNode; + function findElement(config) { + // If this is a shadow DOM query + if (config.shadow && config.selector.includes('>>')) { + const [hostSelector, shadowSelector] = config.selector.split('>>').map(s => s.trim()); + const host = document.querySelector(hostSelector); + return host?.shadowRoot?.querySelector(shadowSelector) || null; } + // Otherwise, use regular querySelector + return document.querySelector(config.selector); + } - return candidate; - }); + function findAllElements(config) { + // If this is a shadow DOM query + if (config.shadow && config.selector.includes('>>')) { + const element = findElement(config); + return element ? [element] : []; + } + // Otherwise, use regular querySelectorAll + return Array.from(document.querySelectorAll(config.selector)); + } + + // Modified to use our new element finding functions + function getSeedKey(listObj) { + const maxLength = Math.max(...Object.values( + omap(listObj, (x) => findAllElements(x).length) + )); + return Object.keys( + ofilter(listObj, (_, v) => findAllElements(v).length === maxLength) + )[0]; + } + + // This function remains unchanged as it works with DOM elements + // regardless of how they were found + function getMBEs(elements) { + return elements.map((element) => { + let candidate = element; + const isUniqueChild = (e) => elements + .filter((elem) => e.parentNode?.contains(elem)) + .length === 1; + + while (candidate && isUniqueChild(candidate)) { + candidate = candidate.parentNode; + } + + return candidate; + }); } const seedName = getSeedKey(lists); - const seedElements = Array.from(document.querySelectorAll(lists[seedName].selector)); + const seedElements = findAllElements(lists[seedName]); const MBEs = getMBEs(seedElements); return MBEs.map((mbe) => omap( - lists, - ({ selector, attribute }, key) => { - const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem)); - if (!elem) return undefined; + lists, + (config, key) => { + // Use our new findAllElements function + const elem = findAllElements(config) + .find((elem) => mbe.contains(elem)); - switch (attribute) { - case 'href': - const relativeHref = elem.getAttribute('href'); - return relativeHref ? new URL(relativeHref, window.location.origin).href : null; - case 'src': - const relativeSrc = elem.getAttribute('src'); - return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; - case 'innerText': - return elem.innerText; - case 'textContent': - return elem.textContent; - default: - return elem.innerText; - } - }, - (key) => key // Use the original key in the output + if (!elem) return undefined; + + switch (config.attribute) { + case 'href': { + const relativeHref = elem.getAttribute('href'); + return relativeHref ? new URL(relativeHref, window.location.origin).href : null; + } + case 'src': { + const relativeSrc = elem.getAttribute('src'); + return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; + } + case 'innerText': + return elem.innerText; + case 'textContent': + return elem.textContent; + default: + return elem.getAttribute(config.attribute) || elem.innerText; + } + }, + (key) => key )) || []; - } + }; /** * Scrapes multiple lists of similar items based on a template item. From 9f9dc4e1030ca3819355245765ecadcc1e2c8d6f Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 01:25:45 +0530 Subject: [PATCH 015/111] feat: add shadow optional field in SelectorObject --- src/context/browserSteps.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index dd211199..fd311a35 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -32,6 +32,7 @@ export interface SelectorObject { selector: string; tag?: string; attribute?: string; + shadow?: boolean; [key: string]: any; } From b696fa568d65a0948edd3b999eb31c4ccf39dad5 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 01:28:22 +0530 Subject: [PATCH 016/111] feat: add shadow param for scrapeSchema config --- maxun-core/src/interpret.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index c581954d..495ba2db 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -403,7 +403,7 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResults); }, - scrapeSchema: async (schema: Record) => { + scrapeSchema: async (schema: Record) => { await this.ensureScriptsLoaded(page); const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); From 415ce02a3d2eb82f7434230239a5c7659b557016 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 02:39:27 +0530 Subject: [PATCH 017/111] feat: add shadow bool field to text step --- src/components/organisms/BrowserWindow.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index c7e9fc0f..11fe8c55 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -13,6 +13,7 @@ import { useTranslation } from 'react-i18next'; interface ElementInfo { tagName: string; hasOnlyText?: boolean; + isShadowRoot?: boolean; innerText?: string; url?: string; imageUrl?: string; @@ -185,6 +186,7 @@ export const BrowserWindow = () => { addTextStep('', data, { selector: highlighterData.selector, tag: highlighterData.elementInfo?.tagName, + shadow: highlighterData.elementInfo?.isShadowRoot, attribute }); } else { @@ -192,7 +194,7 @@ export const BrowserWindow = () => { setAttributeOptions(options); setSelectedElement({ selector: highlighterData.selector, - info: highlighterData.elementInfo + info: highlighterData.elementInfo, }); setShowAttributeModal(true); } @@ -229,6 +231,7 @@ export const BrowserWindow = () => { selectorObj: { selector: highlighterData.selector, tag: highlighterData.elementInfo?.tagName, + shadow: highlighterData.elementInfo?.isShadowRoot, attribute } }; @@ -276,6 +279,7 @@ export const BrowserWindow = () => { addTextStep('', data, { selector: selectedElement.selector, tag: selectedElement.info?.tagName, + shadow: selectedElement.info?.isShadowRoot, attribute: attribute }); } @@ -288,6 +292,7 @@ export const BrowserWindow = () => { selectorObj: { selector: selectedElement.selector, tag: selectedElement.info?.tagName, + shadow: selectedElement.info?.isShadowRoot, attribute: attribute } }; From 1a6a481b578a7212743ceb199b934585583b5a0e Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 02:46:24 +0530 Subject: [PATCH 018/111] feat: add shadow selectors field type --- server/src/types/index.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/server/src/types/index.ts b/server/src/types/index.ts index f2e327ef..e882f69d 100644 --- a/server/src/types/index.ts +++ b/server/src/types/index.ts @@ -129,6 +129,13 @@ export interface BaseActionInfo { hasOnlyText: boolean; } +interface ShadowSelector { + full: string; + host: string; + element: string; + mode: string; +} + /** * Holds all the possible css selectors that has been found for an element. * @category Types @@ -143,6 +150,7 @@ export interface Selectors { hrefSelector: string|null; accessibilitySelector: string|null; formSelector: string|null; + shadowSelector: ShadowSelector | null; } /** @@ -156,7 +164,7 @@ export interface BaseAction extends BaseActionInfo{ associatedActions: ActionType[]; inputType: string | undefined; value: string | undefined; - selectors: { [key: string]: string | null }; + selectors: Selectors; timestamp: number; isPassword: boolean; /** From c3031811a63d21139c306781a6e64ee09d81b1de Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 02:52:58 +0530 Subject: [PATCH 019/111] feat: prioritize returning shadow selector --- server/src/workflow-management/utils.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/server/src/workflow-management/utils.ts b/server/src/workflow-management/utils.ts index b3dadd60..4f747127 100644 --- a/server/src/workflow-management/utils.ts +++ b/server/src/workflow-management/utils.ts @@ -12,6 +12,11 @@ export const getBestSelectorForAction = (action: Action) => { case ActionType.Hover: case ActionType.DragAndDrop: { const selectors = action.selectors; + + if (selectors?.shadowSelector?.full) { + return selectors.shadowSelector.full; + } + // less than 25 characters, and element only has text inside const textSelector = selectors?.text?.length != null && @@ -75,6 +80,11 @@ export const getBestSelectorForAction = (action: Action) => { case ActionType.Input: case ActionType.Keydown: { const selectors = action.selectors; + + if (selectors?.shadowSelector?.full) { + return selectors.shadowSelector.full; + } + return ( selectors.testIdSelector ?? selectors?.id ?? From cec2397a58256736b60467e40f1cc2e255667394 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 02:55:21 +0530 Subject: [PATCH 020/111] feat: change shadowDOM full selector path --- server/src/workflow-management/selector.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 690fb0b1..164f5220 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -882,7 +882,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { }); return { - fullSelector: `${hostSelector} > ${shadowElementSelector}`, + fullSelector: `${hostSelector} >> ${shadowElementSelector}`, hostSelector, shadowElementSelector, mode: shadowContext.root.mode From 05c7921c9d574d4074b64f56319d6166e84b1dc3 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 03:05:07 +0530 Subject: [PATCH 021/111] feat: add shadowInfo in highlighter data --- .../workflow-management/classes/Generator.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 609541de..d1bccbe4 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -730,15 +730,26 @@ export class WorkflowGenerator { const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); const elementInfo = await getElementInformation(page, coordinates, this.listSelector, this.getList); if (rect) { + const highlighterData = { + rect, + selector: displaySelector, + elementInfo, + // Include shadow DOM specific information + shadowInfo: elementInfo?.isShadowRoot ? { + mode: elementInfo.shadowRootMode, + content: elementInfo.shadowRootContent + } : null + }; + if (this.getList === true) { if (this.listSelector !== '') { const childSelectors = await getChildSelectors(page, this.listSelector || ''); - this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo, childSelectors }) + this.socket.emit('highlighter', { ...highlighterData, childSelectors }) } else { - this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); + this.socket.emit('highlighter', { ...highlighterData }); } } else { - this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); + this.socket.emit('highlighter', { ...highlighterData }); } } } From 4031ded27947f7ac72f40c9203cb1498f0ac4460 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:26:53 +0530 Subject: [PATCH 022/111] feat: confirm instead of confirm pagination --- public/locales/en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/en.json b/public/locales/en.json index 70ded861..cb3f2789 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -159,7 +159,7 @@ "confirm": "Confirm", "discard": "Discard", "confirm_capture": "Confirm Capture", - "confirm_pagination": "Confirm Pagination", + "confirm_pagination": "Confirm", "confirm_limit": "Confirm Limit", "finish_capture": "Finish Capture", "back": "Back", From 8baad8d1f90b9b36594206e91dc37304c68d0a9b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:27:23 +0530 Subject: [PATCH 023/111] feat: confirm instead of confirm limit --- public/locales/en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/en.json b/public/locales/en.json index cb3f2789..bd8acce3 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -160,7 +160,7 @@ "discard": "Discard", "confirm_capture": "Confirm Capture", "confirm_pagination": "Confirm", - "confirm_limit": "Confirm Limit", + "confirm_limit": "Confirm", "finish_capture": "Finish Capture", "back": "Back", "finish": "Finish", From 663a4fd69c0f13998a61f13b1564cb8565bf942b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:29:24 +0530 Subject: [PATCH 024/111] feat(spanish): confirm instead of confirm pagination --- public/locales/es.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/es.json b/public/locales/es.json index 5cde0c70..089c10cd 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -159,7 +159,7 @@ "confirm": "Confirmar", "discard": "Descartar", "confirm_capture": "Confirmar Captura", - "confirm_pagination": "Confirmar Paginación", + "confirm_pagination": "Confirmar", "confirm_limit": "Confirmar Límite", "finish_capture": "Finalizar Captura", "back": "Atrás", From aded4dfebb42e765f8b22d1b01111c2f82fc73f3 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:29:41 +0530 Subject: [PATCH 025/111] feat(spanish): confirm instead of confirm limit --- public/locales/es.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/es.json b/public/locales/es.json index 089c10cd..94210880 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -160,7 +160,7 @@ "discard": "Descartar", "confirm_capture": "Confirmar Captura", "confirm_pagination": "Confirmar", - "confirm_limit": "Confirmar Límite", + "confirm_limit": "Confirmar", "finish_capture": "Finalizar Captura", "back": "Atrás", "finish": "Finalizar", From 09b974ca782e574240beab052c1a1e78e0316eac Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:30:38 +0530 Subject: [PATCH 026/111] feat(japanese): confirm instead of confirm pagination --- public/locales/ja.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/ja.json b/public/locales/ja.json index a0d18c67..e2204e14 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -159,7 +159,7 @@ "confirm": "確認", "discard": "破棄", "confirm_capture": "取得を確認", - "confirm_pagination": "ページネーションを確認", + "confirm_pagination": "確認", "confirm_limit": "制限を確認", "finish_capture": "取得を完了", "back": "戻る", From 20b31f36d99fe327075e9c104637b79b282edf87 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:30:57 +0530 Subject: [PATCH 027/111] feat(japanese): confirm instead of confirm limit --- public/locales/ja.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/ja.json b/public/locales/ja.json index e2204e14..0bcba967 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -160,7 +160,7 @@ "discard": "破棄", "confirm_capture": "取得を確認", "confirm_pagination": "確認", - "confirm_limit": "制限を確認", + "confirm_limit": "確認", "finish_capture": "取得を完了", "back": "戻る", "finish": "完了", From e78a61139d7fa828cf772ee4bc6c63889f77f3f8 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:31:50 +0530 Subject: [PATCH 028/111] feat(german): confirm instead of confirm pagination --- public/locales/de.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/de.json b/public/locales/de.json index debf80f6..e06b784a 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -158,7 +158,7 @@ "confirm": "Bestätigen", "discard": "Verwerfen", "confirm_capture": "Erfassung bestätigen", - "confirm_pagination": "Paginierung bestätigen", + "confirm_pagination": "Bestätigen", "confirm_limit": "Limit bestätigen", "finish_capture": "Erfassung abschließen", "back": "Zurück", From c753ce551200fa4b6431ff034694c6dcd343e516 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:32:16 +0530 Subject: [PATCH 029/111] feat(german): confirm instead of confirm limit --- public/locales/de.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/de.json b/public/locales/de.json index e06b784a..b9b4185b 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -159,7 +159,7 @@ "discard": "Verwerfen", "confirm_capture": "Erfassung bestätigen", "confirm_pagination": "Bestätigen", - "confirm_limit": "Limit bestätigen", + "confirm_limit": "Bestätigen", "finish_capture": "Erfassung abschließen", "back": "Zurück", "finish": "Fertig", From 634daeecf595cd2418913d9ae21689e55e5c2b39 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:32:56 +0530 Subject: [PATCH 030/111] feat(chinese): confirm instead of confirm pagination --- public/locales/zh.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/zh.json b/public/locales/zh.json index e55565f8..805396dc 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -159,7 +159,7 @@ "confirm": "确认", "discard": "放弃", "confirm_capture": "确认捕获", - "confirm_pagination": "确认分页", + "confirm_pagination": "确认", "confirm_limit": "确认限制", "finish_capture": "完成捕获", "back": "返回", From cd7f38f561a1ce2e4b596a25adeb48c5b6342f0f Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:33:12 +0530 Subject: [PATCH 031/111] feat(chinese): confirm instead of confirm limit --- public/locales/zh.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/zh.json b/public/locales/zh.json index 805396dc..a19fe439 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -160,7 +160,7 @@ "discard": "放弃", "confirm_capture": "确认捕获", "confirm_pagination": "确认", - "confirm_limit": "确认限制", + "confirm_limit": "确认", "finish_capture": "完成捕获", "back": "返回", "finish": "完成", From d2ab81e22959acc9fccf65f5845d8962170608d7 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 22:59:28 +0530 Subject: [PATCH 032/111] feat: add logic to get deeply nested shadowDOM elements --- server/src/workflow-management/selector.ts | 87 ++++++++++++---------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 164f5220..d957b879 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -823,7 +823,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return output; } - const MAX_DEPTH = 10; + // const MAX_DEPTH = 10; const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; @@ -832,60 +832,76 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { let current = element; let deepestElement = current; let depth = 0; + const MAX_DEPTH = 4; // Limit to 2 levels of shadow DOM while (current && depth < MAX_DEPTH) { const shadowRoot = current.shadowRoot; - if (shadowRoot) { - const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; - if (!shadowElement) break; - - deepestElement = shadowElement; - current = shadowElement; - } else { - break; - } + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepestElement = shadowElement; + current = shadowElement; depth++; } return deepestElement; }; + // Helper function to generate selectors for shadow DOM elements const genSelectorForShadowDOM = (element: HTMLElement) => { - const findShadowContext = (element: HTMLElement): { host: HTMLElement, root: ShadowRoot } | null => { - let current: HTMLElement | null = element; + // Get complete path up to document root + const getShadowPath = (el: HTMLElement) => { + const path = []; + let current = el; let depth = 0; + const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { - // Check if element is inside a shadow root - if (current.parentNode instanceof ShadowRoot) { - return { - host: (current.parentNode as ShadowRoot).host as HTMLElement, - root: current.parentNode as ShadowRoot - }; + const rootNode = current.getRootNode(); + if (rootNode instanceof ShadowRoot) { + path.unshift({ + host: rootNode.host as HTMLElement, + root: rootNode, + element: current + }); + current = rootNode.host as HTMLElement; + depth++; + } else { + break; } - current = current.parentElement; - depth++; } - return null; + return path; }; - - const shadowContext = findShadowContext(element); - if (!shadowContext) return null; + + const shadowPath = getShadowPath(element); + if (shadowPath.length === 0) return null; try { - // Generate selector for the shadow host - const hostSelector = finder(shadowContext.host); + const selectorParts: string[] = []; - // Generate selector for the element within the shadow DOM - const shadowElementSelector = finder(element, { - root: shadowContext.root as unknown as Element + // Generate selector for each shadow DOM boundary + shadowPath.forEach((context, index) => { + // Get selector for the host element + const hostSelector = finder(context.host, { + root: index === 0 ? document.body : (shadowPath[index - 1].root as unknown as Element) + }); + + // For the last context, get selector for target element + if (index === shadowPath.length - 1) { + const elementSelector = finder(element, { + root: context.root as unknown as Element + }); + selectorParts.push(`${hostSelector} >> ${elementSelector}`); + } else { + selectorParts.push(hostSelector); + } }); return { - fullSelector: `${hostSelector} >> ${shadowElementSelector}`, - hostSelector, - shadowElementSelector, - mode: shadowContext.root.mode + fullSelector: selectorParts.join(' >> '), + mode: shadowPath[shadowPath.length - 1].root.mode }; } catch (e) { console.warn('Error generating shadow DOM selector:', e); @@ -963,12 +979,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { formSelector, // Shadow DOM selector shadowSelector: shadowSelector ? { - // Full selector that can traverse shadow DOM full: shadowSelector.fullSelector, - // Individual parts for more flexible usage - host: shadowSelector.hostSelector, - element: shadowSelector.shadowElementSelector, - // Shadow root mode (open/closed) mode: shadowSelector.mode } : null }; From 9287c296922478b77391d0c4930f4b478de4614e Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 23:02:21 +0530 Subject: [PATCH 033/111] feat: rm host and element info for shadow selector --- server/src/types/index.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/types/index.ts b/server/src/types/index.ts index e882f69d..151e3dd4 100644 --- a/server/src/types/index.ts +++ b/server/src/types/index.ts @@ -131,8 +131,6 @@ export interface BaseActionInfo { interface ShadowSelector { full: string; - host: string; - element: string; mode: string; } From e952d8f202278a67e86350e60542576b09260238 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 23:37:16 +0530 Subject: [PATCH 034/111] feat: add nested shadow-root scraping logic for scrapeSchema --- maxun-core/src/browserSide/scraper.js | 115 ++++++++++++++++---------- 1 file changed, 70 insertions(+), 45 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index ef979828..ad9295b8 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -188,8 +188,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @param {Object.} lists The named lists of HTML elements. * @returns {Array.>} */ - window.scrapeSchema = function (lists) { - // These utility functions remain unchanged as they work perfectly + window.scrapeSchema = function(lists) { function omap(object, f, kf = (x) => x) { return Object.fromEntries( Object.entries(object) @@ -203,29 +202,73 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, .filter(([k, v]) => f(k, v)), ); } - - function findElement(config) { - // If this is a shadow DOM query - if (config.shadow && config.selector.includes('>>')) { - const [hostSelector, shadowSelector] = config.selector.split('>>').map(s => s.trim()); - const host = document.querySelector(hostSelector); - return host?.shadowRoot?.querySelector(shadowSelector) || null; - } - // Otherwise, use regular querySelector - return document.querySelector(config.selector); - } - + function findAllElements(config) { - // If this is a shadow DOM query - if (config.shadow && config.selector.includes('>>')) { - const element = findElement(config); - return element ? [element] : []; - } - // Otherwise, use regular querySelectorAll - return Array.from(document.querySelectorAll(config.selector)); + if (!config.shadow || !config.selector.includes('>>')) { + return Array.from(document.querySelectorAll(config.selector)); + } + + // For shadow DOM, we'll get all possible combinations + const parts = config.selector.split('>>').map(s => s.trim()); + let currentElements = [document]; + + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + const nextElements = []; + + for (const element of currentElements) { + let targets; + if (i === 0) { + // First selector is queried from document + targets = Array.from(element.querySelectorAll(part)) + .filter(el => { + // Only include elements that either: + // 1. Have an open shadow root + // 2. Don't need shadow root (last part of selector) + if (i === parts.length - 1) return true; + const shadowRoot = el.shadowRoot; + return shadowRoot && shadowRoot.mode === 'open'; + }); + } else { + // For subsequent selectors, only use elements with open shadow roots + const shadowRoot = element.shadowRoot; + if (!shadowRoot || shadowRoot.mode !== 'open') continue; + + targets = Array.from(shadowRoot.querySelectorAll(part)); + } + nextElements.push(...targets); + } + + if (nextElements.length === 0) return []; + currentElements = nextElements; + } + + return currentElements; } + + // Helper function to extract value from element based on attribute + function getElementValue(element, attribute) { + if (!element) return null; + + switch (attribute) { + case 'href': { + const relativeHref = element.getAttribute('href'); + return relativeHref ? new URL(relativeHref, window.location.origin).href : null; + } + case 'src': { + const relativeSrc = element.getAttribute('src'); + return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; + } + case 'innerText': + return element.innerText?.trim(); + case 'textContent': + return element.textContent?.trim(); + default: + return element.getAttribute(attribute) || element.innerText?.trim(); + } + } - // Modified to use our new element finding functions + // Get the seed key based on the maximum number of elements found function getSeedKey(listObj) { const maxLength = Math.max(...Object.values( omap(listObj, (x) => findAllElements(x).length) @@ -235,8 +278,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, )[0]; } - // This function remains unchanged as it works with DOM elements - // regardless of how they were found + // Find minimal bounding elements function getMBEs(elements) { return elements.map((element) => { let candidate = element; @@ -252,35 +294,18 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, }); } + // Main scraping logic const seedName = getSeedKey(lists); const seedElements = findAllElements(lists[seedName]); const MBEs = getMBEs(seedElements); return MBEs.map((mbe) => omap( lists, - (config, key) => { - // Use our new findAllElements function + (config) => { const elem = findAllElements(config) .find((elem) => mbe.contains(elem)); - - if (!elem) return undefined; - - switch (config.attribute) { - case 'href': { - const relativeHref = elem.getAttribute('href'); - return relativeHref ? new URL(relativeHref, window.location.origin).href : null; - } - case 'src': { - const relativeSrc = elem.getAttribute('src'); - return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; - } - case 'innerText': - return elem.innerText; - case 'textContent': - return elem.textContent; - default: - return elem.getAttribute(config.attribute) || elem.innerText; - } + + return elem ? getElementValue(elem, config.attribute) : undefined; }, (key) => key )) || []; From b757d9c4f8b0ea00d6eb6d6fe6c2e7c37407ed92 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 23:38:38 +0530 Subject: [PATCH 035/111] feat: add func to rm shadow selectors from workflow --- maxun-core/src/interpret.ts | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 495ba2db..3cef8c29 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -663,11 +663,28 @@ export default class Interpreter extends EventEmitter { if (isApplicable) { return actionId; } + } } + + private removeShadowSelectors(workflow: Workflow) { + for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { + const step = workflow[actionId]; + + // Check if step has where and selectors + if (step.where && Array.isArray(step.where.selectors)) { + // Filter out selectors that contain ">>" + step.where.selectors = step.where.selectors.filter(selector => !selector.includes('>>')); + } + } + + return workflow; } private async runLoop(p: Page, workflow: Workflow) { - const workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); + let workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); + + // remove shadow selectors + workflowCopy = this.removeShadowSelectors(workflowCopy); // apply ad-blocker to the current page try { From 4b4074b70d352401120bd1fe0b37fbee7838bac5 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Tue, 31 Dec 2024 01:52:38 +0530 Subject: [PATCH 036/111] feat: add logic to scrape multiple nested shadow dom elements --- maxun-core/src/browserSide/scraper.js | 172 +++++++++++++++----------- 1 file changed, 99 insertions(+), 73 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index ad9295b8..00f8cef7 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -204,69 +204,68 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } function findAllElements(config) { - if (!config.shadow || !config.selector.includes('>>')) { - return Array.from(document.querySelectorAll(config.selector)); - } - - // For shadow DOM, we'll get all possible combinations - const parts = config.selector.split('>>').map(s => s.trim()); - let currentElements = [document]; - - for (let i = 0; i < parts.length; i++) { - const part = parts[i]; - const nextElements = []; - - for (const element of currentElements) { - let targets; - if (i === 0) { - // First selector is queried from document - targets = Array.from(element.querySelectorAll(part)) - .filter(el => { - // Only include elements that either: - // 1. Have an open shadow root - // 2. Don't need shadow root (last part of selector) - if (i === parts.length - 1) return true; - const shadowRoot = el.shadowRoot; - return shadowRoot && shadowRoot.mode === 'open'; - }); - } else { - // For subsequent selectors, only use elements with open shadow roots - const shadowRoot = element.shadowRoot; - if (!shadowRoot || shadowRoot.mode !== 'open') continue; - - targets = Array.from(shadowRoot.querySelectorAll(part)); + if (!config.shadow || !config.selector.includes('>>')) { + return Array.from(document.querySelectorAll(config.selector)); + } + + // For shadow DOM, we'll get all possible combinations + const parts = config.selector.split('>>').map(s => s.trim()); + let currentElements = [document]; + + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + const nextElements = []; + + for (const element of currentElements) { + let targets; + if (i === 0) { + // First selector is queried from document + targets = Array.from(element.querySelectorAll(part)) + .filter(el => { + // Only include elements that either: + // 1. Have an open shadow root + // 2. Don't need shadow root (last part of selector) + if (i === parts.length - 1) return true; + const shadowRoot = el.shadowRoot; + return shadowRoot && shadowRoot.mode === 'open'; + }); + } else { + // For subsequent selectors, only use elements with open shadow roots + const shadowRoot = element.shadowRoot; + if (!shadowRoot || shadowRoot.mode !== 'open') continue; + + targets = Array.from(shadowRoot.querySelectorAll(part)); + } + nextElements.push(...targets); } - nextElements.push(...targets); - } - - if (nextElements.length === 0) return []; - currentElements = nextElements; - } - - return currentElements; + + if (nextElements.length === 0) return []; + currentElements = nextElements; + } + + return currentElements; } - // Helper function to extract value from element based on attribute - function getElementValue(element, attribute) { - if (!element) return null; - - switch (attribute) { - case 'href': { - const relativeHref = element.getAttribute('href'); - return relativeHref ? new URL(relativeHref, window.location.origin).href : null; - } - case 'src': { - const relativeSrc = element.getAttribute('src'); - return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; - } - case 'innerText': - return element.innerText?.trim(); - case 'textContent': - return element.textContent?.trim(); - default: - return element.getAttribute(attribute) || element.innerText?.trim(); - } - } + function getElementValue(element, attribute) { + if (!element) return null; + + switch (attribute) { + case 'href': { + const relativeHref = element.getAttribute('href'); + return relativeHref ? new URL(relativeHref, window.location.origin).href : null; + } + case 'src': { + const relativeSrc = element.getAttribute('src'); + return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; + } + case 'innerText': + return element.innerText?.trim(); + case 'textContent': + return element.textContent?.trim(); + default: + return element.getAttribute(attribute) || element.innerText?.trim(); + } + } // Get the seed key based on the maximum number of elements found function getSeedKey(listObj) { @@ -280,26 +279,26 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, // Find minimal bounding elements function getMBEs(elements) { - return elements.map((element) => { - let candidate = element; - const isUniqueChild = (e) => elements - .filter((elem) => e.parentNode?.contains(elem)) - .length === 1; + return elements.map((element) => { + let candidate = element; + const isUniqueChild = (e) => elements + .filter((elem) => e.parentNode?.contains(elem)) + .length === 1; - while (candidate && isUniqueChild(candidate)) { - candidate = candidate.parentNode; - } + while (candidate && isUniqueChild(candidate)) { + candidate = candidate.parentNode; + } - return candidate; - }); + return candidate; + }); } - // Main scraping logic + // First try the MBE approach const seedName = getSeedKey(lists); const seedElements = findAllElements(lists[seedName]); const MBEs = getMBEs(seedElements); - - return MBEs.map((mbe) => omap( + + const mbeResults = MBEs.map((mbe) => omap( lists, (config) => { const elem = findAllElements(config) @@ -309,6 +308,33 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, }, (key) => key )) || []; + + // If MBE approach didn't find all elements, try independent scraping + if (mbeResults.some(result => Object.values(result).some(v => v === undefined))) { + // Fall back to independent scraping + const results = []; + const foundElements = new Map(); + + // Find all elements for each selector + Object.entries(lists).forEach(([key, config]) => { + const elements = findAllElements(config); + foundElements.set(key, elements); + }); + + // Create result objects for each found element + foundElements.forEach((elements, key) => { + elements.forEach((element, index) => { + if (!results[index]) { + results[index] = {}; + } + results[index][key] = getElementValue(element, lists[key].attribute); + }); + }); + + return results.filter(result => Object.keys(result).length > 0); + } + + return mbeResults; }; /** From 4a09ea66ff6c3c25c02b7997ed97f0ac4d677cd9 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Tue, 31 Dec 2024 12:26:09 +0530 Subject: [PATCH 037/111] feat: get deepest element rect coordinates --- server/src/workflow-management/selector.ts | 53 ++++++++++++++-------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index d957b879..910b3134 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -237,23 +237,30 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector if (!getList || listSelector !== '') { const rect = await page.evaluate( async ({ x, y }) => { - // Helper function to get element from point including shadow DOM + // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; // Traverse through shadow roots let current = element; - while (current) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - + let shadowRoot = current.shadowRoot; + + // Keep track of the deepest shadow DOM element found + let deepestElement = current; + + while (shadowRoot) { + // Try to find element at same point in shadow DOM const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; - + + // Update our tracking of the deepest element + deepestElement = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; } - return current; + + return deepestElement; }; const el = getDeepestElementFromPoint(x, y); @@ -274,36 +281,45 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector }; } } + return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return rect; } else { const rect = await page.evaluate( async ({ x, y }) => { - // Helper function to get element from point including shadow DOM + // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; // Traverse through shadow roots let current = element; - while (current) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - + let shadowRoot = current.shadowRoot; + + // Keep track of the deepest shadow DOM element found + let deepestElement = current; + + while (shadowRoot) { + // Try to find element at same point in shadow DOM const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; - + + // Update our tracking of the deepest element + deepestElement = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; } - return current; + + return deepestElement; }; const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; + // Handle element hierarchy traversal for list items while (element.parentElement) { const parentRect = element.parentElement.getBoundingClientRect(); const childRect = element.getBoundingClientRect(); @@ -326,7 +342,6 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } const rectangle = element?.getBoundingClientRect(); - if (rectangle) { return { x: rectangle.x, @@ -342,14 +357,14 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return rect; } } catch (error) { const { message, stack } = error as Error; - logger.log('error', `Error while retrieving selector: ${message}`); - logger.log('error', `Stack: ${stack}`); + console.error('Error while retrieving selector:', message); + console.error('Stack:', stack); } }; From 4c0ad3ceed6a8b192d8e672b36908fd4db3871cd Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 31 Dec 2024 21:26:53 +0530 Subject: [PATCH 038/111] fix: avoid ui shift on api key reveal --- src/components/organisms/ApiKey.tsx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/ApiKey.tsx b/src/components/organisms/ApiKey.tsx index 37a72764..0af27934 100644 --- a/src/components/organisms/ApiKey.tsx +++ b/src/components/organisms/ApiKey.tsx @@ -124,7 +124,11 @@ const ApiKeyManager = () => { {apiKeyName} - {showKey ? `${apiKey?.substring(0, 10)}...` : '***************'} + + + {showKey ? `${apiKey?.substring(0, 10)}...` : '**********'} + + From a3337d7fcc08a8436edadcb33b0214401a63e28d Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 31 Dec 2024 21:27:12 +0530 Subject: [PATCH 039/111] fix: format --- src/components/organisms/ApiKey.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/ApiKey.tsx b/src/components/organisms/ApiKey.tsx index 0af27934..9d54fe5c 100644 --- a/src/components/organisms/ApiKey.tsx +++ b/src/components/organisms/ApiKey.tsx @@ -126,7 +126,7 @@ const ApiKeyManager = () => { {apiKeyName} - {showKey ? `${apiKey?.substring(0, 10)}...` : '**********'} + {showKey ? `${apiKey?.substring(0, 10)}...` : '**********'} From 42e13066bd7800043e6952ddaae06d62985c2ee4 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 1 Jan 2025 16:13:38 +0530 Subject: [PATCH 040/111] feat: add shadowDOM support for capture list selector generation --- server/src/workflow-management/selector.ts | 343 +++++++++++++++++---- 1 file changed, 286 insertions(+), 57 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 910b3134..713c05bc 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -1076,46 +1076,133 @@ interface SelectorResult { */ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates, listSelector: string): Promise => { + interface ShadowContext { + host: HTMLElement; + root: ShadowRoot; + element: HTMLElement; + } + try { if (!listSelector) { const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { - function getNonUniqueSelector(element: HTMLElement): string { - let selector = element.tagName.toLowerCase(); + // Helper function to get deepest element, traversing shadow DOM + function getDeepestElementFromPoint(x: number, y: number): HTMLElement | null { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; - if (element.className) { - const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); - if (classes.length > 0) { - const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); - if (validClasses.length > 0) { - selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); - } - } + let current = element; + let deepestElement = current; + let depth = 0; + const MAX_DEPTH = 4; // Limit shadow DOM traversal depth + + while (current && depth < MAX_DEPTH) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepestElement = shadowElement; + current = shadowElement; + depth++; } + return deepestElement; + } + + // Generate basic selector from element's tag and classes + function getNonUniqueSelector(element: HTMLElement): string { + let selector = element.tagName.toLowerCase(); + + const className = typeof element.className === 'string' ? element.className : ''; + if (className) { + const classes = className.split(/\s+/) + .filter(cls => Boolean(cls) && !cls.startsWith('!') && !cls.includes(':')); + + if (classes.length > 0) { + selector += '.' + classes.map(cls => CSS.escape(cls)).join('.'); + } + } + return selector; } - function getSelectorPath(element: HTMLElement | null): string { - const path: string[] = []; + // Get complete shadow DOM path for an element + function getShadowPath(element: HTMLElement): ShadowContext[] { + const path: ShadowContext[] = []; + let current = element; let depth = 0; - const maxDepth = 2; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + const rootNode = current.getRootNode(); + if (rootNode instanceof ShadowRoot) { + path.unshift({ + host: rootNode.host as HTMLElement, + root: rootNode, + element: current + }); + current = rootNode.host as HTMLElement; + depth++; + } else { + break; + } + } + return path; + } - while (element && element !== document.body && depth < maxDepth) { - const selector = getNonUniqueSelector(element); + // Generate complete selector path for any element + function getSelectorPath(element: HTMLElement | null): string { + if (!element) return ''; + + // Check for shadow DOM path first + const shadowPath = getShadowPath(element); + if (shadowPath.length > 0) { + const selectorParts: string[] = []; + + // Build complete shadow DOM path + shadowPath.forEach((context, index) => { + const hostSelector = getNonUniqueSelector(context.host); + + if (index === shadowPath.length - 1) { + // For deepest shadow context, include target element + const elementSelector = getNonUniqueSelector(element); + selectorParts.push(`${hostSelector} >> ${elementSelector}`); + } else { + // For intermediate shadow boundaries + selectorParts.push(hostSelector); + } + }); + + return selectorParts.join(' >> '); + } + + // Regular DOM path generation + const path: string[] = []; + let currentElement = element; + let depth = 0; + const MAX_DEPTH = 2; + + while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) { + const selector = getNonUniqueSelector(currentElement); path.unshift(selector); - element = element.parentElement; + + const parentElement = currentElement.parentElement; + if (!parentElement) break; + currentElement = parentElement; depth++; } return path.join(' > '); } - const originalEl = document.elementFromPoint(x, y) as HTMLElement; + // Main logic to get element and generate selector + const originalEl = getDeepestElementFromPoint(x, y); if (!originalEl) return null; let element = originalEl; - // if (listSelector === '') { + // Handle parent traversal for better element targeting while (element.parentElement) { const parentRect = element.parentElement.getBoundingClientRect(); const childRect = element.getBoundingClientRect(); @@ -1136,60 +1223,134 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates break; } } - // } const generalSelector = getSelectorPath(element); - return { - generalSelector, - }; + return { generalSelector }; }, coordinates); + return selectors || { generalSelector: '' }; } else { + // When we have a list selector, we need special handling while maintaining shadow DOM support const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { - function getNonUniqueSelector(element: HTMLElement): string { - let selector = element.tagName.toLowerCase(); + // Helper function to get deepest element, traversing shadow DOM + function getDeepestElementFromPoint(x: number, y: number): HTMLElement | null { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; - if (element.className) { - const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); - if (classes.length > 0) { - const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); - if (validClasses.length > 0) { - selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); - } - } + let current = element; + let deepestElement = current; + let depth = 0; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepestElement = shadowElement; + current = shadowElement; + depth++; } + return deepestElement; + } + + // Generate basic selector from element's tag and classes + function getNonUniqueSelector(element: HTMLElement): string { + let selector = element.tagName.toLowerCase(); + + const className = typeof element.className === 'string' ? element.className : ''; + if (className) { + const classes = className.split(/\s+/) + .filter(cls => Boolean(cls) && !cls.startsWith('!') && !cls.includes(':')); + + if (classes.length > 0) { + selector += '.' + classes.map(cls => CSS.escape(cls)).join('.'); + } + } + return selector; } - function getSelectorPath(element: HTMLElement | null): string { - const path: string[] = []; + // Get complete shadow DOM path for an element + function getShadowPath(element: HTMLElement): ShadowContext[] { + const path: ShadowContext[] = []; + let current = element; let depth = 0; - const maxDepth = 2; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + const rootNode = current.getRootNode(); + if (rootNode instanceof ShadowRoot) { + path.unshift({ + host: rootNode.host as HTMLElement, + root: rootNode, + element: current + }); + current = rootNode.host as HTMLElement; + depth++; + } else { + break; + } + } + return path; + } - while (element && element !== document.body && depth < maxDepth) { - const selector = getNonUniqueSelector(element); + // Generate selector path specifically for list items + function getListItemSelectorPath(element: HTMLElement | null): string { + if (!element) return ''; + + // Check for shadow DOM path first + const shadowPath = getShadowPath(element); + if (shadowPath.length > 0) { + const selectorParts: string[] = []; + + shadowPath.forEach((context, index) => { + const hostSelector = getNonUniqueSelector(context.host); + + if (index === shadowPath.length - 1) { + const elementSelector = getNonUniqueSelector(element); + selectorParts.push(`${hostSelector} >> ${elementSelector}`); + } else { + selectorParts.push(hostSelector); + } + }); + + return selectorParts.join(' >> '); + } + + // For list items, we want a shallower path to better match list patterns + const path: string[] = []; + let currentElement = element; + let depth = 0; + const MAX_LIST_DEPTH = 2; // Keeping shallow depth for list items + + while (currentElement && currentElement !== document.body && depth < MAX_LIST_DEPTH) { + const selector = getNonUniqueSelector(currentElement); path.unshift(selector); - element = element.parentElement; + + if (!currentElement.parentElement) break; + currentElement = currentElement.parentElement; depth++; } return path.join(' > '); } - const originalEl = document.elementFromPoint(x, y) as HTMLElement; - if (!originalEl) return null; + // Main logic for list item selection + const originalEl = getDeepestElementFromPoint(x, y); + if (!originalEl) return { generalSelector: '' }; let element = originalEl; - const generalSelector = getSelectorPath(element); - return { - generalSelector, - }; - }, coordinates); - return selectors || { generalSelector: '' }; - } + const generalSelector = getListItemSelectorPath(element); + return { generalSelector }; + }, coordinates); + return selectors || { generalSelector: '' }; + } } catch (error) { console.error('Error in getNonUniqueSelectors:', error); return { generalSelector: '' }; @@ -1218,42 +1379,110 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro } // Function to generate selector path from an element to its parent - function getSelectorPath(element: HTMLElement | null): string { + function getSelectorPath(element: HTMLElement): string { if (!element || !element.parentElement) return ''; const parentSelector = getNonUniqueSelector(element.parentElement); const elementSelector = getNonUniqueSelector(element); + // Check if element is in shadow DOM + const rootNode = element.getRootNode(); + if (rootNode instanceof ShadowRoot) { + const hostSelector = getNonUniqueSelector(rootNode.host as HTMLElement); + return `${hostSelector} >> ${elementSelector}`; + } + return `${parentSelector} > ${elementSelector}`; } - // Function to recursively get all descendant selectors + // Function to get all shadow DOM children of an element + function getShadowChildren(element: HTMLElement): HTMLElement[] { + const children: HTMLElement[] = []; + + // Check if element has shadow root + const shadowRoot = element.shadowRoot; + if (shadowRoot) { + // Get all elements in the shadow DOM + const shadowElements = Array.from(shadowRoot.querySelectorAll('*')) as HTMLElement[]; + children.push(...shadowElements); + } + + return children; + } + + // Function to recursively get all descendant selectors including shadow DOM function getAllDescendantSelectors(element: HTMLElement): string[] { let selectors: string[] = []; + + // Handle regular DOM children const children = Array.from(element.children) as HTMLElement[]; - for (const child of children) { const childPath = getSelectorPath(child); if (childPath) { - selectors.push(childPath); // Add direct child path - selectors = selectors.concat(getAllDescendantSelectors(child)); // Recursively process descendants + selectors.push(childPath); + // Recursively process regular DOM descendants + selectors = selectors.concat(getAllDescendantSelectors(child)); + + // Check for shadow DOM in this child + const shadowChildren = getShadowChildren(child); + for (const shadowChild of shadowChildren) { + const shadowPath = getSelectorPath(shadowChild); + if (shadowPath) { + selectors.push(shadowPath); + // Recursively process shadow DOM descendants + selectors = selectors.concat(getAllDescendantSelectors(shadowChild)); + } + } + } + } + + // Handle direct shadow DOM children of the current element + const shadowChildren = getShadowChildren(element); + for (const shadowChild of shadowChildren) { + const shadowPath = getSelectorPath(shadowChild); + if (shadowPath) { + selectors.push(shadowPath); + selectors = selectors.concat(getAllDescendantSelectors(shadowChild)); } } return selectors; } - // Find all occurrences of the parent selector in the DOM - const parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[]; - const allChildSelectors = new Set(); // Use a set to ensure uniqueness + // Split the parent selector if it contains shadow DOM parts + const selectorParts = parentSelector.split('>>').map(part => part.trim()); + let parentElements: HTMLElement[] = []; + + // Handle shadow DOM traversal if needed + if (selectorParts.length > 1) { + // Start with the host elements + parentElements = Array.from(document.querySelectorAll(selectorParts[0])) as HTMLElement[]; + + // Traverse through shadow DOM parts + for (let i = 1; i < selectorParts.length; i++) { + const newParentElements: HTMLElement[] = []; + for (const element of parentElements) { + if (element.shadowRoot) { + const shadowChildren = Array.from(element.shadowRoot.querySelectorAll(selectorParts[i])) as HTMLElement[]; + newParentElements.push(...shadowChildren); + } + } + parentElements = newParentElements; + } + } else { + // Regular DOM selector + parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[]; + } + + const allChildSelectors = new Set(); // Process each parent element and its descendants parentElements.forEach((parentElement) => { const descendantSelectors = getAllDescendantSelectors(parentElement); - descendantSelectors.forEach((selector) => allChildSelectors.add(selector)); // Add selectors to the set + descendantSelectors.forEach((selector) => allChildSelectors.add(selector)); }); - return Array.from(allChildSelectors); // Convert the set back to an array + return Array.from(allChildSelectors); }, parentSelector); return childSelectors || []; From c6105b4ee226a562f80b7054fffb3acac23e9d23 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 1 Jan 2025 16:15:13 +0530 Subject: [PATCH 041/111] feat: generate highlighter for shadoDOM and mixedDOM elements --- src/components/organisms/BrowserWindow.tsx | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 11fe8c55..442b7e50 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -120,7 +120,13 @@ export const BrowserWindow = () => { const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] }) => { if (getList === true) { if (listSelector) { + console.log("LIST SELEECTORRRRR: ", listSelector); + console.log("DATA SELEECTORRRRR: ", data.selector); + console.log("CHILDREEENN SELECORRRR: ", data.childSelectors); socket?.emit('listSelector', { selector: listSelector }); + + const hasValidChildSelectors = Array.isArray(data.childSelectors) && data.childSelectors.length > 0; + if (limitMode) { setHighlighterData(null); } else if (paginationMode) { @@ -133,7 +139,29 @@ export const BrowserWindow = () => { } else if (data.childSelectors && data.childSelectors.includes(data.selector)) { // highlight only valid child elements within the listSelector setHighlighterData(data); - } else { + } else if (data.elementInfo?.isShadowRoot && data.childSelectors) { + // New case: Handle pure Shadow DOM elements + // Check if the selector matches any shadow root child selectors + const isShadowChild = data.childSelectors.some(childSelector => + data.selector.includes('>>') && // Shadow DOM uses >> for piercing + childSelector.split('>>').some(part => + data.selector.includes(part.trim()) + ) + ); + setHighlighterData(isShadowChild ? data : null); + } else if (data.selector.includes('>>') && hasValidChildSelectors) { + // New case: Handle mixed DOM cases + // Split the selector into parts and check each against child selectors + const selectorParts = data.selector.split('>>').map(part => part.trim()); + const isValidMixedSelector = selectorParts.some(part => + // Now we know data.childSelectors is defined + data.childSelectors!.some(childSelector => + childSelector.includes(part) + ) + ); + setHighlighterData(isValidMixedSelector ? data : null); + } + else { // if !valid child in normal mode, clear the highlighter setHighlighterData(null); } From 8db6279f05c25e671098f959bfe0b79f5d06cb4f Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 1 Jan 2025 16:39:36 +0530 Subject: [PATCH 042/111] feat: add shadowDOM support for scraping list --- maxun-core/src/browserSide/scraper.js | 146 ++++++++++++++++++++------ 1 file changed, 113 insertions(+), 33 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 00f8cef7..caa783c8 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -349,27 +349,100 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { const scrapedData = []; - while (scrapedData.length < limit) { - let parentElements = Array.from(document.querySelectorAll(listSelector)); + // Helper function to query through Shadow DOM + const queryShadowDOM = (rootElement, selector) => { + // Split the selector by Shadow DOM delimiter + const parts = selector.split('>>').map(part => part.trim()); + let currentElement = rootElement; + + // Traverse through each part of the selector + for (let i = 0; i < parts.length; i++) { + if (!currentElement) return null; + + // If we're at the document level (first part) + if (!currentElement.querySelector && !currentElement.shadowRoot) { + currentElement = document.querySelector(parts[i]); + continue; + } + + // Try to find element in regular DOM first + let nextElement = currentElement.querySelector(parts[i]); + + // If not found, check shadow DOM + if (!nextElement && currentElement.shadowRoot) { + nextElement = currentElement.shadowRoot.querySelector(parts[i]); + } + + // If still not found, try to find in shadow DOM of all child elements + if (!nextElement) { + const allChildren = Array.from(currentElement.children || []); + for (const child of allChildren) { + if (child.shadowRoot) { + nextElement = child.shadowRoot.querySelector(parts[i]); + if (nextElement) break; + } + } + } + + currentElement = nextElement; + } + + return currentElement; + }; + + // Helper function to query all elements through Shadow DOM + const queryShadowDOMAll = (rootElement, selector) => { + const parts = selector.split('>>').map(part => part.trim()); + let currentElements = [rootElement]; - // If we only got one element or none, try a more generic approach + for (const part of parts) { + const nextElements = []; + + for (const element of currentElements) { + // Check regular DOM + if (element.querySelectorAll) { + nextElements.push(...element.querySelectorAll(part)); + } + + // Check shadow DOM + if (element.shadowRoot) { + nextElements.push(...element.shadowRoot.querySelectorAll(part)); + } + + // Check shadow DOM of children + const children = Array.from(element.children || []); + for (const child of children) { + if (child.shadowRoot) { + nextElements.push(...child.shadowRoot.querySelectorAll(part)); + } + } + } + + currentElements = nextElements; + } + + return currentElements; + }; + + while (scrapedData.length < limit) { + // Use our shadow DOM query function to get parent elements + let parentElements = queryShadowDOMAll(document, listSelector); + parentElements = Array.from(parentElements); + + // Handle the case when we don't find enough elements if (limit > 1 && parentElements.length <= 1) { - const [containerSelector, _] = listSelector.split('>').map(s => s.trim()); - const container = document.querySelector(containerSelector); + const [containerSelector, ...rest] = listSelector.split('>>').map(s => s.trim()); + const container = queryShadowDOM(document, containerSelector); if (container) { - const allChildren = Array.from(container.children); + const allChildren = Array.from(container.children || []); + const firstMatch = queryShadowDOM(document, listSelector); - const firstMatch = document.querySelector(listSelector); if (firstMatch) { - // Get classes from the first matching element - const firstMatchClasses = Array.from(firstMatch.classList); + const firstMatchClasses = Array.from(firstMatch.classList || []); - // Find similar elements by matching most of their classes parentElements = allChildren.filter(element => { - const elementClasses = Array.from(element.classList); - - // Element should share at least 70% of classes with the first match + const elementClasses = Array.from(element.classList || []); const commonClasses = firstMatchClasses.filter(cls => elementClasses.includes(cls)); return commonClasses.length >= Math.floor(firstMatchClasses.length * 0.7); @@ -378,42 +451,49 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } } - // Iterate through each parent element + // Process each parent element for (const parent of parentElements) { if (scrapedData.length >= limit) break; const record = {}; - // For each field, select the corresponding element within the parent + // Process each field using shadow DOM querying for (const [label, { selector, attribute }] of Object.entries(fields)) { - const fieldElement = parent.querySelector(selector); + // Use relative selector from parent + const relativeSelector = selector.split('>>').slice(-1)[0]; + const fieldElement = queryShadowDOM(parent, relativeSelector); if (fieldElement) { - if (attribute === 'innerText') { - record[label] = fieldElement.innerText.trim(); - } else if (attribute === 'innerHTML') { - record[label] = fieldElement.innerHTML.trim(); - } else if (attribute === 'src') { - // Handle relative 'src' URLs - const src = fieldElement.getAttribute('src'); - record[label] = src ? new URL(src, window.location.origin).href : null; - } else if (attribute === 'href') { - // Handle relative 'href' URLs - const href = fieldElement.getAttribute('href'); - record[label] = href ? new URL(href, window.location.origin).href : null; - } else { - record[label] = fieldElement.getAttribute(attribute); + switch (attribute) { + case 'innerText': + record[label] = fieldElement.innerText?.trim() || ''; + break; + case 'innerHTML': + record[label] = fieldElement.innerHTML?.trim() || ''; + break; + case 'src': + const src = fieldElement.getAttribute('src'); + record[label] = src ? new URL(src, window.location.origin).href : null; + break; + case 'href': + const href = fieldElement.getAttribute('href'); + record[label] = href ? new URL(href, window.location.origin).href : null; + break; + default: + record[label] = fieldElement.getAttribute(attribute); } } } - scrapedData.push(record); + + if (Object.keys(record).length > 0) { + scrapedData.push(record); + } } - // If we've processed all available elements and still haven't reached the limit, - // break to avoid infinite loop if (parentElements.length === 0 || scrapedData.length >= parentElements.length) { break; } } + return scrapedData; }; From 24915a93410aa1f309f27a7e18dd1bb0729f1b6f Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 22:55:33 +0530 Subject: [PATCH 043/111] feat: get notify from global info store --- package.json | 1 - src/components/molecules/ScheduleSettings.tsx | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index e89f13de..fc5e9edb 100644 --- a/package.json +++ b/package.json @@ -46,7 +46,6 @@ "jwt-decode": "^4.0.0", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", - "maxun-core": "^0.0.7", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", diff --git a/src/components/molecules/ScheduleSettings.tsx b/src/components/molecules/ScheduleSettings.tsx index 3af0072f..ea78720c 100644 --- a/src/components/molecules/ScheduleSettings.tsx +++ b/src/components/molecules/ScheduleSettings.tsx @@ -79,7 +79,7 @@ export const ScheduleSettingsModal = ({ isOpen, handleStart, handleClose, initia 'SUNDAY' ]; - const { recordingId } = useGlobalInfoStore(); + const { recordingId, notify } = useGlobalInfoStore(); const deleteRobotSchedule = () => { if (recordingId) { From 8c2b92483b1a86996d970293f2a167ea05157af6 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 22:57:23 +0530 Subject: [PATCH 044/111] feat: notify on schedule delete --- src/components/molecules/ScheduleSettings.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/components/molecules/ScheduleSettings.tsx b/src/components/molecules/ScheduleSettings.tsx index ea78720c..917696c9 100644 --- a/src/components/molecules/ScheduleSettings.tsx +++ b/src/components/molecules/ScheduleSettings.tsx @@ -85,6 +85,7 @@ export const ScheduleSettingsModal = ({ isOpen, handleStart, handleClose, initia if (recordingId) { deleteSchedule(recordingId); setSchedule(null); + notify('success', t('Schedule deleted successfully')); } else { console.error('No recording id provided'); } From e61798855564427223f4c0177f45fedcbf854814 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:02:17 +0530 Subject: [PATCH 045/111] fix: revert local maxun-core changes --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index fc5e9edb..e89f13de 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "jwt-decode": "^4.0.0", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", + "maxun-core": "^0.0.7", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", From 9b2ea1f5353466f63a92fefc8921bc1271619339 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:29:06 +0530 Subject: [PATCH 046/111] chore: cleanup space --- src/components/molecules/RecordingsTable.tsx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/components/molecules/RecordingsTable.tsx b/src/components/molecules/RecordingsTable.tsx index 01bc524b..ecda3c8d 100644 --- a/src/components/molecules/RecordingsTable.tsx +++ b/src/components/molecules/RecordingsTable.tsx @@ -33,10 +33,6 @@ interface Column { format?: (value: string) => string; } - - - - interface Data { id: string; name: string; From cc6cc8ff8d03b17e942335bcd3770ad6d12f49aa Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:30:00 +0530 Subject: [PATCH 047/111] fix: format --- src/components/molecules/RecordingsTable.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/molecules/RecordingsTable.tsx b/src/components/molecules/RecordingsTable.tsx index ecda3c8d..f8a0ba37 100644 --- a/src/components/molecules/RecordingsTable.tsx +++ b/src/components/molecules/RecordingsTable.tsx @@ -437,7 +437,6 @@ const OptionsButton = ({ handleEdit, handleDelete, handleDuplicate }: OptionsBut {t('recordingtable.duplicate')} - ); From 6d2507982077daceebcba4eba02d44c0c28bab58 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:30:46 +0530 Subject: [PATCH 048/111] fix: format --- src/api/storage.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/api/storage.ts b/src/api/storage.ts index 4b2f4e80..9ae3bc47 100644 --- a/src/api/storage.ts +++ b/src/api/storage.ts @@ -5,11 +5,6 @@ import { ScheduleSettings } from "../components/molecules/ScheduleSettings"; import { CreateRunResponse, ScheduleRunResponse } from "../pages/MainPage"; import { apiUrl } from "../apiConfig"; - - - - - export const getStoredRecordings = async (): Promise => { try { const response = await axios.get(`${apiUrl}/storage/recordings`); From 3b9e30ddae52efa4e450a948c393132dee883b67 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:31:21 +0530 Subject: [PATCH 049/111] fix: format --- src/api/storage.ts | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/api/storage.ts b/src/api/storage.ts index 9ae3bc47..18c793c0 100644 --- a/src/api/storage.ts +++ b/src/api/storage.ts @@ -77,11 +77,7 @@ export const getStoredRecording = async (id: string) => { } } - - export const checkRunsForRecording = async (id: string): Promise => { - - try { const response = await axios.get(`${apiUrl}/storage/recordings/${id}/runs`); @@ -94,32 +90,26 @@ export const checkRunsForRecording = async (id: string): Promise => { } }; - export const deleteRecordingFromStorage = async (id: string): Promise => { - const hasRuns = await checkRunsForRecording(id); - + if (hasRuns) { - + return false; } try { const response = await axios.delete(`${apiUrl}/storage/recordings/${id}`); if (response.status === 200) { - + return true; } else { throw new Error(`Couldn't delete stored recording ${id}`); } } catch (error: any) { console.log(error); - + return false; } - - - - }; export const deleteRunFromStorage = async (id: string): Promise => { @@ -154,7 +144,7 @@ export const createRunForStoredRecording = async (id: string, settings: RunSetti try { const response = await axios.put( `${apiUrl}/storage/runs/${id}`, - { ...settings }); + { ...settings }); if (response.status === 200) { return response.data; } else { From 52aefd1c0f674cb0cac87e25124dd8c56027712b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:31:37 +0530 Subject: [PATCH 050/111] fix: format --- src/api/workflow.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/api/workflow.ts b/src/api/workflow.ts index 03b677b1..40ac0d99 100644 --- a/src/api/workflow.ts +++ b/src/api/workflow.ts @@ -3,7 +3,7 @@ import { emptyWorkflow } from "../shared/constants"; import { default as axios, AxiosResponse } from "axios"; import { apiUrl } from "../apiConfig"; -export const getActiveWorkflow = async(id: string) : Promise => { +export const getActiveWorkflow = async (id: string): Promise => { try { const response = await axios.get(`${apiUrl}/workflow/${id}`) if (response.status === 200) { @@ -11,13 +11,13 @@ export const getActiveWorkflow = async(id: string) : Promise => { } else { throw new Error('Something went wrong when fetching a recorded workflow'); } - } catch(error: any) { + } catch (error: any) { console.log(error); return emptyWorkflow; } }; -export const getParamsOfActiveWorkflow = async(id: string) : Promise => { +export const getParamsOfActiveWorkflow = async (id: string): Promise => { try { const response = await axios.get(`${apiUrl}/workflow/params/${id}`) if (response.status === 200) { @@ -25,15 +25,15 @@ export const getParamsOfActiveWorkflow = async(id: string) : Promise => { +export const deletePair = async (index: number): Promise => { try { - const response = await axios.delete(`${apiUrl}/workflow/pair/${index}`); + const response = await axios.delete(`${apiUrl}/workflow/pair/${index}`); if (response.status === 200) { return response.data; } else { @@ -45,11 +45,11 @@ export const deletePair = async(index: number): Promise => { } }; -export const AddPair = async(index: number, pair: WhereWhatPair): Promise => { +export const AddPair = async (index: number, pair: WhereWhatPair): Promise => { try { const response = await axios.post(`${apiUrl}/workflow/pair/${index}`, { pair, - }, {headers: {'Content-Type': 'application/json'}}); + }, { headers: { 'Content-Type': 'application/json' } }); if (response.status === 200) { return response.data; } else { @@ -61,11 +61,11 @@ export const AddPair = async(index: number, pair: WhereWhatPair): Promise => { +export const UpdatePair = async (index: number, pair: WhereWhatPair): Promise => { try { const response = await axios.put(`${apiUrl}/workflow/pair/${index}`, { pair, - }, {headers: {'Content-Type': 'application/json'}}); + }, { headers: { 'Content-Type': 'application/json' } }); if (response.status === 200) { return response.data; } else { From 735b33b84727439c4c354021a06016166661902c Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:53:58 +0530 Subject: [PATCH 051/111] fix: typo --- src/components/molecules/NavBar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/NavBar.tsx b/src/components/molecules/NavBar.tsx index 142d45ab..8577f30e 100644 --- a/src/components/molecules/NavBar.tsx +++ b/src/components/molecules/NavBar.tsx @@ -318,7 +318,7 @@ export const NavBar: React.FC = ({ { window.open('https://x.com/maxun_io?ref=app', '_blank'); }}> - Twiiter (X) + Twiter (X) {t('navbar.menu_items.language')} From 22a99ff8b56788635581517bc863a3115b6b79db Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:54:28 +0530 Subject: [PATCH 052/111] fix: twitter typo --- src/components/molecules/NavBar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/NavBar.tsx b/src/components/molecules/NavBar.tsx index 8577f30e..8aeeb05d 100644 --- a/src/components/molecules/NavBar.tsx +++ b/src/components/molecules/NavBar.tsx @@ -318,7 +318,7 @@ export const NavBar: React.FC = ({ { window.open('https://x.com/maxun_io?ref=app', '_blank'); }}> - Twiter (X) + Twitter (X) {t('navbar.menu_items.language')} From c287340f845e9429fc5534dc38af6257b4d75826 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Thu, 2 Jan 2025 14:17:19 +0530 Subject: [PATCH 053/111] feat: shadowDOM support for table and non table list scraping --- maxun-core/src/browserSide/scraper.js | 347 +++++++++++++++++++++----- 1 file changed, 281 insertions(+), 66 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index caa783c8..ff5a1938 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -347,33 +347,29 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { - const scrapedData = []; - - // Helper function to query through Shadow DOM + // Shadow DOM query functions remain unchanged const queryShadowDOM = (rootElement, selector) => { - // Split the selector by Shadow DOM delimiter + if (!selector.includes('>>')) { + return rootElement.querySelector(selector); + } + const parts = selector.split('>>').map(part => part.trim()); let currentElement = rootElement; - // Traverse through each part of the selector for (let i = 0; i < parts.length; i++) { if (!currentElement) return null; - // If we're at the document level (first part) if (!currentElement.querySelector && !currentElement.shadowRoot) { currentElement = document.querySelector(parts[i]); continue; } - // Try to find element in regular DOM first let nextElement = currentElement.querySelector(parts[i]); - // If not found, check shadow DOM if (!nextElement && currentElement.shadowRoot) { nextElement = currentElement.shadowRoot.querySelector(parts[i]); } - // If still not found, try to find in shadow DOM of all child elements if (!nextElement) { const allChildren = Array.from(currentElement.children || []); for (const child of allChildren) { @@ -390,8 +386,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return currentElement; }; - // Helper function to query all elements through Shadow DOM const queryShadowDOMAll = (rootElement, selector) => { + if (!selector.includes('>>')) { + return rootElement.querySelectorAll(selector); + } + const parts = selector.split('>>').map(part => part.trim()); let currentElements = [rootElement]; @@ -399,17 +398,14 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, const nextElements = []; for (const element of currentElements) { - // Check regular DOM if (element.querySelectorAll) { nextElements.push(...element.querySelectorAll(part)); } - // Check shadow DOM if (element.shadowRoot) { nextElements.push(...element.shadowRoot.querySelectorAll(part)); } - // Check shadow DOM of children const children = Array.from(element.children || []); for (const child of children) { if (child.shadowRoot) { @@ -424,76 +420,295 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return currentElements; }; - while (scrapedData.length < limit) { - // Use our shadow DOM query function to get parent elements - let parentElements = queryShadowDOMAll(document, listSelector); - parentElements = Array.from(parentElements); + // Enhanced table processing helper functions with shadow DOM support + function extractValue(element, attribute) { + if (!element) return null; + + // Check for shadow root first + if (element.shadowRoot) { + const shadowContent = element.shadowRoot.textContent; + if (shadowContent && shadowContent.trim()) { + return shadowContent.trim(); + } + } + + if (attribute === 'innerText') { + return element.innerText.trim(); + } else if (attribute === 'innerHTML') { + return element.innerHTML.trim(); + } else if (attribute === 'src' || attribute === 'href') { + const attrValue = element.getAttribute(attribute); + return attrValue ? new URL(attrValue, window.location.origin).href : null; + } + return element.getAttribute(attribute); + } - // Handle the case when we don't find enough elements - if (limit > 1 && parentElements.length <= 1) { - const [containerSelector, ...rest] = listSelector.split('>>').map(s => s.trim()); - const container = queryShadowDOM(document, containerSelector); + function findTableAncestor(element) { + let currentElement = element; + const MAX_DEPTH = 5; + let depth = 0; + + while (currentElement && depth < MAX_DEPTH) { + // Check if current element is in shadow DOM + if (currentElement.getRootNode() instanceof ShadowRoot) { + currentElement = currentElement.getRootNode().host; + continue; + } - if (container) { - const allChildren = Array.from(container.children || []); - const firstMatch = queryShadowDOM(document, listSelector); - - if (firstMatch) { - const firstMatchClasses = Array.from(firstMatch.classList || []); + if (currentElement.tagName === 'TD') { + return { type: 'TD', element: currentElement }; + } else if (currentElement.tagName === 'TR') { + return { type: 'TR', element: currentElement }; + } + currentElement = currentElement.parentElement; + depth++; + } + return null; + } + + function getCellIndex(td) { + let index = 0; + let sibling = td; + + // Handle shadow DOM case + if (td.getRootNode() instanceof ShadowRoot) { + const shadowRoot = td.getRootNode(); + const allCells = Array.from(shadowRoot.querySelectorAll('td')); + return allCells.indexOf(td); + } + + while (sibling = sibling.previousElementSibling) { + index++; + } + return index; + } + + function hasThElement(row, tableFields) { + for (const [label, { selector }] of Object.entries(tableFields)) { + const element = queryShadowDOM(row, selector); + if (element) { + let current = element; + while (current && current !== row) { + // Check if we're in shadow DOM + if (current.getRootNode() instanceof ShadowRoot) { + current = current.getRootNode().host; + continue; + } - parentElements = allChildren.filter(element => { - const elementClasses = Array.from(element.classList || []); - const commonClasses = firstMatchClasses.filter(cls => - elementClasses.includes(cls)); - return commonClasses.length >= Math.floor(firstMatchClasses.length * 0.7); - }); + if (current.tagName === 'TH') { + return true; + } + current = current.parentElement; } } } + return false; + } - // Process each parent element - for (const parent of parentElements) { - if (scrapedData.length >= limit) break; - const record = {}; + function filterRowsBasedOnTag(rows, tableFields) { + for (const row of rows) { + if (hasThElement(row, tableFields)) { + return rows; + } + } + // Include shadow DOM in TH search + return rows.filter(row => { + const directTH = row.getElementsByTagName('TH').length === 0; + const shadowTH = row.shadowRoot ? + row.shadowRoot.querySelector('th') === null : true; + return directTH && shadowTH; + }); + } - // Process each field using shadow DOM querying - for (const [label, { selector, attribute }] of Object.entries(fields)) { - // Use relative selector from parent - const relativeSelector = selector.split('>>').slice(-1)[0]; - const fieldElement = queryShadowDOM(parent, relativeSelector); + // Class similarity functions remain unchanged + function calculateClassSimilarity(classList1, classList2) { + const set1 = new Set(classList1); + const set2 = new Set(classList2); + const intersection = new Set([...set1].filter(x => set2.has(x))); + const union = new Set([...set1, ...set2]); + return intersection.size / union.size; + } - if (fieldElement) { - switch (attribute) { - case 'innerText': - record[label] = fieldElement.innerText?.trim() || ''; - break; - case 'innerHTML': - record[label] = fieldElement.innerHTML?.trim() || ''; - break; - case 'src': - const src = fieldElement.getAttribute('src'); - record[label] = src ? new URL(src, window.location.origin).href : null; - break; - case 'href': - const href = fieldElement.getAttribute('href'); - record[label] = href ? new URL(href, window.location.origin).href : null; - break; - default: - record[label] = fieldElement.getAttribute(attribute); + function findSimilarElements(baseElement, similarityThreshold = 0.7) { + const baseClasses = Array.from(baseElement.classList); + if (baseClasses.length === 0) return []; + const potentialElements = document.getElementsByTagName(baseElement.tagName); + return Array.from(potentialElements).filter(element => { + if (element === baseElement) return false; + const similarity = calculateClassSimilarity( + baseClasses, + Array.from(element.classList) + ); + return similarity >= similarityThreshold; + }); + } + + // Main scraping logic with shadow DOM support + let containers = queryShadowDOMAll(document, listSelector); + containers = Array.from(containers); + + if (containers.length === 0) return []; + + if (limit > 1 && containers.length === 1) { + const baseContainer = containers[0]; + const similarContainers = findSimilarElements(baseContainer); + + if (similarContainers.length > 0) { + const newContainers = similarContainers.filter(container => + !container.matches(listSelector) + ); + containers = [...containers, ...newContainers]; + } + } + + const containerFields = containers.map(() => ({ + tableFields: {}, + nonTableFields: {} + })); + + // Classify fields + containers.forEach((container, containerIndex) => { + for (const [label, field] of Object.entries(fields)) { + const sampleElement = queryShadowDOM(container, field.selector); + + if (sampleElement) { + const ancestor = findTableAncestor(sampleElement); + if (ancestor) { + containerFields[containerIndex].tableFields[label] = { + ...field, + tableContext: ancestor.type, + cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1 + }; + } else { + containerFields[containerIndex].nonTableFields[label] = field; + } + } else { + containerFields[containerIndex].nonTableFields[label] = field; + } + } + }); + + const tableData = []; + const nonTableData = []; + + // Process table data with shadow DOM support + for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { + const container = containers[containerIndex]; + const { tableFields } = containerFields[containerIndex]; + + if (Object.keys(tableFields).length > 0) { + const firstField = Object.values(tableFields)[0]; + const firstElement = queryShadowDOM(container, firstField.selector); + let tableContext = firstElement; + + // Find table context including shadow DOM + while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { + if (tableContext.getRootNode() instanceof ShadowRoot) { + tableContext = tableContext.getRootNode().host; + } else { + tableContext = tableContext.parentElement; + } + } + + if (tableContext) { + // Get rows from both regular DOM and shadow DOM + const rows = []; + if (tableContext.shadowRoot) { + rows.push(...tableContext.shadowRoot.getElementsByTagName('TR')); + } + rows.push(...tableContext.getElementsByTagName('TR')); + + const processedRows = filterRowsBasedOnTag(rows, tableFields); + + for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) { + const record = {}; + const currentRow = processedRows[rowIndex]; + + for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) { + let element = null; + + if (cellIndex >= 0) { + let td = currentRow.children[cellIndex]; + + // Check shadow DOM for td + if (!td && currentRow.shadowRoot) { + const shadowCells = currentRow.shadowRoot.children; + if (shadowCells && shadowCells.length > cellIndex) { + td = shadowCells[cellIndex]; + } + } + + if (td) { + element = queryShadowDOM(td, selector); + + if (!element && selector.split(">").pop().includes('td:nth-child')) { + element = td; + } + + if (!element) { + const tagOnlySelector = selector.split('.')[0]; + element = queryShadowDOM(td, tagOnlySelector); + } + + if (!element) { + let currentElement = td; + while (currentElement && currentElement.children.length > 0) { + let foundContentChild = false; + for (const child of currentElement.children) { + if (extractValue(child, attribute)) { + currentElement = child; + foundContentChild = true; + break; + } + } + if (!foundContentChild) break; + } + element = currentElement; + } + } + } else { + element = queryShadowDOM(currentRow, selector); + } + + if (element) { + record[label] = extractValue(element, attribute); + } + } + + if (Object.keys(record).length > 0) { + tableData.push(record); } } } - - if (Object.keys(record).length > 0) { - scrapedData.push(record); - } } + } - if (parentElements.length === 0 || scrapedData.length >= parentElements.length) { - break; + // Non-table data scraping remains unchanged + for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { + if (nonTableData.length >= limit) break; + + const container = containers[containerIndex]; + const { nonTableFields } = containerFields[containerIndex]; + + if (Object.keys(nonTableFields).length > 0) { + const record = {}; + + for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { + const relativeSelector = selector.split('>>').slice(-1)[0]; + const element = queryShadowDOM(container, relativeSelector); + + if (element) { + record[label] = extractValue(element, attribute); + } + } + + if (Object.keys(record).length > 0) { + nonTableData.push(record); + } } } + const scrapedData = [...tableData, ...nonTableData]; return scrapedData; }; From ec0bc75097c287a9ffce1b0fcc47600a96f781c8 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Thu, 2 Jan 2025 18:12:10 +0530 Subject: [PATCH 054/111] docs: update website to maxun.dev --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cebcedd2..47e170b5 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web

- Website | + Website | Discord | Twitter | Join Maxun Cloud | From b6faf5cf17736dcc99ffa0b146031f23ccc55f80 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Thu, 2 Jan 2025 19:35:03 +0530 Subject: [PATCH 055/111] feat: add iframeSelector generation logic for capture text --- server/src/workflow-management/selector.ts | 471 +++++++++++---------- 1 file changed, 242 insertions(+), 229 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index dd869f3d..6ed6a997 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -23,247 +23,110 @@ export const getElementInformation = async ( if (!getList || listSelector !== '') { const elementInfo = await page.evaluate( async ({ x, y }) => { - // Helper function to get element info - const getElementInfo = (element: HTMLElement) => { - let info: { - tagName: string; - hasOnlyText?: boolean; - innerText?: string; - url?: string; - imageUrl?: string; - attributes?: Record; - innerHTML?: string; - outerHTML?: string; - fromIframe?: boolean; - iframePath?: string[]; - } = { - tagName: element?.tagName ?? '', - }; + // Helper function to find elements within iframes, handling nested cases + const getElementFromIframePoint = ( + x: number, + y: number, + context: Document = document, + iframePath: string[] = [] + ): { element: HTMLElement | null; iframePath: string[] } => { + // First try to get element at the given coordinates + let element = context.elementFromPoint(x, y) as HTMLElement; + if (!element) return { element: null, iframePath }; - if (element) { - info.attributes = Array.from(element.attributes).reduce( - (acc, attr) => { - acc[attr.name] = attr.value; - return acc; - }, - {} as Record - ); - } - - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else if (element?.tagName === 'SELECT') { - const selectElement = element as HTMLSelectElement; - info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ''; - info.attributes = { - ...info.attributes, - selectedValue: selectElement.value, - }; - } else if (element?.tagName === 'INPUT' && - ((element as HTMLInputElement).type === 'time' || - (element as HTMLInputElement).type === 'date')) { - info.innerText = (element as HTMLInputElement).value; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; - return info; - }; - - // Helper function to search in iframe - const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, - relativeY: number, - iframePath: string[] - ) => { - try { - if (!iframe.contentDocument) return null; - - const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; - if (!el) return null; - - const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; - - const info = getElementInfo(element); - info.fromIframe = true; - info.iframePath = iframePath; - - return info; - } catch (e) { - console.warn('Cannot access iframe content:', e); - return null; - } - }; - - const el = document.elementFromPoint(x, y) as HTMLElement; - if (el) { - // Check if the element is an iframe - if (el.tagName === 'IFRAME') { - const iframe = el as HTMLIFrameElement; - const rect = iframe.getBoundingClientRect(); - const relativeX = x - rect.left; - const relativeY = y - rect.top; - - const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, - [iframe.id || 'unnamed-iframe'] - ); - if (iframeResult) return iframeResult; - } - - const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; - return getElementInfo(element); - } - return null; - }, - { x: coordinates.x, y: coordinates.y } - ); - return elementInfo; - } else { - const elementInfo = await page.evaluate( - async ({ x, y }) => { - // Helper function to get element info (same as above) - const getElementInfo = (element: HTMLElement) => { - let info: { - tagName: string; - hasOnlyText?: boolean; - innerText?: string; - url?: string; - imageUrl?: string; - attributes?: Record; - innerHTML?: string; - outerHTML?: string; - fromIframe?: boolean; - iframePath?: string[]; - } = { - tagName: element?.tagName ?? '', - }; - - if (element) { - info.attributes = Array.from(element.attributes).reduce( - (acc, attr) => { - acc[attr.name] = attr.value; - return acc; - }, - {} as Record - ); - } - - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; - return info; - }; - - // Helper function to search in iframe (same as above) - const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, - relativeY: number, - iframePath: string[] - ) => { - try { - if (!iframe.contentDocument) return null; - - const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; - if (!el) return null; - - let element = el; - while (element.parentElement) { - const parentRect = element.parentElement.getBoundingClientRect(); - const childRect = element.getBoundingClientRect(); - - const fullyContained = - parentRect.left <= childRect.left && - parentRect.right >= childRect.right && - parentRect.top <= childRect.top && - parentRect.bottom >= childRect.bottom; - - const significantOverlap = - (childRect.width * childRect.height) / - (parentRect.width * parentRect.height) > 0.5; - - if (fullyContained && significantOverlap) { - element = element.parentElement; - } else { - break; + // Check if we found an iframe + if (element.tagName === 'IFRAME') { + const iframe = element as HTMLIFrameElement; + try { + // Make sure we can access the iframe's content + if (!iframe.contentDocument) { + return { element, iframePath }; } + + // Transform coordinates to iframe's space + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + // Add this iframe to the path + const updatedPath = [...iframePath, iframe.id || 'unnamed-iframe']; + + // Recursively search within the iframe + const iframeResult = getElementFromIframePoint( + relativeX, + relativeY, + iframe.contentDocument, + updatedPath + ); + + // If we found an element in the iframe, return it + if (iframeResult.element) { + return iframeResult; + } + } catch (e) { + console.warn('Cannot access iframe content:', e); } - - const info = getElementInfo(element); - info.fromIframe = true; - info.iframePath = iframePath; - - return info; - } catch (e) { - console.warn('Cannot access iframe content:', e); - return null; } + + // Return the element we found (either in main document or iframe) + return { element, iframePath }; }; - const originalEl = document.elementFromPoint(x, y) as HTMLElement; - if (originalEl) { - // Check if the element is an iframe - if (originalEl.tagName === 'IFRAME') { - const iframe = originalEl as HTMLIFrameElement; - const rect = iframe.getBoundingClientRect(); - const relativeX = x - rect.left; - const relativeY = y - rect.top; + // Get the element and its iframe path + const { element: el, iframePath } = getElementFromIframePoint(x, y); + + if (el) { + // Handle potential anchor parent + const { parentElement } = el; + const targetElement = parentElement?.tagName === 'A' ? parentElement : el; - const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, - [iframe.id || 'unnamed-iframe'] + // Build the element information object + let info: { + tagName: string; + hasOnlyText?: boolean; + innerText?: string; + url?: string; + imageUrl?: string; + attributes?: Record; + innerHTML?: string; + outerHTML?: string; + fromIframe?: boolean; + iframePath?: string[]; + } = { + tagName: targetElement?.tagName ?? '', + fromIframe: iframePath.length > 0, + iframePath: iframePath.length > 0 ? iframePath : undefined + }; + + // Collect element attributes and properties + if (targetElement) { + // Get all attributes + info.attributes = Array.from(targetElement.attributes).reduce( + (acc, attr) => { + acc[attr.name] = attr.value; + return acc; + }, + {} as Record ); - if (iframeResult) return iframeResult; - } - let element = originalEl; - while (element.parentElement) { - const parentRect = element.parentElement.getBoundingClientRect(); - const childRect = element.getBoundingClientRect(); - - const fullyContained = - parentRect.left <= childRect.left && - parentRect.right >= childRect.right && - parentRect.top <= childRect.top && - parentRect.bottom >= childRect.bottom; - - const significantOverlap = - (childRect.width * childRect.height) / - (parentRect.width * parentRect.height) > 0.5; - - if (fullyContained && significantOverlap) { - element = element.parentElement; + // Handle specific element types + if (targetElement.tagName === 'A') { + info.url = (targetElement as HTMLAnchorElement).href; + info.innerText = targetElement.textContent ?? ''; + } else if (targetElement.tagName === 'IMG') { + info.imageUrl = (targetElement as HTMLImageElement).src; } else { - break; + info.hasOnlyText = targetElement.children.length === 0 && + (targetElement.textContent !== null && + targetElement.textContent.trim().length > 0); + info.innerText = targetElement.textContent ?? ''; } + + info.innerHTML = targetElement.innerHTML; + info.outerHTML = targetElement.outerHTML; } - return getElementInfo(element); + return info; } return null; }, @@ -271,6 +134,7 @@ export const getElementInformation = async ( ); return elementInfo; } + // ... rest of the code remains same } catch (error) { const { message, stack } = error as Error; console.error('Error while retrieving selector:', message); @@ -984,6 +848,148 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } return output; } + + const getIframeOffset = (iframe: HTMLIFrameElement): { x: number; y: number } => { + const rect = iframe.getBoundingClientRect(); + return { + x: rect.left, + y: rect.top + }; + }; + + const isAccessibleIframe = (iframe: HTMLIFrameElement): boolean => { + try { + return !!iframe.contentDocument; + } catch (e) { + return false; + } + }; + + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // Get the initial element at the specified coordinates + let currentElement = document.elementFromPoint(x, y) as HTMLElement; + if (!currentElement) return null; + + let deepestElement = currentElement; + let current = currentElement; + let currentX = x; + let currentY = y; + let depth = 0; + const MAX_DEPTH = 20; // Prevent infinite loops with deeply nested iframes + + // Continue traversing while we find nested iframes + while (current && depth < MAX_DEPTH) { + // Check if the current element is an iframe and if we can access it + if (current instanceof HTMLIFrameElement && isAccessibleIframe(current)) { + // Calculate the offset of the iframe + const iframeOffset = getIframeOffset(current); + + // Transform coordinates to be relative to the iframe's content window + const relativeX = currentX - iframeOffset.x; + const relativeY = currentY - iframeOffset.y; + + // Find the element at these coordinates within the iframe + const iframeElement = current.contentDocument?.elementFromPoint(relativeX, relativeY) as HTMLElement; + + // If we don't find an element or we get the same element, stop traversing + if (!iframeElement || iframeElement === current) break; + + // Update our tracking variables + deepestElement = iframeElement; + current = iframeElement; + currentX = relativeX; + currentY = relativeY; + depth++; + } else { + // If the current element is not an iframe, we're done traversing + break; + } + } + + return deepestElement; + }; + + interface IframeContext { + frame: HTMLIFrameElement; + document: Document; + element: HTMLElement; + } + + const genSelectorForIframe = (element: HTMLElement) => { + // Helper function to check if we can access an iframe's content + const isAccessibleIframe = (iframe: HTMLIFrameElement): boolean => { + try { + return !!iframe.contentDocument; + } catch (e) { + return false; + } + }; + + // Get complete path up through nested iframes to document root + const getIframePath = (el: HTMLElement) => { + const path: IframeContext[] = []; + let current = el; + let currentDoc = el.ownerDocument; + let depth = 0; + const MAX_DEPTH = 20; // Limit depth to prevent infinite loops + + while (current && depth < MAX_DEPTH) { + // If we're in an iframe, get its parent document + const frameElement = currentDoc.defaultView?.frameElement as HTMLIFrameElement; + if (frameElement && isAccessibleIframe(frameElement)) { + path.unshift({ + frame: frameElement, + document: currentDoc, + element: current + }); + current = frameElement; + currentDoc = frameElement.ownerDocument; + depth++; + } else { + break; + } + } + return path; + }; + + // Get the iframe path for our target element + const iframePath = getIframePath(element); + if (iframePath.length === 0) return null; + + try { + const selectorParts: string[] = []; + + // Generate selector for each iframe boundary + iframePath.forEach((context, index) => { + // Get selector for the iframe element in its parent document + const frameSelector = finder(context.frame, { + root: index === 0 ? document.body : (iframePath[index - 1].document.body as Element) + }); + + // For the last context, get selector for target element + if (index === iframePath.length - 1) { + const elementSelector = finder(element, { + root: context.document.body as Element + }); + // Use :>> for iframe traversal in the selector + selectorParts.push(`${frameSelector} :>> ${elementSelector}`); + } else { + selectorParts.push(frameSelector); + } + }); + + return { + // Join all parts with :>> to indicate iframe traversal + fullSelector: selectorParts.join(' :>> '), + // Include additional metadata about the frames if needed + frameCount: iframePath.length, + isAccessible: true + }; + } catch (e) { + console.warn('Error generating iframe selector:', e); + return null; + } + }; const genSelectors = (element: HTMLElement | null) => { if (element == null) { @@ -1004,6 +1010,8 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } catch (e) { } + const iframeSelector = genSelectorForIframe(element); + const hrefSelector = genSelectorForAttributes(element, ['href']); const formSelector = genSelectorForAttributes(element, [ 'name', @@ -1050,6 +1058,11 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { hrefSelector, accessibilitySelector, formSelector, + iframeSelector: iframeSelector ? { + full: iframeSelector.fullSelector, + frame: iframeSelector.frameCount, + accesible: iframeSelector.isAccessible + } : null }; } @@ -1092,7 +1105,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return char.length === 1 && char.match(/[0-9]/); } - const hoveredElement = document.elementFromPoint(x, y) as HTMLElement; + const hoveredElement = getDeepestElementFromPoint(x, y) as HTMLElement; if ( hoveredElement != null && !hoveredElement.closest('#overlay-controls') != null From 8323593bb09d0b9a869afb825aa58be3944199b9 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Thu, 2 Jan 2025 21:18:49 +0530 Subject: [PATCH 056/111] chore: format --- src/components/organisms/BrowserWindow.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 442b7e50..2a5f7758 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -9,7 +9,6 @@ import { useBrowserSteps, TextStep } from '../../context/browserSteps'; import { useGlobalInfoStore } from '../../context/globalInfo'; import { useTranslation } from 'react-i18next'; - interface ElementInfo { tagName: string; hasOnlyText?: boolean; From e91a3916a0513af15d306adc71fdd68bb9250e7d Mon Sep 17 00:00:00 2001 From: amhsirak Date: Thu, 2 Jan 2025 21:19:36 +0530 Subject: [PATCH 057/111] chore: cleanup console logs --- src/components/organisms/BrowserWindow.tsx | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 2f66e906..ad58a309 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -119,9 +119,6 @@ export const BrowserWindow = () => { const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] }) => { if (getList === true) { if (listSelector) { - console.log("LIST SELEECTORRRRR: ", listSelector); - console.log("DATA SELEECTORRRRR: ", data.selector); - console.log("CHILDREEENN SELECORRRR: ", data.childSelectors); socket?.emit('listSelector', { selector: listSelector }); const hasValidChildSelectors = Array.isArray(data.childSelectors) && data.childSelectors.length > 0; From edfcd8f869f194f7525744d050e30ba81a8bafef Mon Sep 17 00:00:00 2001 From: amhsirak Date: Thu, 2 Jan 2025 23:15:03 +0530 Subject: [PATCH 058/111] fix: format --- src/components/organisms/BrowserWindow.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index b69a0921..421bb680 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -326,7 +326,6 @@ export const BrowserWindow = () => { } }, [paginationMode, resetPaginationSelector]); - return (

{ From af237ba1b0d9973e94f3595fdcc8610c8eac03d5 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 3 Jan 2025 20:06:13 +0530 Subject: [PATCH 059/111] fix: update custom limit if value >= 1 --- src/components/organisms/RightSidePanel.tsx | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 8211a64a..d4670d4f 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -529,7 +529,22 @@ export const RightSidePanel: React.FC = ({ onFinishCapture updateCustomLimit(e.target.value)} + onChange={(e: React.ChangeEvent) => { + const value = parseInt(e.target.value); + // Only update if the value is greater than or equal to 1 or if the field is empty + if (e.target.value === '' || value >= 1) { + updateCustomLimit(e.target.value); + } + }} + inputProps={{ + min: 1, + onKeyPress: (e: React.KeyboardEvent) => { + const value = (e.target as HTMLInputElement).value + e.key; + if (parseInt(value) < 1) { + e.preventDefault(); + } + } + }} placeholder={t('right_panel.limit.enter_number')} sx={{ marginLeft: '10px', From 35a44bb39fcff9381417918dd37fc43d43ee3519 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 3 Jan 2025 20:18:03 +0530 Subject: [PATCH 060/111] fix: rm translation for alt maxun_logo --- src/components/molecules/ActionDescriptionBox.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/ActionDescriptionBox.tsx b/src/components/molecules/ActionDescriptionBox.tsx index 190c5838..45ec1641 100644 --- a/src/components/molecules/ActionDescriptionBox.tsx +++ b/src/components/molecules/ActionDescriptionBox.tsx @@ -113,7 +113,7 @@ const ActionDescriptionBox = () => { return ( - + {renderActionDescription()} From 1874e71e0f5ec86a1ba2cad4891dfdce8f1f19b7 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 3 Jan 2025 20:22:30 +0530 Subject: [PATCH 061/111] fix: add translation for same name robot creation --- src/components/molecules/SaveRecording.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/SaveRecording.tsx b/src/components/molecules/SaveRecording.tsx index 8e1eb462..cc51f238 100644 --- a/src/components/molecules/SaveRecording.tsx +++ b/src/components/molecules/SaveRecording.tsx @@ -101,7 +101,7 @@ export const SaveRecording = ({ fileName }: SaveRecordingProps) => { - {t('save_recording.warnings.robot_exists')} + {t('save_recording.errors.exists_warning')} ) :