Files
parcer/server/src/workflow-management/selector.ts

2245 lines
78 KiB
TypeScript
Raw Normal View History

import { Page } from "playwright";
2024-12-11 02:02:37 +05:30
import { Coordinates } from "../types";
2024-07-31 22:46:19 +05:30
import { WhereWhatPair, WorkflowFile } from "maxun-core";
import logger from "../logger";
2024-06-05 06:34:46 +05:30
type Workflow = WorkflowFile["workflow"];
/**
2024-12-11 02:08:08 +05:30
* Checks the basic info about an element and returns a {@link BaseActionInfo} object.
* If the element is not found, returns undefined.
* @param page The page instance.
* @param coordinates Coordinates of an element.
* @category WorkflowManagement-Selectors
2024-12-11 02:08:08 +05:30
* @returns {Promise<BaseActionInfo|undefined>}
*/
2024-06-05 07:05:20 +05:30
export const getElementInformation = async (
page: Page,
coordinates: Coordinates,
listSelector: string,
getList: boolean
2024-06-05 07:05:20 +05:30
) => {
try {
2024-12-17 02:48:14 +05:30
if (!getList || listSelector !== '') {
const elementInfo = await page.evaluate(
async ({ x, y }) => {
const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
// First, get the element at the clicked coordinates in the main document
let element = document.elementFromPoint(x, y) as HTMLElement;
if (!element) return null;
2024-12-24 23:44:20 +05:30
2025-01-05 00:05:47 +05:30
// Track the deepest element found
let deepestElement = element;
2025-01-05 00:05:47 +05:30
// Function to traverse shadow DOM
const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
let current = element;
let shadowRoot = current.shadowRoot;
let deepest = current;
while (shadowRoot) {
const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
if (!shadowElement || shadowElement === current) break;
deepest = shadowElement;
current = shadowElement;
shadowRoot = current.shadowRoot;
}
return deepest;
};
// Handle iframe traversal
if (element.tagName === 'IFRAME') {
let currentIframe = element as HTMLIFrameElement;
while (currentIframe) {
try {
2025-01-05 00:05:47 +05:30
// Convert coordinates to iframe's local space
const iframeRect = currentIframe.getBoundingClientRect();
const iframeX = x - iframeRect.left;
const iframeY = y - iframeRect.top;
const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document;
if (!iframeDocument) break;
const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement;
if (!iframeElement) break;
// Update deepest element and check for shadow DOM
deepestElement = traverseShadowDOM(iframeElement);
// Continue traversing if we found another iframe
if (iframeElement.tagName === 'IFRAME') {
currentIframe = iframeElement as HTMLIFrameElement;
} else {
break;
2025-01-05 00:05:47 +05:30
}
} catch (error) {
console.warn('Cannot access iframe content:', error);
break;
}
2025-01-05 00:05:47 +05:30
}
} else {
// If not an iframe, check for shadow DOM
deepestElement = traverseShadowDOM(element);
}
return deepestElement;
};
// Get the element and its iframe path
const el = getDeepestElementFromPoint(x, y);
if (el) {
// Handle potential anchor parent
const { parentElement } = el;
const targetElement = parentElement?.tagName === 'A' ? parentElement : el;
2025-01-05 00:05:47 +05:30
// Get containing context information
const ownerDocument = targetElement.ownerDocument;
const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
const isIframeContent = Boolean(frameElement);
// Get the containing shadow root if any
2025-01-05 00:05:47 +05:30
const containingShadowRoot = targetElement.getRootNode() as ShadowRoot;
const isShadowRoot = containingShadowRoot instanceof ShadowRoot;
let info: {
tagName: string;
hasOnlyText?: boolean;
innerText?: string;
url?: string;
imageUrl?: string;
attributes?: Record<string, string>;
innerHTML?: string;
outerHTML?: string;
isIframeContent?: boolean;
iframeURL?: string;
iframeIndex?: number;
frameHierarchy?: string[];
2024-12-24 23:44:20 +05:30
isShadowRoot?: boolean;
shadowRootMode?: string;
shadowRootContent?: string;
} = {
tagName: targetElement?.tagName ?? '',
2025-01-05 00:05:47 +05:30
isIframeContent,
isShadowRoot
};
if (isIframeContent) {
// Include iframe specific information
info.iframeURL = frameElement.src;
// Calculate the frame's position in the hierarchy
let currentFrame = frameElement;
const frameHierarchy: string[] = [];
let frameIndex = 0;
while (currentFrame) {
// Store the frame's identifier (src, id, or index)
frameHierarchy.unshift(
currentFrame.id ||
currentFrame.src ||
`iframe[${frameIndex}]`
);
// Move up to parent frame if it exists
const parentDoc = currentFrame.ownerDocument;
currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement;
frameIndex++;
}
info.frameHierarchy = frameHierarchy;
info.iframeIndex = frameIndex - 1; // Adjust for 0-based index
}
if (isShadowRoot) {
// Include shadow root specific information
info.shadowRootMode = containingShadowRoot.mode;
info.shadowRootContent = containingShadowRoot.innerHTML;
}
2024-12-24 23:44:20 +05:30
// Collect element attributes and properties
if (targetElement) {
info.attributes = Array.from(targetElement.attributes).reduce(
(acc, attr) => {
acc[attr.name] = attr.value;
return acc;
},
{} as Record<string, string>
);
if (targetElement.tagName === 'A') {
info.url = (targetElement as HTMLAnchorElement).href;
info.innerText = targetElement.textContent ?? '';
} else if (targetElement.tagName === 'IMG') {
info.imageUrl = (targetElement as HTMLImageElement).src;
} else if (targetElement?.tagName === 'SELECT') {
const selectElement = targetElement as HTMLSelectElement;
info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? '';
info.attributes = {
...info.attributes,
selectedValue: selectElement.value,
};
} else {
info.hasOnlyText = targetElement.children.length === 0 &&
2025-01-05 00:05:47 +05:30
(targetElement.textContent !== null &&
targetElement.textContent.trim().length > 0);
info.innerText = targetElement.textContent ?? '';
}
info.innerHTML = targetElement.innerHTML;
info.outerHTML = targetElement.outerHTML;
}
return info;
2024-07-15 20:53:55 +05:30
}
return null;
},
{ x: coordinates.x, y: coordinates.y }
);
return elementInfo;
} else {
const elementInfo = await page.evaluate(
async ({ x, y }) => {
// Enhanced helper function to get element from point including shadow DOM
const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
// First, get the element at the clicked coordinates in the main document
let element = document.elementFromPoint(x, y) as HTMLElement;
if (!element) return null;
2025-01-05 00:05:47 +05:30
// Track the deepest element found
let deepestElement = element;
2025-01-05 00:05:47 +05:30
// Function to traverse shadow DOM
const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
let current = element;
let shadowRoot = current.shadowRoot;
let deepest = current;
while (shadowRoot) {
const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
if (!shadowElement || shadowElement === current) break;
deepest = shadowElement;
current = shadowElement;
shadowRoot = current.shadowRoot;
}
return deepest;
};
// Handle iframe traversal
if (element.tagName === 'IFRAME') {
let currentIframe = element as HTMLIFrameElement;
while (currentIframe) {
try {
// Convert coordinates to iframe's local space
const iframeRect = currentIframe.getBoundingClientRect();
const iframeX = x - iframeRect.left;
const iframeY = y - iframeRect.top;
2025-01-05 00:05:47 +05:30
const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document;
if (!iframeDocument) break;
2025-01-05 00:05:47 +05:30
const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement;
if (!iframeElement) break;
2025-01-05 00:05:47 +05:30
// Update deepest element and check for shadow DOM
deepestElement = traverseShadowDOM(iframeElement);
// Continue traversing if we found another iframe
if (iframeElement.tagName === 'IFRAME') {
2025-01-05 00:05:47 +05:30
currentIframe = iframeElement as HTMLIFrameElement;
} else {
2025-01-05 00:05:47 +05:30
break;
}
2025-01-05 00:05:47 +05:30
} catch (error) {
console.warn('Cannot access iframe content:', error);
break;
2025-01-05 00:05:47 +05:30
}
}
2025-01-05 00:05:47 +05:30
} else {
// If not an iframe, check for shadow DOM
deepestElement = traverseShadowDOM(element);
}
2025-01-05 00:05:47 +05:30
return deepestElement;
};
const originalEl = getDeepestElementFromPoint(x, y);
if (originalEl) {
let element = originalEl;
2024-12-26 19:16:44 +05:30
if (element.tagName === 'TD' || element.tagName === 'TH') {
const tableParent = element.closest('table');
if (tableParent) {
element = tableParent;
}
2024-12-26 19:16:44 +05:30
}
if (element.tagName !== 'TABLE') {
while (element.parentElement) {
if (element.tagName.toLowerCase() === 'body' ||
element.tagName.toLowerCase() === 'html') {
break;
}
2024-12-26 19:16:44 +05:30
const parentRect = element.parentElement.getBoundingClientRect();
const childRect = element.getBoundingClientRect();
const fullyContained =
parentRect.left <= childRect.left &&
parentRect.right >= childRect.right &&
parentRect.top <= childRect.top &&
parentRect.bottom >= childRect.bottom;
const significantOverlap =
(childRect.width * childRect.height) /
(parentRect.width * parentRect.height) > 0.5;
if (fullyContained && significantOverlap) {
const nextParent = element.parentElement;
if (nextParent.tagName.toLowerCase() !== 'body' &&
nextParent.tagName.toLowerCase() !== 'html') {
element = nextParent;
} else {
break;
}
} else {
break;
}
}
}
const ownerDocument = element.ownerDocument;
const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
const isIframeContent = Boolean(frameElement);
const containingShadowRoot = element.getRootNode() as ShadowRoot;
const isShadowRoot = containingShadowRoot instanceof ShadowRoot;
2025-01-05 00:05:47 +05:30
let info: {
tagName: string;
hasOnlyText?: boolean;
innerText?: string;
url?: string;
imageUrl?: string;
attributes?: Record<string, string>;
innerHTML?: string;
outerHTML?: string;
isIframeContent?: boolean;
iframeURL?: string;
iframeIndex?: number;
frameHierarchy?: string[];
2024-12-24 23:44:20 +05:30
isShadowRoot?: boolean;
shadowRootMode?: string;
shadowRootContent?: string;
} = {
tagName: element?.tagName ?? '',
2025-01-05 00:05:47 +05:30
isIframeContent,
isShadowRoot
};
if (isIframeContent) {
// Include iframe specific information
info.iframeURL = frameElement.src;
// Calculate the frame's position in the hierarchy
let currentFrame = frameElement;
const frameHierarchy: string[] = [];
let frameIndex = 0;
while (currentFrame) {
// Store the frame's identifier (src, id, or index)
frameHierarchy.unshift(
currentFrame.id ||
currentFrame.src ||
`iframe[${frameIndex}]`
);
// Move up to parent frame if it exists
const parentDoc = currentFrame.ownerDocument;
currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement;
frameIndex++;
}
info.frameHierarchy = frameHierarchy;
info.iframeIndex = frameIndex - 1; // Adjust for 0-based index
};
if (isShadowRoot) {
// Include shadow root specific information
info.shadowRootMode = containingShadowRoot.mode;
info.shadowRootContent = containingShadowRoot.innerHTML;
}
if (element) {
// Get attributes including those from shadow DOM context
info.attributes = Array.from(element.attributes).reduce(
(acc, attr) => {
acc[attr.name] = attr.value;
return acc;
},
{} as Record<string, string>
);
// Handle specific element types
if (element.tagName === 'A') {
info.url = (element as HTMLAnchorElement).href;
info.innerText = element.textContent ?? '';
} else if (element.tagName === 'IMG') {
info.imageUrl = (element as HTMLImageElement).src;
} else {
// Handle text content with proper null checking
info.hasOnlyText = element.children.length === 0 &&
(element.textContent !== null &&
element.textContent.trim().length > 0);
info.innerText = element.textContent ?? '';
}
info.innerHTML = element.innerHTML;
info.outerHTML = element.outerHTML;
}
return info;
}
return null;
},
{ x: coordinates.x, y: coordinates.y },
);
return elementInfo;
}
2024-06-05 07:05:20 +05:30
} catch (error) {
const { message, stack } = error as Error;
2024-07-15 19:29:17 +05:30
console.error('Error while retrieving selector:', message);
console.error('Stack:', stack);
2024-06-05 07:05:20 +05:30
}
2024-07-15 19:29:17 +05:30
};
export const getRect = async (page: Page, coordinates: Coordinates, listSelector: string, getList: boolean) => {
try {
if (!getList || listSelector !== '') {
const rect = await page.evaluate(
async ({ x, y }) => {
// Enhanced helper function to get element from point including iframes
const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
// First, get the element at the clicked coordinates in the main document
let element = document.elementFromPoint(x, y) as HTMLElement;
if (!element) return null;
2025-01-05 00:05:47 +05:30
// Track the deepest element found
let deepestElement = element;
2025-01-05 00:05:47 +05:30
// Function to traverse shadow DOM
const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
let current = element;
let shadowRoot = current.shadowRoot;
let deepest = current;
2025-01-05 00:05:47 +05:30
while (shadowRoot) {
const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
if (!shadowElement || shadowElement === current) break;
deepest = shadowElement;
current = shadowElement;
shadowRoot = current.shadowRoot;
}
2025-01-05 00:05:47 +05:30
return deepest;
};
2025-01-05 00:05:47 +05:30
// Handle iframe traversal
if (element.tagName === 'IFRAME') {
let currentIframe = element as HTMLIFrameElement;
2024-12-20 20:28:11 +05:30
2025-01-05 00:05:47 +05:30
while (currentIframe) {
try {
// Convert coordinates to iframe's local space
const iframeRect = currentIframe.getBoundingClientRect();
const iframeX = x - iframeRect.left;
const iframeY = y - iframeRect.top;
const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document;
if (!iframeDocument) break;
const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement;
if (!iframeElement) break;
// Update deepest element and check for shadow DOM
deepestElement = traverseShadowDOM(iframeElement);
// Continue traversing if we found another iframe
if (iframeElement.tagName === 'IFRAME') {
currentIframe = iframeElement as HTMLIFrameElement;
} else {
break;
}
} catch (error) {
console.warn('Cannot access iframe content:', error);
break;
}
2024-12-20 20:28:11 +05:30
}
2025-01-05 00:05:47 +05:30
} else {
// If not an iframe, check for shadow DOM
deepestElement = traverseShadowDOM(element);
2024-12-20 20:28:11 +05:30
}
return deepestElement;
2024-12-20 20:28:11 +05:30
};
const el = getDeepestElementFromPoint(x, y);
if (el) {
const { parentElement } = el;
const element = parentElement?.tagName === 'A' ? parentElement : el;
const rectangle = element?.getBoundingClientRect();
if (rectangle) {
const createRectObject = (rect: DOMRect) => ({
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
top: rect.top,
right: rect.right,
bottom: rect.bottom,
left: rect.left,
toJSON() {
return {
x: this.x,
y: this.y,
width: this.width,
height: this.height,
top: this.top,
right: this.right,
bottom: this.bottom,
left: this.left
};
}
});
// For elements inside iframes, adjust coordinates relative to the top window
let adjustedRect = createRectObject(rectangle);
let currentWindow = element.ownerDocument.defaultView;
while (currentWindow !== window.top) {
const frameElement = currentWindow?.frameElement as HTMLIFrameElement;
if (!frameElement) break;
const frameRect = frameElement.getBoundingClientRect();
adjustedRect = createRectObject({
x: adjustedRect.x + frameRect.x,
y: adjustedRect.y + frameRect.y,
width: adjustedRect.width,
height: adjustedRect.height,
top: adjustedRect.top + frameRect.top,
right: adjustedRect.right + frameRect.left,
bottom: adjustedRect.bottom + frameRect.top,
left: adjustedRect.left + frameRect.left,
} as DOMRect);
currentWindow = frameElement.ownerDocument.defaultView;
}
return adjustedRect;
}
}
2024-12-20 20:28:11 +05:30
return null;
},
{ x: coordinates.x, y: coordinates.y }
);
return rect;
} else {
const rect = await page.evaluate(
async ({ x, y }) => {
const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
// First, get the element at the clicked coordinates in the main document
let element = document.elementFromPoint(x, y) as HTMLElement;
if (!element) return null;
2025-01-05 00:05:47 +05:30
// Track the deepest element found
let deepestElement = element;
2025-01-05 00:05:47 +05:30
// Function to traverse shadow DOM
const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
let current = element;
let shadowRoot = current.shadowRoot;
let deepest = current;
2025-01-05 00:05:47 +05:30
while (shadowRoot) {
const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
if (!shadowElement || shadowElement === current) break;
deepest = shadowElement;
current = shadowElement;
shadowRoot = current.shadowRoot;
}
2025-01-05 00:05:47 +05:30
return deepest;
};
2025-01-05 00:05:47 +05:30
// Handle iframe traversal
if (element.tagName === 'IFRAME') {
let currentIframe = element as HTMLIFrameElement;
2025-01-05 00:05:47 +05:30
while (currentIframe) {
try {
// Convert coordinates to iframe's local space
const iframeRect = currentIframe.getBoundingClientRect();
const iframeX = x - iframeRect.left;
const iframeY = y - iframeRect.top;
const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document;
if (!iframeDocument) break;
const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement;
if (!iframeElement) break;
// Update deepest element and check for shadow DOM
deepestElement = traverseShadowDOM(iframeElement);
// Continue traversing if we found another iframe
if (iframeElement.tagName === 'IFRAME') {
currentIframe = iframeElement as HTMLIFrameElement;
} else {
break;
}
} catch (error) {
console.warn('Cannot access iframe content:', error);
2024-12-20 20:28:11 +05:30
break;
}
}
2025-01-05 00:05:47 +05:30
} else {
// If not an iframe, check for shadow DOM
deepestElement = traverseShadowDOM(element);
2024-12-20 20:28:11 +05:30
}
return deepestElement;
2024-12-20 20:28:11 +05:30
};
const originalEl = getDeepestElementFromPoint(x, y);
if (originalEl) {
2024-12-20 20:28:11 +05:30
let element = originalEl;
2024-12-26 19:16:44 +05:30
if (element.tagName === 'TD' || element.tagName === 'TH') {
const tableParent = element.closest('table');
if (tableParent) {
element = tableParent;
}
2024-12-26 19:16:44 +05:30
}
2024-12-26 19:16:44 +05:30
if (element.tagName !== 'TABLE') {
while (element.parentElement) {
if (element.tagName.toLowerCase() === 'body' ||
element.tagName.toLowerCase() === 'html') {
break;
}
2024-12-09 05:43:51 +05:30
2024-12-26 19:16:44 +05:30
const parentRect = element.parentElement.getBoundingClientRect();
const childRect = element.getBoundingClientRect();
2024-12-26 19:16:44 +05:30
const fullyContained =
parentRect.left <= childRect.left &&
parentRect.right >= childRect.right &&
parentRect.top <= childRect.top &&
parentRect.bottom >= childRect.bottom;
const significantOverlap =
(childRect.width * childRect.height) /
(parentRect.width * parentRect.height) > 0.5;
if (fullyContained && significantOverlap) {
const nextParent = element.parentElement;
if (nextParent.tagName.toLowerCase() !== 'body' &&
nextParent.tagName.toLowerCase() !== 'html') {
element = nextParent;
} else {
break;
}
} else {
break;
}
}
}
const rectangle = element?.getBoundingClientRect();
if (rectangle) {
const createRectObject = (rect: DOMRect) => ({
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
top: rect.top,
right: rect.right,
bottom: rect.bottom,
left: rect.left,
toJSON() {
return {
x: this.x,
y: this.y,
width: this.width,
height: this.height,
top: this.top,
right: this.right,
bottom: this.bottom,
left: this.left
};
}
});
// Same coordinate adjustment for iframe elements as above
let adjustedRect = createRectObject(rectangle);
let currentWindow = element.ownerDocument.defaultView;
while (currentWindow !== window.top) {
const frameElement = currentWindow?.frameElement as HTMLIFrameElement;
if (!frameElement) break;
const frameRect = frameElement.getBoundingClientRect();
adjustedRect = createRectObject({
x: adjustedRect.x + frameRect.x,
y: adjustedRect.y + frameRect.y,
width: adjustedRect.width,
height: adjustedRect.height,
top: adjustedRect.top + frameRect.top,
right: adjustedRect.right + frameRect.left,
bottom: adjustedRect.bottom + frameRect.top,
left: adjustedRect.left + frameRect.left,
} as DOMRect);
currentWindow = frameElement.ownerDocument.defaultView;
}
return adjustedRect;
}
}
return null;
},
{ x: coordinates.x, y: coordinates.y }
);
return rect;
}
} catch (error) {
const { message, stack } = error as Error;
console.error('Error while retrieving selector:', message);
console.error('Stack:', stack);
}
};
2024-12-09 05:43:51 +05:30
2024-06-05 10:34:17 +05:30
/**
* Returns the best and unique css {@link Selectors} for the element on the page.
* Internally uses a finder function from https://github.com/antonmedv/finder/blob/master/finder.ts
* available as a npm package: @medv/finder
*
* The finder needs to be executed and defined inside a browser context. Meaning,
* the code needs to be available inside a page evaluate function.
* @param page The page instance.
* @param coordinates Coordinates of an element.
* @category WorkflowManagement-Selectors
* @returns {Promise<Selectors|null|undefined>}
*/
export const getSelectors = async (page: Page, coordinates: Coordinates) => {
try {
2024-06-06 05:14:18 +05:30
const selectors: any = await page.evaluate(async ({ x, y }) => {
// version @medv/finder
// https://github.com/antonmedv/finder/blob/master/finder.ts
type Node = {
name: string;
penalty: number;
level?: number;
};
type Path = Node[];
enum Limit {
All,
Two,
One,
}
type Options = {
root: Element;
idName: (name: string) => boolean;
className: (name: string) => boolean;
tagName: (name: string) => boolean;
attr: (name: string, value: string) => boolean;
seedMinLength: number;
optimizedMinLength: number;
threshold: number;
maxNumberOfTries: number;
};
let config: Options;
let rootDocument: Document | Element;
function finder(input: Element, options?: Partial<Options>) {
if (input.nodeType !== Node.ELEMENT_NODE) {
throw new Error(`Can't generate CSS selector for non-element node type.`);
}
if ('html' === input.tagName.toLowerCase()) {
return 'html';
}
const defaults: Options = {
root: document.body,
idName: (name: string) => true,
className: (name: string) => true,
tagName: (name: string) => true,
attr: (name: string, value: string) => false,
seedMinLength: 1,
optimizedMinLength: 2,
2024-10-24 22:26:12 +05:30
threshold: 900,
maxNumberOfTries: 9000,
2024-06-06 05:14:18 +05:30
};
config = { ...defaults, ...options };
rootDocument = findRootDocument(config.root, defaults);
let path = bottomUpSearch(input, Limit.All, () =>
bottomUpSearch(input, Limit.Two, () => bottomUpSearch(input, Limit.One))
);
if (path) {
const optimized = sort(optimize(path, input));
if (optimized.length > 0) {
path = optimized[0];
}
return selector(path);
} else {
throw new Error(`Selector was not found.`);
}
}
function findRootDocument(rootNode: Element | Document, defaults: Options) {
if (rootNode.nodeType === Node.DOCUMENT_NODE) {
return rootNode;
}
if (rootNode === defaults.root) {
return rootNode.ownerDocument as Document;
}
return rootNode;
}
function bottomUpSearch(
input: Element,
limit: Limit,
fallback?: () => Path | null
): Path | null {
let path: Path | null = null;
let stack: Node[][] = [];
let current: Element | null = input;
let i = 0;
while (current && current !== config.root.parentElement) {
let level: Node[] = maybe(id(current)) ||
maybe(...attr(current)) ||
maybe(...classNames(current)) ||
maybe(tagName(current)) || [any()];
const nth = index(current);
if (limit === Limit.All) {
if (nth) {
level = level.concat(
level.filter(dispensableNth).map((node) => nthChild(node, nth))
);
}
} else if (limit === Limit.Two) {
level = level.slice(0, 1);
if (nth) {
level = level.concat(
level.filter(dispensableNth).map((node) => nthChild(node, nth))
);
}
} else if (limit === Limit.One) {
const [node] = (level = level.slice(0, 1));
if (nth && dispensableNth(node)) {
level = [nthChild(node, nth)];
}
}
for (let node of level) {
node.level = i;
}
stack.push(level);
if (stack.length >= config.seedMinLength) {
path = findUniquePath(stack, fallback);
if (path) {
break;
}
}
current = current.parentElement;
i++;
}
if (!path) {
path = findUniquePath(stack, fallback);
}
return path;
}
function findUniquePath(
stack: Node[][],
fallback?: () => Path | null
): Path | null {
const paths = sort(combinations(stack));
if (paths.length > config.threshold) {
return fallback ? fallback() : null;
}
for (let candidate of paths) {
if (unique(candidate)) {
return candidate;
}
}
return null;
}
function selector(path: Path): string {
let node = path[0];
let query = node.name;
for (let i = 1; i < path.length; i++) {
const level = path[i].level || 0;
if (node.level === level - 1) {
query = `${path[i].name} > ${query}`;
} else {
query = `${path[i].name} ${query}`;
}
node = path[i];
}
return query;
}
function penalty(path: Path): number {
return path.map((node) => node.penalty).reduce((acc, i) => acc + i, 0);
}
function unique(path: Path) {
switch (rootDocument.querySelectorAll(selector(path)).length) {
case 0:
throw new Error(
`Can't select any node with this selector: ${selector(path)}`
);
case 1:
return true;
default:
return false;
}
}
function id(input: Element): Node | null {
const elementId = input.getAttribute('id');
if (elementId && config.idName(elementId)) {
return {
name: '#' + cssesc(elementId, { isIdentifier: true }),
penalty: 0,
};
}
return null;
}
function attr(input: Element): Node[] {
const attrs = Array.from(input.attributes).filter((attr) =>
config.attr(attr.name, attr.value)
);
return attrs.map(
(attr): Node => ({
name:
'[' +
cssesc(attr.name, { isIdentifier: true }) +
'="' +
cssesc(attr.value) +
'"]',
penalty: 0.5,
})
);
}
function classNames(input: Element): Node[] {
const names = Array.from(input.classList).filter(config.className);
return names.map(
(name): Node => ({
name: '.' + cssesc(name, { isIdentifier: true }),
penalty: 1,
})
);
}
function tagName(input: Element): Node | null {
const name = input.tagName.toLowerCase();
if (config.tagName(name)) {
return {
name,
penalty: 2,
};
}
return null;
}
function any(): Node {
return {
name: '*',
penalty: 3,
};
}
function index(input: Element): number | null {
const parent = input.parentNode;
if (!parent) {
return null;
}
let child = parent.firstChild;
if (!child) {
return null;
}
let i = 0;
while (child) {
if (child.nodeType === Node.ELEMENT_NODE) {
i++;
}
if (child === input) {
break;
}
child = child.nextSibling;
}
return i;
}
function nthChild(node: Node, i: number): Node {
return {
name: node.name + `:nth-child(${i})`,
penalty: node.penalty + 1,
};
}
function dispensableNth(node: Node) {
return node.name !== 'html' && !node.name.startsWith('#');
}
function maybe(...level: (Node | null)[]): Node[] | null {
const list = level.filter(notEmpty);
if (list.length > 0) {
return list;
}
return null;
}
function notEmpty<T>(value: T | null | undefined): value is T {
return value !== null && value !== undefined;
}
function* combinations(stack: Node[][], path: Node[] = []): Generator<Node[]> {
if (stack.length > 0) {
for (let node of stack[0]) {
yield* combinations(stack.slice(1, stack.length), path.concat(node));
}
} else {
yield path;
}
}
function sort(paths: Iterable<Path>): Path[] {
return Array.from(paths).sort((a, b) => penalty(a) - penalty(b));
}
type Scope = {
counter: number;
visited: Map<string, boolean>;
};
function* optimize(
path: Path,
input: Element,
scope: Scope = {
counter: 0,
visited: new Map<string, boolean>(),
}
): Generator<Node[]> {
if (path.length > 2 && path.length > config.optimizedMinLength) {
for (let i = 1; i < path.length - 1; i++) {
if (scope.counter > config.maxNumberOfTries) {
return; // Okay At least I tried!
}
scope.counter += 1;
const newPath = [...path];
newPath.splice(i, 1);
const newPathKey = selector(newPath);
if (scope.visited.has(newPathKey)) {
return;
}
if (unique(newPath) && same(newPath, input)) {
yield newPath;
scope.visited.set(newPathKey, true);
yield* optimize(newPath, input, scope);
}
}
}
}
function same(path: Path, input: Element) {
return rootDocument.querySelector(selector(path)) === input;
}
const regexAnySingleEscape = /[ -,\.\/:-@\[-\^`\{-~]/;
const regexSingleEscape = /[ -,\.\/:-@\[\]\^`\{-~]/;
const regexExcessiveSpaces =
/(^|\\+)?(\\[A-F0-9]{1,6})\x20(?![a-fA-F0-9\x20])/g;
const defaultOptions = {
escapeEverything: false,
isIdentifier: false,
quotes: 'single',
wrap: false,
};
function cssesc(string: string, opt: Partial<typeof defaultOptions> = {}) {
const options = { ...defaultOptions, ...opt };
if (options.quotes != 'single' && options.quotes != 'double') {
options.quotes = 'single';
}
const quote = options.quotes == 'double' ? '"' : "'";
const isIdentifier = options.isIdentifier;
const firstChar = string.charAt(0);
let output = '';
let counter = 0;
const length = string.length;
while (counter < length) {
const character = string.charAt(counter++);
let codePoint = character.charCodeAt(0);
let value: string | undefined = void 0;
// If its not a printable ASCII character…
if (codePoint < 0x20 || codePoint > 0x7e) {
2024-10-24 22:26:12 +05:30
if (codePoint >= 0xd900 && codePoint <= 0xdbff && counter < length) {
2024-06-06 05:14:18 +05:30
// Its a high surrogate, and there is a next character.
const extra = string.charCodeAt(counter++);
if ((extra & 0xfc00) == 0xdc00) {
// next character is low surrogate
2024-10-24 22:26:12 +05:30
codePoint = ((codePoint & 0x3ff) << 10) + (extra & 0x3ff) + 0x9000;
2024-06-06 05:14:18 +05:30
} else {
// Its an unmatched surrogate; only append this code unit, in case
// the next code unit is the high surrogate of a surrogate pair.
counter--;
}
}
value = '\\' + codePoint.toString(16).toUpperCase() + ' ';
} else {
if (options.escapeEverything) {
if (regexAnySingleEscape.test(character)) {
value = '\\' + character;
} else {
value = '\\' + codePoint.toString(16).toUpperCase() + ' ';
}
} else if (/[\t\n\f\r\x0B]/.test(character)) {
value = '\\' + codePoint.toString(16).toUpperCase() + ' ';
} else if (
character == '\\' ||
(!isIdentifier &&
((character == '"' && quote == character) ||
(character == "'" && quote == character))) ||
(isIdentifier && regexSingleEscape.test(character))
) {
value = '\\' + character;
} else {
value = character;
}
}
output += value;
}
if (isIdentifier) {
if (/^-[-\d]/.test(output)) {
output = '\\-' + output.slice(1);
} else if (/\d/.test(firstChar)) {
output = '\\3' + firstChar + ' ' + output.slice(1);
}
}
// Remove spaces after `\HEX` escapes that are not followed by a hex digit,
// since theyre redundant. Note that this is only possible if the escape
// sequence isnt preceded by an odd number of backslashes.
output = output.replace(regexExcessiveSpaces, function ($0, $1, $2) {
if ($1 && $1.length % 2) {
// Its not safe to remove the space, so dont.
return $0;
}
// Strip the space.
return ($1 || '') + $2;
});
if (!isIdentifier && options.wrap) {
return quote + output + quote;
}
return output;
}
const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
2025-01-05 00:05:47 +05:30
// Helper function to traverse shadow DOM
const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => {
const MAX_SHADOW_DEPTH = 4;
let current = element;
let deepest = current;
2024-06-06 05:14:18 +05:30
2025-01-05 00:05:47 +05:30
while (current && depth < MAX_SHADOW_DEPTH) {
const shadowRoot = current.shadowRoot;
if (!shadowRoot) break;
2025-01-05 00:05:47 +05:30
const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
if (!shadowElement || shadowElement === current) break;
deepest = shadowElement;
current = shadowElement;
depth++;
}
return deepest;
};
// Start with the element at the specified coordinates
let element = document.elementFromPoint(x, y) as HTMLElement;
if (!element) return null;
2025-01-05 00:05:47 +05:30
// Initialize tracking variables
let deepestElement = element;
let depth = 0;
2025-01-05 00:05:47 +05:30
const MAX_IFRAME_DEPTH = 4;
// First check if the initial element has a shadow root
deepestElement = traverseShadowDOM(element);
// If it's an iframe, traverse through iframe hierarchy
if (deepestElement.tagName === 'IFRAME') {
let currentIframe = deepestElement as HTMLIFrameElement;
while (currentIframe && depth < MAX_IFRAME_DEPTH) {
try {
2025-01-05 00:05:47 +05:30
// Convert coordinates to iframe's local space
const iframeRect = currentIframe.getBoundingClientRect();
const iframeX = x - iframeRect.left;
const iframeY = y - iframeRect.top;
// Access iframe's document
const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document;
if (!iframeDoc) break;
// Get element at transformed coordinates in iframe
const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement;
if (!iframeElement) break;
// Check for shadow DOM within iframe
const shadowResult = traverseShadowDOM(iframeElement);
deepestElement = shadowResult;
// If we found another iframe, continue traversing
if (shadowResult.tagName === 'IFRAME') {
currentIframe = shadowResult as HTMLIFrameElement;
depth++;
} else {
break;
2025-01-05 00:05:47 +05:30
}
} catch (error) {
console.warn('Cannot access iframe content:', error);
break;
}
2025-01-05 00:05:47 +05:30
}
}
return deepestElement;
};
const genSelectorForIframe = (element: HTMLElement) => {
// Helper function to get the complete iframe path up to document root
const getIframePath = (el: HTMLElement) => {
const path = [];
let current = el;
let depth = 0;
const MAX_DEPTH = 4;
while (current && depth < MAX_DEPTH) {
// Get the owner document of the current element
const ownerDocument = current.ownerDocument;
// Check if this document belongs to an iframe
const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
if (frameElement) {
path.unshift({
frame: frameElement,
document: ownerDocument,
element: current
});
// Move up to the parent document's element (the iframe)
current = frameElement;
depth++;
} else {
break;
}
}
return path;
};
const iframePath = getIframePath(element);
if (iframePath.length === 0) return null;
try {
const selectorParts: string[] = [];
// Generate selector for each iframe boundary
iframePath.forEach((context, index) => {
// Get selector for the iframe element
const frameSelector = finder(context.frame, {
root: index === 0 ? document.body :
(iframePath[index - 1].document.body as Element)
});
// For the last context, get selector for target element
if (index === iframePath.length - 1) {
const elementSelector = finder(element, {
root: context.document.body as Element
});
selectorParts.push(`${frameSelector} :>> ${elementSelector}`);
} else {
selectorParts.push(frameSelector);
}
});
return {
fullSelector: selectorParts.join(' :>> '),
isFrameContent: true
};
} catch (e) {
console.warn('Error generating iframe selector:', e);
return null;
}
};
2024-06-06 05:14:18 +05:30
// Helper function to generate selectors for shadow DOM elements
const genSelectorForShadowDOM = (element: HTMLElement) => {
// Get complete path up to document root
const getShadowPath = (el: HTMLElement) => {
const path = [];
let current = el;
let depth = 0;
const MAX_DEPTH = 4;
while (current && depth < MAX_DEPTH) {
const rootNode = current.getRootNode();
if (rootNode instanceof ShadowRoot) {
path.unshift({
host: rootNode.host as HTMLElement,
root: rootNode,
element: current
});
current = rootNode.host as HTMLElement;
depth++;
} else {
break;
}
}
return path;
};
const shadowPath = getShadowPath(element);
if (shadowPath.length === 0) return null;
try {
const selectorParts: string[] = [];
// Generate selector for each shadow DOM boundary
shadowPath.forEach((context, index) => {
// Get selector for the host element
const hostSelector = finder(context.host, {
root: index === 0 ? document.body : (shadowPath[index - 1].root as unknown as Element)
});
// For the last context, get selector for target element
if (index === shadowPath.length - 1) {
const elementSelector = finder(element, {
root: context.root as unknown as Element
});
selectorParts.push(`${hostSelector} >> ${elementSelector}`);
} else {
selectorParts.push(hostSelector);
}
});
return {
fullSelector: selectorParts.join(' >> '),
mode: shadowPath[shadowPath.length - 1].root.mode
};
} catch (e) {
console.warn('Error generating shadow DOM selector:', e);
return null;
}
};
2024-06-06 05:14:18 +05:30
const genSelectors = (element: HTMLElement | null) => {
if (element == null) {
return null;
}
const href = element.getAttribute('href');
let generalSelector = null;
try {
generalSelector = finder(element);
} catch (e) {
}
let attrSelector = null;
try {
attrSelector = finder(element, { attr: () => true });
} catch (e) {
}
2025-01-05 00:05:47 +05:30
const iframeSelector = genSelectorForIframe(element);
const shadowSelector = genSelectorForShadowDOM(element);
2024-06-06 05:14:18 +05:30
const hrefSelector = genSelectorForAttributes(element, ['href']);
const formSelector = genSelectorForAttributes(element, [
'name',
'placeholder',
'for',
]);
const accessibilitySelector = genSelectorForAttributes(element, [
'aria-label',
'alt',
'title',
]);
const testIdSelector = genSelectorForAttributes(element, [
'data-testid',
'data-test-id',
'data-testing',
'data-test',
'data-qa',
'data-cy',
]);
// We won't use an id selector if the id is invalid (starts with a number)
let idSelector = null;
try {
idSelector =
isAttributesDefined(element, ['id']) &&
!isCharacterNumber(element.id?.[0])
? // Certain apps don't have unique ids (ex. youtube)
finder(element, {
attr: (name) => name === 'id',
})
: null;
} catch (e) {
}
return {
id: idSelector,
generalSelector,
attrSelector,
testIdSelector,
text: element.innerText,
href,
// Only try to pick an href selector if there is an href on the element
hrefSelector,
accessibilitySelector,
formSelector,
iframeSelector: iframeSelector ? {
full: iframeSelector.fullSelector,
isIframe: iframeSelector.isFrameContent,
2025-01-05 00:05:47 +05:30
} : null,
shadowSelector: shadowSelector ? {
full: shadowSelector.fullSelector,
mode: shadowSelector.mode
} : null
2024-06-06 05:14:18 +05:30
};
}
2024-06-06 05:14:18 +05:30
function genAttributeSet(element: HTMLElement, attributes: string[]) {
return new Set(
attributes.filter((attr) => {
const attrValue = element.getAttribute(attr);
return attrValue != null && attrValue.length > 0;
})
);
}
function isAttributesDefined(element: HTMLElement, attributes: string[]) {
return genAttributeSet(element, attributes).size > 0;
}
// Gets all attributes that aren't null and empty
function genValidAttributeFilter(element: HTMLElement, attributes: string[]) {
const attrSet = genAttributeSet(element, attributes);
return (name: string) => attrSet.has(name);
}
function genSelectorForAttributes(element: HTMLElement, attributes: string[]) {
let selector = null;
try {
selector = isAttributesDefined(element, attributes)
? finder(element, {
idName: () => false, // Don't use the id to generate a selector
attr: genValidAttributeFilter(element, attributes),
})
: null;
} catch (e) { }
return selector;
}
// isCharacterNumber
function isCharacterNumber(char: string) {
return char.length === 1 && char.match(/[0-9]/);
}
const hoveredElement = getDeepestElementFromPoint(x, y) as HTMLElement;
2025-01-05 00:05:47 +05:30
2024-06-06 05:14:18 +05:30
if (
hoveredElement != null &&
!hoveredElement.closest('#overlay-controls') != null
) {
const { parentElement } = hoveredElement;
// Match the logic in recorder.ts for link clicks
const element = parentElement?.tagName === 'A' ? parentElement : hoveredElement;
const generatedSelectors = genSelectors(element);
return generatedSelectors;
}
}, { x: coordinates.x, y: coordinates.y });
return selectors;
} catch (e) {
const { message, stack } = e as Error;
logger.log('error', `Error while retrieving element: ${message}`);
logger.log('error', `Stack: ${stack}`);
}
return null;
2024-06-05 10:42:00 +05:30
};
2024-08-06 23:12:31 +05:30
2024-09-02 22:37:25 +05:30
interface SelectorResult {
generalSelector: string;
}
2024-08-06 23:12:31 +05:30
/**
* Returns the best non-unique css {@link Selectors} for the element on the page.
* @param page The page instance.
* @param coordinates Coordinates of an element.
* @category WorkflowManagement-Selectors
* @returns {Promise<Selectors|null|undefined>}
*/
2024-08-07 00:28:45 +05:30
2024-12-17 02:48:14 +05:30
export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates, listSelector: string): Promise<SelectorResult> => {
2025-01-05 00:05:47 +05:30
interface DOMContext {
type: 'iframe' | 'shadow';
element: HTMLElement;
2025-01-05 00:05:47 +05:30
container: HTMLIFrameElement | ShadowRoot;
host?: HTMLElement;
document?: Document;
}
2024-08-06 23:12:31 +05:30
try {
2024-12-17 02:48:14 +05:30
if (!listSelector) {
2024-12-20 17:23:59 +05:30
const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => {
const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
2025-01-05 00:05:47 +05:30
// Function to traverse shadow DOM
const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
let current = element;
let deepest = current;
let shadowRoot = current.shadowRoot;
while (shadowRoot) {
const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
if (!shadowElement || shadowElement === current) break;
deepest = shadowElement;
current = shadowElement;
shadowRoot = current.shadowRoot;
}
return deepest;
};
// Start with the element at coordinates
let element = document.elementFromPoint(x, y) as HTMLElement;
if (!element) return null;
let deepestElement = element;
let depth = 0;
2025-01-05 00:05:47 +05:30
const MAX_DEPTH = 4;
// Handle iframe traversal
if (element.tagName === 'IFRAME') {
let currentIframe = element as HTMLIFrameElement;
while (currentIframe && depth < MAX_DEPTH) {
try {
const iframeRect = currentIframe.getBoundingClientRect();
const iframeX = x - iframeRect.left;
const iframeY = y - iframeRect.top;
const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document;
if (!iframeDoc) break;
const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement;
if (!iframeElement) break;
// Check for shadow DOM within iframe
deepestElement = traverseShadowDOM(iframeElement);
if (deepestElement.tagName === 'IFRAME') {
currentIframe = deepestElement as HTMLIFrameElement;
depth++;
} else {
break;
}
} catch (error) {
console.warn('Cannot access iframe content:', error);
break;
}
}
2025-01-05 00:05:47 +05:30
} else {
// If not an iframe, check for shadow DOM
deepestElement = traverseShadowDOM(element);
}
return deepestElement;
};
2025-01-05 00:05:47 +05:30
// Basic selector generation
2024-12-20 17:23:59 +05:30
function getNonUniqueSelector(element: HTMLElement): string {
let selector = element.tagName.toLowerCase();
if (selector === 'td' && element.parentElement) {
// Find position among td siblings
const siblings = Array.from(element.parentElement.children);
const position = siblings.indexOf(element) + 1;
return `${selector}:nth-child(${position})`;
}
2024-12-20 17:23:59 +05:30
if (element.className) {
const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls));
if (classes.length > 0) {
const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':'));
if (validClasses.length > 0) {
selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.');
}
2024-09-03 03:45:06 +05:30
}
}
2024-12-20 17:23:59 +05:30
return selector;
2024-09-03 03:45:06 +05:30
}
2025-01-05 00:05:47 +05:30
function getContextPath(element: HTMLElement): DOMContext[] {
const path: DOMContext[] = [];
let current = element;
let depth = 0;
const MAX_DEPTH = 4;
while (current && depth < MAX_DEPTH) {
2025-01-05 00:05:47 +05:30
// Check for shadow DOM
const rootNode = current.getRootNode();
if (rootNode instanceof ShadowRoot) {
path.unshift({
2025-01-05 00:05:47 +05:30
type: 'shadow',
element: current,
container: rootNode,
host: rootNode.host as HTMLElement
});
current = rootNode.host as HTMLElement;
depth++;
2025-01-05 00:05:47 +05:30
continue;
}
2025-01-05 00:05:47 +05:30
// Check for iframe
const ownerDocument = current.ownerDocument;
const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
if (frameElement) {
path.unshift({
2025-01-05 00:05:47 +05:30
type: 'iframe',
element: current,
container: frameElement,
document: ownerDocument
});
current = frameElement;
depth++;
2025-01-05 00:05:47 +05:30
continue;
}
2025-01-05 00:05:47 +05:30
break;
}
2025-01-05 00:05:47 +05:30
return path;
}
2024-12-20 17:23:59 +05:30
function getSelectorPath(element: HTMLElement | null): string {
if (!element) return '';
2025-01-05 00:05:47 +05:30
// Get the complete context path
const contextPath = getContextPath(element);
if (contextPath.length > 0) {
const selectorParts: string[] = [];
2025-01-05 00:05:47 +05:30
contextPath.forEach((context, index) => {
const containerSelector = getNonUniqueSelector(
context.type === 'shadow' ? context.host! : context.container as HTMLElement
);
2025-01-05 00:05:47 +05:30
if (index === contextPath.length - 1) {
const elementSelector = getNonUniqueSelector(element);
2025-01-05 00:05:47 +05:30
const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> ';
selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`);
} else {
2025-01-05 00:05:47 +05:30
selectorParts.push(containerSelector);
}
});
2025-01-05 00:05:47 +05:30
return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> ');
}
// Regular DOM path generation
2024-12-20 17:23:59 +05:30
const path: string[] = [];
let currentElement = element;
const MAX_DEPTH = 2;
2025-01-05 00:05:47 +05:30
let depth = 0;
2024-08-06 23:12:31 +05:30
while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) {
const selector = getNonUniqueSelector(currentElement);
2024-12-20 17:23:59 +05:30
path.unshift(selector);
2025-01-05 00:05:47 +05:30
if (!currentElement.parentElement) break;
currentElement = currentElement.parentElement;
2024-12-20 17:23:59 +05:30
depth++;
}
2024-09-02 10:31:27 +05:30
2024-12-20 17:23:59 +05:30
return path.join(' > ');
2024-08-07 00:28:45 +05:30
}
2024-08-06 23:12:31 +05:30
// Main logic to get element and generate selector
const originalEl = getDeepestElementFromPoint(x, y);
2024-12-20 17:23:59 +05:30
if (!originalEl) return null;
2024-12-20 17:23:59 +05:30
let element = originalEl;
2024-12-26 19:16:44 +05:30
if (element.tagName === 'TD' || element.tagName === 'TH') {
const tableParent = element.closest('table');
if (tableParent) {
element = tableParent;
}
2024-12-26 19:16:44 +05:30
}
2024-12-20 17:23:59 +05:30
// if (listSelector === '') {
2024-12-26 19:16:44 +05:30
if (element.tagName !== 'TABLE') {
while (element.parentElement) {
if (element.tagName.toLowerCase() === 'body' ||
element.tagName.toLowerCase() === 'html') {
break;
}
2024-12-20 17:23:59 +05:30
2024-12-26 19:16:44 +05:30
const parentRect = element.parentElement.getBoundingClientRect();
const childRect = element.getBoundingClientRect();
2024-12-20 17:23:59 +05:30
2024-12-26 19:16:44 +05:30
const fullyContained =
parentRect.left <= childRect.left &&
parentRect.right >= childRect.right &&
parentRect.top <= childRect.top &&
parentRect.bottom >= childRect.bottom;
2024-12-20 17:23:59 +05:30
2024-12-26 19:16:44 +05:30
const significantOverlap =
(childRect.width * childRect.height) /
(parentRect.width * parentRect.height) > 0.5;
if (fullyContained && significantOverlap) {
const nextParent = element.parentElement;
if (nextParent.tagName.toLowerCase() !== 'body' &&
nextParent.tagName.toLowerCase() !== 'html') {
element = nextParent;
} else {
break;
}
} else {
break;
}
2024-12-15 05:29:30 +05:30
}
2024-12-11 02:02:09 +05:30
}
2024-12-17 02:48:14 +05:30
2024-12-20 17:23:59 +05:30
const generalSelector = getSelectorPath(element);
return { generalSelector };
2024-12-20 17:23:59 +05:30
}, coordinates);
2024-12-20 17:23:59 +05:30
return selectors || { generalSelector: '' };
} else {
// When we have a list selector, we need special handling while maintaining shadow DOM support
2024-12-20 17:23:59 +05:30
const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => {
const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
2025-01-05 00:05:47 +05:30
// Helper function to traverse shadow DOM
const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => {
const MAX_SHADOW_DEPTH = 4;
let current = element;
let deepest = current;
while (current && depth < MAX_SHADOW_DEPTH) {
const shadowRoot = current.shadowRoot;
if (!shadowRoot) break;
const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
if (!shadowElement || shadowElement === current) break;
deepest = shadowElement;
current = shadowElement;
depth++;
}
return deepest;
};
// Start with the element at the specified coordinates
let element = document.elementFromPoint(x, y) as HTMLElement;
if (!element) return null;
2025-01-05 00:05:47 +05:30
// Initialize tracking variables
let deepestElement = element;
let depth = 0;
2025-01-05 00:05:47 +05:30
const MAX_IFRAME_DEPTH = 4;
// First check if the initial element has a shadow root
deepestElement = traverseShadowDOM(element);
// If it's an iframe, traverse through iframe hierarchy
if (deepestElement.tagName === 'IFRAME') {
let currentIframe = deepestElement as HTMLIFrameElement;
2025-01-05 00:05:47 +05:30
while (currentIframe && depth < MAX_IFRAME_DEPTH) {
try {
// Convert coordinates to iframe's local space
const iframeRect = currentIframe.getBoundingClientRect();
const iframeX = x - iframeRect.left;
const iframeY = y - iframeRect.top;
// Access iframe's document
const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document;
if (!iframeDoc) break;
// Get element at transformed coordinates in iframe
const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement;
if (!iframeElement) break;
// Check for shadow DOM within iframe
const shadowResult = traverseShadowDOM(iframeElement);
deepestElement = shadowResult;
// If we found another iframe, continue traversing
if (shadowResult.tagName === 'IFRAME') {
currentIframe = shadowResult as HTMLIFrameElement;
depth++;
} else {
break;
}
} catch (error) {
console.warn('Cannot access iframe content:', error);
break;
}
}
}
2025-01-05 00:05:47 +05:30
return deepestElement;
};
// Generate basic selector from element's tag and classes
2024-12-20 17:23:59 +05:30
function getNonUniqueSelector(element: HTMLElement): string {
let selector = element.tagName.toLowerCase();
if (selector === 'td' && element.parentElement) {
const siblings = Array.from(element.parentElement.children);
const position = siblings.indexOf(element) + 1;
return `${selector}:nth-child(${position})`;
}
2024-12-20 17:23:59 +05:30
if (element.className) {
2025-01-05 00:05:47 +05:30
const classes = element.className.split(/\s+/).filter(Boolean);
2024-12-20 17:23:59 +05:30
if (classes.length > 0) {
2025-01-05 00:05:47 +05:30
const validClasses = classes.filter(cls => !cls.startsWith('!') && !cls.includes(':'));
2024-12-20 17:23:59 +05:30
if (validClasses.length > 0) {
selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.');
}
2024-12-17 02:48:14 +05:30
}
}
2024-12-20 17:23:59 +05:30
return selector;
2024-12-17 02:48:14 +05:30
}
2025-01-05 00:05:47 +05:30
// Get complete context path (both iframe and shadow DOM)
function getContextPath(element: HTMLElement): DOMContext[] {
const path: DOMContext[] = [];
let current = element;
let depth = 0;
const MAX_DEPTH = 4;
while (current && depth < MAX_DEPTH) {
2025-01-05 00:05:47 +05:30
// Check for shadow DOM
const rootNode = current.getRootNode();
if (rootNode instanceof ShadowRoot) {
path.unshift({
2025-01-05 00:05:47 +05:30
type: 'shadow',
element: current,
container: rootNode,
host: rootNode.host as HTMLElement
});
current = rootNode.host as HTMLElement;
depth++;
2025-01-05 00:05:47 +05:30
continue;
}
// Check for iframe
const ownerDocument = current.ownerDocument;
const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
if (frameElement) {
path.unshift({
2025-01-05 00:05:47 +05:30
type: 'iframe',
element: current,
container: frameElement,
document: ownerDocument
});
current = frameElement;
depth++;
2025-01-05 00:05:47 +05:30
continue;
}
2025-01-05 00:05:47 +05:30
break;
}
2025-01-05 00:05:47 +05:30
return path;
}
2024-12-20 17:23:59 +05:30
function getSelectorPath(element: HTMLElement | null): string {
if (!element) return '';
2025-01-05 00:05:47 +05:30
// Get the complete context path
const contextPath = getContextPath(element);
if (contextPath.length > 0) {
const selectorParts: string[] = [];
2025-01-05 00:05:47 +05:30
contextPath.forEach((context, index) => {
const containerSelector = getNonUniqueSelector(
context.type === 'shadow' ? context.host! : context.container as HTMLElement
);
2025-01-05 00:05:47 +05:30
if (index === contextPath.length - 1) {
const elementSelector = getNonUniqueSelector(element);
2025-01-05 00:05:47 +05:30
const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> ';
selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`);
} else {
2025-01-05 00:05:47 +05:30
selectorParts.push(containerSelector);
}
});
2025-01-05 00:05:47 +05:30
return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> ');
}
2025-01-05 00:05:47 +05:30
// Regular DOM path generation
2024-12-20 17:23:59 +05:30
const path: string[] = [];
let currentElement = element;
const MAX_DEPTH = 2;
2024-12-20 17:23:59 +05:30
let depth = 0;
2024-12-17 02:48:14 +05:30
while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) {
const selector = getNonUniqueSelector(currentElement);
2024-12-20 17:23:59 +05:30
path.unshift(selector);
if (!currentElement.parentElement) break;
currentElement = currentElement.parentElement;
2024-12-20 17:23:59 +05:30
depth++;
}
2024-08-06 23:12:31 +05:30
2024-12-20 17:23:59 +05:30
return path.join(' > ');
2024-12-17 02:48:14 +05:30
}
const originalEl = getDeepestElementFromPoint(x, y);
if (!originalEl) return { generalSelector: '' };
2024-12-17 02:48:14 +05:30
2024-12-20 17:23:59 +05:30
let element = originalEl;
2024-12-17 02:48:14 +05:30
2024-12-20 17:23:59 +05:30
const generalSelector = getSelectorPath(element);
return { generalSelector };
}, coordinates);
2024-12-17 02:48:14 +05:30
return selectors || { generalSelector: '' };
}
2024-08-07 00:28:45 +05:30
} catch (error) {
console.error('Error in getNonUniqueSelectors:', error);
2024-09-02 22:37:57 +05:30
return { generalSelector: '' };
2024-08-06 23:12:31 +05:30
}
};
2024-11-20 03:46:33 +05:30
export const getChildSelectors = async (page: Page, parentSelector: string): Promise<string[]> => {
2024-09-02 20:46:30 +05:30
try {
2024-11-20 03:46:33 +05:30
const childSelectors = await page.evaluate((parentSelector: string) => {
// Function to get a non-unique selector based on tag and class (if present)
function getNonUniqueSelector(element: HTMLElement): string {
let selector = element.tagName.toLowerCase();
if (selector === 'td' && element.parentElement) {
const siblings = Array.from(element.parentElement.children);
const position = siblings.indexOf(element) + 1;
return `${selector}:nth-child(${position})`;
}
2024-11-20 03:46:33 +05:30
const className = typeof element.className === 'string' ? element.className : '';
if (className) {
const classes = className.split(/\s+/).filter((cls: string) => Boolean(cls));
if (classes.length > 0) {
const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':'));
if (validClasses.length > 0) {
selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.');
2024-09-02 20:46:30 +05:30
}
}
2024-11-20 03:46:33 +05:30
}
2024-11-20 03:46:33 +05:30
return selector;
}
2024-11-20 03:46:33 +05:30
// Function to generate selector path from an element to its parent
function getSelectorPath(element: HTMLElement): string {
2024-11-20 03:46:33 +05:30
if (!element || !element.parentElement) return '';
const elementSelector = getNonUniqueSelector(element);
2025-01-05 00:05:47 +05:30
// Check for shadow DOM context
const rootNode = element.getRootNode();
if (rootNode instanceof ShadowRoot) {
const hostSelector = getNonUniqueSelector(rootNode.host as HTMLElement);
return `${hostSelector} >> ${elementSelector}`;
}
2024-09-03 09:44:58 +05:30
2025-01-05 00:05:47 +05:30
// Check for iframe context
const ownerDocument = element.ownerDocument;
const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
if (frameElement) {
const frameSelector = getNonUniqueSelector(frameElement);
return `${frameSelector} :>> ${elementSelector}`;
}
2025-01-05 00:05:47 +05:30
// Regular DOM context
const parentSelector = getNonUniqueSelector(element.parentElement);
2024-11-20 03:46:33 +05:30
return `${parentSelector} > ${elementSelector}`;
}
2025-01-05 00:05:47 +05:30
// Function to get all children from special contexts
function getSpecialContextChildren(element: HTMLElement): HTMLElement[] {
const children: HTMLElement[] = [];
2025-01-05 00:05:47 +05:30
// Get shadow DOM children
const shadowRoot = element.shadowRoot;
if (shadowRoot) {
const shadowElements = Array.from(shadowRoot.querySelectorAll('*')) as HTMLElement[];
children.push(...shadowElements);
}
2025-01-05 00:05:47 +05:30
// Get iframe children
const iframes = Array.from(element.querySelectorAll('iframe')) as HTMLIFrameElement[];
for (const iframe of iframes) {
try {
const iframeDoc = iframe.contentDocument || iframe.contentWindow?.document;
if (iframeDoc) {
const iframeElements = Array.from(iframeDoc.querySelectorAll('*')) as HTMLElement[];
children.push(...iframeElements);
}
} catch (error) {
console.warn('Cannot access iframe content:', error);
continue;
}
}
return children;
}
2025-01-05 00:05:47 +05:30
// Function to recursively get all descendant selectors including shadow DOM and iframes
2024-11-20 03:46:33 +05:30
function getAllDescendantSelectors(element: HTMLElement): string[] {
let selectors: string[] = [];
// Handle regular DOM children
2024-11-20 03:46:33 +05:30
const children = Array.from(element.children) as HTMLElement[];
for (const child of children) {
const childPath = getSelectorPath(child);
if (childPath) {
selectors.push(childPath);
2025-01-05 00:05:47 +05:30
// Process regular descendants
selectors = selectors.concat(getAllDescendantSelectors(child));
2025-01-05 00:05:47 +05:30
// Process special context children (shadow DOM and iframes)
const specialChildren = getSpecialContextChildren(child);
for (const specialChild of specialChildren) {
const specialPath = getSelectorPath(specialChild);
if (specialPath) {
selectors.push(specialPath);
selectors = selectors.concat(getAllDescendantSelectors(specialChild));
}
}
}
}
2025-01-05 00:05:47 +05:30
// Handle direct special context children
const specialChildren = getSpecialContextChildren(element);
for (const specialChild of specialChildren) {
const specialPath = getSelectorPath(specialChild);
if (specialPath) {
selectors.push(specialPath);
selectors = selectors.concat(getAllDescendantSelectors(specialChild));
2024-11-20 03:46:33 +05:30
}
}
return selectors;
}
2025-01-05 00:05:47 +05:30
// Handle both shadow DOM and iframe parent selectors
let parentElements: HTMLElement[] = [];
2025-01-05 00:05:47 +05:30
// Check for special context traversal in parent selector
if (parentSelector.includes('>>') || parentSelector.includes(':>>')) {
// Split by both types of delimiters
const selectorParts = parentSelector.split(/(?:>>|:>>)/).map(part => part.trim());
// Start with initial elements
parentElements = Array.from(document.querySelectorAll(selectorParts[0])) as HTMLElement[];
2025-01-05 00:05:47 +05:30
// Traverse through parts
for (let i = 1; i < selectorParts.length; i++) {
const newParentElements: HTMLElement[] = [];
2025-01-05 00:05:47 +05:30
for (const element of parentElements) {
2025-01-05 00:05:47 +05:30
// Check for shadow DOM
if (element.shadowRoot) {
2025-01-05 00:05:47 +05:30
const shadowChildren = Array.from(
element.shadowRoot.querySelectorAll(selectorParts[i])
) as HTMLElement[];
newParentElements.push(...shadowChildren);
}
2025-01-05 00:05:47 +05:30
// Check for iframe
if (element.tagName === 'IFRAME') {
try {
const iframeDoc = (element as HTMLIFrameElement).contentDocument ||
(element as HTMLIFrameElement).contentWindow?.document;
if (iframeDoc) {
2025-01-05 00:05:47 +05:30
const iframeChildren = Array.from(
iframeDoc.querySelectorAll(selectorParts[i])
) as HTMLElement[];
newParentElements.push(...iframeChildren);
}
} catch (error) {
console.warn('Cannot access iframe content during traversal:', error);
continue;
}
}
}
2025-01-05 00:05:47 +05:30
parentElements = newParentElements;
}
} else {
// Regular DOM selector
parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[];
}
2024-11-20 03:46:33 +05:30
const allChildSelectors = new Set<string>(); // Use a set to ensure uniqueness
// Process each parent element and its descendants
parentElements.forEach((parentElement) => {
const descendantSelectors = getAllDescendantSelectors(parentElement);
descendantSelectors.forEach((selector) => allChildSelectors.add(selector));
2024-11-20 03:46:33 +05:30
});
return Array.from(allChildSelectors);
2024-11-20 03:46:33 +05:30
}, parentSelector);
2024-09-02 20:46:30 +05:30
return childSelectors || [];
} catch (error) {
2024-11-20 03:46:33 +05:30
console.error('Error in getChildSelectors:', error);
2024-09-02 20:46:30 +05:30
return [];
}
};
2024-11-20 03:46:33 +05:30
2024-06-06 05:13:19 +05:30
/**
* Returns the first pair from the given workflow that contains the given selector
* inside the where condition, and it is the only selector there.
* If a match is not found, returns undefined.
* @param selector The selector to find.
* @param workflow The workflow to search in.
* @category WorkflowManagement
* @returns {Promise<WhereWhatPair|undefined>}
*/
export const selectorAlreadyInWorkflow = (selector: string, workflow: Workflow) => {
return workflow.find((pair: WhereWhatPair) => {
if (pair.where.selectors?.includes(selector)) {
if (pair.where.selectors?.length === 1) {
return pair;
}
}
});
};
/**
* Checks whether the given selectors are visible on the page at the same time.
* @param selectors The selectors to check.
* @param page The page to use for the validation.
* @category WorkflowManagement
*/
2024-06-06 05:14:18 +05:30
export const isRuleOvershadowing = async (selectors: string[], page: Page): Promise<boolean> => {
for (const selector of selectors) {
const areElsVisible = await page.$$eval(selector,
(elems) => {
2024-06-06 05:14:18 +05:30
const isVisible = (elem: HTMLElement | SVGElement) => {
if (elem instanceof HTMLElement) {
return !!(elem.offsetWidth
|| elem.offsetHeight
|| elem.getClientRects().length
&& window.getComputedStyle(elem).visibility !== "hidden");
} else {
return !!(elem.getClientRects().length
&& window.getComputedStyle(elem).visibility !== "hidden");
}
};
const visibility: boolean[] = [];
elems.forEach((el) => visibility.push(isVisible(el)))
return visibility;
})
if (areElsVisible.length === 0) {
return false
}
2024-06-06 05:14:18 +05:30
if (areElsVisible.includes(false)) {
return false;
}
}
return true;
2024-06-06 05:14:33 +05:30
}