Merge pull request #675 from getmaxun/smart-list
feat: better, faster, smarter capture list
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -243,7 +243,7 @@ export class RemoteBrowser {
|
||||
scripts: [] as Array<{ src: string; content: string; type?: string }>,
|
||||
media: [] as Array<{ src: string; dataUrl: string; type: string }>,
|
||||
};
|
||||
|
||||
|
||||
const viewport = (await this.currentPage?.viewportSize()) || {
|
||||
width: 1280,
|
||||
height: 720,
|
||||
@@ -617,7 +617,7 @@ export class RemoteBrowser {
|
||||
);
|
||||
|
||||
await this.context.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' });
|
||||
|
||||
|
||||
this.currentPage = await this.context.newPage();
|
||||
|
||||
await this.setupPageEventListeners(this.currentPage);
|
||||
@@ -1286,129 +1286,181 @@ export class RemoteBrowser {
|
||||
* @returns void
|
||||
*/
|
||||
public registerEditorEvents = (): void => {
|
||||
// For each event, include userId to make sure events are handled for the correct browser
|
||||
logger.log('debug', `Registering editor events for user: ${this.userId}`);
|
||||
// For each event, include userId to make sure events are handled for the correct browser
|
||||
logger.log("debug", `Registering editor events for user: ${this.userId}`);
|
||||
|
||||
this.socket.on(`captureDirectScreenshot:${this.userId}`, async (settings) => {
|
||||
logger.debug(`Direct screenshot capture requested for user ${this.userId}`);
|
||||
this.socket.on(
|
||||
`captureDirectScreenshot:${this.userId}`,
|
||||
async (settings) => {
|
||||
logger.debug(
|
||||
`Direct screenshot capture requested for user ${this.userId}`
|
||||
);
|
||||
await this.captureDirectScreenshot(settings);
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
// For backward compatibility
|
||||
this.socket.on('captureDirectScreenshot', async (settings) => {
|
||||
await this.captureDirectScreenshot(settings);
|
||||
});
|
||||
|
||||
// Listen for specific events for this user
|
||||
this.socket.on(`rerender:${this.userId}`, async () => {
|
||||
logger.debug(`Rerender event received for user ${this.userId}`);
|
||||
await this.makeAndEmitScreenshot();
|
||||
});
|
||||
|
||||
// For backward compatibility, also listen to the general event
|
||||
this.socket.on('rerender', async () => {
|
||||
logger.debug(`General rerender event received, checking if for user ${this.userId}`);
|
||||
await this.makeAndEmitScreenshot();
|
||||
});
|
||||
|
||||
this.socket.on(`settings:${this.userId}`, (settings) => {
|
||||
this.interpreterSettings = settings;
|
||||
logger.debug(`Settings updated for user ${this.userId}`);
|
||||
});
|
||||
|
||||
this.socket.on(`changeTab:${this.userId}`, async (tabIndex) => {
|
||||
logger.debug(`Tab change to ${tabIndex} requested for user ${this.userId}`);
|
||||
await this.changeTab(tabIndex);
|
||||
});
|
||||
|
||||
this.socket.on(`addTab:${this.userId}`, async () => {
|
||||
logger.debug(`New tab requested for user ${this.userId}`);
|
||||
await this.currentPage?.context().newPage();
|
||||
const lastTabIndex = this.currentPage ? this.currentPage.context().pages().length - 1 : 0;
|
||||
await this.changeTab(lastTabIndex);
|
||||
});
|
||||
|
||||
this.socket.on(`closeTab:${this.userId}`, async (tabInfo) => {
|
||||
logger.debug(`Close tab ${tabInfo.index} requested for user ${this.userId}`);
|
||||
const page = this.currentPage?.context().pages()[tabInfo.index];
|
||||
if (page) {
|
||||
if (tabInfo.isCurrent) {
|
||||
if (this.currentPage?.context().pages()[tabInfo.index + 1]) {
|
||||
// next tab
|
||||
await this.changeTab(tabInfo.index + 1);
|
||||
} else {
|
||||
//previous tab
|
||||
await this.changeTab(tabInfo.index - 1);
|
||||
}
|
||||
}
|
||||
await page.close();
|
||||
logger.log(
|
||||
'debug',
|
||||
`Tab ${tabInfo.index} was closed for user ${this.userId}, new tab count: ${this.currentPage?.context().pages().length}`
|
||||
);
|
||||
// For backward compatibility
|
||||
this.socket.on("captureDirectScreenshot", async (settings) => {
|
||||
await this.captureDirectScreenshot(settings);
|
||||
});
|
||||
|
||||
// Listen for specific events for this user
|
||||
this.socket.on(`rerender:${this.userId}`, async () => {
|
||||
logger.debug(`Rerender event received for user ${this.userId}`);
|
||||
if (this.renderingMode === "dom") {
|
||||
await this.makeAndEmitDOMSnapshot();
|
||||
} else {
|
||||
await this.makeAndEmitScreenshot();
|
||||
}
|
||||
});
|
||||
|
||||
this.socket.on("rerender", async () => {
|
||||
logger.debug(
|
||||
`General rerender event received, checking if for user ${this.userId}`
|
||||
);
|
||||
if (this.renderingMode === "dom") {
|
||||
await this.makeAndEmitDOMSnapshot();
|
||||
} else {
|
||||
await this.makeAndEmitScreenshot();
|
||||
}
|
||||
});
|
||||
|
||||
this.socket.on(`settings:${this.userId}`, (settings) => {
|
||||
this.interpreterSettings = settings;
|
||||
logger.debug(`Settings updated for user ${this.userId}`);
|
||||
});
|
||||
|
||||
this.socket.on(`changeTab:${this.userId}`, async (tabIndex) => {
|
||||
logger.debug(
|
||||
`Tab change to ${tabIndex} requested for user ${this.userId}`
|
||||
);
|
||||
await this.changeTab(tabIndex);
|
||||
});
|
||||
|
||||
this.socket.on(`addTab:${this.userId}`, async () => {
|
||||
logger.debug(`New tab requested for user ${this.userId}`);
|
||||
await this.currentPage?.context().newPage();
|
||||
const lastTabIndex = this.currentPage
|
||||
? this.currentPage.context().pages().length - 1
|
||||
: 0;
|
||||
await this.changeTab(lastTabIndex);
|
||||
});
|
||||
|
||||
this.socket.on(`closeTab:${this.userId}`, async (tabInfo) => {
|
||||
logger.debug(
|
||||
`Close tab ${tabInfo.index} requested for user ${this.userId}`
|
||||
);
|
||||
const page = this.currentPage?.context().pages()[tabInfo.index];
|
||||
if (page) {
|
||||
if (tabInfo.isCurrent) {
|
||||
if (this.currentPage?.context().pages()[tabInfo.index + 1]) {
|
||||
// next tab
|
||||
await this.changeTab(tabInfo.index + 1);
|
||||
} else {
|
||||
logger.log('error', `Tab index ${tabInfo.index} out of range for user ${this.userId}`);
|
||||
//previous tab
|
||||
await this.changeTab(tabInfo.index - 1);
|
||||
}
|
||||
});
|
||||
|
||||
this.socket.on(`setViewportSize:${this.userId}`, async (data: { width: number, height: number }) => {
|
||||
const { width, height } = data;
|
||||
logger.log('debug', `Viewport size change to width=${width}, height=${height} requested for user ${this.userId}`);
|
||||
}
|
||||
await page.close();
|
||||
logger.log(
|
||||
"debug",
|
||||
`Tab ${tabInfo.index} was closed for user ${
|
||||
this.userId
|
||||
}, new tab count: ${this.currentPage?.context().pages().length}`
|
||||
);
|
||||
} else {
|
||||
logger.log(
|
||||
"error",
|
||||
`Tab index ${tabInfo.index} out of range for user ${this.userId}`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// Update the browser context's viewport dynamically
|
||||
if (this.context && this.browser) {
|
||||
this.context = await this.browser.newContext({ viewport: { width, height } });
|
||||
logger.log('debug', `Viewport size updated to width=${width}, height=${height} for user ${this.userId}`);
|
||||
}
|
||||
});
|
||||
|
||||
// For backward compatibility, also register the standard events
|
||||
this.socket.on('settings', (settings) => this.interpreterSettings = settings);
|
||||
this.socket.on('changeTab', async (tabIndex) => await this.changeTab(tabIndex));
|
||||
this.socket.on('addTab', async () => {
|
||||
await this.currentPage?.context().newPage();
|
||||
const lastTabIndex = this.currentPage ? this.currentPage.context().pages().length - 1 : 0;
|
||||
await this.changeTab(lastTabIndex);
|
||||
});
|
||||
this.socket.on('closeTab', async (tabInfo) => {
|
||||
const page = this.currentPage?.context().pages()[tabInfo.index];
|
||||
if (page) {
|
||||
if (tabInfo.isCurrent) {
|
||||
if (this.currentPage?.context().pages()[tabInfo.index + 1]) {
|
||||
await this.changeTab(tabInfo.index + 1);
|
||||
} else {
|
||||
await this.changeTab(tabInfo.index - 1);
|
||||
}
|
||||
}
|
||||
await page.close();
|
||||
}
|
||||
});
|
||||
this.socket.on('setViewportSize', async (data: { width: number, height: number }) => {
|
||||
const { width, height } = data;
|
||||
if (this.context && this.browser) {
|
||||
this.context = await this.browser.newContext({ viewport: { width, height } });
|
||||
}
|
||||
});
|
||||
this.socket.on(
|
||||
`setViewportSize:${this.userId}`,
|
||||
async (data: { width: number; height: number }) => {
|
||||
const { width, height } = data;
|
||||
logger.log(
|
||||
"debug",
|
||||
`Viewport size change to width=${width}, height=${height} requested for user ${this.userId}`
|
||||
);
|
||||
|
||||
this.socket.on('extractListData', async (data: {
|
||||
listSelector: string,
|
||||
fields: Record<string, any>,
|
||||
currentListId: number,
|
||||
pagination: any
|
||||
// Update the browser context's viewport dynamically
|
||||
if (this.context && this.browser) {
|
||||
this.context = await this.browser.newContext({
|
||||
viewport: { width, height },
|
||||
});
|
||||
logger.log(
|
||||
"debug",
|
||||
`Viewport size updated to width=${width}, height=${height} for user ${this.userId}`
|
||||
);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// For backward compatibility, also register the standard events
|
||||
this.socket.on(
|
||||
"settings",
|
||||
(settings) => (this.interpreterSettings = settings)
|
||||
);
|
||||
this.socket.on(
|
||||
"changeTab",
|
||||
async (tabIndex) => await this.changeTab(tabIndex)
|
||||
);
|
||||
this.socket.on("addTab", async () => {
|
||||
await this.currentPage?.context().newPage();
|
||||
const lastTabIndex = this.currentPage
|
||||
? this.currentPage.context().pages().length - 1
|
||||
: 0;
|
||||
await this.changeTab(lastTabIndex);
|
||||
});
|
||||
this.socket.on("closeTab", async (tabInfo) => {
|
||||
const page = this.currentPage?.context().pages()[tabInfo.index];
|
||||
if (page) {
|
||||
if (tabInfo.isCurrent) {
|
||||
if (this.currentPage?.context().pages()[tabInfo.index + 1]) {
|
||||
await this.changeTab(tabInfo.index + 1);
|
||||
} else {
|
||||
await this.changeTab(tabInfo.index - 1);
|
||||
}
|
||||
}
|
||||
await page.close();
|
||||
}
|
||||
});
|
||||
this.socket.on(
|
||||
"setViewportSize",
|
||||
async (data: { width: number; height: number }) => {
|
||||
const { width, height } = data;
|
||||
if (this.context && this.browser) {
|
||||
this.context = await this.browser.newContext({
|
||||
viewport: { width, height },
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
this.socket.on(
|
||||
"extractListData",
|
||||
async (data: {
|
||||
listSelector: string;
|
||||
fields: Record<string, any>;
|
||||
currentListId: number;
|
||||
pagination: any;
|
||||
}) => {
|
||||
if (this.currentPage) {
|
||||
const extractedData = await this.extractListData(
|
||||
this.currentPage,
|
||||
data.listSelector,
|
||||
data.fields
|
||||
);
|
||||
|
||||
this.socket.emit('listDataExtracted', {
|
||||
currentListId: data.currentListId,
|
||||
data: extractedData
|
||||
});
|
||||
}
|
||||
});
|
||||
if (this.currentPage) {
|
||||
const extractedData = await this.extractListData(
|
||||
this.currentPage,
|
||||
data.listSelector,
|
||||
data.fields
|
||||
);
|
||||
|
||||
this.socket.emit("listDataExtracted", {
|
||||
currentListId: data.currentListId,
|
||||
data: extractedData,
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
};
|
||||
/**
|
||||
* Subscribes the remote browser for a screencast session
|
||||
@@ -1476,15 +1528,12 @@ export class RemoteBrowser {
|
||||
this.isDOMStreamingActive = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* CDP-based DOM snapshot creation using captured network resources
|
||||
*/
|
||||
public async makeAndEmitDOMSnapshot(): Promise<void> {
|
||||
if (
|
||||
!this.currentPage ||
|
||||
!this.isDOMStreamingActive
|
||||
) {
|
||||
if (!this.currentPage || !this.isDOMStreamingActive) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1537,10 +1586,11 @@ export class RemoteBrowser {
|
||||
if (typeof window.rrwebSnapshot === "undefined") {
|
||||
throw new Error("rrweb-snapshot library not available");
|
||||
}
|
||||
return window.rrwebSnapshot.snapshot(document, {
|
||||
inlineImages: true,
|
||||
collectFonts: true,
|
||||
});
|
||||
|
||||
return window.rrwebSnapshot.snapshot(document, {
|
||||
inlineImages: true,
|
||||
collectFonts: true,
|
||||
});
|
||||
});
|
||||
|
||||
// Process the snapshot to proxy resources
|
||||
@@ -1557,10 +1607,12 @@ export class RemoteBrowser {
|
||||
this.emitRRWebSnapshot(enhancedSnapshot);
|
||||
} catch (error) {
|
||||
// Handle navigation context destruction gracefully
|
||||
if (error instanceof Error &&
|
||||
(error.message.includes("Execution context was destroyed") ||
|
||||
if (
|
||||
error instanceof Error &&
|
||||
(error.message.includes("Execution context was destroyed") ||
|
||||
error.message.includes("most likely because of a navigation") ||
|
||||
error.message.includes("Target closed"))) {
|
||||
error.message.includes("Target closed"))
|
||||
) {
|
||||
logger.debug("DOM snapshot skipped due to page navigation or closure");
|
||||
return; // Don't emit error for navigation - this is expected
|
||||
}
|
||||
@@ -1622,35 +1674,35 @@ export class RemoteBrowser {
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
public async switchOff(): Promise<void> {
|
||||
try {
|
||||
this.isScreencastActive = false;
|
||||
this.isDOMStreamingActive = false;
|
||||
try {
|
||||
this.isScreencastActive = false;
|
||||
this.isDOMStreamingActive = false;
|
||||
|
||||
await this.interpreter.stopInterpretation();
|
||||
await this.interpreter.stopInterpretation();
|
||||
|
||||
if (this.screencastInterval) {
|
||||
clearInterval(this.screencastInterval);
|
||||
}
|
||||
|
||||
if (this.domUpdateInterval) {
|
||||
clearInterval(this.domUpdateInterval);
|
||||
}
|
||||
|
||||
if (this.client) {
|
||||
await this.stopScreencast();
|
||||
await this.stopDOM();
|
||||
}
|
||||
|
||||
if (this.browser) {
|
||||
await this.browser.close();
|
||||
}
|
||||
|
||||
this.screenshotQueue = [];
|
||||
//this.performanceMonitor.reset();
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Error during browser shutdown:', error);
|
||||
if (this.screencastInterval) {
|
||||
clearInterval(this.screencastInterval);
|
||||
}
|
||||
|
||||
if (this.domUpdateInterval) {
|
||||
clearInterval(this.domUpdateInterval);
|
||||
}
|
||||
|
||||
if (this.client) {
|
||||
await this.stopScreencast();
|
||||
await this.stopDOM();
|
||||
}
|
||||
|
||||
if (this.browser) {
|
||||
await this.browser.close();
|
||||
}
|
||||
|
||||
this.screenshotQueue = [];
|
||||
//this.performanceMonitor.reset();
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Error during browser shutdown:', error);
|
||||
}
|
||||
}
|
||||
|
||||
private async optimizeScreenshot(screenshot: Buffer): Promise<Buffer> {
|
||||
@@ -1772,6 +1824,7 @@ export class RemoteBrowser {
|
||||
const page = this.currentPage?.context().pages()[tabIndex];
|
||||
if (page) {
|
||||
await this.stopScreencast();
|
||||
await this.stopDOM();
|
||||
this.currentPage = page;
|
||||
|
||||
await this.setupPageEventListeners(this.currentPage);
|
||||
@@ -1783,8 +1836,13 @@ export class RemoteBrowser {
|
||||
url: this.currentPage.url(),
|
||||
userId: this.userId
|
||||
});
|
||||
await this.makeAndEmitScreenshot();
|
||||
await this.subscribeToScreencast();
|
||||
if (this.isDOMStreamingActive) {
|
||||
await this.makeAndEmitDOMSnapshot();
|
||||
await this.subscribeToDOM();
|
||||
} else {
|
||||
await this.makeAndEmitScreenshot();
|
||||
await this.subscribeToScreencast();
|
||||
}
|
||||
} else {
|
||||
logger.log('error', `${tabIndex} index out of range of pages`)
|
||||
}
|
||||
|
||||
@@ -464,7 +464,6 @@ export class WorkflowGenerator {
|
||||
public onClick = async (coordinates: Coordinates, page: Page) => {
|
||||
let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) };
|
||||
const selector = await this.generateSelector(page, coordinates, ActionType.Click);
|
||||
console.log("COOORDINATES: ", coordinates);
|
||||
logger.log('debug', `Element's selector: ${selector}`);
|
||||
|
||||
const elementInfo = await getElementInformation(page, coordinates, '', false);
|
||||
@@ -999,6 +998,7 @@ export class WorkflowGenerator {
|
||||
rect,
|
||||
selector: displaySelector,
|
||||
elementInfo,
|
||||
isDOMMode: this.isDOMMode,
|
||||
// Include shadow DOM specific information
|
||||
shadowInfo: elementInfo?.isShadowRoot ? {
|
||||
mode: elementInfo.shadowRootMode,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -98,6 +98,7 @@ interface RRWebDOMBrowserRendererProps {
|
||||
getList?: boolean;
|
||||
getText?: boolean;
|
||||
listSelector?: string | null;
|
||||
cachedChildSelectors?: string[];
|
||||
paginationMode?: boolean;
|
||||
paginationType?: string;
|
||||
limitMode?: boolean;
|
||||
@@ -106,12 +107,14 @@ interface RRWebDOMBrowserRendererProps {
|
||||
selector: string;
|
||||
elementInfo: ElementInfo | null;
|
||||
childSelectors?: string[];
|
||||
groupInfo?: any;
|
||||
}) => void;
|
||||
onElementSelect?: (data: {
|
||||
rect: DOMRect;
|
||||
selector: string;
|
||||
elementInfo: ElementInfo | null;
|
||||
childSelectors?: string[];
|
||||
groupInfo?: any;
|
||||
}) => void;
|
||||
onShowDatePicker?: (info: {
|
||||
coordinates: { x: number; y: number };
|
||||
@@ -144,6 +147,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
getList = false,
|
||||
getText = false,
|
||||
listSelector = null,
|
||||
cachedChildSelectors = [],
|
||||
paginationMode = false,
|
||||
paginationType = "",
|
||||
limitMode = false,
|
||||
@@ -205,11 +209,24 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
const handleDOMHighlighting = useCallback(
|
||||
(x: number, y: number, iframeDoc: Document) => {
|
||||
try {
|
||||
if (!getText && !getList) {
|
||||
setCurrentHighlight(null);
|
||||
if (onHighlight) {
|
||||
onHighlight({
|
||||
rect: new DOMRect(0, 0, 0, 0),
|
||||
selector: "",
|
||||
elementInfo: null,
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const highlighterData =
|
||||
clientSelectorGenerator.generateDataForHighlighter(
|
||||
{ x, y },
|
||||
iframeDoc,
|
||||
true
|
||||
true,
|
||||
cachedChildSelectors
|
||||
);
|
||||
|
||||
if (!highlighterData) {
|
||||
@@ -224,70 +241,40 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
return;
|
||||
}
|
||||
|
||||
const { rect, selector, elementInfo, childSelectors } = highlighterData;
|
||||
const { rect, selector, elementInfo, childSelectors, groupInfo } =
|
||||
highlighterData;
|
||||
|
||||
let shouldHighlight = false;
|
||||
|
||||
if (getList) {
|
||||
if (listSelector) {
|
||||
const hasValidChildSelectors =
|
||||
Array.isArray(childSelectors) && childSelectors.length > 0;
|
||||
|
||||
// First phase: Allow any group to be highlighted for selection
|
||||
if (!listSelector && groupInfo?.isGroupElement) {
|
||||
shouldHighlight = true;
|
||||
}
|
||||
// Second phase: Show valid children within selected group
|
||||
else if (listSelector) {
|
||||
if (limitMode) {
|
||||
shouldHighlight = false;
|
||||
} else if (paginationMode) {
|
||||
if (
|
||||
paginationType !== "" &&
|
||||
!["none", "scrollDown", "scrollUp"].includes(paginationType)
|
||||
) {
|
||||
shouldHighlight = true;
|
||||
} else {
|
||||
shouldHighlight = false;
|
||||
}
|
||||
} else if (childSelectors && childSelectors.includes(selector)) {
|
||||
} else if (
|
||||
paginationMode &&
|
||||
paginationType !== "" &&
|
||||
!["none", "scrollDown", "scrollUp"].includes(paginationType)
|
||||
) {
|
||||
shouldHighlight = true;
|
||||
} else if (childSelectors && childSelectors.length > 0) {
|
||||
console.log("✅ Child selectors present, highlighting enabled");
|
||||
shouldHighlight = true;
|
||||
} else if (elementInfo?.isIframeContent && childSelectors) {
|
||||
const isIframeChild = childSelectors.some(
|
||||
(childSelector: string) =>
|
||||
selector.includes(":>>") &&
|
||||
childSelector
|
||||
.split(":>>")
|
||||
.some((part) => selector.includes(part.trim()))
|
||||
);
|
||||
shouldHighlight = isIframeChild;
|
||||
} else if (selector.includes(":>>") && hasValidChildSelectors) {
|
||||
const selectorParts = selector
|
||||
.split(":>>")
|
||||
.map((part: string) => part.trim());
|
||||
const isValidMixedSelector = selectorParts.some((part: any) =>
|
||||
childSelectors!.some((childSelector) =>
|
||||
childSelector.includes(part)
|
||||
)
|
||||
);
|
||||
} else if (elementInfo?.isShadowRoot && childSelectors) {
|
||||
const isShadowChild = childSelectors.some(
|
||||
(childSelector: string) =>
|
||||
selector.includes(">>") &&
|
||||
childSelector
|
||||
.split(">>")
|
||||
.some((part) => selector.includes(part.trim()))
|
||||
);
|
||||
} else if (selector.includes(">>") && hasValidChildSelectors) {
|
||||
const selectorParts = selector
|
||||
.split(">>")
|
||||
.map((part: string) => part.trim());
|
||||
const isValidMixedSelector = selectorParts.some((part: any) =>
|
||||
childSelectors!.some((childSelector) =>
|
||||
childSelector.includes(part)
|
||||
)
|
||||
);
|
||||
} else {
|
||||
console.log("❌ No child selectors available");
|
||||
shouldHighlight = false;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
// No list selector - show regular highlighting
|
||||
else {
|
||||
shouldHighlight = true;
|
||||
}
|
||||
} else {
|
||||
// getText mode - always highlight
|
||||
shouldHighlight = true;
|
||||
}
|
||||
|
||||
@@ -316,6 +303,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
},
|
||||
selector,
|
||||
childSelectors,
|
||||
groupInfo,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -335,9 +323,11 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
}
|
||||
},
|
||||
[
|
||||
getText,
|
||||
getList,
|
||||
listSelector,
|
||||
paginationMode,
|
||||
cachedChildSelectors,
|
||||
paginationType,
|
||||
limitMode,
|
||||
onHighlight,
|
||||
@@ -363,6 +353,10 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isInCaptureMode) {
|
||||
return;
|
||||
}
|
||||
|
||||
const now = performance.now();
|
||||
if (now - lastMouseMoveTime.current < MOUSE_MOVE_THROTTLE) {
|
||||
return;
|
||||
@@ -401,11 +395,24 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
e.stopPropagation();
|
||||
|
||||
if (currentHighlight && onElementSelect) {
|
||||
// Get the group info for the current highlight
|
||||
const highlighterData =
|
||||
clientSelectorGenerator.generateDataForHighlighter(
|
||||
{ x: iframeX, y: iframeY },
|
||||
iframeDoc,
|
||||
true,
|
||||
cachedChildSelectors
|
||||
);
|
||||
|
||||
onElementSelect({
|
||||
rect: currentHighlight.rect,
|
||||
selector: currentHighlight.selector,
|
||||
elementInfo: currentHighlight.elementInfo,
|
||||
childSelectors: currentHighlight.childSelectors || [],
|
||||
childSelectors:
|
||||
cachedChildSelectors.length > 0
|
||||
? cachedChildSelectors
|
||||
: highlighterData?.childSelectors || [],
|
||||
groupInfo: highlighterData?.groupInfo,
|
||||
});
|
||||
}
|
||||
notifyLastAction("select element");
|
||||
@@ -790,12 +797,41 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
|
||||
rebuiltHTML = "<!DOCTYPE html>\n" + rebuiltHTML;
|
||||
|
||||
const additionalCSS = [];
|
||||
|
||||
if (snapshotData.resources.fonts?.length > 0) {
|
||||
const fontCSS = snapshotData.resources.fonts
|
||||
.map((font) => {
|
||||
const format = font.format || "woff2";
|
||||
return `
|
||||
@font-face {
|
||||
font-family: 'ProxiedFont-${
|
||||
font.url.split("/").pop()?.split(".")[0] || "unknown"
|
||||
}';
|
||||
src: url("${font.dataUrl}") format("${format}");
|
||||
font-display: swap;
|
||||
}
|
||||
`;
|
||||
})
|
||||
.join("\n");
|
||||
additionalCSS.push(fontCSS);
|
||||
}
|
||||
|
||||
if (snapshotData.resources.stylesheets?.length > 0) {
|
||||
const externalCSS = snapshotData.resources.stylesheets
|
||||
.map((stylesheet) => stylesheet.content)
|
||||
.join("\n\n");
|
||||
additionalCSS.push(externalCSS);
|
||||
}
|
||||
|
||||
const enhancedCSS = `
|
||||
/* rrweb rebuilt content styles */
|
||||
html, body {
|
||||
margin: 0 !important;
|
||||
padding: 8px !important;
|
||||
overflow-x: hidden !important;
|
||||
margin: 0 !important;
|
||||
padding: 8px !important;
|
||||
font-family: system-ui, -apple-system, BlinkMacSystemFont, sans-serif !important;
|
||||
background: white !important;
|
||||
overflow-x: hidden !important;
|
||||
}
|
||||
|
||||
html::-webkit-scrollbar,
|
||||
@@ -818,12 +854,22 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
scrollbar-width: none !important; /* Firefox */
|
||||
-ms-overflow-style: none !important; /* Internet Explorer 10+ */
|
||||
}
|
||||
|
||||
img {
|
||||
max-width: 100% !important;
|
||||
height: auto !important;
|
||||
}
|
||||
|
||||
|
||||
/* Make everything interactive */
|
||||
* {
|
||||
cursor: "pointer" !important;
|
||||
}
|
||||
`;
|
||||
|
||||
/* Additional CSS from resources */
|
||||
${additionalCSS.join("\n\n")}
|
||||
`;
|
||||
|
||||
|
||||
const headTagRegex = /<head[^>]*>/i;
|
||||
const cssInjection = `
|
||||
|
||||
@@ -22,6 +22,7 @@ import { useThemeMode } from '../../context/theme-provider';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
|
||||
import { clientListExtractor } from '../../helpers/clientListExtractor';
|
||||
import { clientSelectorGenerator } from '../../helpers/clientSelectorGenerator';
|
||||
|
||||
const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => {
|
||||
getActiveWorkflow(id).then(
|
||||
@@ -52,10 +53,8 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
const [isCaptureTextConfirmed, setIsCaptureTextConfirmed] = useState(false);
|
||||
const [isCaptureListConfirmed, setIsCaptureListConfirmed] = useState(false);
|
||||
const { panelHeight } = useBrowserDimensionsStore();
|
||||
const [isDOMMode, setIsDOMMode] = useState(false);
|
||||
const [currentSnapshot, setCurrentSnapshot] = useState<any>(null);
|
||||
|
||||
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId } = useGlobalInfoStore();
|
||||
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId, updateDOMMode, currentSnapshot, isDOMMode } = useGlobalInfoStore();
|
||||
const {
|
||||
getText, startGetText, stopGetText,
|
||||
getList, startGetList, stopGetList,
|
||||
@@ -86,22 +85,20 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
if (socket) {
|
||||
const domModeHandler = (data: any) => {
|
||||
if (!data.userId || data.userId === id) {
|
||||
setIsDOMMode(true);
|
||||
updateDOMMode(true);
|
||||
}
|
||||
};
|
||||
|
||||
const screenshotModeHandler = (data: any) => {
|
||||
if (!data.userId || data.userId === id) {
|
||||
setIsDOMMode(false);
|
||||
setCurrentSnapshot(null);
|
||||
updateDOMMode(false);
|
||||
}
|
||||
};
|
||||
|
||||
const domcastHandler = (data: any) => {
|
||||
if (!data.userId || data.userId === id) {
|
||||
if (data.snapshotData && data.snapshotData.snapshot) {
|
||||
setCurrentSnapshot(data.snapshotData);
|
||||
setIsDOMMode(true);
|
||||
updateDOMMode(true, data.snapshotData);
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -116,7 +113,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
socket.off("domcast", domcastHandler);
|
||||
};
|
||||
}
|
||||
}, [socket, id]);
|
||||
}, [socket, id, updateDOMMode]);
|
||||
|
||||
useEffect(() => {
|
||||
if (socket) {
|
||||
@@ -214,7 +211,6 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
) => {
|
||||
if (isDOMMode && currentSnapshot) {
|
||||
try {
|
||||
// Find the DOM iframe element
|
||||
let iframeElement = document.querySelector(
|
||||
"#dom-browser-iframe"
|
||||
) as HTMLIFrameElement;
|
||||
@@ -247,22 +243,42 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
return;
|
||||
}
|
||||
|
||||
// Use client-side extraction
|
||||
Object.entries(fields).forEach(([key, field]) => {
|
||||
if (field.selectorObj?.selector) {
|
||||
const isFieldXPath =
|
||||
field.selectorObj.selector.startsWith("//") ||
|
||||
field.selectorObj.selector.startsWith("/");
|
||||
console.log(
|
||||
`Field "${key}" selector:`,
|
||||
field.selectorObj.selector,
|
||||
`(XPath: ${isFieldXPath})`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
const extractedData = clientListExtractor.extractListData(
|
||||
iframeDoc,
|
||||
listSelector,
|
||||
fields,
|
||||
5 // limit for preview
|
||||
5
|
||||
);
|
||||
|
||||
updateListStepData(currentListId, extractedData);
|
||||
console.log("✅ UI extraction completed:");
|
||||
|
||||
if (extractedData.length === 0) {
|
||||
console.warn(
|
||||
"⚠️ No data extracted - this might indicate selector issues"
|
||||
);
|
||||
notify(
|
||||
"warning",
|
||||
"No data was extracted. Please verify your selections."
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error in client-side data extraction:", error);
|
||||
notify("error", "Failed to extract data client-side");
|
||||
}
|
||||
} else {
|
||||
// Fallback to socket-based extraction for screenshot mode
|
||||
if (!socket) {
|
||||
console.error("Socket not available for backend extraction");
|
||||
return;
|
||||
@@ -275,8 +291,6 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
currentListId,
|
||||
pagination: { type: "", selector: "" },
|
||||
});
|
||||
|
||||
console.log("📤 Sent extraction request to server");
|
||||
} catch (error) {
|
||||
console.error("Error in backend data extraction:", error);
|
||||
}
|
||||
@@ -443,6 +457,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
resetInterpretationLog();
|
||||
finishAction('text');
|
||||
onFinishCapture();
|
||||
clientSelectorGenerator.cleanup();
|
||||
}, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps, resetInterpretationLog, finishAction, notify, onFinishCapture, t]);
|
||||
|
||||
const getListSettingsObject = useCallback(() => {
|
||||
@@ -494,6 +509,8 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
|
||||
const stopCaptureAndEmitGetListSettings = useCallback(() => {
|
||||
const settings = getListSettingsObject();
|
||||
|
||||
console.log("rrwebSnapshotHandler", settings);
|
||||
|
||||
const latestListStep = getLatestListStep(browserSteps);
|
||||
if (latestListStep && settings) {
|
||||
@@ -509,6 +526,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
resetInterpretationLog();
|
||||
finishAction('list');
|
||||
onFinishCapture();
|
||||
clientSelectorGenerator.cleanup();
|
||||
}, [getListSettingsObject, socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t, browserSteps, extractDataClientSide]);
|
||||
|
||||
const hasUnconfirmedListTextFields = browserSteps.some(step =>
|
||||
@@ -638,6 +656,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
|
||||
setCurrentTextActionId('');
|
||||
setIsCaptureTextConfirmed(false);
|
||||
clientSelectorGenerator.cleanup();
|
||||
notify('error', t('right_panel.errors.capture_text_discarded'));
|
||||
}, [currentTextActionId, browserSteps, stopGetText, deleteStepsByActionId, notify, t]);
|
||||
|
||||
@@ -668,6 +687,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
setCaptureStage('initial');
|
||||
setCurrentListActionId('');
|
||||
setIsCaptureListConfirmed(false);
|
||||
clientSelectorGenerator.cleanup();
|
||||
notify('error', t('right_panel.errors.capture_list_discarded'));
|
||||
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t]);
|
||||
|
||||
@@ -686,6 +706,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
stopGetScreenshot();
|
||||
resetInterpretationLog();
|
||||
finishAction('screenshot');
|
||||
clientSelectorGenerator.cleanup();
|
||||
onFinishCapture();
|
||||
};
|
||||
|
||||
|
||||
@@ -27,6 +27,41 @@ interface ScheduleConfig {
|
||||
cronExpression?: string;
|
||||
}
|
||||
|
||||
interface ProcessedSnapshot {
|
||||
snapshot: any;
|
||||
resources: {
|
||||
stylesheets: Array<{
|
||||
href: string;
|
||||
content: string;
|
||||
media?: string;
|
||||
}>;
|
||||
images: Array<{
|
||||
src: string;
|
||||
dataUrl: string;
|
||||
alt?: string;
|
||||
}>;
|
||||
fonts: Array<{
|
||||
url: string;
|
||||
dataUrl: string;
|
||||
format?: string;
|
||||
}>;
|
||||
scripts: Array<{
|
||||
src: string;
|
||||
content: string;
|
||||
type?: string;
|
||||
}>;
|
||||
media: Array<{
|
||||
src: string;
|
||||
dataUrl: string;
|
||||
type: string;
|
||||
}>;
|
||||
};
|
||||
baseUrl: string;
|
||||
viewport: { width: number; height: number };
|
||||
timestamp: number;
|
||||
processingStats: any;
|
||||
}
|
||||
|
||||
export interface RobotSettings {
|
||||
id: string;
|
||||
userId?: number;
|
||||
@@ -86,6 +121,11 @@ interface GlobalInfo {
|
||||
setCurrentListActionId: (actionId: string) => void;
|
||||
currentScreenshotActionId: string;
|
||||
setCurrentScreenshotActionId: (actionId: string) => void;
|
||||
isDOMMode: boolean;
|
||||
setIsDOMMode: (isDOMMode: boolean) => void;
|
||||
currentSnapshot: ProcessedSnapshot | null;
|
||||
setCurrentSnapshot: (snapshot: ProcessedSnapshot | null) => void;
|
||||
updateDOMMode: (isDOMMode: boolean, snapshot?: ProcessedSnapshot | null) => void;
|
||||
};
|
||||
|
||||
class GlobalInfoStore implements Partial<GlobalInfo> {
|
||||
@@ -115,6 +155,8 @@ class GlobalInfoStore implements Partial<GlobalInfo> {
|
||||
currentTextActionId = '';
|
||||
currentListActionId = '';
|
||||
currentScreenshotActionId = '';
|
||||
isDOMMode = false;
|
||||
currentSnapshot = null;
|
||||
};
|
||||
|
||||
const globalInfoStore = new GlobalInfoStore();
|
||||
@@ -141,6 +183,8 @@ export const GlobalInfoProvider = ({ children }: { children: JSX.Element }) => {
|
||||
const [currentTextActionId, setCurrentTextActionId] = useState<string>('');
|
||||
const [currentListActionId, setCurrentListActionId] = useState<string>('');
|
||||
const [currentScreenshotActionId, setCurrentScreenshotActionId] = useState<string>('');
|
||||
const [isDOMMode, setIsDOMMode] = useState<boolean>(globalInfoStore.isDOMMode);
|
||||
const [currentSnapshot, setCurrentSnapshot] = useState<ProcessedSnapshot | null>(globalInfoStore.currentSnapshot);
|
||||
|
||||
const notify = (severity: 'error' | 'warning' | 'info' | 'success', message: string) => {
|
||||
setNotification({ severity, message, isOpen: true });
|
||||
@@ -165,6 +209,18 @@ export const GlobalInfoProvider = ({ children }: { children: JSX.Element }) => {
|
||||
}, 100);
|
||||
}
|
||||
|
||||
const updateDOMMode = (mode: boolean, snapshot?: ProcessedSnapshot | null) => {
|
||||
setIsDOMMode(mode);
|
||||
|
||||
if (snapshot !== undefined) {
|
||||
setCurrentSnapshot(snapshot);
|
||||
}
|
||||
|
||||
if (!mode) {
|
||||
setCurrentSnapshot(null);
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<globalInfoContext.Provider
|
||||
value={{
|
||||
@@ -205,6 +261,11 @@ export const GlobalInfoProvider = ({ children }: { children: JSX.Element }) => {
|
||||
setCurrentListActionId,
|
||||
currentScreenshotActionId,
|
||||
setCurrentScreenshotActionId,
|
||||
isDOMMode,
|
||||
setIsDOMMode,
|
||||
currentSnapshot,
|
||||
setCurrentSnapshot,
|
||||
updateDOMMode,
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
|
||||
@@ -15,30 +15,89 @@ interface ExtractedListData {
|
||||
[key: string]: string;
|
||||
}
|
||||
|
||||
interface TableField {
|
||||
interface Field {
|
||||
selector: string;
|
||||
attribute: string;
|
||||
tableContext?: string;
|
||||
cellIndex?: number;
|
||||
}
|
||||
|
||||
interface NonTableField {
|
||||
selector: string;
|
||||
attribute: string;
|
||||
}
|
||||
|
||||
interface ContainerFields {
|
||||
tableFields: Record<string, TableField>;
|
||||
nonTableFields: Record<string, NonTableField>;
|
||||
}
|
||||
|
||||
class ClientListExtractor {
|
||||
private evaluateXPath = (
|
||||
rootElement: Element | Document,
|
||||
xpath: string
|
||||
): Element | null => {
|
||||
try {
|
||||
const ownerDoc =
|
||||
rootElement.nodeType === Node.DOCUMENT_NODE
|
||||
? (rootElement as Document)
|
||||
: rootElement.ownerDocument;
|
||||
|
||||
if (!ownerDoc) return null;
|
||||
|
||||
const result = ownerDoc.evaluate(
|
||||
xpath,
|
||||
rootElement,
|
||||
null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||
null
|
||||
);
|
||||
|
||||
return result.singleNodeValue as Element | null;
|
||||
} catch (error) {
|
||||
console.warn("XPath evaluation failed:", xpath, error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
private evaluateXPathAll = (
|
||||
rootElement: Element | Document,
|
||||
xpath: string
|
||||
): Element[] => {
|
||||
try {
|
||||
const ownerDoc =
|
||||
rootElement.nodeType === Node.DOCUMENT_NODE
|
||||
? (rootElement as Document)
|
||||
: rootElement.ownerDocument;
|
||||
|
||||
if (!ownerDoc) return [];
|
||||
|
||||
const result = ownerDoc.evaluate(
|
||||
xpath,
|
||||
rootElement,
|
||||
null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
||||
null
|
||||
);
|
||||
|
||||
const elements: Element[] = [];
|
||||
for (let i = 0; i < result.snapshotLength; i++) {
|
||||
const node = result.snapshotItem(i);
|
||||
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
||||
elements.push(node as Element);
|
||||
}
|
||||
}
|
||||
|
||||
return elements;
|
||||
} catch (error) {
|
||||
console.warn("XPath evaluation failed:", xpath, error);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
private queryElement = (
|
||||
rootElement: Element | Document,
|
||||
selector: string
|
||||
): Element | null => {
|
||||
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
||||
return rootElement.querySelector(selector);
|
||||
// Check if it's an XPath selector (starts with // or / or ./)
|
||||
if (
|
||||
selector.startsWith("//") ||
|
||||
selector.startsWith("/") ||
|
||||
selector.startsWith("./")
|
||||
) {
|
||||
return this.evaluateXPath(rootElement, selector);
|
||||
} else {
|
||||
return rootElement.querySelector(selector);
|
||||
}
|
||||
}
|
||||
|
||||
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
||||
@@ -59,7 +118,17 @@ class ClientListExtractor {
|
||||
frameElement.contentDocument ||
|
||||
frameElement.contentWindow?.document;
|
||||
if (!frameDoc) return null;
|
||||
currentElement = frameDoc.querySelector(parts[i]);
|
||||
|
||||
// Handle XPath in iframe context
|
||||
if (
|
||||
parts[i].startsWith("//") ||
|
||||
parts[i].startsWith("/") ||
|
||||
parts[i].startsWith("./")
|
||||
) {
|
||||
currentElement = this.evaluateXPath(frameDoc, parts[i]);
|
||||
} else {
|
||||
currentElement = frameDoc.querySelector(parts[i]);
|
||||
}
|
||||
continue;
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
@@ -75,7 +144,16 @@ class ClientListExtractor {
|
||||
let nextElement: Element | null = null;
|
||||
|
||||
if ("querySelector" in currentElement) {
|
||||
nextElement = currentElement.querySelector(parts[i]);
|
||||
// Handle XPath vs CSS selector
|
||||
if (
|
||||
parts[i].startsWith("//") ||
|
||||
parts[i].startsWith("/") ||
|
||||
parts[i].startsWith("./")
|
||||
) {
|
||||
nextElement = this.evaluateXPath(currentElement, parts[i]);
|
||||
} else {
|
||||
nextElement = currentElement.querySelector(parts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
@@ -83,9 +161,20 @@ class ClientListExtractor {
|
||||
"shadowRoot" in currentElement &&
|
||||
(currentElement as Element).shadowRoot
|
||||
) {
|
||||
nextElement = (currentElement as Element).shadowRoot!.querySelector(
|
||||
parts[i]
|
||||
);
|
||||
if (
|
||||
parts[i].startsWith("//") ||
|
||||
parts[i].startsWith("/") ||
|
||||
parts[i].startsWith("./")
|
||||
) {
|
||||
nextElement = this.evaluateXPath(
|
||||
(currentElement as Element).shadowRoot as unknown as Document,
|
||||
parts[i]
|
||||
);
|
||||
} else {
|
||||
nextElement = (currentElement as Element).shadowRoot!.querySelector(
|
||||
parts[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (!nextElement && "children" in currentElement) {
|
||||
@@ -94,7 +183,18 @@ class ClientListExtractor {
|
||||
);
|
||||
for (const child of children) {
|
||||
if (child.shadowRoot) {
|
||||
nextElement = child.shadowRoot.querySelector(parts[i]);
|
||||
if (
|
||||
parts[i].startsWith("//") ||
|
||||
parts[i].startsWith("/") ||
|
||||
parts[i].startsWith("./")
|
||||
) {
|
||||
nextElement = this.evaluateXPath(
|
||||
child.shadowRoot as unknown as Document,
|
||||
parts[i]
|
||||
);
|
||||
} else {
|
||||
nextElement = child.shadowRoot.querySelector(parts[i]);
|
||||
}
|
||||
if (nextElement) break;
|
||||
}
|
||||
}
|
||||
@@ -111,7 +211,12 @@ class ClientListExtractor {
|
||||
selector: string
|
||||
): Element[] => {
|
||||
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
||||
return Array.from(rootElement.querySelectorAll(selector));
|
||||
// Check if it's an XPath selector (starts with // or /)
|
||||
if (selector.startsWith("//") || selector.startsWith("/")) {
|
||||
return this.evaluateXPathAll(rootElement, selector);
|
||||
} else {
|
||||
return Array.from(rootElement.querySelectorAll(selector));
|
||||
}
|
||||
}
|
||||
|
||||
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
||||
@@ -133,7 +238,14 @@ class ClientListExtractor {
|
||||
frameElement.contentDocument ||
|
||||
frameElement.contentWindow?.document;
|
||||
if (frameDoc) {
|
||||
nextElements.push(...Array.from(frameDoc.querySelectorAll(part)));
|
||||
// Handle XPath in iframe context
|
||||
if (part.startsWith("//") || part.startsWith("/")) {
|
||||
nextElements.push(...this.evaluateXPathAll(frameDoc, part));
|
||||
} else {
|
||||
nextElements.push(
|
||||
...Array.from(frameDoc.querySelectorAll(part))
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
@@ -146,24 +258,47 @@ class ClientListExtractor {
|
||||
}
|
||||
} else {
|
||||
if ("querySelectorAll" in element) {
|
||||
nextElements.push(...Array.from(element.querySelectorAll(part)));
|
||||
// Handle XPath vs CSS selector
|
||||
if (part.startsWith("//") || part.startsWith("/")) {
|
||||
nextElements.push(...this.evaluateXPathAll(element, part));
|
||||
} else {
|
||||
nextElements.push(...Array.from(element.querySelectorAll(part)));
|
||||
}
|
||||
}
|
||||
|
||||
if ("shadowRoot" in element && (element as Element).shadowRoot) {
|
||||
nextElements.push(
|
||||
...Array.from(
|
||||
(element as Element).shadowRoot!.querySelectorAll(part)
|
||||
)
|
||||
);
|
||||
if (part.startsWith("//") || part.startsWith("/")) {
|
||||
nextElements.push(
|
||||
...this.evaluateXPathAll(
|
||||
(element as Element).shadowRoot as unknown as Document,
|
||||
part
|
||||
)
|
||||
);
|
||||
} else {
|
||||
nextElements.push(
|
||||
...Array.from(
|
||||
(element as Element).shadowRoot!.querySelectorAll(part)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if ("children" in element) {
|
||||
const children = Array.from((element as Element).children || []);
|
||||
for (const child of children) {
|
||||
if (child.shadowRoot) {
|
||||
nextElements.push(
|
||||
...Array.from(child.shadowRoot.querySelectorAll(part))
|
||||
);
|
||||
if (part.startsWith("//") || part.startsWith("/")) {
|
||||
nextElements.push(
|
||||
...this.evaluateXPathAll(
|
||||
child.shadowRoot as unknown as Document,
|
||||
part
|
||||
)
|
||||
);
|
||||
} else {
|
||||
nextElements.push(
|
||||
...Array.from(child.shadowRoot.querySelectorAll(part))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -193,35 +328,66 @@ class ClientListExtractor {
|
||||
}
|
||||
|
||||
if (attribute === "innerText") {
|
||||
return (element as HTMLElement).innerText?.trim() || null;
|
||||
} else if (attribute === "innerHTML") {
|
||||
return element.innerHTML?.trim() || null;
|
||||
} else if (attribute === "src" || attribute === "href") {
|
||||
if (attribute === "href" && element.tagName !== "A") {
|
||||
const parentElement = element.parentElement;
|
||||
if (parentElement && parentElement.tagName === "A") {
|
||||
const parentHref = parentElement.getAttribute("href");
|
||||
if (parentHref) {
|
||||
try {
|
||||
return new URL(parentHref, baseURL).href;
|
||||
} catch (e) {
|
||||
return parentHref;
|
||||
}
|
||||
// First try standard innerText/textContent
|
||||
let textContent =
|
||||
(element as HTMLElement).innerText?.trim() ||
|
||||
(element as HTMLElement).textContent?.trim();
|
||||
|
||||
// If empty, check for common data attributes that might contain the text
|
||||
if (!textContent) {
|
||||
// Check for data-* attributes that commonly contain text values
|
||||
const dataAttributes = [
|
||||
"data-600",
|
||||
"data-text",
|
||||
"data-label",
|
||||
"data-value",
|
||||
"data-content",
|
||||
];
|
||||
for (const attr of dataAttributes) {
|
||||
const dataValue = element.getAttribute(attr);
|
||||
if (dataValue && dataValue.trim()) {
|
||||
textContent = dataValue.trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return textContent || null;
|
||||
} else if (attribute === "innerHTML") {
|
||||
return element.innerHTML?.trim() || null;
|
||||
} else if (attribute === "href") {
|
||||
// For href, we need to find the anchor tag if the current element isn't one
|
||||
let anchorElement = element;
|
||||
|
||||
// If current element is not an anchor, look for parent anchor
|
||||
if (element.tagName !== "A") {
|
||||
anchorElement =
|
||||
element.closest("a") ||
|
||||
element.parentElement?.closest("a") ||
|
||||
element;
|
||||
}
|
||||
|
||||
const hrefValue = anchorElement.getAttribute("href");
|
||||
if (!hrefValue || hrefValue.trim() === "") {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return new URL(hrefValue, baseURL).href;
|
||||
} catch (e) {
|
||||
console.warn("Error creating URL from", hrefValue, e);
|
||||
return hrefValue;
|
||||
}
|
||||
} else if (attribute === "src") {
|
||||
const attrValue = element.getAttribute(attribute);
|
||||
const dataAttr = attrValue || element.getAttribute("data-" + attribute);
|
||||
|
||||
if (!dataAttr || dataAttr.trim() === "") {
|
||||
if (attribute === "src") {
|
||||
const style = window.getComputedStyle(element as HTMLElement);
|
||||
const bgImage = style.backgroundImage;
|
||||
if (bgImage && bgImage !== "none") {
|
||||
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
||||
return matches ? new URL(matches[1], baseURL).href : null;
|
||||
}
|
||||
const style = window.getComputedStyle(element as HTMLElement);
|
||||
const bgImage = style.backgroundImage;
|
||||
if (bgImage && bgImage !== "none") {
|
||||
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
||||
return matches ? new URL(matches[1], baseURL).href : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@@ -236,187 +402,8 @@ class ClientListExtractor {
|
||||
return element.getAttribute(attribute);
|
||||
};
|
||||
|
||||
private findTableAncestor = (
|
||||
element: Element
|
||||
): { type: string; element: Element } | null => {
|
||||
let currentElement: Element | null = element;
|
||||
const MAX_DEPTH = 5;
|
||||
let depth = 0;
|
||||
|
||||
while (currentElement && depth < MAX_DEPTH) {
|
||||
if (currentElement.getRootNode() instanceof ShadowRoot) {
|
||||
currentElement = (currentElement.getRootNode() as ShadowRoot).host;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (currentElement.tagName === "TD") {
|
||||
return { type: "TD", element: currentElement };
|
||||
} else if (currentElement.tagName === "TR") {
|
||||
return { type: "TR", element: currentElement };
|
||||
}
|
||||
|
||||
if (
|
||||
currentElement.tagName === "IFRAME" ||
|
||||
currentElement.tagName === "FRAME"
|
||||
) {
|
||||
try {
|
||||
const frameElement = currentElement as
|
||||
| HTMLIFrameElement
|
||||
| HTMLFrameElement;
|
||||
currentElement = frameElement.contentDocument?.body || null;
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
currentElement = currentElement.parentElement;
|
||||
}
|
||||
depth++;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
private getCellIndex = (td: Element): number => {
|
||||
if (td.getRootNode() instanceof ShadowRoot) {
|
||||
const shadowRoot = td.getRootNode() as ShadowRoot;
|
||||
const allCells = Array.from(shadowRoot.querySelectorAll("td"));
|
||||
return allCells.indexOf(td as HTMLTableCellElement);
|
||||
}
|
||||
|
||||
let index = 0;
|
||||
let sibling = td;
|
||||
while ((sibling = sibling.previousElementSibling as Element)) {
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
};
|
||||
|
||||
private hasThElement = (
|
||||
row: Element,
|
||||
tableFields: Record<string, TableField>
|
||||
): boolean => {
|
||||
for (const [_, { selector }] of Object.entries(tableFields)) {
|
||||
const element = this.queryElement(row, selector);
|
||||
if (element) {
|
||||
let current: Element | ShadowRoot | Document | null = element;
|
||||
while (current && current !== row) {
|
||||
if (current.getRootNode() instanceof ShadowRoot) {
|
||||
current = (current.getRootNode() as ShadowRoot).host;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((current as Element).tagName === "TH") return true;
|
||||
|
||||
if (
|
||||
(current as Element).tagName === "IFRAME" ||
|
||||
(current as Element).tagName === "FRAME"
|
||||
) {
|
||||
try {
|
||||
const frameElement = current as
|
||||
| HTMLIFrameElement
|
||||
| HTMLFrameElement;
|
||||
current = frameElement.contentDocument?.body || null;
|
||||
} catch (e) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
current = (current as Element).parentElement;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
private filterRowsBasedOnTag = (
|
||||
rows: Element[],
|
||||
tableFields: Record<string, TableField>
|
||||
): Element[] => {
|
||||
for (const row of rows) {
|
||||
if (this.hasThElement(row, tableFields)) {
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
return rows.filter((row) => {
|
||||
const directTH = row.getElementsByTagName("TH").length === 0;
|
||||
const shadowTH = row.shadowRoot
|
||||
? row.shadowRoot.querySelector("th") === null
|
||||
: true;
|
||||
return directTH && shadowTH;
|
||||
});
|
||||
};
|
||||
|
||||
private calculateClassSimilarity = (
|
||||
classList1: string[],
|
||||
classList2: string[]
|
||||
): number => {
|
||||
const set1 = new Set(classList1);
|
||||
const set2 = new Set(classList2);
|
||||
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
||||
const union = new Set([...set1, ...set2]);
|
||||
return intersection.size / union.size;
|
||||
};
|
||||
|
||||
private findSimilarElements = (
|
||||
baseElement: Element,
|
||||
document: Document,
|
||||
similarityThreshold: number = 0.7
|
||||
): Element[] => {
|
||||
const baseClasses = Array.from(baseElement.classList);
|
||||
if (baseClasses.length === 0) return [];
|
||||
|
||||
const allElements: Element[] = [];
|
||||
|
||||
allElements.push(
|
||||
...Array.from(document.getElementsByTagName(baseElement.tagName))
|
||||
);
|
||||
|
||||
if (baseElement.getRootNode() instanceof ShadowRoot) {
|
||||
const shadowHost = (baseElement.getRootNode() as ShadowRoot).host;
|
||||
allElements.push(
|
||||
...Array.from(shadowHost.getElementsByTagName(baseElement.tagName))
|
||||
);
|
||||
}
|
||||
|
||||
const frames = [
|
||||
...Array.from(document.getElementsByTagName("iframe")),
|
||||
...Array.from(document.getElementsByTagName("frame")),
|
||||
];
|
||||
|
||||
for (const frame of frames) {
|
||||
try {
|
||||
const frameElement = frame as HTMLIFrameElement | HTMLFrameElement;
|
||||
const frameDoc =
|
||||
frameElement.contentDocument || frameElement.contentWindow?.document;
|
||||
if (frameDoc) {
|
||||
allElements.push(
|
||||
...Array.from(frameDoc.getElementsByTagName(baseElement.tagName))
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
`Cannot access ${frame.tagName.toLowerCase()} content:`,
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return allElements.filter((element) => {
|
||||
if (element === baseElement) return false;
|
||||
const similarity = this.calculateClassSimilarity(
|
||||
baseClasses,
|
||||
Array.from(element.classList)
|
||||
);
|
||||
return similarity >= similarityThreshold;
|
||||
});
|
||||
};
|
||||
|
||||
private convertFields = (
|
||||
fields: any
|
||||
): Record<string, { selector: string; attribute: string }> => {
|
||||
const convertedFields: Record<
|
||||
string,
|
||||
{ selector: string; attribute: string }
|
||||
> = {};
|
||||
private convertFields = (fields: any): Record<string, Field> => {
|
||||
const convertedFields: Record<string, Field> = {};
|
||||
|
||||
for (const [key, field] of Object.entries(fields)) {
|
||||
const typedField = field as TextStep;
|
||||
@@ -439,285 +426,134 @@ class ClientListExtractor {
|
||||
// Convert fields to the format expected by the extraction logic
|
||||
const convertedFields = this.convertFields(fields);
|
||||
|
||||
// Get all container elements matching the list selector
|
||||
let containers = this.queryElementAll(iframeDocument, listSelector);
|
||||
// Step 1: Get all container elements matching the list selector
|
||||
const containers = this.queryElementAll(iframeDocument, listSelector);
|
||||
|
||||
if (containers.length === 0) {
|
||||
console.warn("No containers found for listSelector:", listSelector);
|
||||
console.warn("❌ No containers found for listSelector:", listSelector);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Enhanced container discovery: find similar elements if we need more containers
|
||||
if (limit > 1 && containers.length === 1) {
|
||||
const baseContainer = containers[0];
|
||||
const similarContainers = this.findSimilarElements(
|
||||
baseContainer,
|
||||
iframeDocument,
|
||||
0.7
|
||||
);
|
||||
// Step 2: Extract data from each container up to the limit
|
||||
const extractedData: ExtractedListData[] = [];
|
||||
const containersToProcess = Math.min(containers.length, limit);
|
||||
|
||||
if (similarContainers.length > 0) {
|
||||
const newContainers = similarContainers.filter(
|
||||
(container) => !container.matches(listSelector)
|
||||
);
|
||||
containers = [...containers, ...newContainers];
|
||||
}
|
||||
}
|
||||
for (
|
||||
let containerIndex = 0;
|
||||
containerIndex < containersToProcess;
|
||||
containerIndex++
|
||||
) {
|
||||
const container = containers[containerIndex];
|
||||
const record: ExtractedListData = {};
|
||||
|
||||
// Analyze fields for table vs non-table context
|
||||
const containerFields: ContainerFields[] = containers.map(() => ({
|
||||
tableFields: {},
|
||||
nonTableFields: {},
|
||||
}));
|
||||
// Step 3: For each field, extract data from the current container
|
||||
for (const [label, { selector, attribute }] of Object.entries(
|
||||
convertedFields
|
||||
)) {
|
||||
let element: Element | null = null;
|
||||
|
||||
containers.forEach((container, containerIndex) => {
|
||||
for (const [label, field] of Object.entries(convertedFields)) {
|
||||
const sampleElement = this.queryElement(container, field.selector);
|
||||
// CORRECT APPROACH: Create indexed absolute XPath
|
||||
if (selector.startsWith("//")) {
|
||||
// Convert the absolute selector to target the specific container instance
|
||||
const indexedSelector = this.createIndexedXPath(
|
||||
selector,
|
||||
listSelector,
|
||||
containerIndex + 1
|
||||
);
|
||||
|
||||
if (sampleElement) {
|
||||
const ancestor = this.findTableAncestor(sampleElement);
|
||||
if (ancestor) {
|
||||
containerFields[containerIndex].tableFields[label] = {
|
||||
...field,
|
||||
tableContext: ancestor.type,
|
||||
cellIndex:
|
||||
ancestor.type === "TD"
|
||||
? this.getCellIndex(ancestor.element)
|
||||
: -1,
|
||||
};
|
||||
} else {
|
||||
containerFields[containerIndex].nonTableFields[label] = field;
|
||||
}
|
||||
element = this.evaluateXPathSingle(iframeDocument, indexedSelector);
|
||||
} else {
|
||||
containerFields[containerIndex].nonTableFields[label] = field;
|
||||
// Fallback for non-XPath selectors
|
||||
element = this.queryElement(container, selector);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Extract table data
|
||||
const tableData: ExtractedListData[] = [];
|
||||
for (
|
||||
let containerIndex = 0;
|
||||
containerIndex < containers.length;
|
||||
containerIndex++
|
||||
) {
|
||||
const container = containers[containerIndex];
|
||||
const { tableFields } = containerFields[containerIndex];
|
||||
|
||||
if (Object.keys(tableFields).length > 0) {
|
||||
const firstField = Object.values(tableFields)[0];
|
||||
const firstElement = this.queryElement(
|
||||
container,
|
||||
firstField.selector
|
||||
);
|
||||
let tableContext: Element | null = firstElement;
|
||||
|
||||
// Find the table context
|
||||
while (
|
||||
tableContext &&
|
||||
tableContext.tagName !== "TABLE" &&
|
||||
tableContext !== container
|
||||
) {
|
||||
if (tableContext.getRootNode() instanceof ShadowRoot) {
|
||||
tableContext = (tableContext.getRootNode() as ShadowRoot).host;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
tableContext.tagName === "IFRAME" ||
|
||||
tableContext.tagName === "FRAME"
|
||||
) {
|
||||
try {
|
||||
const frameElement = tableContext as
|
||||
| HTMLIFrameElement
|
||||
| HTMLFrameElement;
|
||||
tableContext = frameElement.contentDocument?.body || null;
|
||||
} catch (e) {
|
||||
break;
|
||||
}
|
||||
// Step 4: Extract the value from the found element
|
||||
if (element) {
|
||||
const value = this.extractValue(element, attribute);
|
||||
if (value !== null && value !== "") {
|
||||
record[label] = value;
|
||||
} else {
|
||||
tableContext = tableContext.parentElement;
|
||||
}
|
||||
}
|
||||
|
||||
if (tableContext) {
|
||||
const rows: Element[] = [];
|
||||
rows.push(...Array.from(tableContext.getElementsByTagName("TR")));
|
||||
|
||||
if (
|
||||
tableContext.tagName === "IFRAME" ||
|
||||
tableContext.tagName === "FRAME"
|
||||
) {
|
||||
try {
|
||||
const frameElement = tableContext as
|
||||
| HTMLIFrameElement
|
||||
| HTMLFrameElement;
|
||||
const frameDoc =
|
||||
frameElement.contentDocument ||
|
||||
frameElement.contentWindow?.document;
|
||||
if (frameDoc) {
|
||||
rows.push(...Array.from(frameDoc.getElementsByTagName("TR")));
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
`Cannot access ${tableContext.tagName.toLowerCase()} rows:`,
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const processedRows = this.filterRowsBasedOnTag(rows, tableFields);
|
||||
|
||||
for (
|
||||
let rowIndex = 0;
|
||||
rowIndex < Math.min(processedRows.length, limit);
|
||||
rowIndex++
|
||||
) {
|
||||
const record: ExtractedListData = {};
|
||||
const currentRow = processedRows[rowIndex];
|
||||
|
||||
for (const [
|
||||
label,
|
||||
{ selector, attribute, cellIndex },
|
||||
] of Object.entries(tableFields)) {
|
||||
let element: Element | null = null;
|
||||
|
||||
if (cellIndex !== undefined && cellIndex >= 0) {
|
||||
let td: Element | null =
|
||||
currentRow.children[cellIndex] || null;
|
||||
|
||||
if (!td && currentRow.shadowRoot) {
|
||||
const shadowCells = currentRow.shadowRoot.children;
|
||||
if (shadowCells && shadowCells.length > cellIndex) {
|
||||
td = shadowCells[cellIndex];
|
||||
}
|
||||
}
|
||||
|
||||
if (td) {
|
||||
element = this.queryElement(td, selector);
|
||||
|
||||
if (
|
||||
!element &&
|
||||
selector
|
||||
.split(/(?:>>|:>>)/)
|
||||
.pop()
|
||||
?.includes("td:nth-child")
|
||||
) {
|
||||
element = td;
|
||||
}
|
||||
|
||||
if (!element) {
|
||||
const tagOnlySelector = selector.split(".")[0];
|
||||
element = this.queryElement(td, tagOnlySelector);
|
||||
}
|
||||
|
||||
if (!element) {
|
||||
let currentElement: Element | null = td;
|
||||
while (
|
||||
currentElement &&
|
||||
currentElement.children.length > 0
|
||||
) {
|
||||
let foundContentChild = false;
|
||||
for (const child of Array.from(
|
||||
currentElement.children
|
||||
)) {
|
||||
if (this.extractValue(child, attribute)) {
|
||||
currentElement = child;
|
||||
foundContentChild = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!foundContentChild) break;
|
||||
}
|
||||
element = currentElement;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
element = this.queryElement(currentRow, selector);
|
||||
}
|
||||
|
||||
if (element) {
|
||||
const value = this.extractValue(element, attribute);
|
||||
if (value !== null && value !== "") {
|
||||
record[label] = value;
|
||||
} else {
|
||||
console.warn(
|
||||
`❌ No value for ${label} in row ${rowIndex + 1}`
|
||||
);
|
||||
record[label] = "";
|
||||
}
|
||||
} else {
|
||||
console.warn(
|
||||
`❌ Element not found for ${label} with selector:`,
|
||||
selector
|
||||
);
|
||||
record[label] = "";
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.values(record).some((value) => value !== "")) {
|
||||
tableData.push(record);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract non-table data
|
||||
const nonTableData: ExtractedListData[] = [];
|
||||
for (
|
||||
let containerIndex = 0;
|
||||
containerIndex < containers.length;
|
||||
containerIndex++
|
||||
) {
|
||||
if (nonTableData.length >= limit) break;
|
||||
|
||||
const container = containers[containerIndex];
|
||||
const { nonTableFields } = containerFields[containerIndex];
|
||||
|
||||
if (Object.keys(nonTableFields).length > 0) {
|
||||
const record: ExtractedListData = {};
|
||||
|
||||
for (const [label, { selector, attribute }] of Object.entries(
|
||||
nonTableFields
|
||||
)) {
|
||||
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
|
||||
const element = this.queryElement(container, relativeSelector);
|
||||
|
||||
if (element) {
|
||||
const value = this.extractValue(element, attribute);
|
||||
if (value !== null && value !== "") {
|
||||
record[label] = value;
|
||||
} else {
|
||||
console.warn(
|
||||
`❌ No value for ${label} in container ${containerIndex + 1}`
|
||||
);
|
||||
record[label] = "";
|
||||
}
|
||||
} else {
|
||||
console.warn(
|
||||
`❌ Element not found for ${label} with selector:`,
|
||||
selector
|
||||
);
|
||||
console.warn(` ⚠️ Empty value for "${label}"`);
|
||||
record[label] = "";
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.values(record).some((value) => value !== "")) {
|
||||
nonTableData.push(record);
|
||||
} else {
|
||||
console.warn(` ❌ Element not found for "${label}"`);
|
||||
record[label] = "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Combine and limit results
|
||||
const extractedData = [...tableData, ...nonTableData].slice(0, limit);
|
||||
// Step 5: Add record if it has any non-empty values
|
||||
if (Object.values(record).some((value) => value !== "")) {
|
||||
extractedData.push(record);
|
||||
} else {
|
||||
console.warn(
|
||||
` ⚠️ Skipping empty record for container ${containerIndex + 1}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return extractedData;
|
||||
} catch (error) {
|
||||
console.error("Error in client-side extractListData:", error);
|
||||
console.error("💥 Error in client-side extractListData:", error);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
// Create indexed XPath for specific container instance
|
||||
private createIndexedXPath(
|
||||
childSelector: string,
|
||||
listSelector: string,
|
||||
containerIndex: number
|
||||
): string {
|
||||
// Check if the child selector contains the list selector pattern
|
||||
if (childSelector.includes(listSelector.replace("//", ""))) {
|
||||
// Replace the list selector part with indexed version
|
||||
const listPattern = listSelector.replace("//", "");
|
||||
const indexedListSelector = `(${listSelector})[${containerIndex}]`;
|
||||
|
||||
const indexedSelector = childSelector.replace(
|
||||
`//${listPattern}`,
|
||||
indexedListSelector
|
||||
);
|
||||
|
||||
return indexedSelector;
|
||||
} else {
|
||||
// If pattern doesn't match, create a more generic indexed selector
|
||||
// This is a fallback approach
|
||||
console.warn(` ⚠️ Pattern doesn't match, using fallback approach`);
|
||||
return `(${listSelector})[${containerIndex}]${childSelector.replace(
|
||||
"//",
|
||||
"/"
|
||||
)}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper method for single XPath evaluation
|
||||
private evaluateXPathSingle = (
|
||||
document: Document,
|
||||
xpath: string
|
||||
): Element | null => {
|
||||
try {
|
||||
const result = document.evaluate(
|
||||
xpath,
|
||||
document,
|
||||
null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||
null
|
||||
);
|
||||
|
||||
const element = result.singleNodeValue as Element | null;
|
||||
|
||||
if (!element) {
|
||||
console.warn(`❌ XPath found no element for: ${xpath}`);
|
||||
}
|
||||
|
||||
return element;
|
||||
} catch (error) {
|
||||
console.error("❌ XPath evaluation failed:", xpath, error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
export const clientListExtractor = new ClientListExtractor();
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user