support cross domain css sheet parse (#2535)

This commit is contained in:
Shuchang Zheng
2025-05-29 18:51:59 -07:00
committed by GitHub
parent ad9daae283
commit 47709dc0d8
2 changed files with 103 additions and 61 deletions

View File

@@ -1432,7 +1432,7 @@ async function buildElementTree(
) { ) {
// Generate hover styles map at the start // Generate hover styles map at the start
if (hoverStylesMap === undefined) { if (hoverStylesMap === undefined) {
hoverStylesMap = getHoverStylesMap(); hoverStylesMap = await getHoverStylesMap();
} }
var elements = []; var elements = [];
@@ -2163,72 +2163,114 @@ function scrollToElementTop(element) {
* https://stackoverflow.com/questions/7013559/is-there-a-way-to-get-element-hover-style-while-the-element-not-in-hover-state * https://stackoverflow.com/questions/7013559/is-there-a-way-to-get-element-hover-style-while-the-element-not-in-hover-state
* https://stackoverflow.com/questions/17226676/how-to-simulate-a-mouseover-in-pure-javascript-that-activates-the-css-hover * https://stackoverflow.com/questions/17226676/how-to-simulate-a-mouseover-in-pure-javascript-that-activates-the-css-hover
*/ */
function getHoverStylesMap() { async function getHoverStylesMap() {
const hoverMap = new Map(); const hoverMap = new Map();
const sheets = document.styleSheets; const sheets = [...document.styleSheets];
const parseCssSheet = (sheet) => {
const rules = sheet.cssRules || sheet.rules;
for (const rule of rules) {
if (rule.type === 1 && rule.selectorText) {
// Split multiple selectors (e.g., "a:hover, button:hover")
const selectors = rule.selectorText.split(",").map((s) => s.trim());
for (const selector of selectors) {
// Check if this is a hover rule
if (selector.includes(":hover")) {
// Get all parts of the selector
const parts = selector.split(/\s*[>+~]\s*/);
// Get the main hoverable element (the one with :hover)
const hoverPart = parts.find((part) => part.includes(":hover"));
if (!hoverPart) continue;
// Get base selector without :hover
const baseSelector = hoverPart.replace(/:hover/g, "").trim();
// Skip invalid selectors
if (!isValidCSSSelector(baseSelector)) {
continue;
}
// Get or create styles object for this selector
let styles = hoverMap.get(baseSelector) || {};
// Add all style properties
for (const prop of rule.style) {
styles[prop] = rule.style[prop];
}
// If this is a nested selector (like :hover > .something)
// store it in a special format
if (parts.length > 1) {
const fullSelector = selector;
styles["__nested__"] = styles["__nested__"] || [];
styles["__nested__"].push({
selector: fullSelector,
styles: Object.fromEntries(
[...rule.style].map((prop) => [prop, rule.style[prop]]),
),
});
}
// only need the style which includes the cursor attribute.
if (!("cursor" in styles)) {
continue;
}
hoverMap.set(baseSelector, styles);
}
}
}
}
};
try { try {
for (const sheet of sheets) { await Promise.all(
try { sheets.map(async (sheet) => {
const rules = sheet.cssRules || sheet.rules; try {
for (const rule of rules) { parseCssSheet(sheet);
if (rule.type === 1 && rule.selectorText) { } catch (e) {
// Split multiple selectors (e.g., "a:hover, button:hover") _jsConsoleWarn("Could not access stylesheet:", e);
const selectors = rule.selectorText.split(",").map((s) => s.trim());
for (const selector of selectors) { if ((e.name !== "SecurityError" && e.code !== 18) || !sheet.href) {
// Check if this is a hover rule return;
if (selector.includes(":hover")) { }
// Get all parts of the selector
const parts = selector.split(/\s*[>+~]\s*/);
// Get the main hoverable element (the one with :hover) let newLink = null;
const hoverPart = parts.find((part) => part.includes(":hover")); try {
if (!hoverPart) continue; _jsConsoleLog("recreating the link element: ", sheet.href);
const oldLink = document.querySelector(
`link[href="${sheet.href}"]`,
);
newLink = document.createElement("link");
newLink.rel = "stylesheet";
newLink.href = oldLink.href + "?v=" + Date.now(); // to void cache
newLink.crossOrigin = "anonymous";
// until the new link loaded, removing the old one
document.head.append(newLink);
// Get base selector without :hover // wait for a while until the sheet is fully loaded
const baseSelector = hoverPart.replace(/:hover/g, "").trim(); await asyncSleepFor(1500);
const newSheets = [...document.styleSheets];
// Skip invalid selectors const refreshedSheet = newSheets.find(
if (!isValidCSSSelector(baseSelector)) { (s) => s.href === newLink.href,
continue; );
} if (!refreshedSheet) {
newLink.remove();
// Get or create styles object for this selector return;
let styles = hoverMap.get(baseSelector) || {}; }
_jsConsoleLog("parsing recreated the link element: ", newLink.href);
// Add all style properties parseCssSheet(refreshedSheet);
for (const prop of rule.style) { oldLink.remove();
styles[prop] = rule.style[prop]; } catch (e) {
} _jsConsoleWarn("Error recreating the link element:", e);
if (newLink) {
// If this is a nested selector (like :hover > .something) newLink.remove();
// store it in a special format
if (parts.length > 1) {
const fullSelector = selector;
styles["__nested__"] = styles["__nested__"] || [];
styles["__nested__"].push({
selector: fullSelector,
styles: Object.fromEntries(
[...rule.style].map((prop) => [prop, rule.style[prop]]),
),
});
}
// only need the style which includes the cursor attribute.
if (!("cursor" in styles)) {
continue;
}
hoverMap.set(baseSelector, styles);
}
} }
} }
} }
} catch (e) { }),
_jsConsoleWarn("Could not access stylesheet:", e); );
continue;
}
}
} catch (e) { } catch (e) {
_jsConsoleError("Error processing stylesheets:", e); _jsConsoleError("Error processing stylesheets:", e);
} }
@@ -2468,11 +2510,11 @@ if (window.globalObserverForDOMIncrement === undefined) {
}); });
} }
function startGlobalIncrementalObserver(element = null) { async function startGlobalIncrementalObserver(element = null) {
window.globalListnerFlag = true; window.globalListnerFlag = true;
window.globalDomDepthMap = new Map(); window.globalDomDepthMap = new Map();
window.globalOneTimeIncrementElements = []; window.globalOneTimeIncrementElements = [];
window.globalHoverStylesMap = getHoverStylesMap(); window.globalHoverStylesMap = await getHoverStylesMap();
window.globalParsedElementCounter = new SafeCounter(); window.globalParsedElementCounter = new SafeCounter();
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
window.globalObserverForDOMIncrement.observe(document.body, { window.globalObserverForDOMIncrement.observe(document.body, {

View File

@@ -725,7 +725,7 @@ class IncrementalScrapePage:
return self.element_tree_trimmed return self.element_tree_trimmed
async def start_listen_dom_increment(self, element: ElementHandle | None = None) -> None: async def start_listen_dom_increment(self, element: ElementHandle | None = None) -> None:
js_script = "(element) => startGlobalIncrementalObserver(element)" js_script = "async (element) => await startGlobalIncrementalObserver(element)"
await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script, arg=element) await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script, arg=element)
async def stop_listen_dom_increment(self) -> None: async def stop_listen_dom_increment(self) -> None: