support cross domain css sheet parse (#2535)
This commit is contained in:
@@ -1432,7 +1432,7 @@ async function buildElementTree(
|
||||
) {
|
||||
// Generate hover styles map at the start
|
||||
if (hoverStylesMap === undefined) {
|
||||
hoverStylesMap = getHoverStylesMap();
|
||||
hoverStylesMap = await getHoverStylesMap();
|
||||
}
|
||||
|
||||
var elements = [];
|
||||
@@ -2163,72 +2163,114 @@ function scrollToElementTop(element) {
|
||||
* https://stackoverflow.com/questions/7013559/is-there-a-way-to-get-element-hover-style-while-the-element-not-in-hover-state
|
||||
* https://stackoverflow.com/questions/17226676/how-to-simulate-a-mouseover-in-pure-javascript-that-activates-the-css-hover
|
||||
*/
|
||||
function getHoverStylesMap() {
|
||||
async function getHoverStylesMap() {
|
||||
const hoverMap = new Map();
|
||||
const sheets = document.styleSheets;
|
||||
const sheets = [...document.styleSheets];
|
||||
|
||||
const parseCssSheet = (sheet) => {
|
||||
const rules = sheet.cssRules || sheet.rules;
|
||||
for (const rule of rules) {
|
||||
if (rule.type === 1 && rule.selectorText) {
|
||||
// Split multiple selectors (e.g., "a:hover, button:hover")
|
||||
const selectors = rule.selectorText.split(",").map((s) => s.trim());
|
||||
|
||||
for (const selector of selectors) {
|
||||
// Check if this is a hover rule
|
||||
if (selector.includes(":hover")) {
|
||||
// Get all parts of the selector
|
||||
const parts = selector.split(/\s*[>+~]\s*/);
|
||||
|
||||
// Get the main hoverable element (the one with :hover)
|
||||
const hoverPart = parts.find((part) => part.includes(":hover"));
|
||||
if (!hoverPart) continue;
|
||||
|
||||
// Get base selector without :hover
|
||||
const baseSelector = hoverPart.replace(/:hover/g, "").trim();
|
||||
|
||||
// Skip invalid selectors
|
||||
if (!isValidCSSSelector(baseSelector)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get or create styles object for this selector
|
||||
let styles = hoverMap.get(baseSelector) || {};
|
||||
|
||||
// Add all style properties
|
||||
for (const prop of rule.style) {
|
||||
styles[prop] = rule.style[prop];
|
||||
}
|
||||
|
||||
// If this is a nested selector (like :hover > .something)
|
||||
// store it in a special format
|
||||
if (parts.length > 1) {
|
||||
const fullSelector = selector;
|
||||
styles["__nested__"] = styles["__nested__"] || [];
|
||||
styles["__nested__"].push({
|
||||
selector: fullSelector,
|
||||
styles: Object.fromEntries(
|
||||
[...rule.style].map((prop) => [prop, rule.style[prop]]),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
// only need the style which includes the cursor attribute.
|
||||
if (!("cursor" in styles)) {
|
||||
continue;
|
||||
}
|
||||
hoverMap.set(baseSelector, styles);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
for (const sheet of sheets) {
|
||||
try {
|
||||
const rules = sheet.cssRules || sheet.rules;
|
||||
for (const rule of rules) {
|
||||
if (rule.type === 1 && rule.selectorText) {
|
||||
// Split multiple selectors (e.g., "a:hover, button:hover")
|
||||
const selectors = rule.selectorText.split(",").map((s) => s.trim());
|
||||
await Promise.all(
|
||||
sheets.map(async (sheet) => {
|
||||
try {
|
||||
parseCssSheet(sheet);
|
||||
} catch (e) {
|
||||
_jsConsoleWarn("Could not access stylesheet:", e);
|
||||
|
||||
for (const selector of selectors) {
|
||||
// Check if this is a hover rule
|
||||
if (selector.includes(":hover")) {
|
||||
// Get all parts of the selector
|
||||
const parts = selector.split(/\s*[>+~]\s*/);
|
||||
if ((e.name !== "SecurityError" && e.code !== 18) || !sheet.href) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the main hoverable element (the one with :hover)
|
||||
const hoverPart = parts.find((part) => part.includes(":hover"));
|
||||
if (!hoverPart) continue;
|
||||
let newLink = null;
|
||||
try {
|
||||
_jsConsoleLog("recreating the link element: ", sheet.href);
|
||||
const oldLink = document.querySelector(
|
||||
`link[href="${sheet.href}"]`,
|
||||
);
|
||||
newLink = document.createElement("link");
|
||||
newLink.rel = "stylesheet";
|
||||
newLink.href = oldLink.href + "?v=" + Date.now(); // to void cache
|
||||
newLink.crossOrigin = "anonymous";
|
||||
// until the new link loaded, removing the old one
|
||||
document.head.append(newLink);
|
||||
|
||||
// Get base selector without :hover
|
||||
const baseSelector = hoverPart.replace(/:hover/g, "").trim();
|
||||
|
||||
// Skip invalid selectors
|
||||
if (!isValidCSSSelector(baseSelector)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get or create styles object for this selector
|
||||
let styles = hoverMap.get(baseSelector) || {};
|
||||
|
||||
// Add all style properties
|
||||
for (const prop of rule.style) {
|
||||
styles[prop] = rule.style[prop];
|
||||
}
|
||||
|
||||
// If this is a nested selector (like :hover > .something)
|
||||
// store it in a special format
|
||||
if (parts.length > 1) {
|
||||
const fullSelector = selector;
|
||||
styles["__nested__"] = styles["__nested__"] || [];
|
||||
styles["__nested__"].push({
|
||||
selector: fullSelector,
|
||||
styles: Object.fromEntries(
|
||||
[...rule.style].map((prop) => [prop, rule.style[prop]]),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
// only need the style which includes the cursor attribute.
|
||||
if (!("cursor" in styles)) {
|
||||
continue;
|
||||
}
|
||||
hoverMap.set(baseSelector, styles);
|
||||
}
|
||||
// wait for a while until the sheet is fully loaded
|
||||
await asyncSleepFor(1500);
|
||||
const newSheets = [...document.styleSheets];
|
||||
const refreshedSheet = newSheets.find(
|
||||
(s) => s.href === newLink.href,
|
||||
);
|
||||
if (!refreshedSheet) {
|
||||
newLink.remove();
|
||||
return;
|
||||
}
|
||||
_jsConsoleLog("parsing recreated the link element: ", newLink.href);
|
||||
parseCssSheet(refreshedSheet);
|
||||
oldLink.remove();
|
||||
} catch (e) {
|
||||
_jsConsoleWarn("Error recreating the link element:", e);
|
||||
if (newLink) {
|
||||
newLink.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
_jsConsoleWarn("Could not access stylesheet:", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}),
|
||||
);
|
||||
} catch (e) {
|
||||
_jsConsoleError("Error processing stylesheets:", e);
|
||||
}
|
||||
@@ -2468,11 +2510,11 @@ if (window.globalObserverForDOMIncrement === undefined) {
|
||||
});
|
||||
}
|
||||
|
||||
function startGlobalIncrementalObserver(element = null) {
|
||||
async function startGlobalIncrementalObserver(element = null) {
|
||||
window.globalListnerFlag = true;
|
||||
window.globalDomDepthMap = new Map();
|
||||
window.globalOneTimeIncrementElements = [];
|
||||
window.globalHoverStylesMap = getHoverStylesMap();
|
||||
window.globalHoverStylesMap = await getHoverStylesMap();
|
||||
window.globalParsedElementCounter = new SafeCounter();
|
||||
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
|
||||
window.globalObserverForDOMIncrement.observe(document.body, {
|
||||
|
||||
@@ -725,7 +725,7 @@ class IncrementalScrapePage:
|
||||
return self.element_tree_trimmed
|
||||
|
||||
async def start_listen_dom_increment(self, element: ElementHandle | None = None) -> None:
|
||||
js_script = "(element) => startGlobalIncrementalObserver(element)"
|
||||
js_script = "async (element) => await startGlobalIncrementalObserver(element)"
|
||||
await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script, arg=element)
|
||||
|
||||
async def stop_listen_dom_increment(self) -> None:
|
||||
|
||||
Reference in New Issue
Block a user