From c04fafd726d2189d19db84f4f26fafb8458f1d1c Mon Sep 17 00:00:00 2001
From: karishmas6 <carishmashukla28@gmail.com>
Date: Tue, 16 Jul 2024 00:25:47 +0530
Subject: [PATCH] feat: scrapable heuristics

---
 mx-interpreter/browserSide/scraper.js | 40 +++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
diff --git a/mx-interpreter/browserSide/scraper.js b/mx-interpreter/browserSide/scraper.js
index c64c4808..d9e5a0d9 100644
--- a/mx-interpreter/browserSide/scraper.js
+++ b/mx-interpreter/browserSide/scraper.js
@@ -11,3 +11,43 @@ function getBiggestElement(selector) {
   );
   return biggest;
 }
+
+/**
+ * Generates structural selector (describing element by its DOM tree location).
+ *
+ * **The generated selector is not guaranteed to be unique!** (In fact, this is
+ *    the desired behaviour in here.)
+ * @param {HTMLElement} element Element being described.
+ * @returns {string} CSS-compliant selector describing the element's location in the DOM tree.
+ */
+function GetSelectorStructural(element) {
+  // Base conditions for the recursive approach.
+  if (element.tagName === 'BODY') {
+    return 'BODY';
+  }
+  const selector = element.tagName;
+  if (element.parentElement) {
+    return `${GetSelectorStructural(element.parentElement)} > ${selector}`;
+  }
+
+  return selector;
+}
+
+/**
+ * Heuristic method to find collections of "interesting" items on the page.
+ * @returns {Array<HTMLElement>} A collection of interesting DOM nodes
+ *  (online store products, plane tickets, list items... and many more?)
+ */
+function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, metricType = 'size_deviation') {
+  const restoreScroll = (() => {
+    const { scrollX, scrollY } = window;
+    return () => {
+      window.scrollTo(scrollX, scrollY);
+    };
+  })();
+
+  /**
+* @typedef {Array<{x: number, y: number}>} Grid
+*/
+
+ 
\ No newline at end of file