diff --git a/crawl4ai/async_configs.py b/crawl4ai/async_configs.py
index 44d3040b5..7d2837745 100644
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -937,23 +937,33 @@ def __init__(
         scroll_count: int = 10,
         scroll_by: Union[str, int] = "container_height",
         wait_after_scroll: float = 0.5,
+        max_no_change: int = 5,
+        max_captured_elements: int = 10000,
     ):
         """
         Initialize virtual scroll configuration.
-        
+
         Args:
             container_selector: CSS selector for the scrollable container
             scroll_count: Maximum number of scrolls to perform
             scroll_by: Amount to scroll - can be:
                 - "container_height": scroll by container's height
-                - "page_height": scroll by viewport height  
+                - "page_height": scroll by viewport height
                 - int: fixed pixel amount
             wait_after_scroll: Seconds to wait after each scroll for content to load
+            max_no_change: Stop scrolling after this many consecutive scrolls with no
+                new content detected. Prevents wasting time at the end of a feed.
+                Set to 0 to disable early termination.
+            max_captured_elements: Maximum number of unique elements to accumulate
+                before stopping. Prevents browser OOM on very large feeds.
+                Set to 0 to disable the cap.
         """
         self.container_selector = container_selector
         self.scroll_count = scroll_count
         self.scroll_by = scroll_by
         self.wait_after_scroll = wait_after_scroll
+        self.max_no_change = max_no_change
+        self.max_captured_elements = max_captured_elements
     
     def to_dict(self) -> dict:
         """Convert to dictionary for serialization."""
@@ -962,12 +972,18 @@ def to_dict(self) -> dict:
             "scroll_count": self.scroll_count,
             "scroll_by": self.scroll_by,
             "wait_after_scroll": self.wait_after_scroll,
+            "max_no_change": self.max_no_change,
+            "max_captured_elements": self.max_captured_elements,
         }
     
     @classmethod
     def from_dict(cls, data: dict) -> "VirtualScrollConfig":
-        """Create instance from dictionary."""
-        return cls(**data)
+        """Create instance from dictionary.  Unknown keys are ignored for
+        forward-compatibility with newer config versions."""
+        known = {"container_selector", "scroll_count", "scroll_by",
+                 "wait_after_scroll", "max_no_change", "max_captured_elements"}
+        filtered = {k: v for k, v in data.items() if k in known}
+        return cls(**filtered)
 
 class LinkPreviewConfig:
     """Configuration for link head extraction and scoring."""
diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py
index b9de25f6b..42ce5be26 100644
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -1200,88 +1200,935 @@ async def get_delayed_content(delay: float = 5.0) -> str:
                 except Exception:
                     pass
 
-    # async def _handle_full_page_scan(self, page: Page, scroll_delay: float = 0.1):
     async def _handle_full_page_scan(self, page: Page, scroll_delay: float = 0.1, max_scroll_steps: Optional[int] = None):
         """
-        Helper method to handle full page scanning.
-
-        How it works:
-        1. Get the viewport height.
-        2. Scroll to the bottom of the page.
-        3. Get the total height of the page.
-        4. Scroll back to the top of the page.
-        5. Scroll to the bottom of the page again.
-        6. Continue scrolling until the bottom of the page is reached.
+        Progressive full-page scan with automatic DOM recycling detection.
+
+        Five phases:
+          1. Setup — viewport, timeout, helpers (fingerprint, cleanOuterHTML,
+             expandCollapsed).
+          2. Detect recycling — fingerprint candidate containers before/after
+             a probe scroll; also run a MutationObserver for innerHTML-wipe
+             detection.  Collects all recycling containers.
+          3. Scroll + capture recycling containers — for each detected
+             recycling container: deterministic scroll with fingerprint-based
+             dedup, nested inner-container scrolling, then inject merged HTML.
+             Supports vertical, horizontal, and 2D zigzag scroll.
+          4. Container-scroll pass — scans for overflow-y/x scrollable
+             containers not handled in phase 3 and scrolls them.
+          5. Fallback — normal scroll-to-bottom for append-based lazy loading.
+
+        For pages that do NOT recycle DOM nodes the behaviour is identical to
+        the previous implementation — scroll top to bottom.
 
         Args:
             page (Page): The Playwright page object
             scroll_delay (float): The delay between page scrolls
-            max_scroll_steps (Optional[int]): Maximum number of scroll steps to perform. Defaults to 10 to prevent infinite scroll hangs.
-
+            max_scroll_steps (Optional[int]): Maximum number of scroll steps.
+                If None, scrolls until the bottom is reached.
         """
-        # Default to 10 steps to prevent infinite scroll on dynamic pages
-        if max_scroll_steps is None:
-            max_scroll_steps = 10
-
+        prev_timeout = page._timeout_settings._timeout if hasattr(page, '_timeout_settings') else None
         try:
             viewport_size = page.viewport_size
             if viewport_size is None:
                 await page.set_viewport_size(
                     {"width": self.browser_config.viewport_width, "height": self.browser_config.viewport_height}
                 )
-                viewport_size = page.viewport_size
 
-            viewport_height = viewport_size.get(
-                "height", self.browser_config.viewport_height
-            )
-            current_position = viewport_height
+            # Virtual-scroll pages may need many scroll steps, easily
+            # exceeding Playwright's default 30s evaluate timeout.
+            # Temporarily raise it for this call.
+            page.set_default_timeout(300_000)  # 5 minutes
+
+            result = await page.evaluate(
+                """async (cfg) => {
+                    // ── Config ──────────────────────────────────────────────────────────
+                    const scrollDelay = cfg.scrollDelay;
+                    const maxSteps   = cfg.maxSteps;
+                    const memCap     = cfg.memCap || 50000;
+
+                    // ── Helpers ─────────────────────────────────────────────────────────
+                    const sleep = ms => new Promise(r => setTimeout(r, ms));
+
+                    // Deterministic fingerprint.
+                    // Priority: href on element or child a[href]
+                    //        → data-id / data-key / data-index / data-testid / id
+                    //        → full text djb2 hash (no truncation, no outerHTML fallback)
+                    function fingerprint(el) {
+                        try {
+                            if (!el || el.nodeType !== 1) return null;
+                            const tag = el.tagName.toLowerCase();
+                            if (['script','style','link','meta','noscript','br','hr'].indexOf(tag) !== -1) return null;
+
+                            // 1. href
+                            const href = el.getAttribute('href')
+                                || (el.querySelector ? (el.querySelector('a[href]') || {getAttribute: ()=>null}).getAttribute('href') : null);
+                            if (href && href.length > 1 && href !== '#' && href !== '/') return 'u:' + href;
+
+                            // 2. stable data attributes
+                            const aid = el.getAttribute('data-id')
+                                || el.getAttribute('data-key')
+                                || el.getAttribute('data-index')
+                                || el.getAttribute('data-testid')
+                                || el.id;
+                            if (aid) return 'a:' + aid;
+
+                            // 3. full-text djb2 hash — no truncation
+                            const txt = (el.textContent || '').trim();
+                            if (txt.length > 0) {
+                                let h = 5381;
+                                for (let i = 0; i < txt.length; i++) {
+                                    h = ((h << 5) + h + txt.charCodeAt(i)) | 0;
+                                }
+                                return 't:' + h + ':' + txt.length;
+                            }
+                            return null;
+                        } catch(e) { return null; }
+                    }
+
+                    // Strip volatile positioning from inline style before capturing.
+                    // Uses cloneNode to avoid mutating live DOM (which could trigger
+                    // framework re-renders or race with MutationObservers).
+                    function cleanOuterHTML(el) {
+                        try {
+                            const clone = el.cloneNode(true);
+                            const s = clone.style;
+                            if (s) {
+                                for (const prop of ['transform', 'translate', 'top', 'left']) {
+                                    if (s[prop]) s[prop] = '';
+                                }
+                            }
+                            return clone.outerHTML;
+                        } catch(e) {
+                            return el.outerHTML;
+                        }
+                    }
 
-            # await page.evaluate(f"window.scrollTo(0, {current_position})")
-            await self.safe_scroll(page, 0, current_position, delay=scroll_delay)
-            # await self.csp_scroll_to(page, 0, current_position)
-            # await asyncio.sleep(scroll_delay)
+                    // ── expandCollapsed — preserved exactly ─────────────────────────────
+                    const EXPAND_RE = /^\+\d+\s*more|show\s*(all|more)|load\s*more|see\s*(all|more)|expand/i;
+                    const expandedSet = new WeakSet();
+                    async function expandCollapsed() {
+                        let count = 0;
+                        for (const el of document.querySelectorAll('[role="button"], button')) {
+                            if (expandedSet.has(el)) continue;
+                            const txt = (el.textContent || '').trim();
+                            if (EXPAND_RE.test(txt) && !el.closest('nav') && !el.closest('header') && !el.closest('footer')) {
+                                expandedSet.add(el);
+                                try { el.click(); count++; } catch(e) {}
+                            }
+                        }
+                        for (const d of document.querySelectorAll('details:not([open])')) {
+                            if (expandedSet.has(d)) continue;
+                            expandedSet.add(d);
+                            d.setAttribute('open', '');
+                            count++;
+                        }
+                        for (const el of document.querySelectorAll('[aria-expanded="false"]')) {
+                            if (expandedSet.has(el)) continue;
+                            const txt = (el.textContent || '').trim();
+                            if (txt.length < 200 && !el.closest('nav') && !el.closest('header')) {
+                                expandedSet.add(el);
+                                try { el.click(); count++; } catch(e) {}
+                            }
+                        }
+                        if (count > 0) await sleep(scrollDelay * 1000);
+                        return count;
+                    }
 
-            # total_height = await page.evaluate("document.documentElement.scrollHeight")
-            dimensions = await self.get_page_dimensions(page)
-            total_height = dimensions["height"]
-
-            scroll_step_count = 0
-            while current_position < total_height:
-                #### 
-                # NEW FEATURE: Check if we've reached the maximum allowed scroll steps
-                # This prevents infinite scrolling on very long pages or infinite scroll scenarios
-                # If max_scroll_steps is None, this check is skipped (unlimited scrolling - original behavior)
-                ####
-                if max_scroll_steps is not None and scroll_step_count >= max_scroll_steps:
-                    break
-                current_position = min(current_position + viewport_height, total_height)
-                await self.safe_scroll(page, 0, current_position, delay=scroll_delay)
-
-                # Increment the step counter for max_scroll_steps tracking
-                scroll_step_count += 1
-                
-                # await page.evaluate(f"window.scrollTo(0, {current_position})")
-                # await asyncio.sleep(scroll_delay)
+                    // ── Phase 1: Setup ─────────────────────────────────────────────
+                    let steps = 0;
+                    const limit = (maxSteps && maxSteps > 0) ? maxSteps : 1000000;
+                    let totalExpanded = 0;
+                    let removedCount = 0;
+
+                    // ── Phase 2: Detect recycling via content comparison ────────
+                    // Three recycling patterns exist:
+                    //   a) Transform-based: DOM nodes stay, style.transform changes content
+                    //   b) innerHTML-wipe: container cleared and rebuilt on scroll
+                    //   c) Node swap: individual nodes removed and new ones added
+                    // We detect ALL patterns by:
+                    //   1. Fingerprinting all children of candidate containers before scroll
+                    //   2. Scrolling down
+                    //   3. Fingerprinting again — if fingerprints changed but child count
+                    //      didn't grow proportionally, it's recycling
+                    // Also run MutationObserver for innerHTML-wipe detection.
+                    
+                    let recyclingContainers = [];
+
+                    // Find candidate containers: elements with 3+ children in the viewport area
+                    function findCandidates() {
+                        const candidates = [];
+                        const elems = document.body.querySelectorAll('*');
+                        const max = Math.min(elems.length, 5000);
+                        for (let i = 0; i < max; i++) {
+                            const el = elems[i];
+                            if (el.children.length < 3) continue;
+                            // Check first child tag — list containers have same-tag children
+                            const firstTag = el.children[0].tagName;
+                            let sameTag = 0;
+                            for (const c of el.children) { if (c.tagName === firstTag) sameTag++; }
+                            if (sameTag >= 3) candidates.push(el);
+                        }
+                        return candidates;
+                    }
+                    
+                    // Snapshot: collect fingerprints of all children
+                    function snapshotFps(el) {
+                        const fps = new Set();
+                        for (const child of el.children) {
+                            if (child.nodeType !== 1) continue;
+                            const f = fingerprint(child);
+                            if (f) fps.add(f);
+                        }
+                        return fps;
+                    }
+                    
+                    // MutationObserver for innerHTML-wipe detection
+                    let hadChildListRecycling = null;  // element or null
+                    const recycleParents = new Map();
+                    const probeObserver = new MutationObserver(mutations => {
+                        for (const mut of mutations) {
+                            if (mut.type !== 'childList') continue;
+                            if (!recycleParents.has(mut.target)) {
+                                recycleParents.set(mut.target, {adds: 0, removes: 0});
+                            }
+                            const rec = recycleParents.get(mut.target);
+                            if (mut.addedNodes.length > 0) rec.adds += mut.addedNodes.length;
+                            if (mut.removedNodes.length > 0) {
+                                rec.removes += mut.removedNodes.length;
+                                for (const node of mut.removedNodes) {
+                                    if (node.nodeType === 1) removedCount++;
+                                }
+                            }
+                        }
+                    });
+                    probeObserver.observe(document.body, {childList: true, subtree: true});
+                    
+                    // Snapshot candidates before scroll
+                    const candidates = findCandidates();
+                    const beforeMap = new Map();  // element → Set<fingerprint>
+                    for (const c of candidates) {
+                        beforeMap.set(c, snapshotFps(c));
+                    }
+                    const beforeChildCounts = new Map();
+                    for (const c of candidates) {
+                        beforeChildCounts.set(c, c.children.length);
+                    }
+                    
+                    // Helper: find nearest scrollable ancestor of el (excluding window).
+                    // Checks both vertical (overflowY) and horizontal (overflowX).
+                    function scrollableAncestor(el) {
+                        let p = el.parentElement;
+                        while (p && p !== document.body && p !== document.documentElement) {
+                            const cs = window.getComputedStyle(p);
+                            const oy = cs.overflowY;
+                            if ((oy === 'scroll' || oy === 'auto') && p.scrollHeight > p.clientHeight) {
+                                return p;
+                            }
+                            const ox = cs.overflowX;
+                            if ((ox === 'scroll' || ox === 'auto') && p.scrollWidth > p.clientWidth) {
+                                return p;
+                            }
+                            p = p.parentElement;
+                        }
+                        return null;
+                    }
+
+                    // Scroll down by one viewport height (window-level).
+                    // Also probe each candidate container's own scrollTop/scrollLeft
+                    // for container-level virtual scroll (overflow-y/x: scroll).
+                    // If a candidate is not itself scrollable, scroll its nearest
+                    // scrollable ancestor (e.g. #inner inside #scroller).
+                    const probeDistance = window.innerHeight;
+                    window.scrollBy(0, probeDistance);
+                    const probedAncestors = new Set();
+                    for (const c of candidates) {
+                        const cs = window.getComputedStyle(c);
+                        const oy = cs.overflowY;
+                        const ox = cs.overflowX;
+                        if ((oy === 'scroll' || oy === 'auto') && c.scrollHeight > c.clientHeight) {
+                            c.scrollTop += c.clientHeight;
+                        } else if ((ox === 'scroll' || ox === 'auto') && c.scrollWidth > c.clientWidth) {
+                            c.scrollLeft += c.clientWidth;
+                        } else {
+                            // Candidate itself doesn't scroll — try scrollable parent
+                            const anc = scrollableAncestor(c);
+                            if (anc && !probedAncestors.has(anc)) {
+                                probedAncestors.add(anc);
+                                const ancCs = window.getComputedStyle(anc);
+                                const ancOx = ancCs.overflowX;
+                                if ((ancOx === 'scroll' || ancOx === 'auto') && anc.scrollWidth > anc.clientWidth) {
+                                    anc.scrollLeft += anc.clientWidth;
+                                } else {
+                                    anc.scrollTop += anc.clientHeight;
+                                }
+                            }
+                        }
+                    }
+                    await sleep(Math.max(scrollDelay * 1000, 300));
+                    
+                    // Compare: which containers had their content change?
+                    for (const c of candidates) {
+                        const before = beforeMap.get(c);
+                        const after  = snapshotFps(c);
+                        const beforeCount = beforeChildCounts.get(c);
+                        const afterCount  = c.children.length;
+                        
+                        // Content changed?
+                        let changed = 0;
+                        for (const f of before) {
+                            if (!after.has(f)) changed++;
+                        }
+                        let added = 0;
+                        for (const f of after) {
+                            if (!before.has(f)) added++;
+                        }
+                        
+                        // Recycling = content changed significantly but child count
+                        // didn't grow (or grew very little). If child count doubled,
+                        // it's append-based, not recycling.
+                        if (changed >= 2 && afterCount <= beforeCount * 1.5) {
+                            recyclingContainers.push(c);
+                        }
+                    }
+                    
+                    // Also check MutationObserver results for childList recycling.
+                    // Require significant mutation count (>= 4) to avoid false positives
+                    // from loading spinners or minor DOM updates (1 add + 1 remove).
+                    // Also check MutationObserver for containers not already detected
+                    // via fingerprint comparison above.
+                    {
+                        const alreadyFound = new Set(recyclingContainers);
+                        for (const [el, counts] of recycleParents) {
+                            if (alreadyFound.has(el)) continue;
+                            if (counts.adds >= 2 && counts.removes >= 2) {
+                                recyclingContainers.push(el);
+                                alreadyFound.add(el);
+                            }
+                        }
+                    }
+                    
+                    probeObserver.disconnect();
+
+                    // Scroll back to top/left (window + all probed containers + their ancestors)
+                    window.scrollTo(0, 0);
+                    for (const c of candidates) {
+                        if (c.scrollTop > 0) c.scrollTop = 0;
+                        if (c.scrollLeft > 0) c.scrollLeft = 0;
+                    }
+                    for (const anc of probedAncestors) {
+                        anc.scrollTop = 0;
+                        anc.scrollLeft = 0;
+                    }
+                    await sleep(scrollDelay * 1000);
+
+                    // ── Phase 3: Scroll + capture all recycling containers ────
+                    let phase2TotalMerged = 0;
+                    let phase2CapReached = false;
+
+                    for (let rci = 0; rci < recyclingContainers.length && steps < limit && !phase2CapReached; rci++) {
+                        const recyclingContainer = recyclingContainers[rci];
+                        const children = recyclingContainer.children;
+                        if (children.length < 2) {
+                            continue;  // can't measure, skip
+                        } else {
+                            // Measure item dimension — try vertical first, then horizontal.
+                            let itemHeight = Math.abs(
+                                children[1].getBoundingClientRect().top -
+                                children[0].getBoundingClientRect().top
+                            );
+                            if (itemHeight <= 0) {
+                                // Vertical offset is zero — try horizontal offset
+                                // (items may be laid out horizontally with translateX)
+                                itemHeight = Math.abs(
+                                    children[1].getBoundingClientRect().left -
+                                    children[0].getBoundingClientRect().left
+                                );
+                            }
+                            if (itemHeight <= 0) {
+                                itemHeight = children[0].getBoundingClientRect().height;
+                            }
+                            if (itemHeight <= 0) {
+                                itemHeight = children[0].getBoundingClientRect().width;
+                            }
+                            if (itemHeight <= 0) {
+                                continue;  // can't determine step size, skip this container
+                            } else {
+                                // Determine total virtual size (height or width).
+                                // Check container's explicit style.height/width first (set by
+                                // virtual-scroll frameworks for scrollbar sizing).
+                                // Fall back to scrollHeight/scrollWidth.
+                                // For window-level scroll, use documentElement dimensions.
+                                let totalHeight = 0;
+                                if (recyclingContainer.style && recyclingContainer.style.width) {
+                                    const pw = parseFloat(recyclingContainer.style.width) || 0;
+                                    if (pw > 0) totalHeight = pw;
+                                }
+                                if (!totalHeight && recyclingContainer.style && recyclingContainer.style.height) {
+                                    totalHeight = parseFloat(recyclingContainer.style.height) || 0;
+                                }
+                                if (!totalHeight || totalHeight <= 0) {
+                                    totalHeight = Math.max(
+                                        document.documentElement.scrollHeight,
+                                        document.documentElement.scrollWidth
+                                    );
+                                }
+                                const totalItems  = Math.round(totalHeight / itemHeight);
+                                const cappedTotal = Math.min(totalItems, memCap);
+
+                                // ── Phase 3a: Deterministic scroll + capture ────────────
+                                // captured: fingerprint → cleanOuterHTML (first-seen wins)
+                                const captured = new Map();
+
+                                function captureVisible() {
+                                    for (const child of recyclingContainer.children) {
+                                        if (child.nodeType !== 1) continue;
+                                        const key = fingerprint(child);
+                                        if (key && !captured.has(key)) {
+                                            captured.set(key, cleanOuterHTML(child));
+                                            if (captured.size >= memCap) return;
+                                        }
+                                        // Also capture children inside nested scrollable
+                                        // containers within each recycled child.
+                                        try {
+                                            const subs = child.querySelectorAll('*');
+                                            for (const isc of subs) {
+                                                const ics = window.getComputedStyle(isc);
+                                                const iox = ics.overflowX;
+                                                const ioy = ics.overflowY;
+                                                const hS = (iox === 'scroll' || iox === 'auto') && isc.scrollWidth > isc.clientWidth;
+                                                const vS = (ioy === 'scroll' || ioy === 'auto') && isc.scrollHeight > isc.clientHeight;
+                                                if (!hS && !vS) continue;
+                                                for (const ic of isc.children) {
+                                                    if (ic.nodeType !== 1) continue;
+                                                    const ik = fingerprint(ic);
+                                                    if (ik && !captured.has(ik)) {
+                                                        captured.set(ik, cleanOuterHTML(ic));
+                                                        if (captured.size >= memCap) return;
+                                                    }
+                                                }
+                                            }
+                                        } catch(e) {}
+                                    }
+                                }
+
+                                // Scroll nested scrollable containers within visible
+                                // recycled children to reveal nested recycled content.
+                                async function scrollInnerContainers() {
+                                    for (const child of recyclingContainer.children) {
+                                        if (child.nodeType !== 1) continue;
+                                        if (captured.size >= memCap) return;
+                                        try {
+                                            const subs = child.querySelectorAll('*');
+                                            for (const isc of subs) {
+                                                if (captured.size >= memCap) return;
+                                                const ics = window.getComputedStyle(isc);
+                                                const iox = ics.overflowX;
+                                                const ioy = ics.overflowY;
+                                                const hS = (iox === 'scroll' || iox === 'auto') && isc.scrollWidth > isc.clientWidth;
+                                                const vS = (ioy === 'scroll' || ioy === 'auto') && isc.scrollHeight > isc.clientHeight;
+                                                if (!hS && !vS) continue;
+                                                let iDim = hS ? isc.clientWidth : isc.clientHeight;
+                                                if (isc.children.length >= 2) {
+                                                    const d = hS
+                                                        ? Math.abs(isc.children[1].getBoundingClientRect().left - isc.children[0].getBoundingClientRect().left)
+                                                        : Math.abs(isc.children[1].getBoundingClientRect().top - isc.children[0].getBoundingClientRect().top);
+                                                    if (d > 0) iDim = d;
+                                                }
+                                                const iStep = Math.max(iDim, iDim * 2);
+                                                for (let si = 0; si < 200; si++) {
+                                                    if (hS) isc.scrollLeft += iStep;
+                                                    else isc.scrollTop += iStep;
+                                                    await sleep(Math.min(scrollDelay * 500, 100));
+                                                    for (const ic of isc.children) {
+                                                        if (ic.nodeType !== 1) continue;
+                                                        const ik = fingerprint(ic);
+                                                        if (ik && !captured.has(ik)) {
+                                                            captured.set(ik, cleanOuterHTML(ic));
+                                                        }
+                                                    }
+                                                    if (captured.size >= memCap) break;
+                                                    if (hS) {
+                                                        if (isc.scrollLeft + isc.clientWidth >= isc.scrollWidth - iDim) break;
+                                                    } else {
+                                                        if (isc.scrollTop + isc.clientHeight >= isc.scrollHeight - iDim) break;
+                                                    }
+                                                }
+                                                if (hS) isc.scrollLeft = 0;
+                                                else isc.scrollTop = 0;
+                                            }
+                                        } catch(e) {}
+                                    }
+                                }
+
+                                // Second observer: capture items at the moment of removal
+                                // (innerHTML-wipe fallback — ensures we don't miss items
+                                //  whose container is wiped rather than recycled).
+                                const wipeObserver = new MutationObserver(mutations => {
+                                    for (const mut of mutations) {
+                                        if (mut.type !== 'childList') continue;
+                                        for (const node of mut.removedNodes) {
+                                            if (node.nodeType !== 1) continue;
+                                            const key = fingerprint(node);
+                                            if (key && !captured.has(key)) {
+                                                try { captured.set(key, cleanOuterHTML(node)); } catch(e) {}
+                                            }
+                                        }
+                                    }
+                                });
+                                wipeObserver.observe(recyclingContainer, {childList: true, subtree: false});
+                                let capReached = false;
+                                try {
+
+                                // Determine the element to actually scroll.
+                                // Priority:
+                                //   1. recyclingContainer itself, if it has overflow-y/x scroll/auto
+                                //      and is actually scrollable.
+                                //   2. Its nearest scrollable ancestor (e.g. #scroller wrapping #inner).
+                                //   3. Window (fallback).
+                                // This handles the pattern where the DATA container (#inner) is not
+                                // scrollable but its PARENT (#scroller) is.
+                                let scrollTarget = null;  // null → use window
+                                let isHorizontal = false;  // true if the scroll axis is horizontal
+                                if (
+                                    recyclingContainer !== document.documentElement &&
+                                    recyclingContainer !== document.body
+                                ) {
+                                    const rcs = window.getComputedStyle(recyclingContainer);
+                                    const oy = rcs.overflowY;
+                                    const ox = rcs.overflowX;
+                                    if ((oy === 'scroll' || oy === 'auto') &&
+                                        recyclingContainer.scrollHeight > recyclingContainer.clientHeight) {
+                                        scrollTarget = recyclingContainer;
+                                        isHorizontal = false;
+                                    } else if ((ox === 'scroll' || ox === 'auto') &&
+                                        recyclingContainer.scrollWidth > recyclingContainer.clientWidth) {
+                                        scrollTarget = recyclingContainer;
+                                        isHorizontal = true;
+                                    } else {
+                                        // Not scrollable itself — look for a scrollable ancestor
+                                        const anc = scrollableAncestor(recyclingContainer);
+                                        if (anc) {
+                                            scrollTarget = anc;
+                                            const ancCs = window.getComputedStyle(anc);
+                                            const ancOx = ancCs.overflowX;
+                                            if ((ancOx === 'scroll' || ancOx === 'auto') &&
+                                                anc.scrollWidth > anc.clientWidth &&
+                                                !(anc.scrollHeight > anc.clientHeight)) {
+                                                isHorizontal = true;
+                                            }
+                                        }
+                                    }
+                                }
+                                const useWindowScroll = (scrollTarget === null);
+
+                                // Detect 2D scrollable containers (both horizontal + vertical)
+                                let is2D = false;
+                                if (!useWindowScroll) {
+                                    const st = scrollTarget;
+                                    const stCs = window.getComputedStyle(st);
+                                    const stOx = stCs.overflowX;
+                                    const stOy = stCs.overflowY;
+                                    const hasH = (stOx === 'scroll' || stOx === 'auto') && st.scrollWidth > st.clientWidth;
+                                    const hasV = (stOy === 'scroll' || stOy === 'auto') && st.scrollHeight > st.clientHeight;
+                                    if (hasH && hasV) is2D = true;
+                                }
+
+                                // Measure item width (separate from height) for 2D / horizontal
+                                let itemWidth = itemHeight;
+                                if (recyclingContainer.children.length >= 2) {
+                                    const iw = Math.abs(
+                                        recyclingContainer.children[1].getBoundingClientRect().left -
+                                        recyclingContainer.children[0].getBoundingClientRect().left
+                                    );
+                                    if (iw > 0) itemWidth = iw;
+                                }
+                                // For pure horizontal scroll, reuse itemHeight variable for width
+                                if (isHorizontal && !is2D) {
+                                    itemHeight = itemWidth;
+                                }
+
+                                // Capture at position 0
+                                captureVisible();
+                                await scrollInnerContainers();
+                                totalExpanded += await expandCollapsed();
+
+                                let consecutiveEmpty = 0;
+
+                                if (is2D) {
+                                    // ── 2D zigzag scroll ──────────────────────────────────
+                                    // For containers scrollable in both X and Y (e.g. 2D grids),
+                                    // sweep horizontally at each vertical band, then step down.
+                                    // Use single-item steps to ensure the pool (which may be
+                                    // smaller than the visible cell count) renders every cell
+                                    // at least once across the sweep.
+                                    const vpW = scrollTarget.clientWidth;
+                                    const vpH2 = scrollTarget.clientHeight;
+                                    const hStep = itemWidth;   // one column at a time
+                                    const vStep = itemHeight;  // one row at a time
+
+                                    let yPos = 0;
+                                    let atBottomRow = false;
+                                    while (!atBottomRow && steps < limit && !capReached) {
+                                        // Set vertical position and reset horizontal
+                                        scrollTarget.scrollTop = yPos;
+                                        scrollTarget.scrollLeft = 0;
+                                        await sleep(scrollDelay * 1000);
+                                        captureVisible();
+                                        steps++;
+                                        if (captured.size >= memCap) { capReached = true; break; }
+
+                                        // Sweep all the way right
+                                        while (steps < limit && !capReached) {
+                                            scrollTarget.scrollLeft += hStep;
+                                            await sleep(scrollDelay * 1000);
+                                            captureVisible();
+                                            steps++;
+                                            if (captured.size >= memCap) { capReached = true; break; }
+                                            if (scrollTarget.scrollLeft + vpW >= scrollTarget.scrollWidth - itemWidth) break;
+                                        }
+
+                                        // Check if at bottom
+                                        if (yPos + vpH2 >= scrollTarget.scrollHeight - itemHeight) {
+                                            atBottomRow = true;
+                                        } else {
+                                            yPos += vStep;
+                                        }
+                                    }
+                                } else {
+                                    // ── 1D scroll (original logic) ────────────────────────
+                                    const vpH = useWindowScroll
+                                        ? (isHorizontal ? window.innerWidth : window.innerHeight)
+                                        : (isHorizontal ? scrollTarget.clientWidth : scrollTarget.clientHeight);
+                                    const scrollStep = Math.max(
+                                        itemHeight,
+                                        Math.floor(vpH / itemHeight - 1) * itemHeight
+                                    );
+
+                                    while (steps < limit && !capReached) {
+                                        if (useWindowScroll) {
+                                            if (isHorizontal) {
+                                                window.scrollBy(scrollStep, 0);
+                                            } else {
+                                                window.scrollBy(0, scrollStep);
+                                            }
+                                        } else {
+                                            if (isHorizontal) {
+                                                scrollTarget.scrollLeft += scrollStep;
+                                            } else {
+                                                scrollTarget.scrollTop += scrollStep;
+                                            }
+                                        }
+                                        await sleep(scrollDelay * 1000);
+                                        totalExpanded += await expandCollapsed();
+
+                                        const beforeSize = captured.size;
+                                        captureVisible();
+                                        await scrollInnerContainers();
+                                        const newItems = captured.size - beforeSize;
+
+                                        if (newItems === 0) {
+                                            consecutiveEmpty++;
+                                            let atEnd;
+                                            if (useWindowScroll) {
+                                                if (isHorizontal) {
+                                                    atEnd = (window.scrollX + window.innerWidth >= document.documentElement.scrollWidth - itemHeight * 2);
+                                                } else {
+                                                    atEnd = (window.scrollY + window.innerHeight >= document.documentElement.scrollHeight - itemHeight * 2);
+                                                }
+                                            } else {
+                                                if (isHorizontal) {
+                                                    atEnd = (scrollTarget.scrollLeft + scrollTarget.clientWidth >= scrollTarget.scrollWidth - itemHeight * 2);
+                                                } else {
+                                                    atEnd = (scrollTarget.scrollTop + scrollTarget.clientHeight >= scrollTarget.scrollHeight - itemHeight * 2);
+                                                }
+                                            }
+                                            if (consecutiveEmpty >= 3 && atEnd) break;
+                                        } else {
+                                            consecutiveEmpty = 0;
+                                        }
+
+                                        steps++;
+
+                                        if (captured.size >= memCap) { capReached = true; break; }
+
+                                        if (useWindowScroll) {
+                                            if (isHorizontal) {
+                                                const scrolled = window.scrollX || window.pageXOffset || 0;
+                                                const docWidth = document.documentElement.scrollWidth;
+                                                if (scrolled + window.innerWidth >= docWidth - itemHeight) break;
+                                            } else {
+                                                const scrolled = window.scrollY || window.pageYOffset || 0;
+                                                const docHeight = document.documentElement.scrollHeight;
+                                                if (scrolled + window.innerHeight >= docHeight - itemHeight) break;
+                                            }
+                                        } else {
+                                            if (isHorizontal) {
+                                                if (scrollTarget.scrollLeft + scrollTarget.clientWidth >=
+                                                    scrollTarget.scrollWidth - itemHeight) break;
+                                            } else {
+                                                if (scrollTarget.scrollTop + scrollTarget.clientHeight >=
+                                                    scrollTarget.scrollHeight - itemHeight) break;
+                                            }
+                                        }
+                                    }
+                                }
+
+                                // Final capture at scroll bottom
+                                captureVisible();
+                                await scrollInnerContainers();
+
+                                } finally { wipeObserver.disconnect(); }
+
+                                // ── Phase 3b: Inject results for this container ─────────
+                                if (captured.size > recyclingContainer.children.length) {
+                                    recyclingContainer.style.display = 'none';
+                                    const mergedDiv = document.createElement('div');
+                                    mergedDiv.id = 'crawl4ai-merged-' + rci;
+                                    mergedDiv.innerHTML = Array.from(captured.values()).join('\\n');
+                                    recyclingContainer.parentElement.insertBefore(mergedDiv, recyclingContainer);
+                                }
+                                phase2TotalMerged += captured.size;
+                                if (capReached) phase2CapReached = true;
+                            }
+                        }
+                    }  // end for each recyclingContainer
+
+                    // ── Phase 3 complete — store result, fall through to Phase 4 ──
+                    // Even if Phase 3 handled recycling containers, there may be
+                    // OTHER scrollable containers (nested scroll, overflow-y/x)
+                    // that Phase 4 should also scroll.
+                    let phase2Result = null;
+                    if (phase2TotalMerged > 0) {
+                        window.scrollTo(0, 0);
+                        phase2Result = {
+                            recyclingDetected: true,
+                            removedCount: removedCount,
+                            totalMerged: phase2TotalMerged,
+                            scrollSteps: steps,
+                            capReached: phase2CapReached,
+                            expandedGroups: totalExpanded,
+                            phase: 3,
+                            phase2Containers: recyclingContainers
+                        };
+                    }
+
+                    // ── Phase 4: Container-scroll (overflow-y/x: scroll/auto) ──
+                    // Handles scrollable containers not caught by Phase 3's
+                    // recycling detection.  Uses the same fingerprint + Map
+                    // approach.  Detects both vertical and horizontal containers.
+                    const allEls3 = document.querySelectorAll('*');
+                    const scanLimit = Math.min(allEls3.length, 5000);
+                    const scrollContainers = [];
+                    for (let i = 0; i < scanLimit; i++) {
+                        try {
+                            const el = allEls3[i];
+                            const cs3 = window.getComputedStyle(el);
+                            const oy = cs3.overflowY;
+                            const ox = cs3.overflowX;
+                            if ((oy === 'auto' || oy === 'scroll') &&
+                                el.scrollHeight > el.clientHeight * 2 &&
+                                el.clientHeight > 50 &&
+                                el.children.length > 0) {
+                                scrollContainers.push({el: el, horizontal: false});
+                            } else if ((ox === 'auto' || ox === 'scroll') &&
+                                el.scrollWidth > el.clientWidth * 2 &&
+                                el.clientWidth > 50 &&
+                                el.children.length > 0) {
+                                scrollContainers.push({el: el, horizontal: true});
+                            }
+                        } catch(e) { continue; }
+                    }
+
+                    let phase3Merged = 0;
+                    let phase3CapReached = false;
+
+                    // Collect containers already handled by Phase 3 to skip them
+                    const phase3Handled = new Set();
+                    if (phase2Result && phase2Result.phase2Containers) {
+                        for (const rc of phase2Result.phase2Containers) {
+                            phase3Handled.add(rc);
+                        }
+                    }
+
+                    for (let ci = 0; ci < scrollContainers.length && steps < limit && !phase3CapReached; ci++) {
+                        try {
+                            const ct = scrollContainers[ci].el;
+                            const ctHoriz = scrollContainers[ci].horizontal;
+                            if (ctHoriz ? ct.clientWidth === 0 : ct.clientHeight === 0) continue;
+                            // Skip containers already handled by Phase 3
+                            if (phase3Handled.has(ct)) continue;
+                            // Skip containers that are display:none (hidden by Phase 3)
+                            if (ct.style.display === 'none') continue;
+                            // Skip containers inside a display:none parent (merged by Phase 3)
+                            if (ct.closest('[style*="display: none"]') || ct.closest('[style*="display:none"]')) continue;
+
+                            // Measure item dimension from first two children
+                            let ctItemDim = ctHoriz ? ct.clientWidth : ct.clientHeight;  // fallback
+                            if (ct.children.length >= 2) {
+                                const ih = ctHoriz
+                                    ? Math.abs(ct.children[1].getBoundingClientRect().left - ct.children[0].getBoundingClientRect().left)
+                                    : Math.abs(ct.children[1].getBoundingClientRect().top - ct.children[0].getBoundingClientRect().top);
+                                if (ih > 0) ctItemDim = ih;
+                            } else if (ct.children.length === 1) {
+                                const ih = ctHoriz
+                                    ? ct.children[0].getBoundingClientRect().width
+                                    : ct.children[0].getBoundingClientRect().height;
+                                if (ih > 0) ctItemDim = ih;
+                            }
 
-                # new_height = await page.evaluate("document.documentElement.scrollHeight")
-                dimensions = await self.get_page_dimensions(page)
-                new_height = dimensions["height"]
+                            const ctCaptured = new Map();
 
-                if new_height > total_height:
-                    total_height = new_height
+                            function ctCaptureVisible() {
+                                for (const child of ct.children) {
+                                    if (child.nodeType !== 1) continue;
+                                    const key = fingerprint(child);
+                                    if (key && !ctCaptured.has(key)) {
+                                        ctCaptured.set(key, cleanOuterHTML(child));
+                                        if (ctCaptured.size >= memCap) return;
+                                    }
+                                }
+                            }
+
+                            ctCaptureVisible();
+                            let ctConsecutiveEmpty = 0;
+
+                            for (let si = 0; si < 1000 && ctConsecutiveEmpty < 5 && steps < limit; si++) {
+                                if (ctHoriz) {
+                                    ct.scrollLeft += ctItemDim;
+                                } else {
+                                    ct.scrollTop += ctItemDim;
+                                }
+                                await sleep(scrollDelay * 1000);
+                                steps++;
+
+                                const beforeSize = ctCaptured.size;
+                                ctCaptureVisible();
+                                const newItems = ctCaptured.size - beforeSize;
+
+                                if (newItems === 0) {
+                                    ctConsecutiveEmpty++;
+                                } else {
+                                    ctConsecutiveEmpty = 0;
+                                }
+
+                                if (ctCaptured.size >= memCap) { phase3CapReached = true; break; }
+                                if (ctHoriz) {
+                                    if (ct.scrollLeft + ct.clientWidth >= ct.scrollWidth - ctItemDim) break;
+                                } else {
+                                    if (ct.scrollTop + ct.clientHeight >= ct.scrollHeight - ctItemDim) break;
+                                }
+                            }
+
+                            // Only inject if we found more items than currently visible
+                            if (ctCaptured.size > ct.children.length) {
+                                ct.innerHTML = Array.from(ctCaptured.values()).join('\\n');
+                                phase3Merged += ctCaptured.size;
+                            }
+                        } catch(e) { continue; }
+                    }
+
+                    if (phase3Merged > 0 || phase2Result) {
+                        window.scrollTo(0, document.documentElement.scrollHeight);
+                        const totalMerged = (phase2Result ? phase2Result.totalMerged : 0) + phase3Merged;
+                        return {
+                            recyclingDetected: true,
+                            removedCount: totalMerged,
+                            totalMerged: totalMerged,
+                            scrollSteps: steps,
+                            capReached: phase3CapReached || (phase2Result && phase2Result.capReached),
+                            expandedGroups: totalExpanded,
+                            phase: phase2Result ? 3 : 4
+                        };
+                    }
 
-            # await page.evaluate("window.scrollTo(0, 0)")
-            await self.safe_scroll(page, 0, 0)
+                    // No recycling and no container-scroll found.
+                    // Do a normal scroll-to-bottom for append-based infinite scroll
+                    // (items are lazy-loaded as user scrolls down).
+                    let totalHeight = document.documentElement.scrollHeight;
+                    const vpHeight  = window.innerHeight;
+                    let pos = 0;
+                    while (pos < totalHeight && steps < limit) {
+                        pos = Math.min(pos + vpHeight, totalHeight);
+                        window.scrollTo(0, pos);
+                        await sleep(scrollDelay * 1000);
+                        totalExpanded += await expandCollapsed();
+                        const nh = document.documentElement.scrollHeight;
+                        if (nh > totalHeight) totalHeight = nh;
+                        steps++;
+                    }
+                    window.scrollTo(0, document.documentElement.scrollHeight);
+                    return {
+                        recyclingDetected: false,
+                        scrollSteps: steps,
+                        removedButNotRecycled: removedCount
+                    };
+                }""",
+                {
+                    "scrollDelay": scroll_delay,
+                    "maxSteps": max_scroll_steps if max_scroll_steps else 0,
+                    "memCap": 50000,
+                },
+            )
 
+            if result and result.get("recyclingDetected"):
+                if result.get("capReached"):
+                    self.logger.warning(
+                        message="DOM recycling detected — recovered {removed} elements, "
+                                "{total} total after merge ({steps} scroll steps) "
+                                "[MEMORY CAP REACHED — results may be incomplete]",
+                        tag="PAGE_SCAN",
+                        params={
+                            "removed": result.get("removedCount", 0),
+                            "total": result.get("totalMerged", 0),
+                            "steps": result.get("scrollSteps", 0),
+                        },
+                    )
+                else:
+                    expanded = result.get("expandedGroups", 0)
+                    extra = f", expanded {expanded} collapsed groups" if expanded else ""
+                    self.logger.success(
+                        message="DOM recycling detected — recovered {removed} elements, "
+                                "{total} total after merge ({steps} scroll steps)"
+                                + extra,
+                        tag="PAGE_SCAN",
+                        params={
+                            "removed": result.get("removedCount", 0),
+                            "total": result.get("totalMerged", 0),
+                            "steps": result.get("scrollSteps", 0),
+                        },
+                    )
+            else:
+                removed_not_recycled = result.get("removedButNotRecycled", 0) if result else 0
+                if removed_not_recycled > 0:
+                    self.logger.warning(
+                        message="Full page scan completed but {count} DOM removals "
+                                "were not confirmed as recycling. Content may be incomplete.",
+                        tag="PAGE_SCAN",
+                        params={"count": removed_not_recycled},
+                    )
+                else:
+                    self.logger.info(
+                        message="Full page scan completed in {steps} scroll steps",
+                        tag="PAGE_SCAN",
+                        params={"steps": result.get("scrollSteps", 0) if result else 0},
+                    )
+
+        except (asyncio.CancelledError, KeyboardInterrupt):
+            raise
         except Exception as e:
-            self.logger.warning(
-                message="Failed to perform full page scan: {error}",
+            self.logger.error(
+                message="Full page scan failed: {error}. HTML will contain only "
+                        "the first viewport of content.",
                 tag="PAGE_SCAN",
                 params={"error": str(e)},
             )
-        else:
-            # await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
-            await self.safe_scroll(page, 0, total_height)
+        finally:
+            # Always restore the previous timeout, even on error
+            if prev_timeout is not None:
+                page.set_default_timeout(prev_timeout)
+            else:
+                page.set_default_timeout(30_000)
 
     async def _handle_virtual_scroll(self, page: Page, config: "VirtualScrollConfig"):
         """
@@ -1316,113 +2163,158 @@ async def _handle_virtual_scroll(self, page: Page, config: "VirtualScrollConfig"
                 params={"selector": config.container_selector}
             )
             
-            # JavaScript function to handle virtual scroll capture
             virtual_scroll_js = """
             async (config) => {
                 const container = document.querySelector(config.container_selector);
                 if (!container) {
                     throw new Error(`Container not found: ${config.container_selector}`);
                 }
-                
-                // List to store HTML chunks when content is replaced
+
                 const htmlChunks = [];
                 let previousHTML = container.innerHTML;
+                let previousChildCount = container.children.length;
                 let scrollCount = 0;
-                
+                let consecutiveNoChange = 0;
+                const maxNoChange = config.max_no_change || 5;
+                const maxCaptured = config.max_captured_elements || 0;
+                let totalCapturedCount = 0;
+                let capReached = false;
+
                 // Determine scroll amount
                 let scrollAmount;
                 if (typeof config.scroll_by === 'number') {
                     scrollAmount = config.scroll_by;
                 } else if (config.scroll_by === 'page_height') {
                     scrollAmount = window.innerHeight;
-                } else { // container_height
+                } else {
                     scrollAmount = container.offsetHeight;
                 }
-                
-                // Perform scrolling
-                while (scrollCount < config.scroll_count) {
-                    // Scroll the container
-                    container.scrollTop += scrollAmount;
-                    
-                    // Wait for content to potentially load
+
+                let useWindowScroll = false;
+                const prevScrollTop = container.scrollTop;
+                container.scrollTop += 1;
+                if (container.scrollTop === prevScrollTop) {
+                    useWindowScroll = true;
+
+                } else {
+                    container.scrollTop = prevScrollTop;
+                }
+
+                function doScroll() {
+                    if (useWindowScroll) {
+                        window.scrollBy(0, scrollAmount);
+                    } else {
+                        container.scrollTop += scrollAmount;
+                    }
+                }
+
+                function isAtEnd() {
+                    if (useWindowScroll) {
+                        return window.scrollY + window.innerHeight >= document.documentElement.scrollHeight - 10;
+                    }
+                    return container.scrollTop + container.clientHeight >= container.scrollHeight - 10;
+                }
+
+                function getElementFingerprint(el) {
+                    try {
+                        const attrId = el.getAttribute('data-id')
+                            || el.getAttribute('data-index')
+                            || el.getAttribute('data-key')
+                            || el.getAttribute('data-testid')
+                            || el.id;
+                        if (attrId) return 'attr:' + attrId;
+                        const txt = (el.innerText || '').trim();
+                        if (txt.length > 10)
+                            return 'text:' + txt.toLowerCase().replace(/[\\s\\W]/g, '').substring(0, 200);
+                        if (el.outerHTML && el.outerHTML.length > 0)
+                            return 'html:' + el.outerHTML.length + ':' + el.outerHTML.substring(0, 120);
+                        return null;
+                    } catch(e) { return null; }
+                }
+
+                function isContentAppended(prevHTML, currHTML, prevCount) {
+                    try {
+                        const currCount = container.children.length;
+                        if (currCount <= prevCount) return false;
+                        if (prevCount > 0 && currCount > prevCount && container.children[0]) {
+                            const firstChild = container.children[0];
+                            const prefix = firstChild.outerHTML.substring(0, Math.min(100, firstChild.outerHTML.length));
+                            return prevHTML.startsWith(prefix);
+                        }
+                        return false;
+                    } catch(e) { return false; }
+                }
+
+                while (scrollCount < config.scroll_count && !capReached) {
+                    doScroll();
                     await new Promise(resolve => setTimeout(resolve, config.wait_after_scroll * 1000));
-                    
-                    // Get current HTML
+
                     const currentHTML = container.innerHTML;
-                    
-                    // Determine what changed
+
                     if (currentHTML === previousHTML) {
-                        // Case 0: No change - continue scrolling
-                        console.log(`Scroll ${scrollCount + 1}: No change in content`);
-                    } else if (currentHTML.startsWith(previousHTML)) {
-                        // Case 1: New items appended - content already in page
-                        console.log(`Scroll ${scrollCount + 1}: New items appended`);
+                        consecutiveNoChange++;
+                        if (maxNoChange > 0 && consecutiveNoChange >= maxNoChange) {
+                            break;
+                        }
+                    } else if (isContentAppended(previousHTML, currentHTML, previousChildCount)) {
+                        consecutiveNoChange = 0;
                     } else {
-                        // Case 2: Items replaced - capture the previous HTML
-                        console.log(`Scroll ${scrollCount + 1}: Content replaced, capturing chunk`);
                         htmlChunks.push(previousHTML);
+                        totalCapturedCount += container.children.length;
+                        consecutiveNoChange = 0;
+                        if (maxCaptured > 0 && totalCapturedCount >= maxCaptured) {
+                            capReached = true;
+                        }
                     }
-                    
-                    // Update previous HTML for next iteration
+
                     previousHTML = currentHTML;
+                    previousChildCount = container.children.length;
                     scrollCount++;
-                    
-                    // Check if we've reached the end
-                    if (container.scrollTop + container.clientHeight >= container.scrollHeight - 10) {
-                        console.log(`Reached end of scrollable content at scroll ${scrollCount}`);
-                        // Capture final chunk if content was replaced
-                        if (htmlChunks.length > 0) {
-                            htmlChunks.push(currentHTML);
-                        }
-                        break;
-                    }
+
+                    if (isAtEnd()) { break; }
                 }
-                
-                // If we have chunks (case 2 occurred), merge them
+
+                if (htmlChunks.length > 0) {
+                    htmlChunks.push(previousHTML);
+                }
+
                 if (htmlChunks.length > 0) {
-                    console.log(`Merging ${htmlChunks.length} HTML chunks`);
-                    
-                    // Parse all chunks to extract unique elements
                     const tempDiv = document.createElement('div');
-                    const seenTexts = new Set();
+                    const seenFingerprints = new Set();
                     const uniqueElements = [];
-                    
-                    // Process each chunk
+
                     for (const chunk of htmlChunks) {
                         tempDiv.innerHTML = chunk;
-                        const elements = tempDiv.children;
-                        
-                        for (let i = 0; i < elements.length; i++) {
-                            const element = elements[i];
-                            // Normalize text for deduplication
-                            const normalizedText = element.innerText
-                                .toLowerCase()
-                                .replace(/[\\s\\W]/g, ''); // Remove spaces and symbols
-                            
-                            if (!seenTexts.has(normalizedText)) {
-                                seenTexts.add(normalizedText);
+                        const elements = Array.from(tempDiv.children);
+                        for (const element of elements) {
+                            const fp = getElementFingerprint(element);
+                            if (fp && !seenFingerprints.has(fp)) {
+                                seenFingerprints.add(fp);
+                                uniqueElements.push(element.outerHTML);
+                            } else if (!fp) {
                                 uniqueElements.push(element.outerHTML);
                             }
                         }
                     }
-                    
-                    // Replace container content with merged unique elements
+
                     container.innerHTML = uniqueElements.join('\\n');
-                    console.log(`Merged ${uniqueElements.length} unique elements from ${htmlChunks.length} chunks`);
-                    
+
                     return {
                         success: true,
                         chunksCount: htmlChunks.length,
                         uniqueCount: uniqueElements.length,
-                        replaced: true
+                        replaced: true,
+                        usedWindowScroll: useWindowScroll,
+                        capReached: capReached
                     };
                 } else {
-                    console.log('No content replacement detected, all content remains in page');
                     return {
                         success: true,
                         chunksCount: 0,
                         uniqueCount: 0,
-                        replaced: false
+                        replaced: false,
+                        usedWindowScroll: useWindowScroll,
+                        capReached: false
                     };
                 }
             }
@@ -1432,12 +2324,18 @@ async def _handle_virtual_scroll(self, page: Page, config: "VirtualScrollConfig"
             result = await self.adapter.evaluate(page, virtual_scroll_js, config.to_dict())
             
             if result.get("replaced", False):
+                extra = ""
+                if result.get("usedWindowScroll"):
+                    extra += " (window scroll fallback)"
+                if result.get("capReached"):
+                    extra += " [memory cap reached]"
                 self.logger.success(
-                    message="Virtual scroll completed. Merged {unique} unique elements from {chunks} chunks",
+                    message="Virtual scroll completed. Merged {unique} unique elements from {chunks} chunks{extra}",
                     tag="VSCROLL",
                     params={
                         "unique": result.get("uniqueCount", 0),
-                        "chunks": result.get("chunksCount", 0)
+                        "chunks": result.get("chunksCount", 0),
+                        "extra": extra,
                     }
                 )
             else:
diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py
index ade19aa11..2ddcc2b1b 100644
--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -562,6 +562,15 @@ def remove_empty_elements_fast(self, root, word_count_threshold=5):
             ):
                 parent = el.getparent()
                 if parent is not None:
+                    # Preserve tail text before removal — tail belongs to
+                    # the parent's content flow, not the removed element
+                    tail = el.tail
+                    if tail:
+                        prev = el.getprevious()
+                        if prev is not None:
+                            prev.tail = (prev.tail or "") + tail
+                        else:
+                            parent.text = (parent.text or "") + tail
                     parent.remove(el)
 
         return root
diff --git a/test_virtual_scroll_compat.py b/test_virtual_scroll_compat.py
new file mode 100644
index 000000000..9412bcdf2
--- /dev/null
+++ b/test_virtual_scroll_compat.py
@@ -0,0 +1,2117 @@
+"""
+Comprehensive virtual scroll compatibility test suite.
+
+Covers 13 distinct scroll/virtualisation patterns:
+
+  Test 1  — Transform-based virtual scroll (50 items, translateY)
+  Test 2  — innerHTML-wipe virtual scroll (50 items, PR #1853 exact pattern)
+  Test 3  — Append-based infinite scroll (100 quotes, no DOM recycling)
+  Test 4  — Container-level virtual scroll (200 rows, overflow-y: scroll)
+  Test 5  — Transform-based stress test (1000 items)
+  Test 6  — Real site: quotes.toscrape.com/scroll
+  Test 7  — Variable row heights (80 items, non-uniform heights)
+  Test 8  — Horizontal virtual scroll (60 items, translateX)
+  Test 9  — 2D grid virtualisation (10x10 = 100 cells)
+  Test 10 — Multiple virtual containers on same page (40 + 30 items)
+  Test 11 — Nested virtual scroll (5 categories x 10 items)
+  Test 12 — Async/setTimeout-loaded items (50 items)
+  Test 13 — Small virtual section in large static page (60 items)
+
+Each local test is served from a self-contained HTML file via HTTPServer
+on a unique port.  All tests use JsonCssExtractionStrategy + scan_full_page=True.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+import tempfile
+import threading
+from http.server import HTTPServer, SimpleHTTPRequestHandler
+
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CacheMode, CrawlerRunConfig
+from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+
+# ---------------------------------------------------------------------------
+# HTML fixtures
+# ---------------------------------------------------------------------------
+
+# Test 1 — Transform-based virtual scroll
+# Items are positioned with CSS transform: translateY(Npx).
+# Container has an explicit style.height = TOTAL * ITEM_HEIGHT.
+# Only ~10 items exist in the DOM at any time.
+# On scroll the transform of each live node is updated (moved into / out of
+# the visible band) — this mirrors how React/Next.js virtual lists work
+# (e.g. skills.sh, Twitter feed, Tanstack Virtual).
+TRANSFORM_SCROLL_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: sans-serif; }
+  #container {
+    position: relative;
+    width: 600px;
+    margin: 0 auto;
+  }
+  .item {
+    position: absolute;
+    left: 0; right: 0;
+    height: 60px;
+    padding: 10px 14px;
+    border-bottom: 1px solid #eee;
+    background: #fff;
+  }
+  .item .title { font-weight: bold; font-size: 14px; }
+  .item .meta  { color: #777; font-size: 12px; margin-top: 4px; }
+</style>
+</head>
+<body>
+<div id="container"></div>
+<script>
+(function () {
+  var TOTAL      = 50;
+  var ITEM_H     = 60;
+  var OVERSCAN   = 3;          // extra items above/below viewport
+  var VISIBLE    = Math.ceil(window.innerHeight / ITEM_H) + OVERSCAN * 2;
+
+  // Build full data array up-front
+  var allItems = [];
+  for (var i = 0; i < TOTAL; i++) {
+    allItems.push({ id: i + 1, title: 'Item ' + (i + 1), slug: 'item-' + (i + 1) });
+  }
+
+  var container = document.getElementById('container');
+  container.style.height = (TOTAL * ITEM_H) + 'px';
+
+  // Pool of DOM nodes — we recycle exactly VISIBLE nodes
+  var pool = [];
+  for (var j = 0; j < VISIBLE; j++) {
+    var el = document.createElement('div');
+    el.className = 'item';
+    el.style.transform = 'translateY(-9999px)'; // hidden until assigned
+    el.innerHTML = '<div class="title"></div><div class="meta"></div>';
+    container.appendChild(el);
+    pool.push({ el: el, assignedIndex: -1 });
+  }
+
+  var lastStart = -1;
+
+  function render() {
+    var scrollY = window.scrollY;
+    var start = Math.max(0, Math.floor(scrollY / ITEM_H) - OVERSCAN);
+    var end   = Math.min(TOTAL, start + VISIBLE);
+
+    if (start === lastStart) return;
+    lastStart = start;
+
+    // Assign pool slots to visible range
+    for (var k = 0; k < pool.length; k++) {
+      var idx = start + k;
+      if (idx >= end) {
+        // Park unused nodes off-screen
+        pool[k].el.style.transform = 'translateY(-9999px)';
+        pool[k].assignedIndex = -1;
+        continue;
+      }
+      var item = allItems[idx];
+      pool[k].assignedIndex = idx;
+      pool[k].el.style.transform = 'translateY(' + (idx * ITEM_H) + 'px)';
+      pool[k].el.querySelector('.title').textContent = item.title;
+      pool[k].el.querySelector('.meta').innerHTML =
+        '<a href="/item/' + item.id + '">#' + item.id + '</a>';
+    }
+  }
+
+  render();
+  window.addEventListener('scroll', render);
+})();
+</script>
+</body>
+</html>
+"""
+
+# Test 2 — innerHTML-wipe virtual scroll (PR #1853 exact pattern)
+# Container.innerHTML = '' then new items are appended on every scroll.
+# No transforms, no explicit height on the container itself.
+# Body height is set to TOTAL * ITEM_HEIGHT to allow the window to scroll.
+INNERHTML_WIPE_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  body { margin: 0; font-family: sans-serif; }
+  .item { height: 80px; padding: 10px; border-bottom: 1px solid #eee; }
+  .item .name   { font-weight: bold; }
+  .item .handle { color: #666; }
+</style>
+</head>
+<body>
+<div id="feed"></div>
+<script>
+  var TOTAL   = 50;
+  var VISIBLE = 10;
+  var ITEM_H  = 80;
+
+  var allUsers = [];
+  for (var i = 0; i < TOTAL; i++) {
+    allUsers.push({ name: 'User ' + (i + 1), handle: '@user' + (i + 1) });
+  }
+
+  var feed     = document.getElementById('feed');
+  var startIdx = 0;
+
+  function render() {
+    feed.innerHTML = '';
+    var end = Math.min(startIdx + VISIBLE, TOTAL);
+    for (var i = startIdx; i < end; i++) {
+      var div = document.createElement('div');
+      div.className = 'item';
+      div.setAttribute('data-testid', 'UserCell');
+      div.innerHTML =
+        '<div class="name">' + allUsers[i].name + '</div>' +
+        '<div class="handle">' +
+          '<a href="/profile/' + (i + 1) + '">' + allUsers[i].handle + '</a>' +
+        '</div>';
+      feed.appendChild(div);
+    }
+    document.body.style.height = (TOTAL * ITEM_H) + 'px';
+  }
+
+  render();
+
+  window.addEventListener('scroll', function () {
+    var newStart = Math.min(
+      Math.floor(window.scrollY / ITEM_H),
+      TOTAL - VISIBLE
+    );
+    if (newStart !== startIdx) {
+      startIdx = newStart;
+      render();
+    }
+  });
+</script>
+</body>
+</html>
+"""
+
+# Test 3 — Append-based infinite scroll (no DOM recycling)
+# Items are only ever appended; nothing is ever removed.
+APPEND_SCROLL_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  body { margin: 0; font-family: sans-serif; }
+  .quote { padding: 15px; border-bottom: 1px solid #ddd; }
+  .quote .text   { font-style: italic; }
+  .quote .author { color: #555; margin-top: 5px; }
+</style>
+</head>
+<body>
+<div id="quotes"></div>
+<script>
+  var TOTAL     = 100;
+  var BATCH     = 10;
+  var loaded    = 0;
+  var container = document.getElementById('quotes');
+
+  function loadBatch() {
+    var end = Math.min(loaded + BATCH, TOTAL);
+    for (var i = loaded; i < end; i++) {
+      var div = document.createElement('div');
+      div.className = 'quote';
+      div.innerHTML =
+        '<div class="text">"Quote number ' + (i + 1) + ' — a test quote."</div>' +
+        '<div class="author">' +
+          '<a href="/author/' + (i + 1) + '">Author ' + (i + 1) + '</a>' +
+        '</div>';
+      container.appendChild(div);
+    }
+    loaded = end;
+  }
+
+  loadBatch();
+
+  window.addEventListener('scroll', function () {
+    if (window.scrollY + window.innerHeight >= document.body.scrollHeight - 100) {
+      if (loaded < TOTAL) loadBatch();
+    }
+  });
+</script>
+</body>
+</html>
+"""
+
+# Test 4 — Container-level virtual scroll (overflow-y: scroll on a div)
+# The container element itself scrolls (not the window).
+# Items inside use position: absolute + top offset — recycled on container scroll.
+CONTAINER_SCROLL_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: sans-serif; padding: 16px; }
+  h1   { margin-bottom: 12px; font-size: 18px; }
+  #scroller {
+    height: 400px;
+    overflow-y: scroll;
+    border: 1px solid #ccc;
+    position: relative;
+    width: 640px;
+  }
+  #inner { position: relative; }
+  .row {
+    position: absolute;
+    left: 0; right: 0;
+    height: 40px;
+    line-height: 40px;
+    padding: 0 12px;
+    border-bottom: 1px solid #eee;
+    font-size: 13px;
+  }
+</style>
+</head>
+<body>
+<h1>Container Scroll (200 rows)</h1>
+<div id="scroller">
+  <div id="inner"></div>
+</div>
+<script>
+(function () {
+  var TOTAL   = 200;
+  var ITEM_H  = 40;
+  var VISIBLE = 15;
+  var OVERSCAN = 3;
+
+  var scroller = document.getElementById('scroller');
+  var inner    = document.getElementById('inner');
+  inner.style.height = (TOTAL * ITEM_H) + 'px';
+
+  var poolSize = VISIBLE + OVERSCAN * 2;
+  var pool = [];
+  for (var j = 0; j < poolSize; j++) {
+    var el = document.createElement('div');
+    el.className = 'row';
+    el.style.top = '-9999px';
+    inner.appendChild(el);
+    pool.push(el);
+  }
+
+  var lastStart = -1;
+
+  function render() {
+    var scrollTop = scroller.scrollTop;
+    var start = Math.max(0, Math.floor(scrollTop / ITEM_H) - OVERSCAN);
+    var end   = Math.min(TOTAL, start + poolSize);
+
+    if (start === lastStart) return;
+    lastStart = start;
+
+    for (var k = 0; k < pool.length; k++) {
+      var idx = start + k;
+      if (idx >= end) {
+        pool[k].style.top = '-9999px';
+        pool[k].textContent = '';
+        continue;
+      }
+      pool[k].style.top  = (idx * ITEM_H) + 'px';
+      pool[k].setAttribute('data-row', idx + 1);
+      pool[k].innerHTML =
+        'Row ' + (idx + 1) + ' — ' +
+        '<a href="/row/' + (idx + 1) + '">detail #' + (idx + 1) + '</a>';
+    }
+  }
+
+  render();
+  scroller.addEventListener('scroll', render);
+})();
+</script>
+</body>
+</html>
+"""
+
+# Test 8 — Horizontal virtual scroll (60 items, translateX)
+# Container scrolls horizontally via overflow-x: scroll.
+# Items are positioned with transform: translateX(Npx) and recycled
+# from a pool of ~8 DOM nodes — same pattern as vertical virtual scroll
+# but on the X axis.
+HORIZONTAL_SCROLL_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: sans-serif; padding: 16px; }
+  h1   { margin-bottom: 12px; font-size: 18px; }
+  #hscroller {
+    position: relative;
+    overflow-x: scroll;
+    white-space: nowrap;
+    height: 200px;
+    width: 800px;
+    border: 1px solid #ccc;
+  }
+  #hinner {
+    position: relative;
+    height: 100%;
+  }
+  .hitem {
+    position: absolute;
+    top: 0;
+    width: 150px;
+    height: 180px;
+    padding: 10px;
+    border-right: 1px solid #eee;
+    background: #fff;
+    white-space: normal;
+  }
+  .hitem .card-title { font-weight: bold; font-size: 14px; }
+  .hitem .card-link  { display: block; margin-top: 8px; font-size: 12px; }
+</style>
+</head>
+<body>
+<h1>Horizontal Virtual Scroll (60 items)</h1>
+<div id="hscroller">
+  <div id="hinner"></div>
+</div>
+<script>
+(function () {
+  var TOTAL    = 60;
+  var ITEM_W   = 150;
+  var OVERSCAN = 2;
+  var POOL_SIZE = 8;
+
+  var allItems = [];
+  for (var i = 0; i < TOTAL; i++) {
+    allItems.push({ id: i + 1, title: 'Card ' + (i + 1) });
+  }
+
+  var scroller = document.getElementById('hscroller');
+  var inner    = document.getElementById('hinner');
+  inner.style.width = (TOTAL * ITEM_W) + 'px';
+
+  var pool = [];
+  for (var j = 0; j < POOL_SIZE; j++) {
+    var el = document.createElement('div');
+    el.className = 'hitem';
+    el.style.transform = 'translateX(-9999px)';
+    el.innerHTML = '<div class="card-title"></div><div class="card-link"></div>';
+    inner.appendChild(el);
+    pool.push({ el: el, assignedIndex: -1 });
+  }
+
+  var lastStart = -1;
+
+  function render() {
+    var scrollX = scroller.scrollLeft;
+    var visibleCount = Math.ceil(scroller.clientWidth / ITEM_W) + OVERSCAN * 2;
+    var start = Math.max(0, Math.floor(scrollX / ITEM_W) - OVERSCAN);
+    var end   = Math.min(TOTAL, start + visibleCount);
+
+    if (start === lastStart) return;
+    lastStart = start;
+
+    for (var k = 0; k < pool.length; k++) {
+      var idx = start + k;
+      if (idx >= end || k >= visibleCount) {
+        pool[k].el.style.transform = 'translateX(-9999px)';
+        pool[k].assignedIndex = -1;
+        continue;
+      }
+      var item = allItems[idx];
+      pool[k].assignedIndex = idx;
+      pool[k].el.style.transform = 'translateX(' + (idx * ITEM_W) + 'px)';
+      pool[k].el.querySelector('.card-title').textContent = item.title;
+      pool[k].el.querySelector('.card-link').innerHTML =
+        '<a href="/hscroll/' + item.id + '">Link #' + item.id + '</a>';
+    }
+  }
+
+  render();
+  scroller.addEventListener('scroll', render);
+})();
+</script>
+</body>
+</html>
+"""
+
+# Test 5 — Transform-based, 1000 items (stress test)
+# Identical logic to Test 1 but scaled to 1000 items.
+TRANSFORM_SCROLL_1000_HTML = TRANSFORM_SCROLL_HTML.replace(
+    "var TOTAL      = 50;",
+    "var TOTAL      = 1000;",
+)
+
+# Test 7 — Variable Row Heights virtual scroll
+# 80 items where each item N has height 40 + (N % 5) * 20 px (40-120px).
+# Uses transform-based recycling (translateY) with a pool of ~15 DOM nodes.
+# Container style.height is the SUM of all item heights.
+# The scroll render uses cumulative-sum lookup to find visible items.
+VARIABLE_ROW_HEIGHTS_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: sans-serif; }
+  #container {
+    position: relative;
+    width: 600px;
+    margin: 0 auto;
+  }
+  .item {
+    position: absolute;
+    left: 0; right: 0;
+    padding: 10px 14px;
+    border-bottom: 1px solid #eee;
+    background: #fff;
+    overflow: hidden;
+  }
+  .item .title { font-weight: bold; font-size: 14px; }
+  .item .meta  { color: #777; font-size: 12px; margin-top: 4px; }
+</style>
+</head>
+<body>
+<div id="container"></div>
+<script>
+(function () {
+  var TOTAL    = 80;
+  var POOL     = 15;
+  var OVERSCAN = 3;
+
+  // Build data + cumulative offsets
+  var allItems = [];
+  var offsets  = [0];  // offsets[i] = top of item i
+  for (var i = 0; i < TOTAL; i++) {
+    var h = 40 + (i % 5) * 20;  // heights: 40, 60, 80, 100, 120, 40, 60, ...
+    allItems.push({ id: i + 1, title: 'VarH Item ' + (i + 1), height: h });
+    offsets.push(offsets[i] + h);
+  }
+  var totalHeight = offsets[TOTAL];
+
+  var container = document.getElementById('container');
+  container.style.height = totalHeight + 'px';
+
+  // Pool of DOM nodes
+  var pool = [];
+  for (var j = 0; j < POOL; j++) {
+    var el = document.createElement('div');
+    el.className = 'item';
+    el.style.transform = 'translateY(-9999px)';
+    el.innerHTML = '<div class="title"></div><div class="meta"></div>';
+    container.appendChild(el);
+    pool.push({ el: el, assignedIndex: -1 });
+  }
+
+  // Binary search: find first item whose top <= scrollY
+  function findStart(scrollY) {
+    var lo = 0, hi = TOTAL - 1;
+    while (lo < hi) {
+      var mid = (lo + hi + 1) >> 1;
+      if (offsets[mid] <= scrollY) lo = mid;
+      else hi = mid - 1;
+    }
+    return lo;
+  }
+
+  var lastStart = -1;
+
+  function render() {
+    var scrollY = window.scrollY;
+    var vpBottom = scrollY + window.innerHeight;
+
+    var rawStart = findStart(scrollY);
+    var start = Math.max(0, rawStart - OVERSCAN);
+
+    // Find end: first item whose top > vpBottom, + overscan
+    var end = start;
+    while (end < TOTAL && offsets[end] < vpBottom) end++;
+    end = Math.min(TOTAL, end + OVERSCAN);
+
+    if (start === lastStart) return;
+    lastStart = start;
+
+    for (var k = 0; k < pool.length; k++) {
+      var idx = start + k;
+      if (idx >= end || idx >= TOTAL) {
+        pool[k].el.style.transform = 'translateY(-9999px)';
+        pool[k].el.style.height = '0px';
+        pool[k].assignedIndex = -1;
+        continue;
+      }
+      var item = allItems[idx];
+      pool[k].assignedIndex = idx;
+      pool[k].el.style.transform = 'translateY(' + offsets[idx] + 'px)';
+      pool[k].el.style.height = item.height + 'px';
+      pool[k].el.querySelector('.title').textContent = item.title;
+      pool[k].el.querySelector('.meta').innerHTML =
+        '<a href="/varh/' + item.id + '">#' + item.id + '</a>';
+    }
+  }
+
+  render();
+  window.addEventListener('scroll', render);
+})();
+</script>
+</body>
+</html>
+"""
+
+# Test 8 — 2D Grid Virtualisation
+# A 10x10 grid (100 cells total) where only ~20 DOM nodes exist at any time.
+# Both horizontal AND vertical scrolling is needed to reveal all cells.
+# Each cell is position: absolute with left/top computed from col/row.
+# On scroll, the pool is recycled for the visible 2D viewport region.
+GRID_2D_SCROLL_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: sans-serif; padding: 16px; }
+  h1 { margin-bottom: 12px; font-size: 18px; }
+  #grid-container {
+    overflow: auto;
+    height: 500px;
+    width: 600px;
+    position: relative;
+    border: 2px solid #333;
+  }
+  #grid-inner {
+    position: relative;
+  }
+  .cell {
+    position: absolute;
+    width: 110px;
+    height: 90px;
+    padding: 8px;
+    border: 1px solid #ccc;
+    background: #f9f9f9;
+    font-size: 12px;
+    overflow: hidden;
+  }
+  .cell a { color: #0066cc; text-decoration: none; }
+</style>
+</head>
+<body>
+<h1>2D Grid Virtual Scroll (10x10 = 100 cells)</h1>
+<div id="grid-container">
+  <div id="grid-inner"></div>
+</div>
+<script>
+(function () {
+  var ROWS     = 10;
+  var COLS     = 10;
+  var CELL_W   = 120;
+  var CELL_H   = 100;
+  var POOL_SIZE = 50;
+
+  var container = document.getElementById('grid-container');
+  var inner     = document.getElementById('grid-inner');
+
+  inner.style.width  = (COLS * CELL_W) + 'px';
+  inner.style.height = (ROWS * CELL_H) + 'px';
+
+  var allCells = [];
+  for (var r = 0; r < ROWS; r++) {
+    for (var c = 0; c < COLS; c++) {
+      allCells.push({ row: r, col: c, label: 'Cell ' + r + '-' + c });
+    }
+  }
+
+  var pool = [];
+  for (var i = 0; i < POOL_SIZE; i++) {
+    var el = document.createElement('div');
+    el.className = 'cell';
+    el.style.left = '-9999px';
+    el.style.top  = '-9999px';
+    inner.appendChild(el);
+    pool.push({ el: el, assignedKey: null });
+  }
+
+  var lastVisKey = '';
+
+  function render() {
+    var scrollLeft = container.scrollLeft;
+    var scrollTop  = container.scrollTop;
+    var vpW = container.clientWidth;
+    var vpH = container.clientHeight;
+
+    var startCol = Math.max(0, Math.floor(scrollLeft / CELL_W) - 1);
+    var endCol   = Math.min(COLS, Math.ceil((scrollLeft + vpW) / CELL_W) + 1);
+    var startRow = Math.max(0, Math.floor(scrollTop / CELL_H) - 1);
+    var endRow   = Math.min(ROWS, Math.ceil((scrollTop + vpH) / CELL_H) + 1);
+
+    var visKey = startCol + ',' + endCol + ',' + startRow + ',' + endRow;
+    if (visKey === lastVisKey) return;
+    lastVisKey = visKey;
+
+    var visible = [];
+    for (var r = startRow; r < endRow; r++) {
+      for (var c = startCol; c < endCol; c++) {
+        visible.push(allCells[r * COLS + c]);
+      }
+    }
+
+    for (var p = 0; p < pool.length; p++) {
+      if (p < visible.length) {
+        var cell = visible[p];
+        var key = cell.row + '-' + cell.col;
+        pool[p].assignedKey = key;
+        pool[p].el.style.left = (cell.col * CELL_W) + 'px';
+        pool[p].el.style.top  = (cell.row * CELL_H) + 'px';
+        pool[p].el.innerHTML =
+          '<strong>' + cell.label + '</strong><br>' +
+          '<a href="/cell/' + cell.row + '-' + cell.col + '">Link ' + cell.row + '-' + cell.col + '</a>';
+      } else {
+        pool[p].assignedKey = null;
+        pool[p].el.style.left = '-9999px';
+        pool[p].el.style.top  = '-9999px';
+        pool[p].el.innerHTML  = '';
+      }
+    }
+  }
+
+  render();
+  container.addEventListener('scroll', render);
+})();
+</script>
+</body>
+</html>
+"""
+
+# Test 9 — WebSocket/Async-Loaded Items (setTimeout simulating async fetch)
+# Items are appended in batches of 10 (50 total) via setTimeout when the
+# user scrolls near the bottom.  A loading spinner appears during the 300ms
+# delay.  Items are never recycled — this is an append pattern, but with an
+# async gap that can trip up crawlers that check "at bottom" before the new
+# content has arrived.
+ASYNC_LOADED_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  body { margin: 0; font-family: sans-serif; }
+  .feed-item { padding: 16px; border-bottom: 1px solid #eee; }
+  .feed-item .title { font-weight: bold; font-size: 14px; }
+  .feed-item .link  { color: #0066cc; font-size: 12px; margin-top: 4px; }
+  #spinner {
+    display: none;
+    text-align: center;
+    padding: 20px;
+    color: #999;
+    font-size: 14px;
+  }
+  #spinner.active { display: block; }
+</style>
+</head>
+<body>
+<div id="feed"></div>
+<div id="spinner">Loading...</div>
+<script>
+(function () {
+  var TOTAL   = 50;
+  var BATCH   = 10;
+  var DELAY   = 300;  // ms — simulates network/async latency
+  var loaded  = 0;
+  var loading = false;
+
+  var feed    = document.getElementById('feed');
+  var spinner = document.getElementById('spinner');
+
+  function appendBatch() {
+    var end = Math.min(loaded + BATCH, TOTAL);
+    for (var i = loaded; i < end; i++) {
+      var div = document.createElement('div');
+      div.className = 'feed-item';
+      div.innerHTML =
+        '<div class="title">Async Item ' + (i + 1) + '</div>' +
+        '<div class="link">' +
+          '<a href="/async/' + (i + 1) + '">Link #' + (i + 1) + '</a>' +
+        '</div>';
+      feed.appendChild(div);
+    }
+    loaded = end;
+    loading = false;
+    spinner.classList.remove('active');
+  }
+
+  // Load first batch synchronously
+  appendBatch();
+
+  window.addEventListener('scroll', function () {
+    if (loading || loaded >= TOTAL) return;
+    // Trigger when within 150px of the bottom
+    if (window.scrollY + window.innerHeight >= document.body.scrollHeight - 150) {
+      loading = true;
+      spinner.classList.add('active');
+      setTimeout(appendBatch, DELAY);
+    }
+  });
+})();
+</script>
+</body>
+</html>
+"""
+
+# Test 10 — Nested Virtual Scroll (outer vertical + inner horizontal)
+# OUTER: 5 categories recycled vertically via window scroll (each 200px tall).
+# INNER: Each visible category contains a HORIZONTAL scrollable list of 10 items,
+#         recycled horizontally via overflow-x: scroll.
+# Total: 5 category links + 50 inner item links = 55 unique links.
+NESTED_VIRTUAL_SCROLL_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: sans-serif; }
+  #outer-container {
+    position: relative;
+    width: 800px;
+    margin: 0 auto;
+  }
+  .category {
+    position: absolute;
+    left: 0; right: 0;
+    height: 200px;
+    padding: 10px;
+    border-bottom: 2px solid #ccc;
+    background: #fafafa;
+  }
+  .category .cat-title { font-weight: bold; font-size: 16px; margin-bottom: 8px; }
+  .inner-scroll {
+    overflow-x: scroll;
+    white-space: nowrap;
+    height: 150px;
+    position: relative;
+    border: 1px solid #ddd;
+    background: #fff;
+  }
+  .inner-spacer {
+    display: inline-block;
+    height: 1px;
+  }
+  .inner-item {
+    position: absolute;
+    top: 10px;
+    width: 120px;
+    height: 130px;
+    display: inline-block;
+    padding: 8px;
+    border: 1px solid #eee;
+    background: #f5f5f5;
+    white-space: normal;
+    font-size: 12px;
+    vertical-align: top;
+  }
+  .inner-item a { color: #0066cc; }
+</style>
+</head>
+<body>
+<div id="outer-container"></div>
+<script>
+(function () {
+  var NUM_CATS     = 5;
+  var ITEMS_PER    = 10;
+  var CAT_H        = 200;
+  var ITEM_W       = 130;
+  var OVERSCAN     = 1;
+  var VISIBLE_CATS = Math.ceil(window.innerHeight / CAT_H) + OVERSCAN * 2;
+  var VISIBLE_ITEMS = 4;
+
+  var container = document.getElementById('outer-container');
+  container.style.height = (NUM_CATS * CAT_H) + 'px';
+
+  var catPool = [];
+  for (var c = 0; c < VISIBLE_CATS; c++) {
+    var el = document.createElement('div');
+    el.className = 'category';
+    el.style.transform = 'translateY(-9999px)';
+    el.innerHTML =
+      '<div class="cat-title"></div>' +
+      '<div class="inner-scroll"><div class="inner-spacer"></div></div>';
+    container.appendChild(el);
+    catPool.push({ el: el, assignedCat: -1, itemPool: [], lastInnerStart: -1 });
+  }
+
+  for (var p = 0; p < catPool.length; p++) {
+    var innerScroll = catPool[p].el.querySelector('.inner-scroll');
+    for (var ii = 0; ii < VISIBLE_ITEMS + 2; ii++) {
+      var itemEl = document.createElement('div');
+      itemEl.className = 'inner-item';
+      itemEl.style.left = '-9999px';
+      innerScroll.appendChild(itemEl);
+      catPool[p].itemPool.push(itemEl);
+    }
+  }
+
+  var lastOuterStart = -1;
+
+  function renderOuter() {
+    var scrollY = window.scrollY;
+    var start = Math.max(0, Math.floor(scrollY / CAT_H) - OVERSCAN);
+    var end   = Math.min(NUM_CATS, start + VISIBLE_CATS);
+
+    if (start === lastOuterStart) return;
+    lastOuterStart = start;
+
+    for (var k = 0; k < catPool.length; k++) {
+      var catIdx = start + k;
+      if (catIdx >= end) {
+        catPool[k].el.style.transform = 'translateY(-9999px)';
+        catPool[k].assignedCat = -1;
+        continue;
+      }
+      catPool[k].assignedCat = catIdx;
+      catPool[k].el.style.transform = 'translateY(' + (catIdx * CAT_H) + 'px)';
+      var catNum = catIdx + 1;
+      catPool[k].el.querySelector('.cat-title').innerHTML =
+        '<a href="/cat/' + catNum + '">Category ' + catNum + '</a>';
+
+      var spacer = catPool[k].el.querySelector('.inner-spacer');
+      spacer.style.width = (ITEMS_PER * ITEM_W) + 'px';
+
+      catPool[k].lastInnerStart = -1;
+      var innerScroll = catPool[k].el.querySelector('.inner-scroll');
+      innerScroll.scrollLeft = 0;
+      renderInner(k);
+    }
+  }
+
+  function renderInner(poolIdx) {
+    var catInfo = catPool[poolIdx];
+    if (catInfo.assignedCat < 0) return;
+    var catNum = catInfo.assignedCat + 1;
+    var innerScroll = catInfo.el.querySelector('.inner-scroll');
+    var scrollLeft = innerScroll.scrollLeft;
+    var start = Math.max(0, Math.floor(scrollLeft / ITEM_W) - 1);
+    var end   = Math.min(ITEMS_PER, start + VISIBLE_ITEMS + 2);
+
+    if (start === catInfo.lastInnerStart) return;
+    catInfo.lastInnerStart = start;
+
+    for (var j = 0; j < catInfo.itemPool.length; j++) {
+      var itemIdx = start + j;
+      if (itemIdx >= end) {
+        catInfo.itemPool[j].style.left = '-9999px';
+        continue;
+      }
+      var itemNum = itemIdx + 1;
+      catInfo.itemPool[j].style.left = (itemIdx * ITEM_W) + 'px';
+      catInfo.itemPool[j].innerHTML =
+        '<a href="/cat/' + catNum + '/item/' + itemNum + '">Item ' + itemNum + '</a>' +
+        '<br><small>Cat ' + catNum + '</small>';
+    }
+  }
+
+  for (var p = 0; p < catPool.length; p++) {
+    (function(idx) {
+      var innerScroll = catPool[idx].el.querySelector('.inner-scroll');
+      innerScroll.addEventListener('scroll', function() { renderInner(idx); });
+    })(p);
+  }
+
+  renderOuter();
+  window.addEventListener('scroll', renderOuter);
+})();
+</script>
+</body>
+</html>
+"""
+
+
+# ---------------------------------------------------------------------------
+# Server helper
+# ---------------------------------------------------------------------------
+
+def start_server(html_dir: str, port: int) -> HTTPServer:
+    """Start a simple HTTP server in a daemon thread."""
+
+    class _Handler(SimpleHTTPRequestHandler):
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, directory=html_dir, **kwargs)
+
+        def log_message(self, fmt, *args):  # silence access log
+            pass
+
+    server = HTTPServer(("127.0.0.1", port), _Handler)
+    t = threading.Thread(target=server.serve_forever, daemon=True)
+    t.start()
+    return server
+
+
+# ---------------------------------------------------------------------------
+# Individual test coroutines
+# ---------------------------------------------------------------------------
+
+async def test_transform_virtual_scroll() -> bool:
+    """
+    Test 1: Transform-based virtual scroll — 50 items.
+
+    Items use CSS transform: translateY(Npx).  The container has an explicit
+    style.height.  Only ~10 DOM nodes exist at a time; on scroll the pool is
+    recycled by updating each node's transform.  This is how React/Next.js
+    virtual lists work (skills.sh, Twitter, Tanstack Virtual).
+
+    Fingerprint: each item has a unique <a href="/item/N"> link.
+    Expected: capture all 50 items.
+    """
+    print("=" * 70)
+    print("TEST 1: Transform-based virtual scroll — 50 items (translateY)")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(TRANSFORM_SCROLL_HTML)
+        server = start_server(tmpdir, 9741)
+        try:
+            schema = {
+                "name": "Items",
+                "baseSelector": ".item",
+                "fields": [
+                    {"name": "title", "selector": ".title", "type": "text"},
+                    {"name": "link",  "selector": ".meta a", "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.3,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9741/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            # Deduplicate by unique href (/item/1 … /item/50)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/item/{i}" for i in range(1, 51)}
+            missing      = sorted(expected - unique_links, key=lambda s: int(s.split("/")[-1]))
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/50")
+            if missing:
+                show = missing[:10]
+                tail = "..." if len(missing) > 10 else ""
+                print(f"  Missing       : {show}{tail}")
+            passed = len(unique_links) >= 45
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+async def test_innerhtml_wipe_virtual_scroll() -> bool:
+    """
+    Test 2: innerHTML-wipe virtual scroll — 50 items (PR #1853 exact pattern).
+
+    On every scroll event the container's innerHTML is cleared and freshly
+    rendered items are appended.  No transforms, no explicit container height.
+    Body height is set to TOTAL * ITEM_HEIGHT so the window can scroll.
+
+    Fingerprint: each item has a unique <a href="/profile/N"> link.
+    Expected: capture all 50 items.
+    """
+    print("\n" + "=" * 70)
+    print("TEST 2: innerHTML-wipe virtual scroll — 50 items (PR #1853 pattern)")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(INNERHTML_WIPE_HTML)
+        server = start_server(tmpdir, 9742)
+        try:
+            schema = {
+                "name": "Users",
+                "baseSelector": "[data-testid='UserCell']",
+                "fields": [
+                    {"name": "name",   "selector": ".name",        "type": "text"},
+                    {"name": "handle", "selector": ".handle",      "type": "text"},
+                    {"name": "link",   "selector": ".handle a",    "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.2,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9742/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/profile/{i}" for i in range(1, 51)}
+            missing      = sorted(expected - unique_links, key=lambda s: int(s.split("/")[-1]))
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/50")
+            if missing:
+                show = missing[:10]
+                tail = "..." if len(missing) > 10 else ""
+                print(f"  Missing       : {show}{tail}")
+            passed = len(unique_links) >= 45
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+async def test_append_infinite_scroll() -> bool:
+    """
+    Test 3: Append-based infinite scroll — 100 quotes.
+
+    Items are only ever appended to the DOM; nothing is ever removed.
+    This is the classic infinite scroll pattern (no virtualisation at all).
+    This test must not regress — crawl4ai has always handled this correctly.
+
+    Fingerprint: each quote has a unique <a href="/author/N"> link.
+    Expected: capture all 100 items.
+    """
+    print("\n" + "=" * 70)
+    print("TEST 3: Append-based infinite scroll — 100 quotes (regression guard)")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(APPEND_SCROLL_HTML)
+        server = start_server(tmpdir, 9743)
+        try:
+            schema = {
+                "name": "Quotes",
+                "baseSelector": ".quote",
+                "fields": [
+                    {"name": "text",   "selector": ".text",     "type": "text"},
+                    {"name": "author", "selector": ".author",   "type": "text"},
+                    {"name": "link",   "selector": ".author a", "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.2,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9743/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/author/{i}" for i in range(1, 101)}
+            missing      = sorted(expected - unique_links, key=lambda s: int(s.split("/")[-1]))
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/100")
+            if missing:
+                show = missing[:10]
+                tail = "..." if len(missing) > 10 else ""
+                print(f"  Missing       : {show}{tail}")
+            passed = len(unique_links) >= 90
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+async def test_container_scroll() -> bool:
+    """
+    Test 4: Container-level virtual scroll — 200 rows.
+
+    The scrolling happens on a fixed-height div (overflow-y: scroll), not on
+    the window.  Inside the container a tall inner wrapper provides scroll
+    height; rows use position: absolute + top offset and are recycled on
+    container scroll events.
+
+    Fingerprint: each row has a unique <a href="/row/N"> link.
+    Expected: capture all 200 rows.
+    """
+    print("\n" + "=" * 70)
+    print("TEST 4: Container-level virtual scroll — 200 rows (overflow-y: scroll)")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(CONTAINER_SCROLL_HTML)
+        server = start_server(tmpdir, 9744)
+        try:
+            schema = {
+                "name": "Rows",
+                "baseSelector": ".row",
+                "fields": [
+                    {"name": "label", "selector": "",    "type": "text"},
+                    {"name": "link",  "selector": "a",   "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.1,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9744/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/row/{i}" for i in range(1, 201)}
+            missing      = sorted(expected - unique_links, key=lambda s: int(s.split("/")[-1]))
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/200")
+            if missing:
+                show = missing[:10]
+                tail = "..." if len(missing) > 10 else ""
+                print(f"  Missing       : {show}{tail}")
+            passed = len(unique_links) >= 180
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+async def test_transform_stress_1000() -> bool:
+    """
+    Test 5: Transform-based virtual scroll — 1000 items (stress test).
+
+    Same DOM-recycling / translateY mechanism as Test 1 but scaled to 1000
+    items.  Validates that the crawler's snapshot-and-deduplicate strategy
+    holds up under a large item count without running out of memory or
+    missing large swathes of the list.
+
+    Fingerprint: each item has a unique <a href="/item/N"> link.
+    Expected: capture all 1000 items.
+    """
+    print("\n" + "=" * 70)
+    print("TEST 5: Transform-based virtual scroll — 1000 items (stress test)")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(TRANSFORM_SCROLL_1000_HTML)
+        server = start_server(tmpdir, 9745)
+        try:
+            schema = {
+                "name": "Items",
+                "baseSelector": ".item",
+                "fields": [
+                    {"name": "title", "selector": ".title", "type": "text"},
+                    {"name": "link",  "selector": ".meta a", "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.05,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9745/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/item/{i}" for i in range(1, 1001)}
+            missing_count = len(expected - unique_links)
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/1000")
+            print(f"  Missing       : {missing_count}")
+            passed = len(unique_links) >= 950
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+async def test_real_site_quotes() -> bool:
+    """
+    Test 6: Real site — quotes.toscrape.com/scroll.
+
+    Append-based infinite scroll on a live public site.  This validates that
+    real-world behaviour matches what the synthetic Test 3 exercises.
+
+    Expected: capture all 100 quotes (or >=90 to allow for network variance).
+    """
+    print("\n" + "=" * 70)
+    print("TEST 6: Real site — quotes.toscrape.com/scroll")
+    print("=" * 70)
+
+    schema = {
+        "name": "Quotes",
+        "baseSelector": ".quote",
+        "fields": [
+            {"name": "text",   "selector": ".text",   "type": "text"},
+            {"name": "author", "selector": ".author", "type": "text"},
+        ],
+    }
+
+    async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+        cfg = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            extraction_strategy=JsonCssExtractionStrategy(schema),
+            scan_full_page=True,
+            scroll_delay=0.5,
+        )
+        result = await crawler.arun(url="https://quotes.toscrape.com/scroll", config=cfg)
+
+    data   = json.loads(result.extracted_content)
+    unique = {d["text"]: d for d in data if d.get("text")}
+
+    print(f"  Raw extracted : {len(data)}")
+    print(f"  Unique quotes : {len(unique)}/100")
+    passed = len(unique) >= 90
+    print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+    return passed
+
+
+async def test_variable_row_heights() -> bool:
+    """
+    Test 7: Variable Row Heights virtual scroll -- 80 items.
+
+    Items use CSS transform: translateY(Npx) but each item has a DIFFERENT
+    height (40 + (N % 5) * 20 px, ranging from 40px to 120px).  Container
+    style.height is the sum of all item heights.  The scroll render uses a
+    cumulative-sum / binary-search approach to find visible items.  Pool of
+    ~15 DOM nodes recycled on window scroll.
+
+    This tests whether _handle_full_page_scan works when itemHeight is NOT
+    uniform -- the Phase 4 scroll step calculation must not skip items.
+
+    Fingerprint: each item has a unique <a href="/varh/N"> link.
+    Expected: capture >=72 of 80 items (90%).
+    """
+    print("\n" + "=" * 70)
+    print("TEST 7: Variable Row Heights virtual scroll -- 80 items")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(VARIABLE_ROW_HEIGHTS_HTML)
+        server = start_server(tmpdir, 9751)
+        try:
+            schema = {
+                "name": "Items",
+                "baseSelector": ".item",
+                "fields": [
+                    {"name": "title", "selector": ".title", "type": "text"},
+                    {"name": "link",  "selector": ".meta a", "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.3,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9751/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/varh/{i}" for i in range(1, 81)}
+            missing      = sorted(expected - unique_links, key=lambda s: int(s.split("/")[-1]))
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/80")
+            if missing:
+                show = missing[:10]
+                tail = "..." if len(missing) > 10 else ""
+                print(f"  Missing       : {show}{tail}")
+            passed = len(unique_links) >= 72
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+async def test_horizontal_virtual_scroll() -> bool:
+    """
+    Test 8: Horizontal virtual scroll — 60 items (translateX).
+
+    Container scrolls horizontally (overflow-x: scroll) with items positioned
+    via transform: translateX(Npx).  Pool of ~8 DOM nodes recycled on
+    horizontal scroll.  This tests that _handle_full_page_scan detects and
+    scrolls horizontal virtual scroll containers, not just vertical ones.
+
+    Fingerprint: each item has a unique <a href="/hscroll/N"> link.
+    Expected: capture >=54 of 60 items (90%).
+    """
+    print("\n" + "=" * 70)
+    print("TEST 8: Horizontal virtual scroll — 60 items (translateX)")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(HORIZONTAL_SCROLL_HTML)
+        server = start_server(tmpdir, 9752)
+        try:
+            schema = {
+                "name": "Cards",
+                "baseSelector": ".hitem",
+                "fields": [
+                    {"name": "title", "selector": ".card-title", "type": "text"},
+                    {"name": "link",  "selector": ".card-link a", "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.3,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9752/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/hscroll/{i}" for i in range(1, 61)}
+            missing      = sorted(expected - unique_links, key=lambda s: int(s.split("/")[-1]))
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/60")
+            if missing:
+                show = missing[:10]
+                tail = "..." if len(missing) > 10 else ""
+                print(f"  Missing       : {show}{tail}")
+            passed = len(unique_links) >= 54
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+async def test_2d_grid_virtual_scroll() -> bool:
+    """
+    Test 9: 2D Grid Virtualisation — 10x10 = 100 cells.
+
+    A grid container scrolls both horizontally AND vertically.  Only ~20
+    DOM nodes exist at any time; they are recycled as the user scrolls in
+    either direction.  Each cell uses position: absolute with left/top
+    calculated from its column/row.
+
+    This tests whether _handle_full_page_scan can handle containers where
+    scrollWidth > clientWidth AND scrollHeight > clientHeight — it needs
+    to scroll in a zigzag pattern to visit all 2D regions.
+
+    Fingerprint: each cell has a unique <a href="/cell/R-C"> link.
+    Expected: capture >=90 of 100 cells (90%).
+    """
+    print("\n" + "=" * 70)
+    print("TEST 9: 2D Grid Virtualisation — 10x10 = 100 cells")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(GRID_2D_SCROLL_HTML)
+        server = start_server(tmpdir, 9753)
+        try:
+            schema = {
+                "name": "Cells",
+                "baseSelector": ".cell",
+                "fields": [
+                    {"name": "label", "selector": "strong",  "type": "text"},
+                    {"name": "link",  "selector": "a",       "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.3,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9753/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/cell/{r}-{c}" for r in range(10) for c in range(10)}
+            missing      = sorted(expected - unique_links)
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/100")
+            if missing:
+                show = missing[:10]
+                tail = "..." if len(missing) > 10 else ""
+                print(f"  Missing       : {show}{tail}")
+            passed = len(unique_links) >= 90
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+async def test_async_loaded_items() -> bool:
+    """
+    Test 10: WebSocket/Async-Loaded Items — 50 items.
+
+    Simulates async-loaded content (like a chat feed or real-time dashboard).
+    50 items total, loaded in batches of 10 via setTimeout with a 300ms delay.
+    On scroll near the bottom, a loading spinner appears, then after the delay
+    new items are appended.  Items are NOT recycled — they accumulate.
+
+    The async delay is the key challenge: the crawler may detect "at bottom"
+    before the new batch has been appended by setTimeout, causing early exit.
+
+    Fingerprint: each item has a unique <a href="/async/N"> link.
+    Expected: capture >=45 of 50 items (90%).
+    """
+    print("\n" + "=" * 70)
+    print("TEST 10: WebSocket/Async-Loaded Items — 50 items (setTimeout)")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(ASYNC_LOADED_HTML)
+        server = start_server(tmpdir, 9756)
+        try:
+            schema = {
+                "name": "FeedItems",
+                "baseSelector": ".feed-item",
+                "fields": [
+                    {"name": "title", "selector": ".title", "type": "text"},
+                    {"name": "link",  "selector": ".link a", "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.5,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9756/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/async/{i}" for i in range(1, 51)}
+            missing      = sorted(expected - unique_links, key=lambda s: int(s.split("/")[-1]))
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/50")
+            if missing:
+                show = missing[:10]
+                tail = "..." if len(missing) > 10 else ""
+                print(f"  Missing       : {show}{tail}")
+            passed = len(unique_links) >= 45
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+async def test_nested_virtual_scroll() -> bool:
+    """
+    Test 11: Nested Virtual Scroll — 5 categories x 10 items = 55 links.
+
+    OUTER: 5 categories recycled vertically via window scroll (translateY).
+    Each category div has a link <a href="/cat/N">.
+    INNER: Each visible category contains a HORIZONTAL scrollable list of
+    10 items, recycled horizontally via overflow-x: scroll with position
+    absolute + left offset.  Each item has <a href="/cat/N/item/M">.
+
+    Total: 5 category links + 50 inner item links = 55 unique links.
+    This tests nested scroll-within-scroll: the outer vertical scroll
+    recycles categories, while each category's inner horizontal scroll
+    recycles items.
+
+    Expected: capture >=45 of 55 total unique links.
+    """
+    print("\n" + "=" * 70)
+    print("TEST 11: Nested Virtual Scroll — 5 cats x 10 items = 55 links")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(NESTED_VIRTUAL_SCROLL_HTML)
+        server = start_server(tmpdir, 9755)
+        try:
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    scan_full_page=True,
+                    scroll_delay=0.3,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9755/index.html", config=cfg)
+
+            # Extract links from result.links (internal) and also
+            # scan the raw HTML for /cat/ hrefs as a fallback.
+            import re
+            unique_links = set()
+            # From result.links
+            if hasattr(result, "links") and result.links:
+                for link in result.links.get("internal", []):
+                    href = link.get("href", "")
+                    # Normalise: strip origin, keep path
+                    if "/cat/" in href:
+                        path = "/" + href.split("/cat/", 1)[1]
+                        unique_links.add("/cat/" + path.lstrip("/"))
+            # Also scan raw HTML for any /cat/ hrefs
+            for m in re.findall(r'href="(/cat/[^"]+)"', result.html or ""):
+                if m.startswith("/cat/"):
+                    unique_links.add(m)
+
+            # Expected links
+            cat_links  = {f"/cat/{i}" for i in range(1, 6)}
+            item_links = {f"/cat/{c}/item/{m}" for c in range(1, 6) for m in range(1, 11)}
+            expected   = cat_links | item_links  # 55 total
+
+            found_cats  = cat_links & unique_links
+            found_items = item_links & unique_links
+            missing     = sorted(expected - unique_links)
+
+            print(f"  Unique links  : {len(unique_links & expected)}/55")
+            print(f"  Cat links     : {len(found_cats)}/5")
+            print(f"  Item links    : {len(found_items)}/50")
+            if missing:
+                show = missing[:15]
+                tail = "..." if len(missing) > 15 else ""
+                print(f"  Missing       : {show}{tail}")
+            passed = len(unique_links & expected) >= 45
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+# Test 9 — Multiple virtual containers on same page
+# TWO independent pool-based virtual scroll containers side-by-side.
+# Container A (#scroller-a): 40 items, height 400px, overflow-y: scroll
+# Container B (#scroller-b): 30 items, height 300px, overflow-y: scroll
+# Both use position: absolute + pool recycling independently.
+MULTIPLE_CONTAINERS_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: sans-serif; padding: 20px; }
+  h1 { margin-bottom: 16px; font-size: 20px; }
+  .page-layout { display: flex; gap: 40px; align-items: flex-start; }
+  .panel { flex: 1; }
+  .panel h2 { margin-bottom: 8px; font-size: 16px; }
+  .panel p  { margin-bottom: 12px; color: #666; font-size: 13px; }
+  .scroller-a {
+    height: 400px;
+    overflow-y: scroll;
+    border: 1px solid #ccc;
+    position: relative;
+    width: 100%;
+  }
+  .scroller-b {
+    height: 300px;
+    overflow-y: scroll;
+    border: 1px solid #ccc;
+    position: relative;
+    width: 100%;
+  }
+  .inner-a, .inner-b { position: relative; }
+  .entry {
+    position: absolute;
+    left: 0; right: 0;
+    height: 50px;
+    line-height: 50px;
+    padding: 0 12px;
+    border-bottom: 1px solid #eee;
+    font-size: 13px;
+  }
+  .static-content {
+    text-align: center;
+    padding: 20px;
+    flex: 0 0 auto;
+  }
+  .static-content h2 { font-size: 15px; margin-bottom: 8px; }
+  .static-content p  { color: #888; font-size: 12px; }
+</style>
+</head>
+<body>
+<h1>Multiple Virtual Containers</h1>
+<div class="page-layout">
+  <div class="panel">
+    <h2>List A (40 items)</h2>
+    <p>Product catalog items</p>
+    <div class="scroller-a" id="scroller-a">
+      <div class="inner-a" id="inner-a"></div>
+    </div>
+  </div>
+
+  <div class="static-content">
+    <h2>Dashboard</h2>
+    <p>This is static content between two virtual scroll lists.</p>
+    <p>Both lists scroll independently with their own recycling pools.</p>
+  </div>
+
+  <div class="panel">
+    <h2>List B (30 items)</h2>
+    <p>Recent activity feed</p>
+    <div class="scroller-b" id="scroller-b">
+      <div class="inner-b" id="inner-b"></div>
+    </div>
+  </div>
+</div>
+<script>
+(function () {
+  // Container A: 40 items
+  var TOTAL_A   = 40;
+  var ITEM_H_A  = 50;
+  var VISIBLE_A = 12;
+  var OVERSCAN_A = 3;
+
+  var scrollerA = document.getElementById('scroller-a');
+  var innerA    = document.getElementById('inner-a');
+  innerA.style.height = (TOTAL_A * ITEM_H_A) + 'px';
+
+  var poolSizeA = VISIBLE_A + OVERSCAN_A * 2;
+  var poolA = [];
+  for (var j = 0; j < poolSizeA; j++) {
+    var el = document.createElement('div');
+    el.className = 'entry';
+    el.style.top = '-9999px';
+    innerA.appendChild(el);
+    poolA.push(el);
+  }
+
+  var lastStartA = -1;
+
+  function renderA() {
+    var scrollTop = scrollerA.scrollTop;
+    var start = Math.max(0, Math.floor(scrollTop / ITEM_H_A) - OVERSCAN_A);
+    var end   = Math.min(TOTAL_A, start + poolSizeA);
+
+    if (start === lastStartA) return;
+    lastStartA = start;
+
+    for (var k = 0; k < poolA.length; k++) {
+      var idx = start + k;
+      if (idx >= end) {
+        poolA[k].style.top = '-9999px';
+        poolA[k].textContent = '';
+        continue;
+      }
+      poolA[k].style.top  = (idx * ITEM_H_A) + 'px';
+      poolA[k].setAttribute('data-row', idx + 1);
+      poolA[k].innerHTML =
+        'Product ' + (idx + 1) + ' &mdash; ' +
+        '<a href="/list-a/' + (idx + 1) + '">view #' + (idx + 1) + '</a>';
+    }
+  }
+
+  renderA();
+  scrollerA.addEventListener('scroll', renderA);
+
+  // Container B: 30 items
+  var TOTAL_B   = 30;
+  var ITEM_H_B  = 50;
+  var VISIBLE_B = 10;
+  var OVERSCAN_B = 2;
+
+  var scrollerB = document.getElementById('scroller-b');
+  var innerB    = document.getElementById('inner-b');
+  innerB.style.height = (TOTAL_B * ITEM_H_B) + 'px';
+
+  var poolSizeB = VISIBLE_B + OVERSCAN_B * 2;
+  var poolB = [];
+  for (var j = 0; j < poolSizeB; j++) {
+    var el = document.createElement('div');
+    el.className = 'entry';
+    el.style.top = '-9999px';
+    innerB.appendChild(el);
+    poolB.push(el);
+  }
+
+  var lastStartB = -1;
+
+  function renderB() {
+    var scrollTop = scrollerB.scrollTop;
+    var start = Math.max(0, Math.floor(scrollTop / ITEM_H_B) - OVERSCAN_B);
+    var end   = Math.min(TOTAL_B, start + poolSizeB);
+
+    if (start === lastStartB) return;
+    lastStartB = start;
+
+    for (var k = 0; k < poolB.length; k++) {
+      var idx = start + k;
+      if (idx >= end) {
+        poolB[k].style.top = '-9999px';
+        poolB[k].textContent = '';
+        continue;
+      }
+      poolB[k].style.top  = (idx * ITEM_H_B) + 'px';
+      poolB[k].setAttribute('data-row', idx + 1);
+      poolB[k].innerHTML =
+        'Activity ' + (idx + 1) + ' &mdash; ' +
+        '<a href="/list-b/' + (idx + 1) + '">detail #' + (idx + 1) + '</a>';
+    }
+  }
+
+  renderB();
+  scrollerB.addEventListener('scroll', renderB);
+})();
+</script>
+</body>
+</html>
+"""
+
+
+async def test_multiple_virtual_containers() -> bool:
+    """
+    Test 9: Multiple virtual containers on same page.
+
+    TWO independent pool-based virtual scroll containers side-by-side.
+    Container A (#scroller-a): 40 items, height 400px, overflow-y: scroll
+    Container B (#scroller-b): 30 items, height 300px, overflow-y: scroll
+    Both use position: absolute + pool recycling independently.
+
+    Fingerprint: /list-a/N links in container A, /list-b/N links in container B.
+    Expected: capture >=36 items from list-a AND >=27 items from list-b (90% each).
+    """
+    print("\n" + "=" * 70)
+    print("TEST 9: Multiple virtual containers -- 40 + 30 items (side by side)")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(MULTIPLE_CONTAINERS_HTML)
+        server = start_server(tmpdir, 9754)
+        try:
+            schema = {
+                "name": "Entries",
+                "baseSelector": ".entry",
+                "fields": [
+                    {"name": "label", "selector": "",  "type": "text"},
+                    {"name": "link",  "selector": "a", "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.1,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9754/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+
+            # Separate links by container
+            links_a = {d["link"] for d in data if d.get("link") and d["link"].startswith("/list-a/")}
+            links_b = {d["link"] for d in data if d.get("link") and d["link"].startswith("/list-b/")}
+            expected_a = {f"/list-a/{i}" for i in range(1, 41)}
+            expected_b = {f"/list-b/{i}" for i in range(1, 31)}
+            missing_a  = sorted(expected_a - links_a, key=lambda s: int(s.split("/")[-1]))
+            missing_b  = sorted(expected_b - links_b, key=lambda s: int(s.split("/")[-1]))
+
+            print(f"  Raw extracted  : {len(data)}")
+            print(f"  List A by href : {len(links_a)}/40")
+            if missing_a:
+                show = missing_a[:10]
+                tail = "..." if len(missing_a) > 10 else ""
+                print(f"  List A missing : {show}{tail}")
+            print(f"  List B by href : {len(links_b)}/30")
+            if missing_b:
+                show = missing_b[:10]
+                tail = "..." if len(missing_b) > 10 else ""
+                print(f"  List B missing : {show}{tail}")
+
+            passed_a = len(links_a) >= 36
+            passed_b = len(links_b) >= 27
+            passed = passed_a and passed_b
+            print(f"  List A         : {'PASS' if passed_a else 'FAIL'} (>=36)")
+            print(f"  List B         : {'PASS' if passed_b else 'FAIL'} (>=27)")
+            print(f"  Result         : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+# Test — Large page with small virtual scroll section in the middle
+# A ~2000px+ page of static content with a 400px overflow-y:scroll container
+# embedded in the middle.  The container has 60 virtual items (pool of ~12 DOM
+# nodes, position: absolute, recycled on container scroll).
+SMALL_VIRTUAL_IN_LARGE_PAGE_HTML = """<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: sans-serif; }
+
+  .site-header {
+    height: 200px; background: #2c3e50; color: #fff;
+    display: flex; align-items: center; justify-content: center;
+  }
+  .site-header nav a { color: #ecf0f1; margin: 0 12px; text-decoration: none; }
+
+  .hero {
+    height: 500px; background: linear-gradient(135deg, #667eea, #764ba2);
+    color: #fff; display: flex; flex-direction: column;
+    align-items: center; justify-content: center; text-align: center;
+    padding: 40px;
+  }
+  .hero h1 { font-size: 36px; margin-bottom: 16px; }
+  .hero p  { font-size: 18px; max-width: 600px; line-height: 1.6; }
+
+  .static-text {
+    min-height: 300px; padding: 40px 60px; font-size: 16px; line-height: 1.8;
+    background: #f9f9f9;
+  }
+
+  .widget-section { padding: 20px 60px; background: #fff; }
+  .widget-section h2 { margin-bottom: 12px; font-size: 20px; }
+  #vscroll-container {
+    height: 400px; overflow-y: scroll; border: 2px solid #ccc;
+    position: relative; width: 100%; max-width: 700px;
+  }
+  #vscroll-inner { position: relative; }
+  .vitem {
+    position: absolute; left: 0; right: 0; height: 50px;
+    line-height: 50px; padding: 0 14px; border-bottom: 1px solid #eee;
+    font-size: 14px; background: #fff;
+  }
+  .vitem a { color: #3498db; }
+
+  .site-footer {
+    min-height: 300px; background: #2c3e50; color: #ecf0f1;
+    padding: 40px 60px; font-size: 14px; line-height: 1.8;
+  }
+  .site-footer a { color: #3498db; }
+
+  .bottom-text {
+    min-height: 200px; padding: 40px 60px; font-size: 16px; line-height: 1.8;
+    background: #fafafa;
+  }
+</style>
+</head>
+<body>
+
+<header class="site-header">
+  <nav>
+    <a href="/nav/home" id="nav-home">Home</a>
+    <a href="/nav/about" id="nav-about">About</a>
+    <a href="/nav/services" id="nav-services">Services</a>
+    <a href="/nav/contact" id="nav-contact">Contact</a>
+  </nav>
+</header>
+
+<section class="hero">
+  <h1 id="hero-title">Welcome to Our Platform</h1>
+  <p id="hero-desc">This is a large page with a virtual scroll widget embedded in the
+  middle. The scanner must handle both the static content and the virtual
+  scroll container to capture everything.</p>
+</section>
+
+<section class="static-text">
+  <p id="above-text">This is static content that appears ABOVE the virtual scroll
+  section. It contains important information that must be captured by the crawler.
+  Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor
+  incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+  nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
+</section>
+
+<section class="widget-section">
+  <h2>Data Feed</h2>
+  <div id="vscroll-container">
+    <div id="vscroll-inner"></div>
+  </div>
+</section>
+
+<footer class="site-footer">
+  <p id="footer-text">This is the site footer with links and information that appears
+  BELOW the virtual scroll section.</p>
+  <p>
+    <a href="/footer/privacy" id="footer-privacy">Privacy Policy</a> |
+    <a href="/footer/terms" id="footer-terms">Terms of Service</a> |
+    <a href="/footer/sitemap" id="footer-sitemap">Sitemap</a>
+  </p>
+</footer>
+
+<section class="bottom-text">
+  <p id="below-text">Final section of static content at the very bottom of the page.
+  This must also be captured by the scanner. The page total height exceeds 2000px
+  with the virtual scroll widget in the middle.</p>
+</section>
+
+<script>
+(function () {
+  var TOTAL    = 60;
+  var ITEM_H   = 50;
+  var POOL     = 12;
+  var OVERSCAN = 2;
+
+  var allItems = [];
+  for (var i = 0; i < TOTAL; i++) {
+    allItems.push({ id: i + 1, label: 'Mid Item ' + (i + 1) });
+  }
+
+  var container = document.getElementById('vscroll-container');
+  var inner     = document.getElementById('vscroll-inner');
+  inner.style.height = (TOTAL * ITEM_H) + 'px';
+
+  var pool = [];
+  for (var j = 0; j < POOL; j++) {
+    var el = document.createElement('div');
+    el.className = 'vitem';
+    el.style.top = '-9999px';
+    inner.appendChild(el);
+    pool.push(el);
+  }
+
+  var lastStart = -1;
+
+  function render() {
+    var scrollTop = container.scrollTop;
+    var start = Math.max(0, Math.floor(scrollTop / ITEM_H) - OVERSCAN);
+    var end   = Math.min(TOTAL, start + POOL);
+
+    if (start === lastStart) return;
+    lastStart = start;
+
+    for (var k = 0; k < pool.length; k++) {
+      var idx = start + k;
+      if (idx >= end || idx >= TOTAL) {
+        pool[k].style.top = '-9999px';
+        pool[k].textContent = '';
+        continue;
+      }
+      var item = allItems[idx];
+      pool[k].style.top = (idx * ITEM_H) + 'px';
+      pool[k].setAttribute('data-row', idx + 1);
+      pool[k].innerHTML =
+        'Mid Item ' + item.id + ' &mdash; ' +
+        '<a href="/mid/' + item.id + '">detail #' + item.id + '</a>';
+    }
+  }
+
+  render();
+  container.addEventListener('scroll', render);
+})();
+</script>
+</body>
+</html>
+"""
+
+
+async def test_small_virtual_in_large_page() -> bool:
+    """
+    Test: Large page with small virtual scroll section in the middle.
+
+    A 2000px+ page of static content (header, hero, text, footer) with a
+    400px overflow-y:scroll container embedded in the middle.  The container
+    holds 60 virtual items (pool of ~12 DOM nodes, position: absolute,
+    recycled on container scroll).
+
+    The scanner must:
+      1. Scroll the page to reach the container (it is ~1000px down)
+      2. Scroll the container to capture all 60 items
+      3. Continue scrolling the page to capture static content below
+
+    Fingerprint: each item has a unique <a href="/mid/N"> link.
+    Expected: capture >=54 of 60 items (90%) from the virtual section.
+    Also verify that static content above and below is in result.html.
+    """
+    print("\n" + "=" * 70)
+    print("TEST 13: Large page with small virtual scroll in the middle — 60 items")
+    print("=" * 70)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with open(os.path.join(tmpdir, "index.html"), "w") as fh:
+            fh.write(SMALL_VIRTUAL_IN_LARGE_PAGE_HTML)
+        server = start_server(tmpdir, 9757)
+        try:
+            schema = {
+                "name": "MidItems",
+                "baseSelector": ".vitem",
+                "fields": [
+                    {"name": "label", "selector": "",  "type": "text"},
+                    {"name": "link",  "selector": "a", "type": "attribute", "attribute": "href"},
+                ],
+            }
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                cfg = CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    extraction_strategy=JsonCssExtractionStrategy(schema),
+                    scan_full_page=True,
+                    scroll_delay=0.2,
+                )
+                result = await crawler.arun(url="http://127.0.0.1:9757/index.html", config=cfg)
+
+            data = json.loads(result.extracted_content)
+            unique_links = {d["link"] for d in data if d.get("link")}
+            expected     = {f"/mid/{i}" for i in range(1, 61)}
+            missing      = sorted(expected - unique_links, key=lambda s: int(s.split("/")[-1]))
+
+            print(f"  Raw extracted : {len(data)}")
+            print(f"  Unique by href: {len(unique_links)}/60")
+            if missing:
+                show = missing[:10]
+                tail = "..." if len(missing) > 10 else ""
+                print(f"  Missing       : {show}{tail}")
+
+            # Check static content above/below is present in result.html
+            html = result.html or ""
+            has_hero     = "hero-title" in html or "Welcome to Our Platform" in html
+            has_above    = "above-text" in html or "ABOVE the virtual scroll" in html
+            has_footer   = "footer-text" in html or "BELOW the virtual scroll" in html
+            has_below    = "below-text" in html or "very bottom of the page" in html
+            has_nav      = "nav-home" in html or "/nav/home" in html
+
+            print(f"  Static content checks:")
+            print(f"    Navigation : {'OK' if has_nav else 'MISSING'}")
+            print(f"    Hero       : {'OK' if has_hero else 'MISSING'}")
+            print(f"    Above text : {'OK' if has_above else 'MISSING'}")
+            print(f"    Footer     : {'OK' if has_footer else 'MISSING'}")
+            print(f"    Below text : {'OK' if has_below else 'MISSING'}")
+
+            static_ok = has_hero and has_above and has_footer and has_below and has_nav
+            items_ok  = len(unique_links) >= 54  # 90% of 60
+            passed    = items_ok and static_ok
+
+            if not items_ok:
+                print(f"  FAIL: Only captured {len(unique_links)}/60 items (need >=54)")
+            if not static_ok:
+                print(f"  FAIL: Some static content is missing from result.html")
+
+            print(f"  Result        : {'PASS' if passed else 'FAIL'}")
+            return passed
+        finally:
+            server.shutdown()
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+async def main() -> None:
+    results: dict[str, bool] = {}
+
+    results["Test 1 — Transform virtual scroll (50 items)"]    = await test_transform_virtual_scroll()
+    results["Test 2 — innerHTML-wipe virtual scroll (50 items)"]= await test_innerhtml_wipe_virtual_scroll()
+    results["Test 3 — Append infinite scroll (100 quotes)"]    = await test_append_infinite_scroll()
+    results["Test 4 — Container scroll (200 rows)"]            = await test_container_scroll()
+    results["Test 5 — Transform stress test (1000 items)"]     = await test_transform_stress_1000()
+    results["Test 6 — Real site: quotes.toscrape.com"]         = await test_real_site_quotes()
+    results["Test 7 — Variable Row Heights (80 items)"]          = await test_variable_row_heights()
+    results["Test 8 — Horizontal virtual scroll (60 items)"]     = await test_horizontal_virtual_scroll()
+    results["Test 9 — 2D grid virtualisation (100 cells)"]       = await test_2d_grid_virtual_scroll()
+    results["Test 10 — Multiple virtual containers (40+30)"]     = await test_multiple_virtual_containers()
+    results["Test 11 — Nested virtual scroll (55 links)"]        = await test_nested_virtual_scroll()
+    results["Test 12 — Async-loaded items (50 items)"]           = await test_async_loaded_items()
+    results["Test 13 — Small virtual in large page (60 items)"]  = await test_small_virtual_in_large_page()
+
+    print("\n" + "=" * 70)
+    print("SUMMARY")
+    print("=" * 70)
+    for name, passed in results.items():
+        tag = "PASS" if passed else "FAIL"
+        print(f"  [{tag}] {name}")
+    print("=" * 70)
+
+    total = sum(results.values())
+    print(f"\n  {total}/{len(results)} tests passed\n")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/tests/test_virtual_scroll.py b/tests/test_virtual_scroll.py
index 1e7a7890e..31e3d88d7 100644
--- a/tests/test_virtual_scroll.py
+++ b/tests/test_virtual_scroll.py
@@ -1,197 +1,513 @@
 """
-Test virtual scroll implementation according to the design:
-- Create a page with virtual scroll that replaces content
-- Verify all 1000 items are captured
+Tests for virtual scroll and progressive full-page scan.
+
+Covers:
+- VirtualScrollConfig: container DOM recycling, dedup, early termination, memory cap
+- scan_full_page: window-level DOM recycling (issue #731), lazy-load backward compat
+- VirtualScrollConfig window.scrollBy fallback
+- Config serialization, from_dict forward-compat, error handling
 """
 
 import asyncio
+import re
+import socket
+import tempfile
+import threading
+from functools import partial
+import http.server
 import os
-from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, VirtualScrollConfig, CacheMode, BrowserConfig
-
-async def test_virtual_scroll():
-    """Test virtual scroll with content replacement (true virtual scroll)"""
-    
-    # Create test HTML with true virtual scroll that replaces content
-    test_html = '''
-    <html>
-    <head>
-        <style>
-            #container {
-                height: 500px;
-                overflow-y: auto;
-                border: 1px solid #ccc;
-            }
-            .item {
-                height: 50px;
-                padding: 10px;
-                border-bottom: 1px solid #eee;
-            }
-        </style>
-    </head>
-    <body>
-        <h1>Virtual Scroll Test - 1000 Items</h1>
-        <div id="container"></div>
-        <script>
-            // True virtual scroll that REPLACES content
-            const container = document.getElementById('container');
-            const totalItems = 1000;
-            const itemsPerPage = 10; // Only show 10 items at a time
-            let currentStartIndex = 0;
-            
-            // All our data
-            const allData = [];
-            for (let i = 0; i < totalItems; i++) {
-                allData.push({
-                    id: i,
-                    text: `Item ${i + 1} of ${totalItems} - Unique ID: ${i}`
-                });
-            }
-            
-            // Function to render current page
-            function renderPage(startIndex) {
-                const items = [];
-                const endIndex = Math.min(startIndex + itemsPerPage, totalItems);
-                
-                for (let i = startIndex; i < endIndex; i++) {
-                    const item = allData[i];
-                    items.push(`<div class="item" data-index="${item.id}">${item.text}</div>`);
-                }
-                
-                // REPLACE container content (virtual scroll)
-                container.innerHTML = items.join('');
-                currentStartIndex = startIndex;
-            }
-            
-            // Initial render
-            renderPage(0);
-            
-            // Handle scroll
-            container.addEventListener('scroll', () => {
-                const scrollTop = container.scrollTop;
-                const scrollHeight = container.scrollHeight;
-                const clientHeight = container.clientHeight;
-                
-                // Calculate which page we should show based on scroll position
-                // This creates a virtual scroll effect
-                if (scrollTop + clientHeight >= scrollHeight - 50) {
-                    // Load next page
-                    const nextIndex = currentStartIndex + itemsPerPage;
-                    if (nextIndex < totalItems) {
-                        renderPage(nextIndex);
-                        // Reset scroll to top to continue scrolling
-                        container.scrollTop = 10;
-                    }
-                }
-            });
-        </script>
-    </body>
-    </html>
-    '''
-    
-    # Save test HTML to a file
-    import tempfile
-    
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
-        f.write(test_html)
-        test_file_path = f.name
-    
-    httpd = None
-    old_cwd = os.getcwd()
-    
+
+import pytest
+
+from crawl4ai import (
+    AsyncWebCrawler,
+    BrowserConfig,
+    CacheMode,
+    CrawlerRunConfig,
+    VirtualScrollConfig,
+)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+def _find_free_port() -> int:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("", 0))
+        return s.getsockname()[1]
+
+
+@pytest.fixture(scope="module")
+def browser_config():
+    return BrowserConfig(headless=True)
+
+
+class _TestServer:
+    """Lightweight HTTP server. Uses directory= to avoid os.chdir."""
+
+    def __init__(self, html: str):
+        self._tmpdir = tempfile.mkdtemp()
+        self._filepath = os.path.join(self._tmpdir, "page.html")
+        with open(self._filepath, "w") as f:
+            f.write(html)
+        self.port = _find_free_port()
+        handler = partial(http.server.SimpleHTTPRequestHandler, directory=self._tmpdir)
+        self._httpd = http.server.HTTPServer(("127.0.0.1", self.port), handler)
+        self._thread = threading.Thread(target=self._httpd.serve_forever, daemon=True)
+        self._thread.start()
+        self.url = f"http://127.0.0.1:{self.port}/page.html"
+
+    def shutdown(self):
+        self._httpd.shutdown()
+        os.unlink(self._filepath)
+        os.rmdir(self._tmpdir)
+
+
+# ---------------------------------------------------------------------------
+# HTML templates — container-level virtual scroll (uses .format())
+# ---------------------------------------------------------------------------
+
+CONTAINER_VSCROLL_HTML = """
+<html><head><style>
+  #container {{ height: 500px; overflow-y: auto; }}
+  .item {{ height: 50px; padding: 10px; border-bottom: 1px solid #eee; }}
+</style></head><body>
+<div id="container"></div>
+<script>
+  const container = document.getElementById('container');
+  const TOTAL = {total};
+  const PER_PAGE = {per_page};
+  let startIdx = 0;
+  const allData = Array.from({{length: TOTAL}}, (_, i) => ({{
+    id: i, text: 'Item ' + (i+1) + ' of ' + TOTAL + ' - UID:' + i
+  }}));
+
+  function renderPage(start) {{
+    const end = Math.min(start + PER_PAGE, TOTAL);
+    container.innerHTML = allData.slice(start, end)
+      .map(d => '<div class="item" data-index="' + d.id + '">' + d.text + '</div>')
+      .join('');
+    startIdx = start;
+  }}
+  renderPage(0);
+
+  container.addEventListener('scroll', () => {{
+    if (container.scrollTop + container.clientHeight >= container.scrollHeight - 50) {{
+      const next = startIdx + PER_PAGE;
+      if (next < TOTAL) {{ renderPage(next); container.scrollTop = 10; }}
+    }}
+  }});
+</script></body></html>
+"""
+
+# ---------------------------------------------------------------------------
+# HTML templates — static strings (no .format())
+# ---------------------------------------------------------------------------
+
+NO_ATTR_VSCROLL_HTML = """
+<html><head><style>
+  #container { height: 200px; overflow-y: auto; }
+  .card { height: 80px; padding: 8px; border-bottom: 1px solid #ddd; }
+</style></head><body>
+<div id="container"></div>
+<script>
+  const container = document.getElementById('container');
+  const TOTAL = 50, PER_PAGE = 5;
+  let startIdx = 0;
+  function renderPage(start) {
+    const end = Math.min(start + PER_PAGE, TOTAL);
+    const items = [];
+    for (let i = start; i < end; i++)
+      items.push('<div class="card">Profile ' + (i+1) + ' joined 2024</div>');
+    container.innerHTML = items.join('');
+    startIdx = start;
+  }
+  renderPage(0);
+  container.addEventListener('scroll', () => {
+    if (container.scrollTop + container.clientHeight >= container.scrollHeight - 40) {
+      const next = startIdx + PER_PAGE;
+      if (next < TOTAL) { renderPage(next); container.scrollTop = 5; }
+    }
+  });
+</script></body></html>
+"""
+
+SAME_TEXT_VSCROLL_HTML = """
+<html><head><style>
+  #container { height: 200px; overflow-y: auto; }
+  .item { height: 80px; padding: 5px; border-bottom: 1px solid #eee; box-sizing: border-box; }
+</style></head><body>
+<div id="container"></div>
+<script>
+  const container = document.getElementById('container');
+  let page = 0;
+  function renderPage(p) {
+    const items = [];
+    for (let i = 0; i < 5; i++) {
+      const idx = p * 5 + i;
+      items.push('<div class="item" data-index="' + idx + '">Duplicate Text Here</div>');
+    }
+    container.innerHTML = items.join('');
+    page = p;
+  }
+  renderPage(0);
+  container.addEventListener('scroll', () => {
+    if (container.scrollTop + container.clientHeight >= container.scrollHeight - 30) {
+      if (page < 3) { renderPage(page + 1); container.scrollTop = 5; }
+    }
+  });
+</script></body></html>
+"""
+
+STATIC_HTML = """
+<html><head><style>
+  #container { height: 200px; overflow-y: auto; }
+  .item { height: 40px; }
+</style></head><body>
+<div id="container">
+  <div class="item" data-index="0">Static item 1</div>
+  <div class="item" data-index="1">Static item 2</div>
+  <div class="item" data-index="2">Static item 3</div>
+</div>
+</body></html>
+"""
+
+WINDOW_RECYCLE_HTML = """
+<html><head><style>
+  body { margin: 0; font-family: sans-serif; }
+  .item { height: 120px; padding: 20px; border-bottom: 2px solid #ccc; box-sizing: border-box; }
+</style></head><body>
+<h1 style="padding:10px">Feed</h1>
+<div id="feed"></div>
+<script>
+  const feed = document.getElementById('feed');
+  const TOTAL = 100, PER_PAGE = 8;
+  let startIdx = 0;
+  function renderPage(start) {
+    const end = Math.min(start + PER_PAGE, TOTAL);
+    const items = [];
+    for (let i = start; i < end; i++)
+      items.push('<div class="item" data-index="' + i + '">Post ' + (i+1) + '</div>');
+    feed.innerHTML = items.join('');
+    startIdx = start;
+  }
+  renderPage(0);
+  window.addEventListener('scroll', () => {
+    if (window.scrollY + window.innerHeight >= document.documentElement.scrollHeight - 100) {
+      const next = startIdx + PER_PAGE;
+      if (next < TOTAL) { renderPage(next); window.scrollTo(0, 100); }
+    }
+  });
+</script></body></html>
+"""
+
+LAZY_LOAD_HTML = """
+<html><head><style>
+  .item { height: 100px; padding: 10px; border-bottom: 1px solid #ddd; }
+</style></head><body>
+<div id="content"></div>
+<script>
+  const content = document.getElementById('content');
+  let loaded = 0;
+  function loadBatch() {
+    for (let i = 0; i < 10; i++) {
+      const div = document.createElement('div');
+      div.className = 'item';
+      div.setAttribute('data-index', loaded);
+      div.textContent = 'Lazy item ' + (loaded + 1);
+      content.appendChild(div);
+      loaded++;
+    }
+  }
+  loadBatch();
+  window.addEventListener('scroll', () => {
+    if (window.scrollY + window.innerHeight >= document.documentElement.scrollHeight - 200) {
+      if (loaded < 50) loadBatch();
+    }
+  });
+</script></body></html>
+"""
+
+
+# ---------------------------------------------------------------------------
+# VirtualScrollConfig tests
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_vscroll_captures_all_items(browser_config):
+    """100 items, 10 per page, DOM recycling — all must be captured."""
+    server = _TestServer(CONTAINER_VSCROLL_HTML.format(total=100, per_page=10))
+    try:
+        await asyncio.sleep(0.3)
+        config = CrawlerRunConfig(
+            virtual_scroll_config=VirtualScrollConfig(
+                container_selector="#container",
+                scroll_count=15,
+                scroll_by="container_height",
+                wait_after_scroll=0.15,
+            ),
+            cache_mode=CacheMode.BYPASS,
+        )
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        indices = set(int(m) for m in re.findall(r'data-index="(\d+)"', result.html))
+        assert indices == set(range(100)), f"Missing: {set(range(100)) - indices}"
+    finally:
+        server.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_vscroll_final_chunk_not_lost(browser_config):
+    """scroll_count exhausted before bottom — last chunk must still be captured."""
+    server = _TestServer(CONTAINER_VSCROLL_HTML.format(total=200, per_page=10))
+    try:
+        await asyncio.sleep(0.3)
+        config = CrawlerRunConfig(
+            virtual_scroll_config=VirtualScrollConfig(
+                container_selector="#container",
+                scroll_count=5,
+                scroll_by="container_height",
+                wait_after_scroll=0.15,
+                max_no_change=0,
+            ),
+            cache_mode=CacheMode.BYPASS,
+        )
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        indices = sorted(set(int(m) for m in re.findall(r'data-index="(\d+)"', result.html)))
+        assert len(indices) > 10, f"Only {len(indices)} items — final chunk likely lost"
+        assert set(range(max(indices) + 1)) == set(indices), "Gaps in captured range"
+    finally:
+        server.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_vscroll_early_termination(browser_config):
+    """Static content with high scroll_count — must stop early via max_no_change."""
+    server = _TestServer(STATIC_HTML)
+    try:
+        await asyncio.sleep(0.3)
+        config = CrawlerRunConfig(
+            virtual_scroll_config=VirtualScrollConfig(
+                container_selector="#container",
+                scroll_count=50,
+                scroll_by=100,
+                wait_after_scroll=0.05,
+                max_no_change=3,
+            ),
+            cache_mode=CacheMode.BYPASS,
+        )
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        indices = set(int(m) for m in re.findall(r'data-index="(\d+)"', result.html))
+        assert indices == {0, 1, 2}
+    finally:
+        server.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_vscroll_text_dedup_no_attributes(browser_config):
+    """Elements with no data-id/id — text-based dedup must capture unique profiles."""
+    server = _TestServer(NO_ATTR_VSCROLL_HTML)
+    try:
+        await asyncio.sleep(0.3)
+        config = CrawlerRunConfig(
+            virtual_scroll_config=VirtualScrollConfig(
+                container_selector="#container",
+                scroll_count=15,
+                scroll_by="container_height",
+                wait_after_scroll=0.15,
+            ),
+            cache_mode=CacheMode.BYPASS,
+        )
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        profiles = set(int(p) for p in re.findall(r"Profile (\d+) joined", result.html))
+        assert len(profiles) >= 30, f"Only {len(profiles)}/50 profiles captured"
+    finally:
+        server.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_vscroll_attr_dedup_same_text(browser_config):
+    """Items with identical text but different data-index — all must survive."""
+    server = _TestServer(SAME_TEXT_VSCROLL_HTML)
     try:
-        # Start a simple HTTP server
-        import http.server
-        import socketserver
-        import threading
-        import random
-        
-        # Find available port
-        for _ in range(10):
-            PORT = random.randint(8000, 9999)
-            try:
-                Handler = http.server.SimpleHTTPRequestHandler
-                os.chdir(os.path.dirname(test_file_path))
-                httpd = socketserver.TCPServer(("", PORT), Handler)
-                break
-            except OSError:
-                continue
-        
-        if httpd is None:
-            raise RuntimeError("Could not find available port")
-            
-        server_thread = threading.Thread(target=httpd.serve_forever)
-        server_thread.daemon = True
-        server_thread.start()
-        
-        # Give server time to start
-        await asyncio.sleep(0.5)
-        
-        # Configure virtual scroll
-        # With 10 items per page and 1000 total, we need 100 pages
-        # Let's do 120 scrolls to ensure we get everything
-        virtual_config = VirtualScrollConfig(
-            container_selector="#container",
-            scroll_count=120,
-            scroll_by="container_height",  # Scroll by container height
-            wait_after_scroll=0.1  # Quick wait for test
+        await asyncio.sleep(0.3)
+        config = CrawlerRunConfig(
+            virtual_scroll_config=VirtualScrollConfig(
+                container_selector="#container",
+                scroll_count=10,
+                scroll_by="container_height",
+                wait_after_scroll=0.15,
+            ),
+            cache_mode=CacheMode.BYPASS,
         )
-        
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        indices = set(int(m) for m in re.findall(r'data-index="(\d+)"', result.html))
+        assert len(indices) >= 18, f"Only {len(indices)}/20 survived dedup"
+    finally:
+        server.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_vscroll_window_fallback(browser_config):
+    """Container scrollTop has no effect — must fall back to window.scrollBy."""
+    server = _TestServer(WINDOW_RECYCLE_HTML)
+    try:
+        await asyncio.sleep(0.3)
         config = CrawlerRunConfig(
-            virtual_scroll_config=virtual_config,
+            virtual_scroll_config=VirtualScrollConfig(
+                container_selector="#feed",
+                scroll_count=20,
+                scroll_by="page_height",
+                wait_after_scroll=0.15,
+            ),
             cache_mode=CacheMode.BYPASS,
-            verbose=True
         )
-        
-        browserConfig = BrowserConfig(
-            headless= False
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        indices = set(int(m) for m in re.findall(r'data-index="(\d+)"', result.html))
+        assert len(indices) >= 80, f"Only {len(indices)}/100 — window fallback may have failed"
+    finally:
+        server.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_vscroll_memory_cap(browser_config):
+    """max_captured_elements prevents unbounded accumulation."""
+    server = _TestServer(CONTAINER_VSCROLL_HTML.format(total=500, per_page=10))
+    try:
+        await asyncio.sleep(0.3)
+        config = CrawlerRunConfig(
+            virtual_scroll_config=VirtualScrollConfig(
+                container_selector="#container",
+                scroll_count=60,
+                scroll_by="container_height",
+                wait_after_scroll=0.1,
+                max_captured_elements=50,
+            ),
+            cache_mode=CacheMode.BYPASS,
         )
-        
-        async with AsyncWebCrawler(verbose=True, config=browserConfig) as crawler:
-            result = await crawler.arun(
-                url=f"http://localhost:{PORT}/{os.path.basename(test_file_path)}",
-                config=config
-            )
-            
-            # Count all items in the result
-            import re
-            items = re.findall(r'data-index="(\d+)"', result.html)
-            unique_indices = sorted(set(int(idx) for idx in items))
-            
-            print(f"\n{'='*60}")
-            print(f"TEST RESULTS:")
-            print(f"HTML Length: {len(result.html)}")
-            print(f"Total items found: {len(items)}")
-            print(f"Unique items: {len(unique_indices)}")
-            
-            if unique_indices:
-                print(f"Item indices: {min(unique_indices)} to {max(unique_indices)}")
-                print(f"Expected: 0 to 999")
-                
-                # Check for gaps
-                expected = set(range(1000))
-                actual = set(unique_indices)
-                missing = expected - actual
-                
-                if missing:
-                    print(f"\n❌ FAILED! Missing {len(missing)} items")
-                    print(f"Missing indices: {sorted(missing)[:10]}{'...' if len(missing) > 10 else ''}")
-                else:
-                    print(f"\n✅ SUCCESS! All 1000 items captured!")
-                    
-                # Show some sample items
-                print(f"\nSample items from result:")
-                sample_items = re.findall(r'<div class="item"[^>]*>([^<]+)</div>', result.html)[:5]
-                for item in sample_items:
-                    print(f"  - {item}")
-            
-            print(f"{'='*60}\n")
-                
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        indices = set(int(m) for m in re.findall(r'data-index="(\d+)"', result.html))
+        assert 20 <= len(indices) <= 150, f"Cap didn't work: {len(indices)} items"
+    finally:
+        server.shutdown()
+
+
+# ---------------------------------------------------------------------------
+# scan_full_page tests (issue #731)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_scan_full_page_window_recycling(browser_config):
+    """Issue #731: scan_full_page=True on window-level DOM recycling page."""
+    server = _TestServer(WINDOW_RECYCLE_HTML)
+    try:
+        await asyncio.sleep(0.3)
+        config = CrawlerRunConfig(
+            scan_full_page=True,
+            scroll_delay=0.15,
+            cache_mode=CacheMode.BYPASS,
+        )
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        indices = set(int(m) for m in re.findall(r'data-index="(\d+)"', result.html))
+        assert len(indices) >= 90, f"Only {len(indices)}/100 captured with scan_full_page"
+    finally:
+        server.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_scan_full_page_lazy_load(browser_config):
+    """Backward compat: lazy-load page (no recycling) still works."""
+    server = _TestServer(LAZY_LOAD_HTML)
+    try:
+        await asyncio.sleep(0.3)
+        config = CrawlerRunConfig(
+            scan_full_page=True,
+            scroll_delay=0.2,
+            cache_mode=CacheMode.BYPASS,
+        )
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        indices = set(int(m) for m in re.findall(r'data-index="(\d+)"', result.html))
+        assert len(indices) >= 40, f"Only {len(indices)}/50 — lazy load regression"
+    finally:
+        server.shutdown()
+
+
+# ---------------------------------------------------------------------------
+# Config unit tests
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_config_serialization():
+    """VirtualScrollConfig round-trips through to_dict/from_dict."""
+    cfg = VirtualScrollConfig(
+        container_selector="#feed",
+        scroll_count=20,
+        scroll_by=300,
+        wait_after_scroll=0.8,
+        max_no_change=7,
+        max_captured_elements=5000,
+    )
+    d = cfg.to_dict()
+    assert d["max_no_change"] == 7
+    assert d["max_captured_elements"] == 5000
+
+    restored = VirtualScrollConfig.from_dict(d)
+    assert restored.max_no_change == 7
+    assert restored.scroll_by == 300
+
+
+@pytest.mark.asyncio
+async def test_config_from_dict_ignores_unknown_keys():
+    """from_dict must not crash on keys from a newer config version."""
+    d = {
+        "container_selector": "#x",
+        "scroll_count": 5,
+        "unknown_future_field": 42,
+        "another_new_thing": True,
+    }
+    cfg = VirtualScrollConfig.from_dict(d)
+    assert cfg.container_selector == "#x"
+    assert cfg.scroll_count == 5
+    assert cfg.max_no_change == 5  # default
+
+
+@pytest.mark.asyncio
+async def test_vscroll_container_not_found(browser_config):
+    """Wrong container selector — crawl must complete without crashing."""
+    server = _TestServer(STATIC_HTML)
+    try:
+        await asyncio.sleep(0.3)
+        config = CrawlerRunConfig(
+            virtual_scroll_config=VirtualScrollConfig(
+                container_selector="#nonexistent",
+                scroll_count=3,
+                scroll_by=100,
+                wait_after_scroll=0.05,
+            ),
+            cache_mode=CacheMode.BYPASS,
+        )
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url=server.url, config=config)
+
+        assert result.html is not None, "Crawl should return HTML even if vscroll fails"
+        assert len(result.html) > 0
     finally:
-        # Clean up
-        if httpd:
-            httpd.shutdown()
-        os.chdir(old_cwd)
-        os.unlink(test_file_path)
-
-if __name__ == "__main__":
-    asyncio.run(test_virtual_scroll())
\ No newline at end of file
+        server.shutdown()