From bd099a4a1ffaf343f43489bd0bd17506e8fee999 Mon Sep 17 00:00:00 2001 From: hafezparast Date: Tue, 24 Mar 2026 00:45:19 +0800 Subject: [PATCH] fix: arun_many respects CrawlerRunConfig.semaphore_count (#1818) arun_many() was creating MemoryAdaptiveDispatcher with the default max_session_permit=20, ignoring config.semaphore_count entirely. This caused deep crawl to launch up to 20 concurrent browser pages against a single site, overwhelming servers that can't handle that load and causing Page.goto timeouts on all subpages. Now passes config.semaphore_count as max_session_permit to the dispatcher. The default (5) is already more conservative than the previous hardcoded 20. Co-Authored-By: Claude Opus 4.6 (1M context) --- crawl4ai/async_webcrawler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index 36b999fd1..85b6b671b 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -1029,7 +1029,9 @@ async def _deep_crawl_stream(): primary_cfg = config[0] if isinstance(config, list) else config mean_delay = getattr(primary_cfg, "mean_delay", 0.1) max_range = getattr(primary_cfg, "max_range", 0.3) + semaphore_count = getattr(primary_cfg, "semaphore_count", 5) dispatcher = MemoryAdaptiveDispatcher( + max_session_permit=semaphore_count, rate_limiter=RateLimiter( base_delay=(mean_delay, mean_delay + max_range), max_delay=60.0,