From 488c5ddd6a4199bcd26d718e82a4dd5a78490fce Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 04:04:33 +0000 Subject: [PATCH 1/3] refactor: replace custom abort flag with deal() AbortSignal mechanism MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crawler の独自 #aborted フラグを AbortController/AbortSignal に置換し、 @d-zero/dealer の deal() に signal オプションとして渡すようにした。 これにより abort 後は dealer が新しいワーカーの起動を停止し、 実行中のワーカーの完了を待ってから正常に終了する。 - Crawler: #aborted フラグ → #abortController + signal getter - deal() 呼び出しに signal オプションを追加 - CrawlerOrchestrator.abort(): archive.abort() → crawler.abort() - CLI シグナルハンドラ: orchestrator.abort() を呼ぶよう統一 - abort 動作を検証するテストを追加 Closes #19 https://claude.ai/code/session_015YfPBKbEv53mMTkgPxGCm5 --- packages/@nitpicker/cli/src/commands/crawl.ts | 8 +-- .../crawler/src/crawler-orchestrator.ts | 10 ++-- .../crawler/src/crawler/crawler.spec.ts | 54 +++++++++++++++++++ .../@nitpicker/crawler/src/crawler/crawler.ts | 24 ++++++--- 4 files changed, 81 insertions(+), 15 deletions(-) diff --git a/packages/@nitpicker/cli/src/commands/crawl.ts b/packages/@nitpicker/cli/src/commands/crawl.ts index 64d5fbe..e9e6380 100644 --- a/packages/@nitpicker/cli/src/commands/crawl.ts +++ b/packages/@nitpicker/cli/src/commands/crawl.ts @@ -139,9 +139,10 @@ type LogType = 'verbose' | 'normal' | 'silent'; /** * Sets up signal handlers for graceful shutdown and starts event logging. * - * Registers SIGINT/SIGBREAK/SIGHUP/SIGABRT handlers that kill zombie - * Chromium processes before exiting, then delegates to {@link eventAssignments} - * for progress output. + * Registers SIGINT/SIGBREAK/SIGHUP/SIGABRT handlers that abort the + * crawl via {@link CrawlerOrchestrator.abort}, then kill zombie Chromium + * processes and exit. The abort signal propagates through the dealer's + * AbortSignal mechanism so no new workers are launched. * @param trigger - Display label for the crawl (URL or stub file path) * @param orchestrator - The initialized CrawlerOrchestrator instance * @param config - The resolved archive configuration @@ -155,6 +156,7 @@ function run( logType: LogType, ) { const killed = () => { + orchestrator.abort(); orchestrator.garbageCollect(); process.exit(); }; diff --git a/packages/@nitpicker/crawler/src/crawler-orchestrator.ts b/packages/@nitpicker/crawler/src/crawler-orchestrator.ts index e396689..ed18ba1 100644 --- a/packages/@nitpicker/crawler/src/crawler-orchestrator.ts +++ b/packages/@nitpicker/crawler/src/crawler-orchestrator.ts @@ -162,14 +162,14 @@ export class CrawlerOrchestrator extends EventEmitter { } /** - * Abort the current crawl and archive operations. + * Abort the current crawl operation. * - * Delegates to the archive's abort method, which stops all in-progress - * database writes and cleans up temporary resources. - * @returns The result of the archive abort operation. + * Delegates to the crawler's AbortController so that the dealer stops + * launching new workers. Currently running workers will finish, after + * which `deal()` resolves and `crawlEnd` is emitted normally. */ abort() { - return this.#archive.abort(); + this.#crawler.abort(); } /** diff --git a/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts b/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts index a6c65a2..fbf71fb 100644 --- a/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts +++ b/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts @@ -186,6 +186,60 @@ describe('Crawler', () => { }); }); + describe('abort()', () => { + it('abort() 後に deal() の signal オプションに渡された AbortSignal が aborted になる', async () => { + const { deal } = await import('@d-zero/dealer'); + const { default: Crawler } = await import('./crawler.js'); + + let receivedSignal: AbortSignal | undefined; + + vi.mocked(deal).mockImplementation((_items, _factory, options) => { + receivedSignal = options?.signal; + return Promise.resolve(); + }); + + const crawler = new Crawler(defaultOptions); + crawler.start(parseUrl('https://example.com/')!); + + await vi.waitFor(() => { + expect(receivedSignal).toBeDefined(); + }); + + expect(receivedSignal!.aborted).toBe(false); + crawler.abort(); + expect(receivedSignal!.aborted).toBe(true); + }); + + it('abort() 後に新しいワーカーが起動しない(deal の signal によりキューが停止する)', async () => { + const { deal } = await import('@d-zero/dealer'); + const { default: Crawler } = await import('./crawler.js'); + + let capturedSignal: AbortSignal | undefined; + + vi.mocked(deal).mockImplementation((_items, _factory, options) => { + capturedSignal = options?.signal; + return Promise.resolve(); + }); + + const crawler = new Crawler(defaultOptions); + crawler.start(parseUrl('https://example.com/')!); + + await vi.waitFor(() => { + expect(capturedSignal).toBeDefined(); + }); + + crawler.abort(); + expect(capturedSignal!.aborted).toBe(true); + }); + + it('signal getter が AbortSignal を返す', async () => { + const { default: Crawler } = await import('./crawler.js'); + const crawler = new Crawler(defaultOptions); + expect(crawler.signal).toBeInstanceOf(AbortSignal); + expect(crawler.signal.aborted).toBe(false); + }); + }); + describe('worker-level error handling', () => { it('ワーカー内の例外が error イベントとして emit され処理が継続する', async () => { const { deal } = await import('@d-zero/dealer'); diff --git a/packages/@nitpicker/crawler/src/crawler/crawler.ts b/packages/@nitpicker/crawler/src/crawler/crawler.ts index 6896edd..dd582ba 100644 --- a/packages/@nitpicker/crawler/src/crawler/crawler.ts +++ b/packages/@nitpicker/crawler/src/crawler/crawler.ts @@ -51,8 +51,8 @@ export type { CrawlerOptions } from './types.js'; * configurable parallelism up to {@link Crawler.MAX_PROCESS_LENGTH}. */ export default class Crawler extends EventEmitter { - /** Flag set by `abort()` to signal in-progress tasks to exit early. */ - #aborted = false; + /** Controller used to cancel the deal-based crawl via its AbortSignal. */ + readonly #abortController = new AbortController(); /** Tracks discovered URLs, their scrape status, and deduplication. */ readonly #linkList = new LinkList(); /** Merged crawler configuration (user overrides + defaults). */ @@ -69,6 +69,15 @@ export default class Crawler extends EventEmitter { /** Maps hostnames to their scope URLs. Defines the crawl boundary for internal/external classification. */ readonly #scope = new Map(); + /** + * The AbortSignal associated with this crawler's AbortController. + * + * Passed to `deal()` so that it stops launching new workers after abort. + * Also available to the orchestrator for forwarding to other subsystems. + */ + get signal(): AbortSignal { + return this.#abortController.signal; + } /** * Create a new Crawler instance. * @param options - Configuration options for crawling behavior. All fields have @@ -113,12 +122,13 @@ export default class Crawler extends EventEmitter { /** * Abort the current crawl operation. * - * Sets the aborted flag and immediately emits a `crawlEnd` event. - * In-progress scrape tasks will check the flag and exit early. + * Signals the AbortController so that the dealer stops launching new + * workers. Currently running workers will finish, after which `deal()` + * resolves and `crawlEnd` is emitted by the normal completion path in + * {@link #runDeal}. */ abort() { - this.#aborted = true; - void this.emit('crawlEnd', {}); + this.#abortController.abort(); } /** @@ -525,7 +535,6 @@ export default class Crawler extends EventEmitter { this.#linkList.progress(url); return async () => { - if (this.#aborted) return; const log = createTimedUpdate(update, this.#options.verbose); try { @@ -617,6 +626,7 @@ export default class Crawler extends EventEmitter { limit: concurrency, interval: this.#options.interval, verbose: this.#options.verbose || !process.stdout.isTTY, + signal: this.#abortController.signal, header: (_progress, done, total, limit) => { return formatCrawlProgress({ done, From 22afac42086c896c5fd5074aea592cac3d27ddb5 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 04:39:58 +0000 Subject: [PATCH 2/3] fix: replace duplicate abort test with meaningful coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace duplicate signal-aborted assertion with crawlEnd-on-resolve test that verifies the normal completion path (deal resolves → crawlEnd emits) - Add double-abort safety test to ensure calling abort() twice doesn't throw https://claude.ai/code/session_015YfPBKbEv53mMTkgPxGCm5 --- .../crawler/src/crawler/crawler.spec.ts | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts b/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts index fbf71fb..3bd31c4 100644 --- a/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts +++ b/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts @@ -210,26 +210,41 @@ describe('Crawler', () => { expect(receivedSignal!.aborted).toBe(true); }); - it('abort() 後に新しいワーカーが起動しない(deal の signal によりキューが停止する)', async () => { + it('deal 正常完了時に crawlEnd イベントが emit される', async () => { const { deal } = await import('@d-zero/dealer'); const { default: Crawler } = await import('./crawler.js'); - let capturedSignal: AbortSignal | undefined; - vi.mocked(deal).mockImplementation((_items, _factory, options) => { - capturedSignal = options?.signal; + // Simulate: abort is called, deal checks signal and resolves normally + expect(options?.signal).toBeInstanceOf(AbortSignal); return Promise.resolve(); }); const crawler = new Crawler(defaultOptions); + let crawlEndEmitted = false; + crawler.on('crawlEnd', () => { + crawlEndEmitted = true; + }); + crawler.start(parseUrl('https://example.com/')!); await vi.waitFor(() => { - expect(capturedSignal).toBeDefined(); + expect(crawlEndEmitted).toBe(true); }); + }); + + it('二重 abort でもエラーにならない', async () => { + const { deal } = await import('@d-zero/dealer'); + const { default: Crawler } = await import('./crawler.js'); + + vi.mocked(deal).mockResolvedValue(); + + const crawler = new Crawler(defaultOptions); + crawler.start(parseUrl('https://example.com/')!); crawler.abort(); - expect(capturedSignal!.aborted).toBe(true); + expect(() => crawler.abort()).not.toThrow(); + expect(crawler.signal.aborted).toBe(true); }); it('signal getter が AbortSignal を返す', async () => { From f57eb1a3eeb415419c3e269f3a427c9d6b570f6f Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 05:09:31 +0000 Subject: [PATCH 3/3] docs: add crawl abort mechanism to ARCHITECTURE.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the AbortController-based crawl cancellation flow (CLI signal handler → Orchestrator → Crawler → deal signal) and fix missing blank line in crawler.ts signal getter. https://claude.ai/code/session_015YfPBKbEv53mMTkgPxGCm5 --- ARCHITECTURE.md | 17 +++++++++++++++++ .../@nitpicker/crawler/src/crawler/crawler.ts | 1 + 2 files changed, 18 insertions(+) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 4c4db42..a692d4d 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -217,6 +217,23 @@ URL を deal() で受け取り: - スクレイピングはインプロセス(`@d-zero/beholder`)で実行。各 URL ごとにブラウザを起動・終了 - `push()` で発見した新 URL を動的にキューに追加 - `onPush` コールバックで `withoutHashAndAuth` による重複排除 +- `signal` オプションで `AbortSignal` を渡し、中断時に新規ワーカーの起動を停止 + +### クロール中断メカニズム + +``` +CLI シグナルハンドラ(SIGINT / SIGHUP 等) + → CrawlerOrchestrator.abort() + → Crawler.abort() + → AbortController.abort() + → deal() の signal オプション経由で新規ワーカー起動を停止 + → 実行中のワーカーは正常完了まで継続 + → 全ワーカー完了後 deal() が resolve → crawlEnd イベント emit +``` + +- `Crawler` は内部に `AbortController` を保持し、`signal` getter で `AbortSignal` を公開 +- `CrawlerOrchestrator` のコンストラクタで `archive` の `error` イベントを監視し、アーカイブエラー発生時にも `Crawler.abort()` を呼び出す +- CLI の `killed()` ハンドラでは `abort()` 後に `garbageCollect()`(ゾンビ Chromium プロセスの終了)→ `process.exit()` を実行 ### 主要定数 diff --git a/packages/@nitpicker/crawler/src/crawler/crawler.ts b/packages/@nitpicker/crawler/src/crawler/crawler.ts index dd582ba..033f35b 100644 --- a/packages/@nitpicker/crawler/src/crawler/crawler.ts +++ b/packages/@nitpicker/crawler/src/crawler/crawler.ts @@ -78,6 +78,7 @@ export default class Crawler extends EventEmitter { get signal(): AbortSignal { return this.#abortController.signal; } + /** * Create a new Crawler instance. * @param options - Configuration options for crawling behavior. All fields have