From fc859029e97f57abfd280d284fec88e128033f79 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 10 Mar 2026 08:24:30 +0000 Subject: [PATCH 1/4] Update @d-zero/dealer to 1.7.0 and improve error handling - Update @d-zero/dealer from 1.6.3 to 1.7.0 in all packages (crawler, core, cli, report-google-sheets) - Add catch block in crawler's deal() worker callback to handle per-URL errors gracefully instead of letting them propagate as unhandled rejections - Handle AggregateError from deal() in start() and startMultiple() to emit individual error events for each failed worker Closes #18 https://claude.ai/code/session_01DZApYkRAury35FhWGY72xf --- packages/@nitpicker/cli/package.json | 2 +- packages/@nitpicker/core/package.json | 2 +- packages/@nitpicker/crawler/package.json | 2 +- .../@nitpicker/crawler/src/crawler/crawler.ts | 64 +++++++++++++++---- .../report-google-sheets/package.json | 2 +- yarn.lock | 18 +++--- 6 files changed, 63 insertions(+), 27 deletions(-) diff --git a/packages/@nitpicker/cli/package.json b/packages/@nitpicker/cli/package.json index 35bdb6b..3265481 100644 --- a/packages/@nitpicker/cli/package.json +++ b/packages/@nitpicker/cli/package.json @@ -31,7 +31,7 @@ "clean": "tsc --build --clean" }, "dependencies": { - "@d-zero/dealer": "1.6.3", + "@d-zero/dealer": "1.7.0", "@d-zero/readtext": "1.1.19", "@d-zero/roar": "2.0.0", "@d-zero/shared": "0.20.0", diff --git a/packages/@nitpicker/core/package.json b/packages/@nitpicker/core/package.json index d55cf45..0ec0599 100644 --- a/packages/@nitpicker/core/package.json +++ b/packages/@nitpicker/core/package.json @@ -27,7 +27,7 @@ "clean": "tsc --build --clean" }, "dependencies": { - "@d-zero/dealer": "1.6.3", + "@d-zero/dealer": "1.7.0", "@d-zero/shared": "0.20.0", "@nitpicker/crawler": "0.4.4", "@nitpicker/types": "0.4.4", diff --git a/packages/@nitpicker/crawler/package.json b/packages/@nitpicker/crawler/package.json index 0a9ef93..29ee3a8 100644 --- a/packages/@nitpicker/crawler/package.json +++ b/packages/@nitpicker/crawler/package.json @@ -28,7 +28,7 @@ }, "dependencies": { "@d-zero/beholder": "2.0.0", - "@d-zero/dealer": "1.6.3", + "@d-zero/dealer": "1.7.0", "@d-zero/fs": "0.2.2", "@d-zero/shared": "0.20.0", "ansi-colors": "4.1.3", diff --git a/packages/@nitpicker/crawler/src/crawler/crawler.ts b/packages/@nitpicker/crawler/src/crawler/crawler.ts index 0737581..7febbcc 100644 --- a/packages/@nitpicker/crawler/src/crawler/crawler.ts +++ b/packages/@nitpicker/crawler/src/crawler/crawler.ts @@ -176,13 +176,26 @@ export default class Crawler extends EventEmitter { void this.#runDeal(initialUrls, resumeOffset).catch((error) => { crawlerLog('runDeal error: %O', error); - void this.emit('error', { - pid: process.pid, - isMainProcess: true, - url: url.href, - isExternal: false, - error: error instanceof Error ? error : new Error(String(error)), - }); + if (error instanceof AggregateError) { + for (const workerError of error.errors) { + void this.emit('error', { + pid: process.pid, + isMainProcess: true, + url: url.href, + isExternal: false, + error: + workerError instanceof Error ? workerError : new Error(String(workerError)), + }); + } + } else { + void this.emit('error', { + pid: process.pid, + isMainProcess: true, + url: url.href, + isExternal: false, + error: error instanceof Error ? error : new Error(String(error)), + }); + } void this.emit('crawlEnd', {}); }); } @@ -218,13 +231,26 @@ export default class Crawler extends EventEmitter { this.#options.fromList = true; void this.#runDeal(pageList).catch((error) => { crawlerLog('runDeal error: %O', error); - void this.emit('error', { - pid: process.pid, - isMainProcess: true, - url: pageList[0]!.href, - isExternal: false, - error: error instanceof Error ? error : new Error(String(error)), - }); + if (error instanceof AggregateError) { + for (const workerError of error.errors) { + void this.emit('error', { + pid: process.pid, + isMainProcess: true, + url: pageList[0]!.href, + isExternal: false, + error: + workerError instanceof Error ? workerError : new Error(String(workerError)), + }); + } + } else { + void this.emit('error', { + pid: process.pid, + isMainProcess: true, + url: pageList[0]!.href, + isExternal: false, + error: error instanceof Error ? error : new Error(String(error)), + }); + } void this.emit('crawlEnd', {}); }); } @@ -572,6 +598,16 @@ export default class Crawler extends EventEmitter { this.#handleResult(result, url, push, paginationState, concurrency); this.#handleResources(result.resources); log(formatResultSummary(result)); + } catch (error) { + crawlerLog('Worker error for %s: %O', url.href, error); + log(c.red('Error')); + void this.emit('error', { + pid: process.pid, + isMainProcess: true, + url: url.href, + isExternal, + error: error instanceof Error ? error : new Error(String(error)), + }); } finally { if (isExternal) { externalDoneUrls.add(protocolAgnosticKey(url.withoutHashAndAuth)); diff --git a/packages/@nitpicker/report-google-sheets/package.json b/packages/@nitpicker/report-google-sheets/package.json index 842e049..fd00041 100644 --- a/packages/@nitpicker/report-google-sheets/package.json +++ b/packages/@nitpicker/report-google-sheets/package.json @@ -27,7 +27,7 @@ "clean": "tsc --build --clean" }, "dependencies": { - "@d-zero/dealer": "1.6.3", + "@d-zero/dealer": "1.7.0", "@d-zero/google-auth": "0.5.6", "@d-zero/google-sheets": "0.6.0", "@d-zero/shared": "0.20.0", diff --git a/yarn.lock b/yarn.lock index 374c8c5..bada8ad 100644 --- a/yarn.lock +++ b/yarn.lock @@ -819,13 +819,13 @@ __metadata: languageName: node linkType: hard -"@d-zero/dealer@npm:1.6.3": - version: 1.6.3 - resolution: "@d-zero/dealer@npm:1.6.3" +"@d-zero/dealer@npm:1.7.0": + version: 1.7.0 + resolution: "@d-zero/dealer@npm:1.7.0" dependencies: - "@d-zero/shared": "npm:0.20.0" + "@d-zero/shared": "npm:0.20.1" ansi-colors: "npm:4.1.3" - checksum: 10c0/b239f33abae6bed51df3eefde7774e70f789689a8c2754fac234dfa5f828fa2c6d587c46560a6d9fbb76be1ac3e8040ae053611fb73c799e865f0b0bd9ac2548 + checksum: 10c0/ac74b090f31272c0b22f563989ef00cb857331e7de19bc6d193a8ebc573fdaacfc82689c8e95bcaea59cad44ae2ab7a063c4db48210c534c597ccc41e86a39d6 languageName: node linkType: hard @@ -2354,7 +2354,7 @@ __metadata: version: 0.0.0-use.local resolution: "@nitpicker/cli@workspace:packages/@nitpicker/cli" dependencies: - "@d-zero/dealer": "npm:1.6.3" + "@d-zero/dealer": "npm:1.7.0" "@d-zero/readtext": "npm:1.1.19" "@d-zero/roar": "npm:2.0.0" "@d-zero/shared": "npm:0.20.0" @@ -2380,7 +2380,7 @@ __metadata: version: 0.0.0-use.local resolution: "@nitpicker/core@workspace:packages/@nitpicker/core" dependencies: - "@d-zero/dealer": "npm:1.6.3" + "@d-zero/dealer": "npm:1.7.0" "@d-zero/shared": "npm:0.20.0" "@nitpicker/crawler": "npm:0.4.4" "@nitpicker/types": "npm:0.4.4" @@ -2395,7 +2395,7 @@ __metadata: resolution: "@nitpicker/crawler@workspace:packages/@nitpicker/crawler" dependencies: "@d-zero/beholder": "npm:2.0.0" - "@d-zero/dealer": "npm:1.6.3" + "@d-zero/dealer": "npm:1.7.0" "@d-zero/fs": "npm:0.2.2" "@d-zero/shared": "npm:0.20.0" "@types/debug": "npm:4.1.12" @@ -2419,7 +2419,7 @@ __metadata: version: 0.0.0-use.local resolution: "@nitpicker/report-google-sheets@workspace:packages/@nitpicker/report-google-sheets" dependencies: - "@d-zero/dealer": "npm:1.6.3" + "@d-zero/dealer": "npm:1.7.0" "@d-zero/google-auth": "npm:0.5.6" "@d-zero/google-sheets": "npm:0.6.0" "@d-zero/shared": "npm:0.20.0" From b4a035274d0b434583e4fd12b7f90f0fafe02fd1 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 10 Mar 2026 14:06:13 +0000 Subject: [PATCH 2/4] Refactor error handling: extract #emitDealErrors, add handleScrapeError to worker catch, add tests - Extract duplicate AggregateError expansion logic from start() and startMultiple() into a shared #emitDealErrors private method - Call handleScrapeError in the worker-level catch block so errored URLs are properly marked as done in the LinkList (prevents stale progress state) - Add crawler.spec.ts with 6 tests covering: - AggregateError expansion into individual error events - Non-Error values in AggregateError converted to Error instances - Regular Error emitted as single error event - crawlEnd emitted after deal failure - startMultiple() AggregateError handling - Worker-level exceptions caught and emitted as error events https://claude.ai/code/session_01DZApYkRAury35FhWGY72xf --- .../crawler/src/crawler/crawler.spec.ts | 238 ++++++++++++++++++ .../@nitpicker/crawler/src/crawler/crawler.ts | 80 +++--- 2 files changed, 277 insertions(+), 41 deletions(-) create mode 100644 packages/@nitpicker/crawler/src/crawler/crawler.spec.ts diff --git a/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts b/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts new file mode 100644 index 0000000..a6c65a2 --- /dev/null +++ b/packages/@nitpicker/crawler/src/crawler/crawler.spec.ts @@ -0,0 +1,238 @@ +import type { CrawlerEventTypes } from './types.js'; + +import { tryParseUrl as parseUrl } from '@d-zero/shared/parse-url'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +vi.mock('@d-zero/dealer', () => ({ + deal: vi.fn(), +})); + +vi.mock('@d-zero/shared/retry', () => ({ + /** + * Stub retryCall that calls the function once without retries. + * @param fn - The function to call. + * @returns The result of calling fn. + */ + retryCall: (fn: () => unknown) => fn(), +})); + +vi.mock('./robots-checker.js', () => { + /** + * Stub RobotsChecker that always allows crawling. + */ + class RobotsCheckerStub { + /** + * Always returns true. + * @returns Resolved with true. + */ + isAllowed() { + return Promise.resolve(true); + } + } + return { RobotsChecker: RobotsCheckerStub }; +}); + +/** + * Default crawler options for testing. + */ +const defaultOptions = { + interval: 0, + parallels: 1, + recursive: true, + scope: ['https://example.com/'], + excludes: [], + excludeKeywords: [], + excludeUrls: [], + ignoreRobots: true, +}; + +describe('Crawler', () => { + beforeEach(() => { + vi.resetAllMocks(); + }); + + describe('#emitDealErrors via start()', () => { + it('AggregateError の各エラーが個別の error イベントとして emit される', async () => { + const { deal } = await import('@d-zero/dealer'); + const { default: Crawler } = await import('./crawler.js'); + + vi.mocked(deal).mockRejectedValue( + new AggregateError( + [new Error('worker-1 failed'), new Error('worker-2 failed')], + 'deal failed', + ), + ); + + const crawler = new Crawler(defaultOptions); + const errors: CrawlerEventTypes['error'][] = []; + crawler.on('error', (e) => { + errors.push(e); + }); + + const url = parseUrl('https://example.com/')!; + crawler.start(url); + + // deal() rejection triggers async .catch — wait for microtask queue + await vi.waitFor(() => { + expect(errors).toHaveLength(2); + }); + + expect(errors[0]!.error.message).toBe('worker-1 failed'); + expect(errors[1]!.error.message).toBe('worker-2 failed'); + expect(errors[0]!.url).toBe('https://example.com'); + expect(errors[0]!.isExternal).toBe(false); + expect(errors[0]!.isMainProcess).toBe(true); + }); + + it('AggregateError 内の非 Error 値が Error に変換される', async () => { + const { deal } = await import('@d-zero/dealer'); + const { default: Crawler } = await import('./crawler.js'); + + vi.mocked(deal).mockRejectedValue( + new AggregateError(['string error', 42], 'mixed errors'), + ); + + const crawler = new Crawler(defaultOptions); + const errors: CrawlerEventTypes['error'][] = []; + crawler.on('error', (e) => { + errors.push(e); + }); + + crawler.start(parseUrl('https://example.com/')!); + + await vi.waitFor(() => { + expect(errors).toHaveLength(2); + }); + + expect(errors[0]!.error).toBeInstanceOf(Error); + expect(errors[0]!.error.message).toBe('string error'); + expect(errors[1]!.error).toBeInstanceOf(Error); + expect(errors[1]!.error.message).toBe('42'); + }); + + it('通常の Error は単一の error イベントとして emit される', async () => { + const { deal } = await import('@d-zero/dealer'); + const { default: Crawler } = await import('./crawler.js'); + + vi.mocked(deal).mockRejectedValue(new Error('deal failed')); + + const crawler = new Crawler(defaultOptions); + const errors: CrawlerEventTypes['error'][] = []; + crawler.on('error', (e) => { + errors.push(e); + }); + + crawler.start(parseUrl('https://example.com/')!); + + await vi.waitFor(() => { + expect(errors).toHaveLength(1); + }); + + expect(errors[0]!.error.message).toBe('deal failed'); + }); + + it('deal 失敗後に crawlEnd イベントが emit される', async () => { + const { deal } = await import('@d-zero/dealer'); + const { default: Crawler } = await import('./crawler.js'); + + vi.mocked(deal).mockRejectedValue(new Error('fatal')); + + const crawler = new Crawler(defaultOptions); + let crawlEndEmitted = false; + crawler.on('crawlEnd', () => { + crawlEndEmitted = true; + }); + + crawler.start(parseUrl('https://example.com/')!); + + await vi.waitFor(() => { + expect(crawlEndEmitted).toBe(true); + }); + }); + }); + + describe('#emitDealErrors via startMultiple()', () => { + it('AggregateError の各エラーが個別に emit される', async () => { + const { deal } = await import('@d-zero/dealer'); + const { default: Crawler } = await import('./crawler.js'); + + vi.mocked(deal).mockRejectedValue( + new AggregateError( + [new Error('err-a'), new Error('err-b'), new Error('err-c')], + 'deal failed', + ), + ); + + const crawler = new Crawler(defaultOptions); + const errors: CrawlerEventTypes['error'][] = []; + crawler.on('error', (e) => { + errors.push(e); + }); + + const urls = [ + parseUrl('https://example.com/page1')!, + parseUrl('https://example.com/page2')!, + ]; + crawler.startMultiple(urls); + + await vi.waitFor(() => { + expect(errors).toHaveLength(3); + }); + + expect(errors[0]!.url).toBe('https://example.com/page1'); + expect(errors[0]!.error.message).toBe('err-a'); + expect(errors[1]!.error.message).toBe('err-b'); + expect(errors[2]!.error.message).toBe('err-c'); + }); + }); + + describe('worker-level error handling', () => { + it('ワーカー内の例外が error イベントとして emit され処理が継続する', async () => { + const { deal } = await import('@d-zero/dealer'); + const { default: Crawler } = await import('./crawler.js'); + + const workerError = new Error('unexpected crash'); + + // Simulate deal: call setup function, then invoke the returned work function + vi.mocked(deal).mockImplementation(async (items, factory) => { + for (const [index, item] of (items as unknown[]).entries()) { + const noop = () => {}; + const noopAsync = async () => {}; + // eslint-disable-next-line @typescript-eslint/no-unsafe-function-type -- deal factory signature is complex; cast is intentional in test + const workFn = (factory as Function)(item, noop, index, noop, noopAsync) as + | (() => Promise) + | undefined; + if (workFn) { + await workFn(); + } + } + }); + + // Mock fetchDestination to throw — triggers the worker catch block + const fetchDestMod = await import('./fetch-destination.js'); + vi.spyOn(fetchDestMod, 'fetchDestination').mockRejectedValue(workerError); + + const crawler = new Crawler(defaultOptions); + + const errors: CrawlerEventTypes['error'][] = []; + crawler.on('error', (e) => { + errors.push(e); + }); + + let crawlEndEmitted = false; + crawler.on('crawlEnd', () => { + crawlEndEmitted = true; + }); + + crawler.start(parseUrl('https://example.com/')!); + + await vi.waitFor(() => { + expect(crawlEndEmitted).toBe(true); + }); + + expect(errors).toHaveLength(1); + expect(errors[0]!.error.message).toBe('unexpected crash'); + expect(errors[0]!.url).toBe('https://example.com'); + }); + }); +}); diff --git a/packages/@nitpicker/crawler/src/crawler/crawler.ts b/packages/@nitpicker/crawler/src/crawler/crawler.ts index 7febbcc..6896edd 100644 --- a/packages/@nitpicker/crawler/src/crawler/crawler.ts +++ b/packages/@nitpicker/crawler/src/crawler/crawler.ts @@ -176,26 +176,7 @@ export default class Crawler extends EventEmitter { void this.#runDeal(initialUrls, resumeOffset).catch((error) => { crawlerLog('runDeal error: %O', error); - if (error instanceof AggregateError) { - for (const workerError of error.errors) { - void this.emit('error', { - pid: process.pid, - isMainProcess: true, - url: url.href, - isExternal: false, - error: - workerError instanceof Error ? workerError : new Error(String(workerError)), - }); - } - } else { - void this.emit('error', { - pid: process.pid, - isMainProcess: true, - url: url.href, - isExternal: false, - error: error instanceof Error ? error : new Error(String(error)), - }); - } + this.#emitDealErrors(error, url.href); void this.emit('crawlEnd', {}); }); } @@ -231,30 +212,35 @@ export default class Crawler extends EventEmitter { this.#options.fromList = true; void this.#runDeal(pageList).catch((error) => { crawlerLog('runDeal error: %O', error); - if (error instanceof AggregateError) { - for (const workerError of error.errors) { - void this.emit('error', { - pid: process.pid, - isMainProcess: true, - url: pageList[0]!.href, - isExternal: false, - error: - workerError instanceof Error ? workerError : new Error(String(workerError)), - }); - } - } else { - void this.emit('error', { - pid: process.pid, - isMainProcess: true, - url: pageList[0]!.href, - isExternal: false, - error: error instanceof Error ? error : new Error(String(error)), - }); - } + this.#emitDealErrors(error, pageList[0]!.href); void this.emit('crawlEnd', {}); }); } + /** + * Emits error events for a deal-level failure. + * + * When the dealer rejects with an `AggregateError` (e.g. multiple worker + * failures), each inner error is emitted as a separate `error` event. + * For any other error type, a single `error` event is emitted. + * @param error - The error thrown by `#runDeal`. + * @param fallbackUrl - URL string used as the error context (typically the root URL). + */ + #emitDealErrors(error: unknown, fallbackUrl: string) { + const errors = + error instanceof AggregateError ? (error.errors as unknown[]) : [error]; + + for (const e of errors) { + void this.emit('error', { + pid: process.pid, + isMainProcess: true, + url: fallbackUrl, + isExternal: false, + error: e instanceof Error ? e : new Error(String(e)), + }); + } + } + /** * Processes captured sub-resources from a page scrape, deduplicates them, * and emits `response` / `responseReferrers` events for new resources. @@ -601,12 +587,24 @@ export default class Crawler extends EventEmitter { } catch (error) { crawlerLog('Worker error for %s: %O', url.href, error); log(c.red('Error')); + const workerError = error instanceof Error ? error : new Error(String(error)); + handleScrapeError( + { + url, + error: workerError, + shutdown: false, + pid: process.pid, + }, + this.#linkList, + this.#scope, + this.#options, + ); void this.emit('error', { pid: process.pid, isMainProcess: true, url: url.href, isExternal, - error: error instanceof Error ? error : new Error(String(error)), + error: workerError, }); } finally { if (isExternal) { From bf0520f370ca6952aa6556e1bb7b9a28c4ebcc19 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 01:45:57 +0000 Subject: [PATCH 3/4] Update @d-zero/shared from 0.20.0 to 0.20.1 across all packages Align direct dependency versions with the transitive dependency pulled in by @d-zero/dealer 1.7.0 and @d-zero/beholder 2.0.0, eliminating duplicate resolutions in yarn.lock. https://claude.ai/code/session_01DZApYkRAury35FhWGY72xf --- packages/@nitpicker/analyze-search/package.json | 2 +- packages/@nitpicker/cli/package.json | 2 +- packages/@nitpicker/core/package.json | 2 +- packages/@nitpicker/crawler/package.json | 2 +- packages/@nitpicker/report-google-sheets/package.json | 2 +- yarn.lock | 10 +++++----- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/@nitpicker/analyze-search/package.json b/packages/@nitpicker/analyze-search/package.json index b637f5b..2ae89c8 100644 --- a/packages/@nitpicker/analyze-search/package.json +++ b/packages/@nitpicker/analyze-search/package.json @@ -27,7 +27,7 @@ "clean": "tsc --build --clean" }, "dependencies": { - "@d-zero/shared": "0.20.0", + "@d-zero/shared": "0.20.1", "@nitpicker/core": "0.4.4", "@nitpicker/crawler": "0.4.4", "@nitpicker/types": "0.4.4", diff --git a/packages/@nitpicker/cli/package.json b/packages/@nitpicker/cli/package.json index 3265481..5903b7b 100644 --- a/packages/@nitpicker/cli/package.json +++ b/packages/@nitpicker/cli/package.json @@ -34,7 +34,7 @@ "@d-zero/dealer": "1.7.0", "@d-zero/readtext": "1.1.19", "@d-zero/roar": "2.0.0", - "@d-zero/shared": "0.20.0", + "@d-zero/shared": "0.20.1", "@nitpicker/analyze-axe": "0.4.4", "@nitpicker/analyze-lighthouse": "0.4.4", "@nitpicker/analyze-main-contents": "0.4.4", diff --git a/packages/@nitpicker/core/package.json b/packages/@nitpicker/core/package.json index 0ec0599..1d4fec6 100644 --- a/packages/@nitpicker/core/package.json +++ b/packages/@nitpicker/core/package.json @@ -28,7 +28,7 @@ }, "dependencies": { "@d-zero/dealer": "1.7.0", - "@d-zero/shared": "0.20.0", + "@d-zero/shared": "0.20.1", "@nitpicker/crawler": "0.4.4", "@nitpicker/types": "0.4.4", "ansi-colors": "4.1.3", diff --git a/packages/@nitpicker/crawler/package.json b/packages/@nitpicker/crawler/package.json index 29ee3a8..f61f731 100644 --- a/packages/@nitpicker/crawler/package.json +++ b/packages/@nitpicker/crawler/package.json @@ -30,7 +30,7 @@ "@d-zero/beholder": "2.0.0", "@d-zero/dealer": "1.7.0", "@d-zero/fs": "0.2.2", - "@d-zero/shared": "0.20.0", + "@d-zero/shared": "0.20.1", "ansi-colors": "4.1.3", "debug": "4.4.3", "follow-redirects": "1.15.11", diff --git a/packages/@nitpicker/report-google-sheets/package.json b/packages/@nitpicker/report-google-sheets/package.json index fd00041..62aceb1 100644 --- a/packages/@nitpicker/report-google-sheets/package.json +++ b/packages/@nitpicker/report-google-sheets/package.json @@ -30,7 +30,7 @@ "@d-zero/dealer": "1.7.0", "@d-zero/google-auth": "0.5.6", "@d-zero/google-sheets": "0.6.0", - "@d-zero/shared": "0.20.0", + "@d-zero/shared": "0.20.1", "@nitpicker/crawler": "0.4.4", "@nitpicker/types": "0.4.4", "ansi-colors": "4.1.3", diff --git a/yarn.lock b/yarn.lock index bada8ad..562bf53 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2301,7 +2301,7 @@ __metadata: version: 0.0.0-use.local resolution: "@nitpicker/analyze-search@workspace:packages/@nitpicker/analyze-search" dependencies: - "@d-zero/shared": "npm:0.20.0" + "@d-zero/shared": "npm:0.20.1" "@nitpicker/core": "npm:0.4.4" "@nitpicker/crawler": "npm:0.4.4" "@nitpicker/types": "npm:0.4.4" @@ -2357,7 +2357,7 @@ __metadata: "@d-zero/dealer": "npm:1.7.0" "@d-zero/readtext": "npm:1.1.19" "@d-zero/roar": "npm:2.0.0" - "@d-zero/shared": "npm:0.20.0" + "@d-zero/shared": "npm:0.20.1" "@nitpicker/analyze-axe": "npm:0.4.4" "@nitpicker/analyze-lighthouse": "npm:0.4.4" "@nitpicker/analyze-main-contents": "npm:0.4.4" @@ -2381,7 +2381,7 @@ __metadata: resolution: "@nitpicker/core@workspace:packages/@nitpicker/core" dependencies: "@d-zero/dealer": "npm:1.7.0" - "@d-zero/shared": "npm:0.20.0" + "@d-zero/shared": "npm:0.20.1" "@nitpicker/crawler": "npm:0.4.4" "@nitpicker/types": "npm:0.4.4" ansi-colors: "npm:4.1.3" @@ -2397,7 +2397,7 @@ __metadata: "@d-zero/beholder": "npm:2.0.0" "@d-zero/dealer": "npm:1.7.0" "@d-zero/fs": "npm:0.2.2" - "@d-zero/shared": "npm:0.20.0" + "@d-zero/shared": "npm:0.20.1" "@types/debug": "npm:4.1.12" "@types/follow-redirects": "npm:1.14.4" "@types/fs-extra": "npm:11.0.4" @@ -2422,7 +2422,7 @@ __metadata: "@d-zero/dealer": "npm:1.7.0" "@d-zero/google-auth": "npm:0.5.6" "@d-zero/google-sheets": "npm:0.6.0" - "@d-zero/shared": "npm:0.20.0" + "@d-zero/shared": "npm:0.20.1" "@nitpicker/crawler": "npm:0.4.4" "@nitpicker/types": "npm:0.4.4" "@types/debug": "npm:4.1.12" From 579d40a59a361a77ec5da705c27910970ce1ff7b Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 02:26:11 +0000 Subject: [PATCH 4/4] docs: fix documentation discrepancies with implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ARCHITECTURE.md: Add missing format-crawl-progress.ts to crawler module listing - ARCHITECTURE.md: Add missing statusText column to resources table summary - ARCHITECTURE.md: Clarify @d-zero/dealer usage (deal() for crawler only, Lanes for cli/core/report) - ARCHITECTURE.md: Note that cli and core also depend on @d-zero/dealer for Lanes type - CLAUDE.md: Correct core package description — uses bounded Promise pool, not deal() - CLAUDE.md: Update analyze data flow to reflect actual implementation https://claude.ai/code/session_01DZApYkRAury35FhWGY72xf --- ARCHITECTURE.md | 11 +++++++---- CLAUDE.md | 12 ++++++------ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 2409609..4c4db42 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -32,6 +32,8 @@ packages/ ``` > **Note**: CLI は analyze プラグインに直接依存する(`npx` 実行時のモジュール解決のため)。新規 analyze プラグイン追加時は `@nitpicker/cli/package.json` の `dependencies` にも追加すること。 +> +> **Note**: `@d-zero/dealer` は上図では crawler と report-google-sheets への接続のみ表示しているが、cli と core も `Lanes` 型のインポートのために依存している。 --- @@ -142,6 +144,7 @@ crawler/src/ │ ├── destination-cache.ts # リクエストキャッシュ │ ├── fetch-robots-txt.ts # robots.txt 取得・パース │ ├── robots-checker.ts # robots.txt 準拠チェッカー(origin 別キャッシュ) +│ ├── format-crawl-progress.ts # deal() 進捗表示のフォーマッタ │ └── ... # link-to-page-data, protocol-agnostic-key, net-timeout-error ├── crawler.ts # バレルエクスポート(パッケージ公開 API) ├── crawler-orchestrator.ts # CrawlerOrchestrator @@ -326,7 +329,7 @@ scrapeStart(url, page, options) ### その他テーブル - **images**: pageId, src, currentSrc, alt, width/height, naturalWidth/naturalHeight, isLazy, viewportWidth, sourceCode -- **resources**: url, isExternal, status, contentType, contentLength, compress, cdn, responseHeaders +- **resources**: url, isExternal, status, statusText, contentType, contentLength, compress, cdn, responseHeaders - **resources-referrers**: resourceId → resources.id, pageId → pages.id - **info**: 設定情報(単一レコード、`Config` 型のフィールドを JSON で保存) @@ -647,7 +650,7 @@ Nitpicker は D-ZERO が公開する以下の外部パッケージに依存し | パッケージ | 用途 | 検索キーワード | | ----------------------- | ----------------------------------------------------------------------------- | ------------------------------------- | | `@d-zero/beholder` | Puppeteer ベースのスクレイパーエンジン。`ScrapeResult` を返す | `"@d-zero/beholder" changelog` | -| `@d-zero/dealer` | 並列処理・スケジューリング。`deal()` 関数を提供 | `"@d-zero/dealer" deal concurrent` | +| `@d-zero/dealer` | 並列処理・スケジューリング。`deal()` 関数と `Lanes` 進捗表示を提供 | `"@d-zero/dealer" deal concurrent` | | `@d-zero/shared` | 共有ユーティリティ(サブパスエクスポート形式: `@d-zero/shared/parse-url` 等) | `"@d-zero/shared" subpath exports` | | `@d-zero/roar` | CLI フレームワーク | `"@d-zero/roar" command` | | `@d-zero/google-auth` | OAuth2 認証(`credentials.json` → `token.json`) | `"@d-zero/google-auth" oauth2` | @@ -659,7 +662,7 @@ Nitpicker は D-ZERO が公開する以下の外部パッケージに依存し ``` @d-zero/beholder → crawler(Scraper, ScrapeResult) -@d-zero/dealer → crawler, core, cli, report-google-sheets(deal() 並列制御) +@d-zero/dealer → crawler(deal() 並列制御), core・cli・report-google-sheets(Lanes 進捗表示) @d-zero/shared → 全パッケージ(parseUrl, delay, isError, detectCompress, detectCDN) @d-zero/roar → cli(CLI コマンド定義) @d-zero/google-auth → report-google-sheets(OAuth2 認証) @@ -671,5 +674,5 @@ Nitpicker は D-ZERO が公開する以下の外部パッケージに依存し ### バージョン更新時の注意 - **`@d-zero/beholder`**: `ScrapeResult` の型が変わると crawler 全体に影響 -- **`@d-zero/dealer`**: `deal()` の API が変わると crawler と core の並列処理に影響 +- **`@d-zero/dealer`**: `deal()` の API が変わると crawler の並列処理に影響。`Lanes` の型が変わると core・cli・report-google-sheets の進捗表示に影響 - **`@d-zero/shared`**: サブパスエクスポートの追加・削除に注意。`@d-zero/shared/parse-url` 形式でインポートすること diff --git a/CLAUDE.md b/CLAUDE.md index d9be69f..733c377 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -13,7 +13,7 @@ packages/ ├── @nitpicker/ │ ├── cli/ # 統合 CLI (bin: nitpicker) │ ├── crawler/ # クローラーエンジン(オーケストレーター + アーカイブ + ユーティリティ) -│ ├── core/ # 監査エンジン(Nitpicker クラス + deal() による並列処理) +│ ├── core/ # 監査エンジン(Nitpicker クラス + bounded Promise pool による並列処理) │ ├── types/ # 監査型定義(Report, ConfigJSON) │ ├── analyze-axe/ # アクセシビリティ監査 │ ├── analyze-lighthouse/ # Lighthouse 監査 @@ -71,9 +71,9 @@ CrawlerOrchestrator.crawling(urls, options) Nitpicker.analyze(archivePath, plugins) → Archive.connect() → ArchiveAccessor → getPagesWithRefs() で全ページ取得 - → deal()(@d-zero/dealer, limit: 50)で並列分析 + → bounded Promise pool(limit: 50)で並列分析 → 各 Page: runInWorker() で Worker スレッドでプラグイン実行 - → deal() が進捗表示を担当(プラグイン内の console.log は不要) + → Lanes(@d-zero/dealer)が進捗表示を担当(プラグイン内の console.log は不要) → レポートファイル書き出し ``` @@ -86,10 +86,10 @@ Nitpicker.analyze(archivePath, plugins) - `delay` — `@d-zero/shared/delay` - `isError` — beholder/is-error.ts に集約、crawler は re-export -### deal() の利用箇所 +### deal() / 並列処理の利用箇所 -- **crawler**: URL スクレイピングの並列制御 -- **core(analyze)**: ページ分析の並列処理(limit: 50) +- **crawler**: `deal()`(@d-zero/dealer)による URL スクレイピングの並列制御 +- **core(analyze)**: 独自の bounded Promise pool(limit: 50)による並列処理。`Lanes`(@d-zero/dealer)で進捗表示 ## テスト