From 9b41a7408b7c020d6181aaee926c5ee612ffe0c3 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 09:07:57 +0000 Subject: [PATCH 1/5] =?UTF-8?q?fix:=20crawl=20=E3=82=B3=E3=83=9E=E3=83=B3?= =?UTF-8?q?=E3=83=89=E3=81=AE=E5=85=A5=E5=8A=9B=E3=83=90=E3=83=AA=E3=83=87?= =?UTF-8?q?=E3=83=BC=E3=82=B7=E3=83=A7=E3=83=B3=E5=BC=B7=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - --scope フラグのカンマ区切りで空文字列をフィルタリング - --list-file で空リストの場合にエラーメッセージを表示 - URL 引数を new URL() で検証し、無効な場合にエラーを表示 Closes #17 https://claude.ai/code/session_01JZboa1PjwHA9voNa2Gx8KZ --- .../@nitpicker/cli/src/commands/crawl.spec.ts | 25 +++++++++++++++++++ packages/@nitpicker/cli/src/commands/crawl.ts | 10 ++++++++ .../crawl/map-flags-to-crawl-config.spec.ts | 14 +++++++++++ .../src/crawl/map-flags-to-crawl-config.ts | 5 +++- 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/packages/@nitpicker/cli/src/commands/crawl.spec.ts b/packages/@nitpicker/cli/src/commands/crawl.spec.ts index f1fe2b4..6401961 100644 --- a/packages/@nitpicker/cli/src/commands/crawl.spec.ts +++ b/packages/@nitpicker/cli/src/commands/crawl.spec.ts @@ -337,6 +337,31 @@ describe('crawl', () => { ); }); + it('--list-file で空リストの場合、エラーを投げる', async () => { + mockReadList.mockResolvedValueOnce([]); + const { crawl } = await import('./crawl.js'); + + await expect(crawl([], createFlags({ listFile: '/tmp/empty.txt' }))).rejects.toThrow( + 'No URLs found in list file: /tmp/empty.txt', + ); + }); + + it('無効な URL 引数の場合、エラーを投げる', async () => { + const { crawl } = await import('./crawl.js'); + + await expect(crawl(['not-a-url'], createFlags())).rejects.toThrow( + 'Invalid URL: "not-a-url"', + ); + }); + + it('スペースを含む無効な URL 引数の場合、エラーを投げる', async () => { + const { crawl } = await import('./crawl.js'); + + await expect(crawl(['foo bar'], createFlags())).rejects.toThrow( + 'Invalid URL: "foo bar"', + ); + }); + it('引数なし・フラグなしの場合、何も呼び出さずに正常終了する', async () => { const { crawl } = await import('./crawl.js'); await crawl([], createFlags()); diff --git a/packages/@nitpicker/cli/src/commands/crawl.ts b/packages/@nitpicker/cli/src/commands/crawl.ts index 1796c45..0e19036 100644 --- a/packages/@nitpicker/cli/src/commands/crawl.ts +++ b/packages/@nitpicker/cli/src/commands/crawl.ts @@ -300,6 +300,9 @@ export async function crawl(args: string[], flags: CrawlFlags) { if (flags.listFile) { const list = await readList(path.resolve(process.cwd(), flags.listFile)); + if (list.length === 0) { + throw new Error(`No URLs found in list file: ${flags.listFile}`); + } flags.list = list; await startCrawl(list, flags); return; @@ -314,6 +317,13 @@ export async function crawl(args: string[], flags: CrawlFlags) { const siteUrl = args[0]; if (siteUrl) { + try { + new URL(siteUrl); + } catch { + throw new Error( + `Invalid URL: "${siteUrl}". Please provide a valid URL (e.g., https://example.com)`, + ); + } await startCrawl([siteUrl], flags); return; } diff --git a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts index 85b476c..a4fcdbb 100644 --- a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts +++ b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts @@ -35,6 +35,20 @@ describe('mapFlagsToCrawlConfig', () => { ]); }); + it('scope のカンマ区切りで空文字列をフィルタリングする', () => { + const result = mapFlagsToCrawlConfig({ + scope: 'a,,b', + }); + expect(result.scope).toEqual(['a', 'b']); + }); + + it('scope のカンマ区切りで空白のみの要素をフィルタリングする', () => { + const result = mapFlagsToCrawlConfig({ + scope: 'a, , ,b', + }); + expect(result.scope).toEqual(['a', 'b']); + }); + it('scope が未指定の場合 undefined を返す', () => { const result = mapFlagsToCrawlConfig({}); expect(result.scope).toBeUndefined(); diff --git a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.ts b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.ts index 24e255b..69ab0a0 100644 --- a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.ts +++ b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.ts @@ -29,6 +29,9 @@ export function mapFlagsToCrawlConfig(flags: CrawlFlagInput) { excludes: flags.exclude, excludeKeywords: flags.excludeKeyword, excludeUrls: flags.excludeUrl, - scope: flags.scope?.split(',').map((s) => s.trim()), + scope: flags.scope + ?.split(',') + .map((s) => s.trim()) + .filter(Boolean), }; } From 3e0c1f18d933fc2d03d63e33f76a752ada354e3b Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 10:19:34 +0000 Subject: [PATCH 2/5] =?UTF-8?q?fix:=20URL=20=E3=83=90=E3=83=AA=E3=83=87?= =?UTF-8?q?=E3=83=BC=E3=82=B7=E3=83=A7=E3=83=B3=E3=82=92=E5=85=A8=E5=85=A5?= =?UTF-8?q?=E5=8A=9B=E3=83=91=E3=82=B9=E3=81=AB=E9=81=A9=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - validateUrls ヘルパーを抽出し、--list / --list-file / 単一URL引数の全パスで適用 - --list-file 内の無効URLを検出するテストを追加 - --list 内の無効URLを検出するテストを追加 - scope 全空文字列のエッジケーステストを追加 https://claude.ai/code/session_01JZboa1PjwHA9voNa2Gx8KZ --- .../@nitpicker/cli/src/commands/crawl.spec.ts | 25 +++++++++++++++++ packages/@nitpicker/cli/src/commands/crawl.ts | 27 ++++++++++++++----- .../crawl/map-flags-to-crawl-config.spec.ts | 7 +++++ 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/packages/@nitpicker/cli/src/commands/crawl.spec.ts b/packages/@nitpicker/cli/src/commands/crawl.spec.ts index 6401961..536cdad 100644 --- a/packages/@nitpicker/cli/src/commands/crawl.spec.ts +++ b/packages/@nitpicker/cli/src/commands/crawl.spec.ts @@ -346,6 +346,31 @@ describe('crawl', () => { ); }); + it('--list-file に無効な URL が含まれる場合、エラーを投げる', async () => { + mockReadList.mockResolvedValueOnce(['https://example.com', 'not-a-url']); + const { crawl } = await import('./crawl.js'); + + await expect(crawl([], createFlags({ listFile: '/tmp/urls.txt' }))).rejects.toThrow( + 'Invalid URL: "not-a-url"', + ); + }); + + it('--list に無効な URL が含まれる場合、エラーを投げる', async () => { + const { crawl } = await import('./crawl.js'); + + await expect( + crawl([], createFlags({ list: ['https://example.com', 'bad-url'] })), + ).rejects.toThrow('Invalid URL: "bad-url"'); + }); + + it('--list と args に無効な URL が含まれる場合、エラーを投げる', async () => { + const { crawl } = await import('./crawl.js'); + + await expect( + crawl(['invalid'], createFlags({ list: ['https://example.com'] })), + ).rejects.toThrow('Invalid URL: "invalid"'); + }); + it('無効な URL 引数の場合、エラーを投げる', async () => { const { crawl } = await import('./crawl.js'); diff --git a/packages/@nitpicker/cli/src/commands/crawl.ts b/packages/@nitpicker/cli/src/commands/crawl.ts index 0e19036..0b774b4 100644 --- a/packages/@nitpicker/cli/src/commands/crawl.ts +++ b/packages/@nitpicker/cli/src/commands/crawl.ts @@ -253,6 +253,23 @@ async function resumeCrawl(stubFilePath: string, flags: CrawlFlags) { } } +/** + * Validates that all URLs in the list are parseable by the URL constructor. + * @param urls - Array of URL strings to validate + * @throws {Error} If any URL is invalid + */ +function validateUrls(urls: readonly string[]) { + for (const url of urls) { + try { + new URL(url); + } catch { + throw new Error( + `Invalid URL: "${url}". Please provide a valid URL (e.g., https://example.com)`, + ); + } + } +} + /** * Main entry point for the `crawl` CLI command. * @@ -303,6 +320,7 @@ export async function crawl(args: string[], flags: CrawlFlags) { if (list.length === 0) { throw new Error(`No URLs found in list file: ${flags.listFile}`); } + validateUrls(list); flags.list = list; await startCrawl(list, flags); return; @@ -310,6 +328,7 @@ export async function crawl(args: string[], flags: CrawlFlags) { if (flags.list && flags.list.length > 0) { const pageList = [...flags.list, ...args]; + validateUrls(pageList); await startCrawl(pageList, flags); return; } @@ -317,13 +336,7 @@ export async function crawl(args: string[], flags: CrawlFlags) { const siteUrl = args[0]; if (siteUrl) { - try { - new URL(siteUrl); - } catch { - throw new Error( - `Invalid URL: "${siteUrl}". Please provide a valid URL (e.g., https://example.com)`, - ); - } + validateUrls([siteUrl]); await startCrawl([siteUrl], flags); return; } diff --git a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts index a4fcdbb..7d1cd78 100644 --- a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts +++ b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts @@ -49,6 +49,13 @@ describe('mapFlagsToCrawlConfig', () => { expect(result.scope).toEqual(['a', 'b']); }); + it('scope が全て空文字列の場合、空配列を返す', () => { + const result = mapFlagsToCrawlConfig({ + scope: ',,,', + }); + expect(result.scope).toEqual([]); + }); + it('scope が未指定の場合 undefined を返す', () => { const result = mapFlagsToCrawlConfig({}); expect(result.scope).toBeUndefined(); From 47713a54e951f0577df43b74e968b0002fdc02d2 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 10:22:55 +0000 Subject: [PATCH 3/5] =?UTF-8?q?docs:=20URL=20=E3=83=90=E3=83=AA=E3=83=87?= =?UTF-8?q?=E3=83=BC=E3=82=B7=E3=83=A7=E3=83=B3=E4=BB=95=E6=A7=98=E3=82=92?= =?UTF-8?q?=20README=20=E3=81=AB=E8=BF=BD=E8=A8=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit crawl コマンドの URL 引数にプロトコル付き完全形式が必要であることを明記。 https://claude.ai/code/session_01JZboa1PjwHA9voNa2Gx8KZ --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 37e001b..0a67180 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ $ npx @nitpicker/cli crawl https://example.com | `--silent` | なし | なし | 不可 | 実行中のログ出力を抑制 | | `--diff` | なし | なし | 不可 | 差分モード | +> **URL の形式**: URL 引数および `--list` / `--list-file` で指定する URL は、プロトコルを含む完全な形式(例: `https://example.com`)である必要があります。`example.com` のようなホスト名のみの指定はエラーになります。 + #### 例 ```sh From 19b7b4345160e734aa123a36018f0e095730380b Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 10:36:17 +0000 Subject: [PATCH 4/5] =?UTF-8?q?docs:=20crawl/debug.ts=20=E3=81=AE=20log=20?= =?UTF-8?q?=E5=AE=9A=E6=95=B0=E3=81=AB=20JSDoc=20=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit エクスポートされたトップレベル定数に JSDoc が欠落していたため追記。 https://claude.ai/code/session_01JZboa1PjwHA9voNa2Gx8KZ --- packages/@nitpicker/cli/src/crawl/debug.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/@nitpicker/cli/src/crawl/debug.ts b/packages/@nitpicker/cli/src/crawl/debug.ts index b5d0a4b..9343582 100644 --- a/packages/@nitpicker/cli/src/crawl/debug.ts +++ b/packages/@nitpicker/cli/src/crawl/debug.ts @@ -1,5 +1,6 @@ import debug from 'debug'; +/** Debug logger for the Nitpicker CLI, scoped to the `Nitpicker:CLI` namespace. */ export const log = debug('Nitpicker').extend('CLI'); /** From 4eb41897149e091dbfaef525e7f42a0fe5b0d9dc Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 10:39:38 +0000 Subject: [PATCH 5/5] =?UTF-8?q?docs:=20ARCHITECTURE.md=20=E3=81=AE?= =?UTF-8?q?=E5=AE=9F=E8=A3=85=E3=81=A8=E3=81=AE=E4=B8=8D=E4=B8=80=E8=87=B4?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - titleOnly → metadataOnly に用語を修正(実装に合わせる) - Archive.write() の説明に snapshot の zip 圧縮ステップを追記 https://claude.ai/code/session_01JZboa1PjwHA9voNa2Gx8KZ --- ARCHITECTURE.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 37a9af0..4fbe6be 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -63,7 +63,7 @@ flowchart TD Result --> Save["Archive にページデータ保存"] Crawling --> Write["CrawlerOrchestrator.write()"] - Write --> ArchiveWrite["Archive.write()
tmpDir を .nitpicker ファイルに圧縮(tar)"] + Write --> ArchiveWrite["Archive.write()
snapshot を zip 圧縮 → tmpDir を .nitpicker ファイルに tar 圧縮"] ``` --- @@ -185,10 +185,10 @@ deal() で選択 → progress(url) → progress セット 発見したアンカーについて: ├── recursive=true の場合: │ ├── isLowerLayer → LinkList.add(url) # フルスクレイプ -│ └── isExternal && fetchExternal → add(url, { titleOnly: true }) +│ └── isExternal && fetchExternal → add(url, { metadataOnly: true }) │ └── recursive=false の場合: - └── add(url, { titleOnly: true }) # HEAD のみ + └── add(url, { metadataOnly: true }) # HEAD のみ ``` ### deal() コールバック内の処理順序