diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 37a9af0..4fbe6be 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -63,7 +63,7 @@ flowchart TD Result --> Save["Archive にページデータ保存"] Crawling --> Write["CrawlerOrchestrator.write()"] - Write --> ArchiveWrite["Archive.write()
tmpDir を .nitpicker ファイルに圧縮(tar)"] + Write --> ArchiveWrite["Archive.write()
snapshot を zip 圧縮 → tmpDir を .nitpicker ファイルに tar 圧縮"] ``` --- @@ -185,10 +185,10 @@ deal() で選択 → progress(url) → progress セット 発見したアンカーについて: ├── recursive=true の場合: │ ├── isLowerLayer → LinkList.add(url) # フルスクレイプ -│ └── isExternal && fetchExternal → add(url, { titleOnly: true }) +│ └── isExternal && fetchExternal → add(url, { metadataOnly: true }) │ └── recursive=false の場合: - └── add(url, { titleOnly: true }) # HEAD のみ + └── add(url, { metadataOnly: true }) # HEAD のみ ``` ### deal() コールバック内の処理順序 diff --git a/README.md b/README.md index 37e001b..0a67180 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ $ npx @nitpicker/cli crawl https://example.com | `--silent` | なし | なし | 不可 | 実行中のログ出力を抑制 | | `--diff` | なし | なし | 不可 | 差分モード | +> **URL の形式**: URL 引数および `--list` / `--list-file` で指定する URL は、プロトコルを含む完全な形式(例: `https://example.com`)である必要があります。`example.com` のようなホスト名のみの指定はエラーになります。 + #### 例 ```sh diff --git a/packages/@nitpicker/cli/src/commands/crawl.spec.ts b/packages/@nitpicker/cli/src/commands/crawl.spec.ts index f1fe2b4..536cdad 100644 --- a/packages/@nitpicker/cli/src/commands/crawl.spec.ts +++ b/packages/@nitpicker/cli/src/commands/crawl.spec.ts @@ -337,6 +337,56 @@ describe('crawl', () => { ); }); + it('--list-file で空リストの場合、エラーを投げる', async () => { + mockReadList.mockResolvedValueOnce([]); + const { crawl } = await import('./crawl.js'); + + await expect(crawl([], createFlags({ listFile: '/tmp/empty.txt' }))).rejects.toThrow( + 'No URLs found in list file: /tmp/empty.txt', + ); + }); + + it('--list-file に無効な URL が含まれる場合、エラーを投げる', async () => { + mockReadList.mockResolvedValueOnce(['https://example.com', 'not-a-url']); + const { crawl } = await import('./crawl.js'); + + await expect(crawl([], createFlags({ listFile: '/tmp/urls.txt' }))).rejects.toThrow( + 'Invalid URL: "not-a-url"', + ); + }); + + it('--list に無効な URL が含まれる場合、エラーを投げる', async () => { + const { crawl } = await import('./crawl.js'); + + await expect( + crawl([], createFlags({ list: ['https://example.com', 'bad-url'] })), + ).rejects.toThrow('Invalid URL: "bad-url"'); + }); + + it('--list と args に無効な URL が含まれる場合、エラーを投げる', async () => { + const { crawl } = await import('./crawl.js'); + + await expect( + crawl(['invalid'], createFlags({ list: ['https://example.com'] })), + ).rejects.toThrow('Invalid URL: "invalid"'); + }); + + it('無効な URL 引数の場合、エラーを投げる', async () => { + const { crawl } = await import('./crawl.js'); + + await expect(crawl(['not-a-url'], createFlags())).rejects.toThrow( + 'Invalid URL: "not-a-url"', + ); + }); + + it('スペースを含む無効な URL 引数の場合、エラーを投げる', async () => { + const { crawl } = await import('./crawl.js'); + + await expect(crawl(['foo bar'], createFlags())).rejects.toThrow( + 'Invalid URL: "foo bar"', + ); + }); + it('引数なし・フラグなしの場合、何も呼び出さずに正常終了する', async () => { const { crawl } = await import('./crawl.js'); await crawl([], createFlags()); diff --git a/packages/@nitpicker/cli/src/commands/crawl.ts b/packages/@nitpicker/cli/src/commands/crawl.ts index 1796c45..0b774b4 100644 --- a/packages/@nitpicker/cli/src/commands/crawl.ts +++ b/packages/@nitpicker/cli/src/commands/crawl.ts @@ -253,6 +253,23 @@ async function resumeCrawl(stubFilePath: string, flags: CrawlFlags) { } } +/** + * Validates that all URLs in the list are parseable by the URL constructor. + * @param urls - Array of URL strings to validate + * @throws {Error} If any URL is invalid + */ +function validateUrls(urls: readonly string[]) { + for (const url of urls) { + try { + new URL(url); + } catch { + throw new Error( + `Invalid URL: "${url}". Please provide a valid URL (e.g., https://example.com)`, + ); + } + } +} + /** * Main entry point for the `crawl` CLI command. * @@ -300,6 +317,10 @@ export async function crawl(args: string[], flags: CrawlFlags) { if (flags.listFile) { const list = await readList(path.resolve(process.cwd(), flags.listFile)); + if (list.length === 0) { + throw new Error(`No URLs found in list file: ${flags.listFile}`); + } + validateUrls(list); flags.list = list; await startCrawl(list, flags); return; @@ -307,6 +328,7 @@ export async function crawl(args: string[], flags: CrawlFlags) { if (flags.list && flags.list.length > 0) { const pageList = [...flags.list, ...args]; + validateUrls(pageList); await startCrawl(pageList, flags); return; } @@ -314,6 +336,7 @@ export async function crawl(args: string[], flags: CrawlFlags) { const siteUrl = args[0]; if (siteUrl) { + validateUrls([siteUrl]); await startCrawl([siteUrl], flags); return; } diff --git a/packages/@nitpicker/cli/src/crawl/debug.ts b/packages/@nitpicker/cli/src/crawl/debug.ts index b5d0a4b..9343582 100644 --- a/packages/@nitpicker/cli/src/crawl/debug.ts +++ b/packages/@nitpicker/cli/src/crawl/debug.ts @@ -1,5 +1,6 @@ import debug from 'debug'; +/** Debug logger for the Nitpicker CLI, scoped to the `Nitpicker:CLI` namespace. */ export const log = debug('Nitpicker').extend('CLI'); /** diff --git a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts index 85b476c..7d1cd78 100644 --- a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts +++ b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.spec.ts @@ -35,6 +35,27 @@ describe('mapFlagsToCrawlConfig', () => { ]); }); + it('scope のカンマ区切りで空文字列をフィルタリングする', () => { + const result = mapFlagsToCrawlConfig({ + scope: 'a,,b', + }); + expect(result.scope).toEqual(['a', 'b']); + }); + + it('scope のカンマ区切りで空白のみの要素をフィルタリングする', () => { + const result = mapFlagsToCrawlConfig({ + scope: 'a, , ,b', + }); + expect(result.scope).toEqual(['a', 'b']); + }); + + it('scope が全て空文字列の場合、空配列を返す', () => { + const result = mapFlagsToCrawlConfig({ + scope: ',,,', + }); + expect(result.scope).toEqual([]); + }); + it('scope が未指定の場合 undefined を返す', () => { const result = mapFlagsToCrawlConfig({}); expect(result.scope).toBeUndefined(); diff --git a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.ts b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.ts index 24e255b..69ab0a0 100644 --- a/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.ts +++ b/packages/@nitpicker/cli/src/crawl/map-flags-to-crawl-config.ts @@ -29,6 +29,9 @@ export function mapFlagsToCrawlConfig(flags: CrawlFlagInput) { excludes: flags.exclude, excludeKeywords: flags.excludeKeyword, excludeUrls: flags.excludeUrl, - scope: flags.scope?.split(',').map((s) => s.trim()), + scope: flags.scope + ?.split(',') + .map((s) => s.trim()) + .filter(Boolean), }; }