Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ flowchart TD
Result --> Save["Archive にページデータ保存"]

Crawling --> Write["CrawlerOrchestrator.write()"]
Write --> ArchiveWrite["Archive.write()<br/>tmpDir を .nitpicker ファイルに圧縮(tar"]
Write --> ArchiveWrite["Archive.write()<br/>snapshot を zip 圧縮 → tmpDir を .nitpicker ファイルに tar 圧縮"]
```

---
Expand Down Expand Up @@ -185,10 +185,10 @@ deal() で選択 → progress(url) → progress セット
発見したアンカーについて:
├── recursive=true の場合:
│ ├── isLowerLayer → LinkList.add(url) # フルスクレイプ
│ └── isExternal && fetchExternal → add(url, { titleOnly: true })
│ └── isExternal && fetchExternal → add(url, { metadataOnly: true })
└── recursive=false の場合:
└── add(url, { titleOnly: true }) # HEAD のみ
└── add(url, { metadataOnly: true }) # HEAD のみ
```

### deal() コールバック内の処理順序
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ $ npx @nitpicker/cli crawl https://example.com
| `--silent` | なし | なし | 不可 | 実行中のログ出力を抑制 |
| `--diff` | なし | なし | 不可 | 差分モード |

> **URL の形式**: URL 引数および `--list` / `--list-file` で指定する URL は、プロトコルを含む完全な形式(例: `https://example.com`)である必要があります。`example.com` のようなホスト名のみの指定はエラーになります。

#### 例

```sh
Expand Down
50 changes: 50 additions & 0 deletions packages/@nitpicker/cli/src/commands/crawl.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,56 @@ describe('crawl', () => {
);
});

it('--list-file で空リストの場合、エラーを投げる', async () => {
mockReadList.mockResolvedValueOnce([]);
const { crawl } = await import('./crawl.js');

await expect(crawl([], createFlags({ listFile: '/tmp/empty.txt' }))).rejects.toThrow(
'No URLs found in list file: /tmp/empty.txt',
);
});

it('--list-file に無効な URL が含まれる場合、エラーを投げる', async () => {
mockReadList.mockResolvedValueOnce(['https://example.com', 'not-a-url']);
const { crawl } = await import('./crawl.js');

await expect(crawl([], createFlags({ listFile: '/tmp/urls.txt' }))).rejects.toThrow(
'Invalid URL: "not-a-url"',
);
});

it('--list に無効な URL が含まれる場合、エラーを投げる', async () => {
const { crawl } = await import('./crawl.js');

await expect(
crawl([], createFlags({ list: ['https://example.com', 'bad-url'] })),
).rejects.toThrow('Invalid URL: "bad-url"');
});

it('--list と args に無効な URL が含まれる場合、エラーを投げる', async () => {
const { crawl } = await import('./crawl.js');

await expect(
crawl(['invalid'], createFlags({ list: ['https://example.com'] })),
).rejects.toThrow('Invalid URL: "invalid"');
});

it('無効な URL 引数の場合、エラーを投げる', async () => {
const { crawl } = await import('./crawl.js');

await expect(crawl(['not-a-url'], createFlags())).rejects.toThrow(
'Invalid URL: "not-a-url"',
);
});

it('スペースを含む無効な URL 引数の場合、エラーを投げる', async () => {
const { crawl } = await import('./crawl.js');

await expect(crawl(['foo bar'], createFlags())).rejects.toThrow(
'Invalid URL: "foo bar"',
);
});

it('引数なし・フラグなしの場合、何も呼び出さずに正常終了する', async () => {
const { crawl } = await import('./crawl.js');
await crawl([], createFlags());
Expand Down
23 changes: 23 additions & 0 deletions packages/@nitpicker/cli/src/commands/crawl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,23 @@ async function resumeCrawl(stubFilePath: string, flags: CrawlFlags) {
}
}

/**
* Validates that all URLs in the list are parseable by the URL constructor.
* @param urls - Array of URL strings to validate
* @throws {Error} If any URL is invalid
*/
function validateUrls(urls: readonly string[]) {
for (const url of urls) {
try {
new URL(url);
} catch {
throw new Error(
`Invalid URL: "${url}". Please provide a valid URL (e.g., https://example.com)`,
);
}
}
}

/**
* Main entry point for the `crawl` CLI command.
*
Expand Down Expand Up @@ -300,20 +317,26 @@ export async function crawl(args: string[], flags: CrawlFlags) {

if (flags.listFile) {
const list = await readList(path.resolve(process.cwd(), flags.listFile));
if (list.length === 0) {
throw new Error(`No URLs found in list file: ${flags.listFile}`);
}
validateUrls(list);
flags.list = list;
await startCrawl(list, flags);
return;
}

if (flags.list && flags.list.length > 0) {
const pageList = [...flags.list, ...args];
validateUrls(pageList);
await startCrawl(pageList, flags);
return;
}

const siteUrl = args[0];

if (siteUrl) {
validateUrls([siteUrl]);
await startCrawl([siteUrl], flags);
return;
}
Expand Down
1 change: 1 addition & 0 deletions packages/@nitpicker/cli/src/crawl/debug.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import debug from 'debug';

/** Debug logger for the Nitpicker CLI, scoped to the `Nitpicker:CLI` namespace. */
export const log = debug('Nitpicker').extend('CLI');

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,27 @@ describe('mapFlagsToCrawlConfig', () => {
]);
});

it('scope のカンマ区切りで空文字列をフィルタリングする', () => {
const result = mapFlagsToCrawlConfig({
scope: 'a,,b',
});
expect(result.scope).toEqual(['a', 'b']);
});

it('scope のカンマ区切りで空白のみの要素をフィルタリングする', () => {
const result = mapFlagsToCrawlConfig({
scope: 'a, , ,b',
});
expect(result.scope).toEqual(['a', 'b']);
});

it('scope が全て空文字列の場合、空配列を返す', () => {
const result = mapFlagsToCrawlConfig({
scope: ',,,',
});
expect(result.scope).toEqual([]);
});

it('scope が未指定の場合 undefined を返す', () => {
const result = mapFlagsToCrawlConfig({});
expect(result.scope).toBeUndefined();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ export function mapFlagsToCrawlConfig(flags: CrawlFlagInput) {
excludes: flags.exclude,
excludeKeywords: flags.excludeKeyword,
excludeUrls: flags.excludeUrl,
scope: flags.scope?.split(',').map((s) => s.trim()),
scope: flags.scope
?.split(',')
.map((s) => s.trim())
.filter(Boolean),
};
}
Loading