-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathtypes.ts
More file actions
148 lines (131 loc) · 5.6 KB
/
types.ts
File metadata and controls
148 lines (131 loc) · 5.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// Base configuration that applies to all source types
export interface BaseSourceConfig {
type: 'website' | 'github' | 'local_directory' | 'zendesk' | 'code';
product_name: string;
version: string;
max_size: number;
database_config: DatabaseConfig;
}
// Configuration specific to local directory sources
export interface LocalDirectorySourceConfig extends BaseSourceConfig {
type: 'local_directory';
path: string; // Path to the local directory
include_extensions?: string[]; // File extensions to include (e.g., ['.md', '.txt', '.pdf'])
exclude_extensions?: string[]; // File extensions to exclude
recursive?: boolean; // Whether to traverse subdirectories
encoding?: BufferEncoding; // File encoding (default: 'utf8')
url_rewrite_prefix?: string; // Optional URL prefix to rewrite file:// URLs (e.g., 'https://mydomain.com')
}
// Configuration specific to website sources
export interface WebsiteSourceConfig extends BaseSourceConfig {
type: 'website';
url: string;
sitemap_url?: string; // Optional sitemap URL to extract additional URLs to crawl
markdown_store?: boolean; // Enable storing generated markdown in Postgres (default: false)
}
// Configuration specific to GitHub repo sources
export interface GithubSourceConfig extends BaseSourceConfig {
type: 'github';
repo: string;
start_date?: string;
}
// Configuration specific to Zendesk sources
export interface ZendeskSourceConfig extends BaseSourceConfig {
type: 'zendesk';
zendesk_subdomain: string; // e.g., 'mycompany' for mycompany.zendesk.com
email: string; // Zendesk user email for authentication
api_token: string; // Zendesk API token
fetch_tickets?: boolean; // Whether to fetch tickets (default: true)
fetch_articles?: boolean; // Whether to fetch help center articles (default: true)
start_date?: string; // For incremental updates (default: start of current year)
ticket_status?: string[]; // Filter tickets by status (default: ['new', 'open', 'pending', 'hold', 'solved'])
ticket_priority?: string[]; // Filter tickets by priority (default: all)
}
// Configuration specific to code sources (local directory or GitHub repo)
export interface CodeSourceConfig extends BaseSourceConfig {
type: 'code';
source: 'local_directory' | 'github';
path?: string; // Path to the local directory (when source=local_directory)
repo?: string; // Repo in 'owner/repo' format (when source=github)
branch?: string; // Optional branch to clone (github only)
include_extensions?: string[]; // File extensions to include (e.g., ['.ts', '.py'])
exclude_extensions?: string[]; // File extensions to exclude
recursive?: boolean; // Whether to traverse subdirectories
encoding?: BufferEncoding; // File encoding (default: 'utf8')
url_rewrite_prefix?: string; // Optional URL prefix to rewrite file:// URLs
chunk_size?: number; // Optional chunk size for Chonkie
}
// Union type for all possible source configurations
export type SourceConfig = WebsiteSourceConfig | GithubSourceConfig | LocalDirectorySourceConfig | ZendeskSourceConfig | CodeSourceConfig;
// Database configuration
export interface DatabaseConfig {
type: 'sqlite' | 'qdrant';
params: SqliteDatabaseParams | QdrantDatabaseParams;
}
export interface SqliteDatabaseParams {
db_path?: string; // Optional, will use default if not provided
}
export interface QdrantDatabaseParams {
qdrant_url?: string;
qdrant_port?: number;
collection_name?: string;
}
export interface EmbeddingConfig {
provider: 'openai' | 'azure';
dimension?: number;
openai?: {
api_key?: string; // Can also use OPENAI_API_KEY env var
model?: string; // Default: text-embedding-3-large
};
azure?: {
api_key?: string; // Can also use AZURE_OPENAI_KEY env var
endpoint?: string; // Can also use AZURE_OPENAI_ENDPOINT env var
deployment_name?: string; // Can also use AZURE_OPENAI_DEPLOYMENT_NAME env var
api_version?: string; // Default: 2024-10-21
};
}
// Postgres markdown store configuration (top-level)
export interface MarkdownStoreConfig {
connection_string?: string; // e.g., 'postgres://user:pass@host:5432/db'
host?: string;
port?: number;
database?: string;
user?: string;
password?: string; // Can use ${PG_PASSWORD} env var substitution
table_name?: string; // Defaults to 'markdown_pages'
}
export interface Config {
sources: SourceConfig[];
embedding?: EmbeddingConfig; // Optional, defaults to OpenAI
markdown_store?: MarkdownStoreConfig; // Optional Postgres markdown store
}
export interface DocumentChunk {
content: string;
metadata: {
product_name: string;
version: string;
branch?: string;
repo?: string;
heading_hierarchy: string[];
section: string;
chunk_id: string;
url: string;
hash?: string;
chunk_index: number; // Position of this chunk within the page (0-based)
total_chunks: number; // Total number of chunks for this page, allows knowing if more chunks exist
};
}
export interface BrokenLink {
source: string;
target: string;
}
export interface SqliteDB {
db: any; // Database from better-sqlite3
type: 'sqlite';
}
export interface QdrantDB {
client: any; // QdrantClient
collectionName: string;
type: 'qdrant';
}
export type DatabaseConnection = SqliteDB | QdrantDB;