Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 46 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,49 @@ You can follow these steps to generate a PageIndex tree from a PDF document.
pip3 install --upgrade -r requirements.txt
```

### 2. Set your OpenAI API key
### 2. Set your API key

Create a `.env` file in the root directory and add your API key:
PageIndex now supports multiple LLM providers via [LiteLLM](https://docs.litellm.ai/). Create a `.env` file in the root directory and add your API key:

**OpenAI (default):**
```bash
OPENAI_API_KEY=your_openai_key_here
# or
CHATGPT_API_KEY=your_openai_key_here
```

**Anthropic Claude:**
```bash
ANTHROPIC_API_KEY=your_anthropic_key_here
```

**Google Gemini:**
```bash
GEMINI_API_KEY=your_google_key_here
```

**Azure OpenAI:**
```bash
AZURE_API_KEY=your_azure_key_here
AZURE_API_BASE=your_azure_endpoint
AZURE_API_VERSION=2024-02-01
```

**AWS Bedrock:**
```bash
AWS_ACCESS_KEY_ID=your_access_key
AWS_SECRET_ACCESS_KEY=your_secret_key
AWS_REGION_NAME=us-east-1
```

**Groq:**
```bash
GROQ_API_KEY=your_groq_key_here
```

**Ollama (local):**
No API key needed. Just ensure Ollama is running locally.

### 3. Run PageIndex on your PDF

```bash
Expand All @@ -167,7 +202,15 @@ python3 run_pageindex.py --pdf_path /path/to/your/document.pdf
You can customize the processing with additional optional arguments:

```
--model OpenAI model to use (default: gpt-4o-2024-11-20)
--model LLM model to use (default: gpt-4o-2024-11-20)
Examples:
- OpenAI: gpt-4o, gpt-4-turbo
- Claude: claude-3-opus-20240229, claude-3-sonnet-20240229
- Gemini: gemini/gemini-pro, gemini/gemini-1.5-pro
- Azure: azure/your-deployment-name
- Bedrock: bedrock/anthropic.claude-3-opus-20240229-v1:0
- Groq: groq/llama-3.1-70b-versatile
- Ollama: ollama/llama3
--toc-check-pages Pages to check for table of contents (default: 20)
--max-pages-per-node Max pages per node (default: 10)
--max-tokens-per-node Max tokens per node (default: 20000)
Expand Down
35 changes: 35 additions & 0 deletions pageindex/config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,38 @@
# PageIndex Configuration
#
# Model Configuration:
# PageIndex now supports multiple LLM providers via LiteLLM.
# Set the model string according to your preferred provider:
#
# OpenAI (default):
# model: "gpt-4o-2024-11-20" or "gpt-4o" or "gpt-4-turbo"
# Env var: OPENAI_API_KEY or CHATGPT_API_KEY
#
# Anthropic Claude:
# model: "claude-3-opus-20240229" or "claude-3-sonnet-20240229"
# Env var: ANTHROPIC_API_KEY
#
# Google Gemini:
# model: "gemini/gemini-pro" or "gemini/gemini-1.5-pro"
# Env var: GEMINI_API_KEY
#
# Azure OpenAI:
# model: "azure/your-deployment-name"
# Env vars: AZURE_API_KEY, AZURE_API_BASE, AZURE_API_VERSION
#
# AWS Bedrock:
# model: "bedrock/anthropic.claude-3-opus-20240229-v1:0"
# Env vars: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION_NAME
#
# Groq:
# model: "groq/llama-3.1-70b-versatile"
# Env var: GROQ_API_KEY
#
# Ollama (local):
# model: "ollama/llama3"
#
# For more providers, see: https://docs.litellm.ai/docs/providers

model: "gpt-4o-2024-11-20"
toc_check_page_num: 20
max_page_num_each_node: 10
Expand Down
149 changes: 114 additions & 35 deletions pageindex/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import tiktoken
import openai
import litellm
import logging
import os
from datetime import datetime
Expand All @@ -17,30 +17,82 @@
from pathlib import Path
from types import SimpleNamespace as config

CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY")
# Support multiple API key environment variables for different providers
CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY") or os.getenv("OPENAI_API_KEY")

# Configure LiteLLM to use environment variables for different providers
# Users can set: OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, etc.
# See: https://docs.litellm.ai/docs/providers

def count_tokens(text, model=None):
"""
Count tokens in text using the appropriate tokenizer for the model.
Uses tiktoken for OpenAI models and LiteLLM's token counter for other providers.
"""
if not text:
return 0
enc = tiktoken.encoding_for_model(model)
tokens = enc.encode(text)
return len(tokens)

# Check if it's an OpenAI model (no prefix or openai/ prefix)
model_lower = model.lower() if model else ""
is_openai_model = (
not "/" in model or
model_lower.startswith("openai/") or
model_lower.startswith("gpt-") or
model_lower.startswith("o1-") or
model_lower.startswith("o3-")
)

if is_openai_model:
# Use tiktoken for OpenAI models
try:
# Strip openai/ prefix if present
clean_model = model.replace("openai/", "") if model else "gpt-4o"
enc = tiktoken.encoding_for_model(clean_model)
tokens = enc.encode(text)
return len(tokens)
except KeyError:
# Fallback to cl100k_base encoding for unknown OpenAI models
enc = tiktoken.get_encoding("cl100k_base")
tokens = enc.encode(text)
return len(tokens)
else:
# Use LiteLLM's token counter for other providers
try:
return litellm.token_counter(model=model, text=text)
except Exception:
# Fallback to approximate counting (4 chars per token)
return len(text) // 4

def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None):
def ChatGPT_API_with_finish_reason(model, prompt, api_key=None, chat_history=None):
"""
Synchronous chat completion API with finish reason tracking.
Uses LiteLLM to support multiple LLM providers.

Args:
model: Model string (e.g., "gpt-4o", "claude-3-opus-20240229", "gemini/gemini-pro")
prompt: User prompt
api_key: API key (optional, uses environment variables if not provided)
chat_history: Previous conversation history

Returns:
Tuple of (response_content, finish_reason)
"""
max_retries = 10
client = openai.OpenAI(api_key=api_key)

# Build messages list
if chat_history:
messages = chat_history.copy()
messages.append({"role": "user", "content": prompt})
else:
messages = [{"role": "user", "content": prompt}]

for i in range(max_retries):
try:
if chat_history:
messages = chat_history
messages.append({"role": "user", "content": prompt})
else:
messages = [{"role": "user", "content": prompt}]

response = client.chat.completions.create(
response = litellm.completion(
model=model,
messages=messages,
temperature=0,
api_key=api_key,
)
if response.choices[0].finish_reason == "length":
return response.choices[0].message.content, "max_output_reached"
Expand All @@ -51,53 +103,80 @@ def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_
print('************* Retrying *************')
logging.error(f"Error: {e}")
if i < max_retries - 1:
time.sleep(1) # Wait for 1η§’ before retrying
time.sleep(1) # Wait for 1s before retrying
else:
logging.error('Max retries reached for prompt: ' + prompt)
return "Error"
return "Error", "error"



def ChatGPT_API(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None):
def ChatGPT_API(model, prompt, api_key=None, chat_history=None):
"""
Synchronous chat completion API.
Uses LiteLLM to support multiple LLM providers.

Args:
model: Model string (e.g., "gpt-4o", "claude-3-opus-20240229", "gemini/gemini-pro")
prompt: User prompt
api_key: API key (optional, uses environment variables if not provided)
chat_history: Previous conversation history

Returns:
Response content string
"""
max_retries = 10
client = openai.OpenAI(api_key=api_key)

# Build messages list
if chat_history:
messages = chat_history.copy()
messages.append({"role": "user", "content": prompt})
else:
messages = [{"role": "user", "content": prompt}]

for i in range(max_retries):
try:
if chat_history:
messages = chat_history
messages.append({"role": "user", "content": prompt})
else:
messages = [{"role": "user", "content": prompt}]

response = client.chat.completions.create(
response = litellm.completion(
model=model,
messages=messages,
temperature=0,
api_key=api_key,
)

return response.choices[0].message.content
except Exception as e:
print('************* Retrying *************')
logging.error(f"Error: {e}")
if i < max_retries - 1:
time.sleep(1) # Wait for 1η§’ before retrying
time.sleep(1) # Wait for 1s before retrying
else:
logging.error('Max retries reached for prompt: ' + prompt)
return "Error"


async def ChatGPT_API_async(model, prompt, api_key=CHATGPT_API_KEY):
async def ChatGPT_API_async(model, prompt, api_key=None):
"""
Asynchronous chat completion API.
Uses LiteLLM to support multiple LLM providers.

Args:
model: Model string (e.g., "gpt-4o", "claude-3-opus-20240229", "gemini/gemini-pro")
prompt: User prompt
api_key: API key (optional, uses environment variables if not provided)

Returns:
Response content string
"""
max_retries = 10
messages = [{"role": "user", "content": prompt}]

for i in range(max_retries):
try:
async with openai.AsyncOpenAI(api_key=api_key) as client:
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
)
return response.choices[0].message.content
response = await litellm.acompletion(
model=model,
messages=messages,
temperature=0,
api_key=api_key,
)
return response.choices[0].message.content
except Exception as e:
print('************* Retrying *************')
logging.error(f"Error: {e}")
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
openai==1.101.0
litellm>=1.0.0
openai>=1.0.0
pymupdf==1.26.4
PyPDF2==3.0.1
python-dotenv==1.1.0
Expand Down