diff --git a/pageindex/utils.py b/pageindex/utils.py index dc7acd88..c92f6bf0 100644 --- a/pageindex/utils.py +++ b/pageindex/utils.py @@ -603,9 +603,11 @@ def add_node_text_with_labels(node, pdf_pages): async def generate_node_summary(node, model=None): - prompt = f"""You are given a part of a document, your task is to generate a description of the partial document about what are main points covered in the partial document. + prompt = f"""You are given a section of a document. Your task is to generate a description of ONLY the section titled "{node['title']}". + Focus only on the content that belongs to this section, not the entire page text. - Partial Document Text: {node['text']} + Section Title: {node['title']} + Page Text: {node['text']} Directly return the description, do not include any other text. """ diff --git a/run_pageindex.py b/run_pageindex.py index 10702450..c5446b66 100644 --- a/run_pageindex.py +++ b/run_pageindex.py @@ -70,7 +70,9 @@ # Save results pdf_name = os.path.splitext(os.path.basename(args.pdf_path))[0] output_dir = './results' - output_file = f'{output_dir}/{pdf_name}_structure.json' + from datetime import datetime + current_time = datetime.now().strftime("%Y%m%d_%H%M%S") + output_file = f'{output_dir}/{pdf_name}_structure_{current_time}.json' os.makedirs(output_dir, exist_ok=True) with open(output_file, 'w', encoding='utf-8') as f: