Improve reference formatting to use actual titles and URLs instead of generic placeholders

This commit is contained in:
Steve White 2025-02-28 16:36:39 -06:00
parent 8cd2f900c1
commit 0e0d4eb9b2
1 changed files with 18 additions and 9 deletions

View File

@ -359,8 +359,12 @@ class ReportSynthesizer:
# Prepare the context with all extracted information
context = ""
for i, chunk in enumerate(processed_chunks):
context += f"Document {i+1}: {chunk.get('title', 'Untitled')}\n"
context += f"Source: {chunk.get('url', 'Unknown')}\n"
title = chunk.get('title', 'Untitled')
url = chunk.get('url', 'Unknown')
context += f"Document {i+1}:\n"
context += f"Title: {title}\n"
context += f"URL: {url}\n"
context += f"Extracted information:\n{chunk.get('extracted_info', '')}\n\n"
# Get template modifier based on detail level and query type
@ -371,12 +375,12 @@ class ReportSynthesizer:
reference_instructions = """
When including references, use a consistent format:
[1] Author(s). Title. Publication. Year. URL (if available)
[1] Title of the Article/Page. URL
If author information is not available, use the website or organization name.
DO NOT use generic placeholders like "Document 1" for references.
Always use the actual title and URL from the source documents.
Always ensure the References section is complete and properly formatted at the end of the report.
Do not use placeholders like "Document X" for references - provide actual titles.
Ensure all references are properly closed with brackets and there are no incomplete references.
"""
@ -422,14 +426,19 @@ class ReportSynthesizer:
# Generate just the references section
ref_messages = [
{"role": "system", "content": "You are an expert at formatting reference lists. Create a properly formatted References section for the following documents:"},
{"role": "system", "content": """You are an expert at formatting reference lists. Create a properly formatted References section for the following documents.
IMPORTANT:
1. Use the actual title and URL from each document
2. DO NOT use generic placeholders like "Document 1"
3. Format each reference as: [1] Title of the Article/Page. URL
4. Make sure all references are complete and properly formatted
5. Number the references sequentially starting from 1"""},
{"role": "user", "content": f"""Here are the documents used in the report:
{context}
Create a complete, properly formatted References section in Markdown format.
Use the format: [1] Title. Source URL
Make sure all references are complete and properly formatted."""}
Create a complete, properly formatted References section in Markdown format."""}
]
references = await self.generate_completion(ref_messages)