Fix template retrieval in report synthesis for all detail levels and query types

This commit is contained in:
Steve White 2025-03-11 20:11:01 -05:00
parent cf7d7f6966
commit a72d4ff35f
2 changed files with 148 additions and 4 deletions

View File

@ -16,11 +16,14 @@ from litellm import completion
from config.config import get_config from config.config import get_config
from report.report_detail_levels import get_report_detail_level_manager, DetailLevel from report.report_detail_levels import get_report_detail_level_manager, DetailLevel
from report.report_templates import QueryType, DetailLevel as TemplateDetailLevel, ReportTemplateManager, ReportTemplate
# Configure logging # Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Note: ReportTemplateManager and ReportTemplate are now imported from report_templates.py
class ReportSynthesizer: class ReportSynthesizer:
""" """
Report synthesizer for the intelligent research system. Report synthesizer for the intelligent research system.
@ -48,6 +51,10 @@ class ReportSynthesizer:
# Set up LiteLLM with the appropriate provider # Set up LiteLLM with the appropriate provider
self._setup_provider() self._setup_provider()
# Initialize template manager
self.template_manager = ReportTemplateManager()
self.template_manager.initialize_default_templates()
# Flag to process <thinking> tags in model output # Flag to process <thinking> tags in model output
self.process_thinking_tags = False self.process_thinking_tags = False
@ -344,6 +351,33 @@ class ReportSynthesizer:
Analyze the reliability and significance of the information. Analyze the reliability and significance of the information.
Format your response with clearly organized sections and detailed bullet points.""" Format your response with clearly organized sections and detailed bullet points."""
def _get_template_from_strings(self, query_type_str: str, detail_level_str: str) -> Optional[ReportTemplate]:
"""
Helper method to get a template using string values for query_type and detail_level.
Args:
query_type_str: String value of query type (factual, exploratory, comparative)
detail_level_str: String value of detail level (brief, standard, detailed, comprehensive)
Returns:
ReportTemplate object or None if not found
"""
try:
# Convert string values to enum objects
query_type_enum = QueryType(query_type_str)
detail_level_enum = TemplateDetailLevel(detail_level_str)
# Get template using enum objects
template = self.template_manager.get_template(query_type_enum, detail_level_enum)
if template:
logger.info(f"Found template for {query_type_str} {detail_level_str}")
else:
logger.warning(f"No template found for {query_type_str} {detail_level_str}")
return template
except (ValueError, KeyError) as e:
logger.error(f"Error getting template for {query_type_str} {detail_level_str}: {str(e)}")
return None
async def reduce_processed_chunks(self, processed_chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory", detail_level: str = "standard") -> str: async def reduce_processed_chunks(self, processed_chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory", detail_level: str = "standard") -> str:
""" """
Reduce phase: Synthesize processed chunks into a coherent report. Reduce phase: Synthesize processed chunks into a coherent report.
@ -369,9 +403,11 @@ class ReportSynthesizer:
context += f"Source URL: {url}\n" # Duplicate for emphasis context += f"Source URL: {url}\n" # Duplicate for emphasis
context += f"Extracted information:\n{chunk.get('extracted_info', '')}\n\n" context += f"Extracted information:\n{chunk.get('extracted_info', '')}\n\n"
# Get template modifier based on detail level and query type # Get template modifier based on detail level and query type using helper method
detail_level_manager = get_report_detail_level_manager() template = self._get_template_from_strings(query_type, detail_level)
template = detail_level_manager.get_template_modifier(detail_level, query_type)
if not template:
raise ValueError(f"No template found for {query_type} {detail_level}")
# Add specific instructions for references formatting # Add specific instructions for references formatting
reference_instructions = """ reference_instructions = """
@ -399,7 +435,7 @@ class ReportSynthesizer:
# Create the prompt for synthesizing the report # Create the prompt for synthesizing the report
messages = [ messages = [
{"role": "system", "content": f"""You are an expert research assistant tasked with creating comprehensive, well-structured reports. {"role": "system", "content": f"""You are an expert research assistant tasked with creating comprehensive, well-structured reports.
{template} {template.template}
Format the report in Markdown with clear headings, subheadings, and bullet points where appropriate. Format the report in Markdown with clear headings, subheadings, and bullet points where appropriate.
Make the report readable, engaging, and informative while maintaining academic rigor. Make the report readable, engaging, and informative while maintaining academic rigor.
@ -474,6 +510,13 @@ class ReportSynthesizer:
logger.warning("No document chunks provided for report synthesis.") logger.warning("No document chunks provided for report synthesis.")
return "No information found for the given query." return "No information found for the given query."
# Verify that a template exists for the given query type and detail level
template = self._get_template_from_strings(query_type, detail_level)
if not template:
logger.warning(f"No template found for {query_type} {detail_level}, falling back to standard template")
# Fall back to standard detail level if the requested one doesn't exist
detail_level = "standard"
# Get detail level configuration # Get detail level configuration
detail_level_manager = get_report_detail_level_manager() detail_level_manager = get_report_detail_level_manager()
config = detail_level_manager.get_detail_level_config(detail_level) config = detail_level_manager.get_detail_level_config(detail_level)

View File

@ -0,0 +1,101 @@
import sys
import os
import asyncio
import argparse
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from report.report_synthesis import ReportSynthesizer
from report.report_templates import QueryType, DetailLevel
async def generate_report(query_type, detail_level, query, chunks):
"""Generate a report with the specified parameters."""
synthesizer = ReportSynthesizer()
print(f"\n{'='*80}")
print(f"Generating {detail_level} report with {query_type} query type")
print(f"{'='*80}")
# Convert string values to enum objects
query_type_enum = QueryType(query_type)
detail_level_enum = DetailLevel(detail_level)
report = await synthesizer.synthesize_report(
query_type=query_type_enum.value,
detail_level=detail_level_enum.value,
query=query,
chunks=chunks
)
print(f"\nGenerated Report:\n")
print(report)
return report
async def main():
parser = argparse.ArgumentParser(description='Test report generation with different detail levels')
parser.add_argument('--query-type', choices=['factual', 'exploratory', 'comparative'], default='factual',
help='Query type to test (default: factual)')
parser.add_argument('--detail-level', choices=['brief', 'standard', 'detailed', 'comprehensive'], default=None,
help='Detail level to test (default: test all)')
args = parser.parse_args()
# Test data
queries = {
'factual': "What is the capital of France?",
'exploratory': "How do electric vehicles impact the environment?",
'comparative': "Compare solar and wind energy technologies."
}
chunks = {
'factual': [
{
'content': 'Paris is the capital of France. It is located in the north-central part of the country.',
'source': 'Wikipedia',
'url': 'https://en.wikipedia.org/wiki/Paris'
}
],
'exploratory': [
{
'content': 'Electric vehicles produce zero direct emissions, which improves air quality in urban areas.',
'source': 'EPA',
'url': 'https://www.epa.gov/greenvehicles/electric-vehicles'
},
{
'content': 'The environmental impact of electric vehicles depends on how the electricity is generated. Renewable sources make EVs more environmentally friendly.',
'source': 'Energy.gov',
'url': 'https://www.energy.gov/eere/electricvehicles/electric-vehicle-benefits'
}
],
'comparative': [
{
'content': 'Solar energy is generated by converting sunlight into electricity using photovoltaic cells or concentrated solar power.',
'source': 'National Renewable Energy Laboratory',
'url': 'https://www.nrel.gov/research/re-solar.html'
},
{
'content': 'Wind energy is generated by using wind turbines to create mechanical power that can be converted into electricity.',
'source': 'Department of Energy',
'url': 'https://www.energy.gov/eere/wind/how-do-wind-turbines-work'
},
{
'content': 'Solar energy works best in sunny areas, while wind energy is more effective in windy regions. Both have different land use requirements.',
'source': 'Renewable Energy World',
'url': 'https://www.renewableenergyworld.com/solar/solar-vs-wind/'
}
]
}
# Get the query type to test
query_type = args.query_type
query = queries[query_type]
test_chunks = chunks[query_type]
# Test all detail levels or just the specified one
detail_levels = ['brief', 'standard', 'detailed', 'comprehensive'] if args.detail_level is None else [args.detail_level]
for detail_level in detail_levels:
await generate_report(query_type, detail_level, query, test_chunks)
if __name__ == "__main__":
asyncio.run(main())