Fix template retrieval in report synthesis for all detail levels and query types

2025-03-11 20:11:01 -05:00 · 2025-03-11 20:11:01 -05:00 · a72d4ff35f
parent cf7d7f6966
commit a72d4ff35f
2 changed files with 148 additions and 4 deletions
--- a/report/report_synthesis.py
+++ b/report/report_synthesis.py
@ -16,11 +16,14 @@ from litellm import completion

 from config.config import get_config
 from report.report_detail_levels import get_report_detail_level_manager, DetailLevel
+from report.report_templates import QueryType, DetailLevel as TemplateDetailLevel, ReportTemplateManager, ReportTemplate

 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)

+# Note: ReportTemplateManager and ReportTemplate are now imported from report_templates.py
+
 class ReportSynthesizer:
    """
    Report synthesizer for the intelligent research system.
@ -48,6 +51,10 @@ class ReportSynthesizer:
        # Set up LiteLLM with the appropriate provider
        self._setup_provider()
        
+        # Initialize template manager
+        self.template_manager = ReportTemplateManager()
+        self.template_manager.initialize_default_templates()
+        
        # Flag to process <thinking> tags in model output
        self.process_thinking_tags = False
    
@ -344,6 +351,33 @@ class ReportSynthesizer:
                Analyze the reliability and significance of the information.
                Format your response with clearly organized sections and detailed bullet points."""
    
+    def _get_template_from_strings(self, query_type_str: str, detail_level_str: str) -> Optional[ReportTemplate]:
+        """
+        Helper method to get a template using string values for query_type and detail_level.
+        
+        Args:
+            query_type_str: String value of query type (factual, exploratory, comparative)
+            detail_level_str: String value of detail level (brief, standard, detailed, comprehensive)
+            
+        Returns:
+            ReportTemplate object or None if not found
+        """
+        try:
+            # Convert string values to enum objects
+            query_type_enum = QueryType(query_type_str)
+            detail_level_enum = TemplateDetailLevel(detail_level_str)
+            
+            # Get template using enum objects
+            template = self.template_manager.get_template(query_type_enum, detail_level_enum)
+            if template:
+                logger.info(f"Found template for {query_type_str} {detail_level_str}")
+            else:
+                logger.warning(f"No template found for {query_type_str} {detail_level_str}")
+            return template
+        except (ValueError, KeyError) as e:
+            logger.error(f"Error getting template for {query_type_str} {detail_level_str}: {str(e)}")
+            return None
+    
    async def reduce_processed_chunks(self, processed_chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory", detail_level: str = "standard") -> str:
        """
        Reduce phase: Synthesize processed chunks into a coherent report.
@ -369,9 +403,11 @@ class ReportSynthesizer:
            context += f"Source URL: {url}\n"  # Duplicate for emphasis
            context += f"Extracted information:\n{chunk.get('extracted_info', '')}\n\n"
        
-        # Get template modifier based on detail level and query type
-        detail_level_manager = get_report_detail_level_manager()
-        template = detail_level_manager.get_template_modifier(detail_level, query_type)
+        # Get template modifier based on detail level and query type using helper method
+        template = self._get_template_from_strings(query_type, detail_level)
+        
+        if not template:
+            raise ValueError(f"No template found for {query_type} {detail_level}")
        
        # Add specific instructions for references formatting
        reference_instructions = """
@ -399,7 +435,7 @@ class ReportSynthesizer:
        # Create the prompt for synthesizing the report
        messages = [
            {"role": "system", "content": f"""You are an expert research assistant tasked with creating comprehensive, well-structured reports. 
-            {template}
+            {template.template}
            
            Format the report in Markdown with clear headings, subheadings, and bullet points where appropriate.
            Make the report readable, engaging, and informative while maintaining academic rigor.
@ -474,6 +510,13 @@ class ReportSynthesizer:
            logger.warning("No document chunks provided for report synthesis.")
            return "No information found for the given query."
        
+        # Verify that a template exists for the given query type and detail level
+        template = self._get_template_from_strings(query_type, detail_level)
+        if not template:
+            logger.warning(f"No template found for {query_type} {detail_level}, falling back to standard template")
+            # Fall back to standard detail level if the requested one doesn't exist
+            detail_level = "standard"
+        
        # Get detail level configuration
        detail_level_manager = get_report_detail_level_manager()
        config = detail_level_manager.get_detail_level_config(detail_level)
--- a/tests/report/test_all_detail_levels.py
+++ b/tests/report/test_all_detail_levels.py
@ -0,0 +1,101 @@
+import sys
+import os
+import asyncio
+import argparse
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from report.report_synthesis import ReportSynthesizer
+from report.report_templates import QueryType, DetailLevel
+
+async def generate_report(query_type, detail_level, query, chunks):
+    """Generate a report with the specified parameters."""
+    synthesizer = ReportSynthesizer()
+    
+    print(f"\n{'='*80}")
+    print(f"Generating {detail_level} report with {query_type} query type")
+    print(f"{'='*80}")
+    
+    # Convert string values to enum objects
+    query_type_enum = QueryType(query_type)
+    detail_level_enum = DetailLevel(detail_level)
+    
+    report = await synthesizer.synthesize_report(
+        query_type=query_type_enum.value,
+        detail_level=detail_level_enum.value,
+        query=query,
+        chunks=chunks
+    )
+    
+    print(f"\nGenerated Report:\n")
+    print(report)
+    
+    return report
+
+async def main():
+    parser = argparse.ArgumentParser(description='Test report generation with different detail levels')
+    parser.add_argument('--query-type', choices=['factual', 'exploratory', 'comparative'], default='factual',
+                        help='Query type to test (default: factual)')
+    parser.add_argument('--detail-level', choices=['brief', 'standard', 'detailed', 'comprehensive'], default=None,
+                        help='Detail level to test (default: test all)')
+    args = parser.parse_args()
+    
+    # Test data
+    queries = {
+        'factual': "What is the capital of France?",
+        'exploratory': "How do electric vehicles impact the environment?",
+        'comparative': "Compare solar and wind energy technologies."
+    }
+    
+    chunks = {
+        'factual': [
+            {
+                'content': 'Paris is the capital of France. It is located in the north-central part of the country.',
+                'source': 'Wikipedia',
+                'url': 'https://en.wikipedia.org/wiki/Paris'
+            }
+        ],
+        'exploratory': [
+            {
+                'content': 'Electric vehicles produce zero direct emissions, which improves air quality in urban areas.',
+                'source': 'EPA',
+                'url': 'https://www.epa.gov/greenvehicles/electric-vehicles'
+            },
+            {
+                'content': 'The environmental impact of electric vehicles depends on how the electricity is generated. Renewable sources make EVs more environmentally friendly.',
+                'source': 'Energy.gov',
+                'url': 'https://www.energy.gov/eere/electricvehicles/electric-vehicle-benefits'
+            }
+        ],
+        'comparative': [
+            {
+                'content': 'Solar energy is generated by converting sunlight into electricity using photovoltaic cells or concentrated solar power.',
+                'source': 'National Renewable Energy Laboratory',
+                'url': 'https://www.nrel.gov/research/re-solar.html'
+            },
+            {
+                'content': 'Wind energy is generated by using wind turbines to create mechanical power that can be converted into electricity.',
+                'source': 'Department of Energy',
+                'url': 'https://www.energy.gov/eere/wind/how-do-wind-turbines-work'
+            },
+            {
+                'content': 'Solar energy works best in sunny areas, while wind energy is more effective in windy regions. Both have different land use requirements.',
+                'source': 'Renewable Energy World',
+                'url': 'https://www.renewableenergyworld.com/solar/solar-vs-wind/'
+            }
+        ]
+    }
+    
+    # Get the query type to test
+    query_type = args.query_type
+    query = queries[query_type]
+    test_chunks = chunks[query_type]
+    
+    # Test all detail levels or just the specified one
+    detail_levels = ['brief', 'standard', 'detailed', 'comprehensive'] if args.detail_level is None else [args.detail_level]
+    
+    for detail_level in detail_levels:
+        await generate_report(query_type, detail_level, query, test_chunks)
+
+if __name__ == "__main__":
+    asyncio.run(main())