diff --git a/report/report_detail_levels.py b/report/report_detail_levels.py new file mode 100644 index 0000000..e7a8236 --- /dev/null +++ b/report/report_detail_levels.py @@ -0,0 +1,258 @@ +""" +Report detail levels module for the intelligent research system. + +This module provides functionality to define and configure different levels of detail +for generated reports, allowing users to customize the depth and breadth of information +included in reports. +""" + +import enum +from typing import Dict, Any, Optional, List, Tuple + + +class DetailLevel(enum.Enum): + """Enum for different report detail levels.""" + BRIEF = "brief" + STANDARD = "standard" + DETAILED = "detailed" + COMPREHENSIVE = "comprehensive" + + +class ReportDetailLevelManager: + """ + Manager for report detail levels. + + This class provides methods to get configuration parameters for different + report detail levels, allowing users to customize the depth and breadth of + information included in reports. + """ + + def __init__(self): + """Initialize the report detail level manager.""" + # Define default configurations for different detail levels + self.detail_level_configs = { + DetailLevel.BRIEF: { + "num_results": 3, + "token_budget": 50000, + "chunk_size": 800, + "overlap_size": 50, + "model": "llama-3.1-8b-instant", + "description": "A concise summary of key findings and conclusions." + }, + DetailLevel.STANDARD: { + "num_results": 7, + "token_budget": 100000, + "chunk_size": 1000, + "overlap_size": 100, + "model": "llama-3.1-8b-instant", + "description": "A balanced report with key findings, analysis, and conclusions." + }, + DetailLevel.DETAILED: { + "num_results": 12, + "token_budget": 150000, + "chunk_size": 1200, + "overlap_size": 150, + "model": "llama-3.3-70b-versatile", + "description": "A comprehensive report with in-depth analysis, methodology, and implications." + }, + DetailLevel.COMPREHENSIVE: { + "num_results": 20, + "token_budget": 200000, + "chunk_size": 1500, + "overlap_size": 200, + "model": "llama-3.3-70b-versatile", + "description": "An exhaustive report with all available information, extensive analysis, and detailed references." + } + } + + # Define template modifiers for different detail levels + self.template_modifiers = { + DetailLevel.BRIEF: { + "factual": "Create a brief factual report that directly answers the query. Focus on accuracy and clarity. Include:\n" + "1. A clear, direct answer to the query\n" + "2. Key supporting evidence and facts\n" + "3. Citations for information (use numbered citations in square brackets [1], [2], etc.)\n" + "4. A concise references section\n\n" + "Keep the report concise and to the point, focusing only on the most essential information.", + + "comparative": "Create a brief comparative report that analyzes different perspectives on the query. Include:\n" + "1. A concise overview of the topic\n" + "2. Key similarities and differences between perspectives\n" + "3. Citations for information (use numbered citations in square brackets [1], [2], etc.)\n" + "4. A concise references section\n\n" + "Keep the report concise and to the point, focusing only on the most essential comparisons.", + + "exploratory": "Create a brief exploratory report that investigates the query. Include:\n" + "1. A concise introduction to the topic\n" + "2. Key findings and insights\n" + "3. Citations for information (use numbered citations in square brackets [1], [2], etc.)\n" + "4. A concise references section\n\n" + "Keep the report concise and to the point, focusing only on the most essential information." + }, + + DetailLevel.STANDARD: { + "factual": "Create a standard factual report that directly answers the query. Focus on accuracy and clarity. Include:\n" + "1. A clear, direct answer to the query\n" + "2. Supporting evidence and facts from the sources\n" + "3. Any relevant context needed to understand the answer\n" + "4. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n" + "5. A references section at the end listing all sources", + + "comparative": "Create a standard comparative report that analyzes different perspectives on the query. Include:\n" + "1. An overview of the topic and why it's significant\n" + "2. A balanced presentation of different viewpoints or approaches\n" + "3. Analysis of similarities and differences\n" + "4. Evidence supporting each perspective\n" + "5. A synthesis of the information that highlights key insights\n" + "6. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n" + "7. A references section at the end listing all sources", + + "exploratory": "Create a standard exploratory report that investigates the query in depth. Include:\n" + "1. An introduction that frames the topic and its significance\n" + "2. Key concepts and definitions\n" + "3. Main findings and insights from the sources\n" + "4. Analysis of the information that highlights patterns and connections\n" + "5. Implications or applications of the findings\n" + "6. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n" + "7. A references section at the end listing all sources" + }, + + DetailLevel.DETAILED: { + "factual": "Create a detailed factual report that thoroughly answers the query. Focus on accuracy, clarity, and depth. Include:\n" + "1. A comprehensive answer to the query with nuanced details\n" + "2. Extensive supporting evidence and facts from multiple sources\n" + "3. Contextual information and background to fully understand the topic\n" + "4. Discussion of any limitations or caveats in the information\n" + "5. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n" + "6. A detailed references section at the end listing all sources\n\n" + "Organize the report with clear sections and subsections to enhance readability.", + + "comparative": "Create a detailed comparative report that thoroughly analyzes different perspectives on the query. Include:\n" + "1. A comprehensive overview of the topic, its history, and significance\n" + "2. In-depth presentation of different viewpoints, approaches, or theories\n" + "3. Detailed analysis of similarities, differences, strengths, and weaknesses\n" + "4. Extensive evidence supporting each perspective from multiple sources\n" + "5. Discussion of nuances, edge cases, and contextual factors\n" + "6. A thorough synthesis that highlights key insights and patterns\n" + "7. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n" + "8. A detailed references section at the end listing all sources\n\n" + "Organize the report with clear sections and subsections to enhance readability.", + + "exploratory": "Create a detailed exploratory report that thoroughly investigates the query. Include:\n" + "1. A comprehensive introduction that frames the topic, its history, and significance\n" + "2. Detailed explanation of key concepts, definitions, and theoretical frameworks\n" + "3. In-depth presentation of findings and insights from multiple sources\n" + "4. Thorough analysis that highlights patterns, connections, and contradictions\n" + "5. Discussion of implications, applications, and future directions\n" + "6. Consideration of limitations, gaps in knowledge, and areas for further research\n" + "7. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n" + "8. A detailed references section at the end listing all sources\n\n" + "Organize the report with clear sections and subsections to enhance readability." + }, + + DetailLevel.COMPREHENSIVE: { + "factual": "Create an exhaustive factual report that answers the query with maximum depth and breadth. Focus on accuracy, clarity, and completeness. Include:\n" + "1. A comprehensive answer to the query with all available details and nuances\n" + "2. Exhaustive supporting evidence and facts from all available sources\n" + "3. Complete contextual information, background, and historical development\n" + "4. Thorough discussion of all limitations, caveats, and alternative interpretations\n" + "5. Analysis of conflicting information or disagreements in the sources\n" + "6. Visual elements such as tables or bullet points to organize complex information\n" + "7. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n" + "8. A comprehensive references section at the end listing all sources\n\n" + "Organize the report with clear sections, subsections, and where appropriate, sub-subsections to maximize readability.", + + "comparative": "Create an exhaustive comparative report that analyzes different perspectives on the query with maximum depth and breadth. Include:\n" + "1. A comprehensive overview of the topic, its complete history, significance, and context\n" + "2. Exhaustive presentation of all viewpoints, approaches, theories, and models\n" + "3. Detailed analysis of all similarities, differences, strengths, weaknesses, and trade-offs\n" + "4. Complete evidence supporting each perspective from all available sources\n" + "5. Thorough discussion of all nuances, edge cases, and contextual factors\n" + "6. Analysis of how different perspectives have evolved over time\n" + "7. Visual elements such as tables or bullet points to organize complex comparisons\n" + "8. A comprehensive synthesis that highlights all insights, patterns, and implications\n" + "9. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n" + "10. A comprehensive references section at the end listing all sources\n\n" + "Organize the report with clear sections, subsections, and where appropriate, sub-subsections to maximize readability.", + + "exploratory": "Create an exhaustive exploratory report that investigates the query with maximum depth and breadth. Include:\n" + "1. A comprehensive introduction that frames the topic, its complete history, significance, and context\n" + "2. Exhaustive explanation of all key concepts, definitions, and theoretical frameworks\n" + "3. Complete presentation of all findings and insights from all available sources\n" + "4. Thorough analysis that highlights all patterns, connections, contradictions, and outliers\n" + "5. Comprehensive discussion of all implications, applications, and future directions\n" + "6. Complete consideration of all limitations, gaps in knowledge, and areas for further research\n" + "7. Visual elements such as tables or bullet points to organize complex information\n" + "8. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n" + "9. A comprehensive references section at the end listing all sources\n\n" + "Organize the report with clear sections, subsections, and where appropriate, sub-subsections to maximize readability." + } + } + + def get_detail_level_config(self, detail_level: str) -> Dict[str, Any]: + """ + Get configuration parameters for a specific detail level. + + Args: + detail_level: Detail level as a string (brief, standard, detailed, comprehensive) + + Returns: + Dictionary of configuration parameters for the specified detail level + + Raises: + ValueError: If the detail level is not valid + """ + try: + level = DetailLevel(detail_level.lower()) + return self.detail_level_configs[level] + except (ValueError, KeyError): + valid_levels = [level.value for level in DetailLevel] + raise ValueError(f"Invalid detail level: {detail_level}. Valid levels are: {', '.join(valid_levels)}") + + def get_template_modifier(self, detail_level: str, query_type: str) -> str: + """ + Get template modifier for a specific detail level and query type. + + Args: + detail_level: Detail level as a string (brief, standard, detailed, comprehensive) + query_type: Query type as a string (factual, exploratory, comparative) + + Returns: + Template modifier as a string + + Raises: + ValueError: If the detail level or query type is not valid + """ + try: + level = DetailLevel(detail_level.lower()) + if query_type not in ["factual", "exploratory", "comparative"]: + query_type = "exploratory" # Default to exploratory if query type is not valid + + return self.template_modifiers[level][query_type] + except (ValueError, KeyError): + valid_levels = [level.value for level in DetailLevel] + raise ValueError(f"Invalid detail level: {detail_level}. Valid levels are: {', '.join(valid_levels)}") + + def get_available_detail_levels(self) -> List[Tuple[str, str]]: + """ + Get a list of available detail levels with descriptions. + + Returns: + List of tuples containing detail level and description + """ + return [(level.value, config["description"]) + for level, config in self.detail_level_configs.items()] + + +# Create a singleton instance for global use +report_detail_level_manager = ReportDetailLevelManager() + + +def get_report_detail_level_manager() -> ReportDetailLevelManager: + """ + Get the global report detail level manager instance. + + Returns: + ReportDetailLevelManager instance + """ + return report_detail_level_manager diff --git a/report/report_generator.py b/report/report_generator.py index 3d93336..6bf4002 100644 --- a/report/report_generator.py +++ b/report/report_generator.py @@ -15,6 +15,7 @@ from report.database.db_manager import get_db_manager, initialize_database from report.document_scraper import get_document_scraper from report.document_processor import get_document_processor from report.report_synthesis import get_report_synthesizer +from report.report_detail_levels import get_report_detail_level_manager, DetailLevel # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') @@ -35,12 +36,56 @@ class ReportGenerator: self.document_scraper = get_document_scraper() self.document_processor = get_document_processor() self.report_synthesizer = get_report_synthesizer() + self.detail_level_manager = get_report_detail_level_manager() + self.detail_level = "standard" # Default detail level + self.model_name = None # Will use default model based on detail level async def initialize(self): """Initialize the report generator by setting up the database.""" await initialize_database() logger.info("Report generator initialized") + def set_detail_level(self, detail_level: str) -> None: + """ + Set the detail level for report generation. + + Args: + detail_level: Detail level (brief, standard, detailed, comprehensive) + """ + try: + # Validate detail level + config = self.detail_level_manager.get_detail_level_config(detail_level) + self.detail_level = detail_level + + # Update model if needed + model = config.get("model") + if model and model != self.model_name: + self.model_name = model + self.report_synthesizer = get_report_synthesizer(model) + + logger.info(f"Detail level set to {detail_level} with model {model}") + except ValueError as e: + logger.error(f"Error setting detail level: {e}") + raise + + def get_detail_level_config(self) -> Dict[str, Any]: + """ + Get the current detail level configuration. + + Returns: + Dictionary of configuration parameters for the current detail level + """ + return self.detail_level_manager.get_detail_level_config(self.detail_level) + + def get_available_detail_levels(self) -> List[Tuple[str, str]]: + """ + Get a list of available detail levels with descriptions. + + Returns: + List of tuples containing detail level and description + """ + return self.detail_level_manager.get_available_detail_levels() + async def process_search_results(self, search_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Process search results by scraping the URLs and storing them in the database. @@ -96,8 +141,8 @@ class ReportGenerator: async def prepare_documents_for_report(self, search_results: List[Dict[str, Any]], token_budget: Optional[int] = None, - chunk_size: int = 1000, - overlap_size: int = 100) -> List[Dict[str, Any]]: + chunk_size: Optional[int] = None, + overlap_size: Optional[int] = None) -> List[Dict[str, Any]]: """ Prepare documents for report generation by processing search results, prioritizing documents, and chunking them to fit within token budget. @@ -111,6 +156,20 @@ class ReportGenerator: Returns: List of selected document chunks """ + # Get configuration from detail level if not specified + config = self.get_detail_level_config() + + if token_budget is None: + token_budget = config.get("token_budget") + + if chunk_size is None: + chunk_size = config.get("chunk_size", 1000) + + if overlap_size is None: + overlap_size = config.get("overlap_size", 100) + + logger.info(f"Preparing documents with token_budget={token_budget}, chunk_size={chunk_size}, overlap_size={overlap_size}") + # Process search results to get documents and relevance scores documents, relevance_scores = await self.process_search_results(search_results) @@ -126,11 +185,12 @@ class ReportGenerator: return selected_chunks async def generate_report(self, - search_results: List[Dict[str, Any]], - query: str, - token_budget: Optional[int] = None, - chunk_size: int = 1000, - overlap_size: int = 100) -> str: + search_results: List[Dict[str, Any]], + query: str, + token_budget: Optional[int] = None, + chunk_size: Optional[int] = None, + overlap_size: Optional[int] = None, + detail_level: Optional[str] = None) -> str: """ Generate a report from search results. @@ -140,10 +200,15 @@ class ReportGenerator: token_budget: Maximum number of tokens to use chunk_size: Maximum number of tokens per chunk overlap_size: Number of tokens to overlap between chunks + detail_level: Level of detail for the report (brief, standard, detailed, comprehensive) Returns: Generated report as a string """ + # Set detail level if specified + if detail_level: + self.set_detail_level(detail_level) + # Prepare documents for report selected_chunks = await self.prepare_documents_for_report( search_results, @@ -153,7 +218,11 @@ class ReportGenerator: ) # Generate report using report synthesizer - report = await self.report_synthesizer.synthesize_report(selected_chunks, query) + report = await self.report_synthesizer.synthesize_report( + selected_chunks, + query, + detail_level=self.detail_level + ) return report diff --git a/report/report_synthesis.py b/report/report_synthesis.py index 0d5b64c..313ea1f 100644 --- a/report/report_synthesis.py +++ b/report/report_synthesis.py @@ -15,6 +15,7 @@ import litellm from litellm import completion from config.config import get_config +from report.report_detail_levels import get_report_detail_level_manager, DetailLevel # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') @@ -166,7 +167,7 @@ class ReportSynthesizer: return processed_chunks - async def reduce_processed_chunks(self, processed_chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory") -> str: + async def reduce_processed_chunks(self, processed_chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory", detail_level: str = "standard") -> str: """ Reduce phase: Synthesize processed chunks into a coherent report. @@ -174,6 +175,7 @@ class ReportSynthesizer: processed_chunks: List of processed chunks with extracted information query: Original search query query_type: Type of query (factual, exploratory, comparative) + detail_level: Level of detail for the report (brief, standard, detailed, comprehensive) Returns: Synthesized report as a string @@ -185,32 +187,9 @@ class ReportSynthesizer: context += f"Source: {chunk.get('url', 'Unknown')}\n" context += f"Extracted information:\n{chunk.get('extracted_info', '')}\n\n" - # Create a template based on query type - if query_type == "factual": - template = """Create a comprehensive factual report that directly answers the query. Focus on accuracy and clarity. Include: - 1. A clear, direct answer to the query - 2. Supporting evidence and facts from the sources - 3. Any relevant context needed to understand the answer - 4. Citations for all information (use numbered citations in square brackets [1], [2], etc.) - 5. A references section at the end listing all sources""" - elif query_type == "comparative": - template = """Create a comprehensive comparative report that analyzes different perspectives on the query. Include: - 1. An overview of the topic and why it's significant - 2. A balanced presentation of different viewpoints or approaches - 3. Analysis of similarities and differences - 4. Evidence supporting each perspective - 5. A synthesis of the information that highlights key insights - 6. Citations for all information (use numbered citations in square brackets [1], [2], etc.) - 7. A references section at the end listing all sources""" - else: # exploratory (default) - template = """Create a comprehensive exploratory report that investigates the query in depth. Include: - 1. An introduction that frames the topic and its significance - 2. Key concepts and definitions - 3. Main findings and insights from the sources - 4. Analysis of the information that highlights patterns and connections - 5. Implications or applications of the findings - 6. Citations for all information (use numbered citations in square brackets [1], [2], etc.) - 7. A references section at the end listing all sources""" + # Get template modifier based on detail level and query type + detail_level_manager = get_report_detail_level_manager() + template = detail_level_manager.get_template_modifier(detail_level, query_type) # Create the prompt for synthesizing the report messages = [ @@ -224,7 +203,7 @@ class ReportSynthesizer: Information from sources: {context} - Synthesize this information into a comprehensive report that addresses the query. Use your own words to create a coherent narrative, but ensure all information is based on the provided sources. Include citations and a references section."""} + Synthesize this information into a report that addresses the query. Use your own words to create a coherent narrative, but ensure all information is based on the provided sources. Include citations and a references section."""} ] # Generate the report @@ -232,7 +211,7 @@ class ReportSynthesizer: return report - async def synthesize_report(self, chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory") -> str: + async def synthesize_report(self, chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory", detail_level: str = "standard") -> str: """ Synthesize a report from document chunks using the map-reduce approach. @@ -240,12 +219,14 @@ class ReportSynthesizer: chunks: List of document chunks query: Original search query query_type: Type of query (factual, exploratory, comparative) + detail_level: Level of detail for the report (brief, standard, detailed, comprehensive) Returns: Synthesized report as a string """ logger.info(f"Synthesizing report for query: {query}") logger.info(f"Using {len(chunks)} document chunks") + logger.info(f"Detail level: {detail_level}") # Determine query type if not specified if query_type == "exploratory": @@ -264,7 +245,7 @@ class ReportSynthesizer: # Reduce phase: Synthesize processed chunks into a coherent report logger.info("Starting reduce phase: Synthesizing processed chunks into a report") - report = await self.reduce_processed_chunks(processed_chunks, query, query_type) + report = await self.reduce_processed_chunks(processed_chunks, query, query_type, detail_level) logger.info("Reduce phase complete: Report generated") return report diff --git a/scripts/query_to_report.py b/scripts/query_to_report.py index ff40278..d525a95 100755 --- a/scripts/query_to_report.py +++ b/scripts/query_to_report.py @@ -22,6 +22,7 @@ from query.query_processor import get_query_processor from execution.search_executor import SearchExecutor from ranking.jina_reranker import get_jina_reranker from report.report_generator import get_report_generator, initialize_report_generator +from report.report_detail_levels import get_report_detail_level_manager # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') @@ -34,8 +35,9 @@ async def query_to_report( search_engines: Optional[List[str]] = None, num_results: int = 10, token_budget: Optional[int] = None, - chunk_size: int = 1000, - overlap_size: int = 100, + chunk_size: Optional[int] = None, + overlap_size: Optional[int] = None, + detail_level: str = "standard", use_mock: bool = False ) -> str: """ @@ -49,12 +51,14 @@ async def query_to_report( token_budget: Maximum number of tokens to use for report generation chunk_size: Maximum number of tokens per chunk overlap_size: Number of tokens to overlap between chunks + detail_level: Level of detail for the report (brief, standard, detailed, comprehensive) use_mock: If True, use mock data instead of making actual API calls Returns: Path to the generated report """ logger.info(f"Processing query: {query}") + logger.info(f"Detail level: {detail_level}") # Step 1: Process the query query_processor = get_query_processor() @@ -76,6 +80,14 @@ async def query_to_report( # Step 3: Execute search search_executor = SearchExecutor() + + # If detail level is specified, adjust num_results based on the detail level + if detail_level and not num_results: + detail_level_manager = get_report_detail_level_manager() + config = detail_level_manager.get_detail_level_config(detail_level) + num_results = config.get("num_results", 10) + logger.info(f"Using {num_results} results per search engine based on detail level: {detail_level}") + search_results = search_executor.execute_search( structured_query, search_engines=search_engines, @@ -124,13 +136,14 @@ async def query_to_report( report_generator = get_report_generator() # Step 6: Generate report - logger.info(f"Generating report...") + logger.info(f"Generating report with detail level: {detail_level}...") report = await report_generator.generate_report( search_results=reranked_results, query=query, token_budget=token_budget, chunk_size=chunk_size, - overlap_size=overlap_size + overlap_size=overlap_size, + detail_level=detail_level ) logger.info(f"Report generated. Length: {len(report)} characters") @@ -150,12 +163,17 @@ def main(): parser.add_argument('query', help='The query to process') parser.add_argument('--output', '-o', default='report.md', help='Output file path') parser.add_argument('--search-engines', '-s', nargs='+', help='Search engines to use') - parser.add_argument('--num-results', '-n', type=int, default=10, help='Number of results per search engine') + parser.add_argument('--num-results', '-n', type=int, help='Number of results per search engine') parser.add_argument('--token-budget', '-t', type=int, help='Maximum number of tokens for report generation') - parser.add_argument('--chunk-size', '-c', type=int, default=1000, help='Maximum tokens per chunk') - parser.add_argument('--overlap-size', '-l', type=int, default=100, help='Tokens to overlap between chunks') + parser.add_argument('--chunk-size', '-c', type=int, help='Maximum tokens per chunk') + parser.add_argument('--overlap-size', '-l', type=int, help='Tokens to overlap between chunks') + parser.add_argument('--detail-level', '-d', type=str, default='standard', + choices=['brief', 'standard', 'detailed', 'comprehensive'], + help='Level of detail for the report') parser.add_argument('--use-mock', '-m', action='store_true', help='Use mock data instead of API calls') parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging') + parser.add_argument('--list-detail-levels', action='store_true', + help='List available detail levels with descriptions and exit') args = parser.parse_args() @@ -163,6 +181,15 @@ def main(): if args.verbose: logging.getLogger().setLevel(logging.DEBUG) + # List detail levels if requested + if args.list_detail_levels: + detail_level_manager = get_report_detail_level_manager() + detail_levels = detail_level_manager.get_available_detail_levels() + print("Available detail levels:") + for level, description in detail_levels: + print(f" {level}: {description}") + return + # Run the workflow asyncio.run(query_to_report( query=args.query, @@ -172,6 +199,7 @@ def main(): token_budget=args.token_budget, chunk_size=args.chunk_size, overlap_size=args.overlap_size, + detail_level=args.detail_level, use_mock=args.use_mock )) diff --git a/scripts/test_detail_levels.py b/scripts/test_detail_levels.py new file mode 100755 index 0000000..cbda308 --- /dev/null +++ b/scripts/test_detail_levels.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +""" +Test Detail Levels Script + +This script tests the report generation with different detail levels +for the same query to demonstrate the differences. +""" + +import os +import sys +import asyncio +import argparse +from datetime import datetime + +# Add parent directory to path to import modules +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from scripts.query_to_report import query_to_report +from report.report_detail_levels import get_report_detail_level_manager, DetailLevel + + +async def run_detail_level_test(query: str, use_mock: bool = False): + """ + Run a test of the query to report workflow with different detail levels. + + Args: + query: The query to process + use_mock: If True, use mock data instead of making actual API calls + """ + # Generate timestamp for unique output files + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Get detail level manager + detail_level_manager = get_report_detail_level_manager() + + # Get all detail levels + detail_levels = [level.value for level in DetailLevel] + + print(f"Processing query: {query}") + print(f"Testing {len(detail_levels)} detail levels: {', '.join(detail_levels)}") + print(f"This may take several minutes to complete all detail levels...") + + # Process each detail level + for detail_level in detail_levels: + print(f"\n{'=' * 80}") + print(f"Processing detail level: {detail_level}") + + # Get detail level configuration + config = detail_level_manager.get_detail_level_config(detail_level) + + # Print detail level configuration + print(f"Detail level configuration:") + print(f" Number of results per search engine: {config.get('num_results')}") + print(f" Token budget: {config.get('token_budget')}") + print(f" Chunk size: {config.get('chunk_size')}") + print(f" Overlap size: {config.get('overlap_size')}") + print(f" Model: {config.get('model')}") + + # Set output file + output_file = f"report_{timestamp}_{detail_level}.md" + + # Run the workflow + start_time = datetime.now() + print(f"Started at: {start_time.strftime('%H:%M:%S')}") + + await query_to_report( + query=query, + output_file=output_file, + detail_level=detail_level, + use_mock=use_mock + ) + + end_time = datetime.now() + duration = end_time - start_time + print(f"Completed at: {end_time.strftime('%H:%M:%S')}") + print(f"Duration: {duration.total_seconds():.2f} seconds") + + # Get report file size + file_size = os.path.getsize(output_file) + print(f"Report saved to: {output_file}") + print(f"Report size: {file_size} bytes") + + # Count words in report + try: + with open(output_file, 'r', encoding='utf-8') as f: + content = f.read() + word_count = len(content.split()) + print(f"Word count: {word_count}") + except Exception as e: + print(f"Error reading report: {e}") + + print(f"\n{'=' * 80}") + print(f"All detail levels processed successfully!") + print(f"Reports saved with prefix: report_{timestamp}_") + + +def main(): + """Main function to parse arguments and run the test.""" + parser = argparse.ArgumentParser(description='Test report generation with different detail levels') + parser.add_argument('--query', '-q', type=str, + default="What is the environmental and economic impact of electric vehicles compared to traditional vehicles?", + help='The query to process') + parser.add_argument('--use-mock', '-m', action='store_true', help='Use mock data instead of API calls') + parser.add_argument('--list-detail-levels', action='store_true', + help='List available detail levels with descriptions and exit') + + args = parser.parse_args() + + # List detail levels if requested + if args.list_detail_levels: + detail_level_manager = get_report_detail_level_manager() + detail_levels = detail_level_manager.get_available_detail_levels() + print("Available detail levels:") + for level, description in detail_levels: + print(f" {level}: {description}") + return + + # Run the test + asyncio.run(run_detail_level_test(query=args.query, use_mock=args.use_mock)) + + +if __name__ == "__main__": + main() diff --git a/scripts/test_ev_query.py b/scripts/test_ev_query.py index 4b54a2c..6589a39 100755 --- a/scripts/test_ev_query.py +++ b/scripts/test_ev_query.py @@ -15,13 +15,15 @@ from datetime import datetime sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from scripts.query_to_report import query_to_report +from report.report_detail_levels import get_report_detail_level_manager -async def run_ev_test(use_mock: bool = False): +async def run_ev_test(detail_level: str = "standard", use_mock: bool = False): """ Run a test of the query to report workflow with an electric vehicles query. Args: + detail_level: Level of detail for the report (brief, standard, detailed, comprehensive) use_mock: If True, use mock data instead of making actual API calls """ # Query about electric vehicles @@ -29,16 +31,29 @@ async def run_ev_test(use_mock: bool = False): # Generate timestamp for unique output file timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - output_file = f"ev_report_{timestamp}.md" + output_file = f"ev_report_{timestamp}_{detail_level}.md" print(f"Processing query: {query}") + print(f"Detail level: {detail_level}") print(f"This may take a few minutes depending on the number of search results and API response times...") + # Get detail level configuration + detail_level_manager = get_report_detail_level_manager() + config = detail_level_manager.get_detail_level_config(detail_level) + + # Print detail level configuration + print(f"\nDetail level configuration:") + print(f" Number of results per search engine: {config.get('num_results')}") + print(f" Token budget: {config.get('token_budget')}") + print(f" Chunk size: {config.get('chunk_size')}") + print(f" Overlap size: {config.get('overlap_size')}") + print(f" Model: {config.get('model')}") + # Run the workflow await query_to_report( query=query, output_file=output_file, - num_results=7, # Get a good number of results for a comprehensive report + detail_level=detail_level, use_mock=use_mock ) @@ -60,12 +75,26 @@ async def run_ev_test(use_mock: bool = False): def main(): """Main function to parse arguments and run the test.""" parser = argparse.ArgumentParser(description='Test the query to report workflow with EV query') + parser.add_argument('--detail-level', '-d', type=str, default='standard', + choices=['brief', 'standard', 'detailed', 'comprehensive'], + help='Level of detail for the report') parser.add_argument('--use-mock', '-m', action='store_true', help='Use mock data instead of API calls') + parser.add_argument('--list-detail-levels', action='store_true', + help='List available detail levels with descriptions and exit') args = parser.parse_args() + # List detail levels if requested + if args.list_detail_levels: + detail_level_manager = get_report_detail_level_manager() + detail_levels = detail_level_manager.get_available_detail_levels() + print("Available detail levels:") + for level, description in detail_levels: + print(f" {level}: {description}") + return + # Run the test - asyncio.run(run_ev_test(use_mock=args.use_mock)) + asyncio.run(run_ev_test(detail_level=args.detail_level, use_mock=args.use_mock)) if __name__ == "__main__":