Implement customizable report detail levels with four options: brief, standard, detailed, and comprehensive
This commit is contained in:
parent
4e9887f08f
commit
646922ef4a
|
@ -0,0 +1,258 @@
|
|||
"""
|
||||
Report detail levels module for the intelligent research system.
|
||||
|
||||
This module provides functionality to define and configure different levels of detail
|
||||
for generated reports, allowing users to customize the depth and breadth of information
|
||||
included in reports.
|
||||
"""
|
||||
|
||||
import enum
|
||||
from typing import Dict, Any, Optional, List, Tuple
|
||||
|
||||
|
||||
class DetailLevel(enum.Enum):
|
||||
"""Enum for different report detail levels."""
|
||||
BRIEF = "brief"
|
||||
STANDARD = "standard"
|
||||
DETAILED = "detailed"
|
||||
COMPREHENSIVE = "comprehensive"
|
||||
|
||||
|
||||
class ReportDetailLevelManager:
|
||||
"""
|
||||
Manager for report detail levels.
|
||||
|
||||
This class provides methods to get configuration parameters for different
|
||||
report detail levels, allowing users to customize the depth and breadth of
|
||||
information included in reports.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the report detail level manager."""
|
||||
# Define default configurations for different detail levels
|
||||
self.detail_level_configs = {
|
||||
DetailLevel.BRIEF: {
|
||||
"num_results": 3,
|
||||
"token_budget": 50000,
|
||||
"chunk_size": 800,
|
||||
"overlap_size": 50,
|
||||
"model": "llama-3.1-8b-instant",
|
||||
"description": "A concise summary of key findings and conclusions."
|
||||
},
|
||||
DetailLevel.STANDARD: {
|
||||
"num_results": 7,
|
||||
"token_budget": 100000,
|
||||
"chunk_size": 1000,
|
||||
"overlap_size": 100,
|
||||
"model": "llama-3.1-8b-instant",
|
||||
"description": "A balanced report with key findings, analysis, and conclusions."
|
||||
},
|
||||
DetailLevel.DETAILED: {
|
||||
"num_results": 12,
|
||||
"token_budget": 150000,
|
||||
"chunk_size": 1200,
|
||||
"overlap_size": 150,
|
||||
"model": "llama-3.3-70b-versatile",
|
||||
"description": "A comprehensive report with in-depth analysis, methodology, and implications."
|
||||
},
|
||||
DetailLevel.COMPREHENSIVE: {
|
||||
"num_results": 20,
|
||||
"token_budget": 200000,
|
||||
"chunk_size": 1500,
|
||||
"overlap_size": 200,
|
||||
"model": "llama-3.3-70b-versatile",
|
||||
"description": "An exhaustive report with all available information, extensive analysis, and detailed references."
|
||||
}
|
||||
}
|
||||
|
||||
# Define template modifiers for different detail levels
|
||||
self.template_modifiers = {
|
||||
DetailLevel.BRIEF: {
|
||||
"factual": "Create a brief factual report that directly answers the query. Focus on accuracy and clarity. Include:\n"
|
||||
"1. A clear, direct answer to the query\n"
|
||||
"2. Key supporting evidence and facts\n"
|
||||
"3. Citations for information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"4. A concise references section\n\n"
|
||||
"Keep the report concise and to the point, focusing only on the most essential information.",
|
||||
|
||||
"comparative": "Create a brief comparative report that analyzes different perspectives on the query. Include:\n"
|
||||
"1. A concise overview of the topic\n"
|
||||
"2. Key similarities and differences between perspectives\n"
|
||||
"3. Citations for information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"4. A concise references section\n\n"
|
||||
"Keep the report concise and to the point, focusing only on the most essential comparisons.",
|
||||
|
||||
"exploratory": "Create a brief exploratory report that investigates the query. Include:\n"
|
||||
"1. A concise introduction to the topic\n"
|
||||
"2. Key findings and insights\n"
|
||||
"3. Citations for information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"4. A concise references section\n\n"
|
||||
"Keep the report concise and to the point, focusing only on the most essential information."
|
||||
},
|
||||
|
||||
DetailLevel.STANDARD: {
|
||||
"factual": "Create a standard factual report that directly answers the query. Focus on accuracy and clarity. Include:\n"
|
||||
"1. A clear, direct answer to the query\n"
|
||||
"2. Supporting evidence and facts from the sources\n"
|
||||
"3. Any relevant context needed to understand the answer\n"
|
||||
"4. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"5. A references section at the end listing all sources",
|
||||
|
||||
"comparative": "Create a standard comparative report that analyzes different perspectives on the query. Include:\n"
|
||||
"1. An overview of the topic and why it's significant\n"
|
||||
"2. A balanced presentation of different viewpoints or approaches\n"
|
||||
"3. Analysis of similarities and differences\n"
|
||||
"4. Evidence supporting each perspective\n"
|
||||
"5. A synthesis of the information that highlights key insights\n"
|
||||
"6. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"7. A references section at the end listing all sources",
|
||||
|
||||
"exploratory": "Create a standard exploratory report that investigates the query in depth. Include:\n"
|
||||
"1. An introduction that frames the topic and its significance\n"
|
||||
"2. Key concepts and definitions\n"
|
||||
"3. Main findings and insights from the sources\n"
|
||||
"4. Analysis of the information that highlights patterns and connections\n"
|
||||
"5. Implications or applications of the findings\n"
|
||||
"6. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"7. A references section at the end listing all sources"
|
||||
},
|
||||
|
||||
DetailLevel.DETAILED: {
|
||||
"factual": "Create a detailed factual report that thoroughly answers the query. Focus on accuracy, clarity, and depth. Include:\n"
|
||||
"1. A comprehensive answer to the query with nuanced details\n"
|
||||
"2. Extensive supporting evidence and facts from multiple sources\n"
|
||||
"3. Contextual information and background to fully understand the topic\n"
|
||||
"4. Discussion of any limitations or caveats in the information\n"
|
||||
"5. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"6. A detailed references section at the end listing all sources\n\n"
|
||||
"Organize the report with clear sections and subsections to enhance readability.",
|
||||
|
||||
"comparative": "Create a detailed comparative report that thoroughly analyzes different perspectives on the query. Include:\n"
|
||||
"1. A comprehensive overview of the topic, its history, and significance\n"
|
||||
"2. In-depth presentation of different viewpoints, approaches, or theories\n"
|
||||
"3. Detailed analysis of similarities, differences, strengths, and weaknesses\n"
|
||||
"4. Extensive evidence supporting each perspective from multiple sources\n"
|
||||
"5. Discussion of nuances, edge cases, and contextual factors\n"
|
||||
"6. A thorough synthesis that highlights key insights and patterns\n"
|
||||
"7. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"8. A detailed references section at the end listing all sources\n\n"
|
||||
"Organize the report with clear sections and subsections to enhance readability.",
|
||||
|
||||
"exploratory": "Create a detailed exploratory report that thoroughly investigates the query. Include:\n"
|
||||
"1. A comprehensive introduction that frames the topic, its history, and significance\n"
|
||||
"2. Detailed explanation of key concepts, definitions, and theoretical frameworks\n"
|
||||
"3. In-depth presentation of findings and insights from multiple sources\n"
|
||||
"4. Thorough analysis that highlights patterns, connections, and contradictions\n"
|
||||
"5. Discussion of implications, applications, and future directions\n"
|
||||
"6. Consideration of limitations, gaps in knowledge, and areas for further research\n"
|
||||
"7. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"8. A detailed references section at the end listing all sources\n\n"
|
||||
"Organize the report with clear sections and subsections to enhance readability."
|
||||
},
|
||||
|
||||
DetailLevel.COMPREHENSIVE: {
|
||||
"factual": "Create an exhaustive factual report that answers the query with maximum depth and breadth. Focus on accuracy, clarity, and completeness. Include:\n"
|
||||
"1. A comprehensive answer to the query with all available details and nuances\n"
|
||||
"2. Exhaustive supporting evidence and facts from all available sources\n"
|
||||
"3. Complete contextual information, background, and historical development\n"
|
||||
"4. Thorough discussion of all limitations, caveats, and alternative interpretations\n"
|
||||
"5. Analysis of conflicting information or disagreements in the sources\n"
|
||||
"6. Visual elements such as tables or bullet points to organize complex information\n"
|
||||
"7. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"8. A comprehensive references section at the end listing all sources\n\n"
|
||||
"Organize the report with clear sections, subsections, and where appropriate, sub-subsections to maximize readability.",
|
||||
|
||||
"comparative": "Create an exhaustive comparative report that analyzes different perspectives on the query with maximum depth and breadth. Include:\n"
|
||||
"1. A comprehensive overview of the topic, its complete history, significance, and context\n"
|
||||
"2. Exhaustive presentation of all viewpoints, approaches, theories, and models\n"
|
||||
"3. Detailed analysis of all similarities, differences, strengths, weaknesses, and trade-offs\n"
|
||||
"4. Complete evidence supporting each perspective from all available sources\n"
|
||||
"5. Thorough discussion of all nuances, edge cases, and contextual factors\n"
|
||||
"6. Analysis of how different perspectives have evolved over time\n"
|
||||
"7. Visual elements such as tables or bullet points to organize complex comparisons\n"
|
||||
"8. A comprehensive synthesis that highlights all insights, patterns, and implications\n"
|
||||
"9. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"10. A comprehensive references section at the end listing all sources\n\n"
|
||||
"Organize the report with clear sections, subsections, and where appropriate, sub-subsections to maximize readability.",
|
||||
|
||||
"exploratory": "Create an exhaustive exploratory report that investigates the query with maximum depth and breadth. Include:\n"
|
||||
"1. A comprehensive introduction that frames the topic, its complete history, significance, and context\n"
|
||||
"2. Exhaustive explanation of all key concepts, definitions, and theoretical frameworks\n"
|
||||
"3. Complete presentation of all findings and insights from all available sources\n"
|
||||
"4. Thorough analysis that highlights all patterns, connections, contradictions, and outliers\n"
|
||||
"5. Comprehensive discussion of all implications, applications, and future directions\n"
|
||||
"6. Complete consideration of all limitations, gaps in knowledge, and areas for further research\n"
|
||||
"7. Visual elements such as tables or bullet points to organize complex information\n"
|
||||
"8. Citations for all information (use numbered citations in square brackets [1], [2], etc.)\n"
|
||||
"9. A comprehensive references section at the end listing all sources\n\n"
|
||||
"Organize the report with clear sections, subsections, and where appropriate, sub-subsections to maximize readability."
|
||||
}
|
||||
}
|
||||
|
||||
def get_detail_level_config(self, detail_level: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get configuration parameters for a specific detail level.
|
||||
|
||||
Args:
|
||||
detail_level: Detail level as a string (brief, standard, detailed, comprehensive)
|
||||
|
||||
Returns:
|
||||
Dictionary of configuration parameters for the specified detail level
|
||||
|
||||
Raises:
|
||||
ValueError: If the detail level is not valid
|
||||
"""
|
||||
try:
|
||||
level = DetailLevel(detail_level.lower())
|
||||
return self.detail_level_configs[level]
|
||||
except (ValueError, KeyError):
|
||||
valid_levels = [level.value for level in DetailLevel]
|
||||
raise ValueError(f"Invalid detail level: {detail_level}. Valid levels are: {', '.join(valid_levels)}")
|
||||
|
||||
def get_template_modifier(self, detail_level: str, query_type: str) -> str:
|
||||
"""
|
||||
Get template modifier for a specific detail level and query type.
|
||||
|
||||
Args:
|
||||
detail_level: Detail level as a string (brief, standard, detailed, comprehensive)
|
||||
query_type: Query type as a string (factual, exploratory, comparative)
|
||||
|
||||
Returns:
|
||||
Template modifier as a string
|
||||
|
||||
Raises:
|
||||
ValueError: If the detail level or query type is not valid
|
||||
"""
|
||||
try:
|
||||
level = DetailLevel(detail_level.lower())
|
||||
if query_type not in ["factual", "exploratory", "comparative"]:
|
||||
query_type = "exploratory" # Default to exploratory if query type is not valid
|
||||
|
||||
return self.template_modifiers[level][query_type]
|
||||
except (ValueError, KeyError):
|
||||
valid_levels = [level.value for level in DetailLevel]
|
||||
raise ValueError(f"Invalid detail level: {detail_level}. Valid levels are: {', '.join(valid_levels)}")
|
||||
|
||||
def get_available_detail_levels(self) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
Get a list of available detail levels with descriptions.
|
||||
|
||||
Returns:
|
||||
List of tuples containing detail level and description
|
||||
"""
|
||||
return [(level.value, config["description"])
|
||||
for level, config in self.detail_level_configs.items()]
|
||||
|
||||
|
||||
# Create a singleton instance for global use
|
||||
report_detail_level_manager = ReportDetailLevelManager()
|
||||
|
||||
|
||||
def get_report_detail_level_manager() -> ReportDetailLevelManager:
|
||||
"""
|
||||
Get the global report detail level manager instance.
|
||||
|
||||
Returns:
|
||||
ReportDetailLevelManager instance
|
||||
"""
|
||||
return report_detail_level_manager
|
|
@ -15,6 +15,7 @@ from report.database.db_manager import get_db_manager, initialize_database
|
|||
from report.document_scraper import get_document_scraper
|
||||
from report.document_processor import get_document_processor
|
||||
from report.report_synthesis import get_report_synthesizer
|
||||
from report.report_detail_levels import get_report_detail_level_manager, DetailLevel
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
@ -35,12 +36,56 @@ class ReportGenerator:
|
|||
self.document_scraper = get_document_scraper()
|
||||
self.document_processor = get_document_processor()
|
||||
self.report_synthesizer = get_report_synthesizer()
|
||||
self.detail_level_manager = get_report_detail_level_manager()
|
||||
self.detail_level = "standard" # Default detail level
|
||||
self.model_name = None # Will use default model based on detail level
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialize the report generator by setting up the database."""
|
||||
await initialize_database()
|
||||
logger.info("Report generator initialized")
|
||||
|
||||
def set_detail_level(self, detail_level: str) -> None:
|
||||
"""
|
||||
Set the detail level for report generation.
|
||||
|
||||
Args:
|
||||
detail_level: Detail level (brief, standard, detailed, comprehensive)
|
||||
"""
|
||||
try:
|
||||
# Validate detail level
|
||||
config = self.detail_level_manager.get_detail_level_config(detail_level)
|
||||
self.detail_level = detail_level
|
||||
|
||||
# Update model if needed
|
||||
model = config.get("model")
|
||||
if model and model != self.model_name:
|
||||
self.model_name = model
|
||||
self.report_synthesizer = get_report_synthesizer(model)
|
||||
|
||||
logger.info(f"Detail level set to {detail_level} with model {model}")
|
||||
except ValueError as e:
|
||||
logger.error(f"Error setting detail level: {e}")
|
||||
raise
|
||||
|
||||
def get_detail_level_config(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get the current detail level configuration.
|
||||
|
||||
Returns:
|
||||
Dictionary of configuration parameters for the current detail level
|
||||
"""
|
||||
return self.detail_level_manager.get_detail_level_config(self.detail_level)
|
||||
|
||||
def get_available_detail_levels(self) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
Get a list of available detail levels with descriptions.
|
||||
|
||||
Returns:
|
||||
List of tuples containing detail level and description
|
||||
"""
|
||||
return self.detail_level_manager.get_available_detail_levels()
|
||||
|
||||
async def process_search_results(self, search_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Process search results by scraping the URLs and storing them in the database.
|
||||
|
@ -96,8 +141,8 @@ class ReportGenerator:
|
|||
async def prepare_documents_for_report(self,
|
||||
search_results: List[Dict[str, Any]],
|
||||
token_budget: Optional[int] = None,
|
||||
chunk_size: int = 1000,
|
||||
overlap_size: int = 100) -> List[Dict[str, Any]]:
|
||||
chunk_size: Optional[int] = None,
|
||||
overlap_size: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Prepare documents for report generation by processing search results,
|
||||
prioritizing documents, and chunking them to fit within token budget.
|
||||
|
@ -111,6 +156,20 @@ class ReportGenerator:
|
|||
Returns:
|
||||
List of selected document chunks
|
||||
"""
|
||||
# Get configuration from detail level if not specified
|
||||
config = self.get_detail_level_config()
|
||||
|
||||
if token_budget is None:
|
||||
token_budget = config.get("token_budget")
|
||||
|
||||
if chunk_size is None:
|
||||
chunk_size = config.get("chunk_size", 1000)
|
||||
|
||||
if overlap_size is None:
|
||||
overlap_size = config.get("overlap_size", 100)
|
||||
|
||||
logger.info(f"Preparing documents with token_budget={token_budget}, chunk_size={chunk_size}, overlap_size={overlap_size}")
|
||||
|
||||
# Process search results to get documents and relevance scores
|
||||
documents, relevance_scores = await self.process_search_results(search_results)
|
||||
|
||||
|
@ -126,11 +185,12 @@ class ReportGenerator:
|
|||
return selected_chunks
|
||||
|
||||
async def generate_report(self,
|
||||
search_results: List[Dict[str, Any]],
|
||||
query: str,
|
||||
token_budget: Optional[int] = None,
|
||||
chunk_size: int = 1000,
|
||||
overlap_size: int = 100) -> str:
|
||||
search_results: List[Dict[str, Any]],
|
||||
query: str,
|
||||
token_budget: Optional[int] = None,
|
||||
chunk_size: Optional[int] = None,
|
||||
overlap_size: Optional[int] = None,
|
||||
detail_level: Optional[str] = None) -> str:
|
||||
"""
|
||||
Generate a report from search results.
|
||||
|
||||
|
@ -140,10 +200,15 @@ class ReportGenerator:
|
|||
token_budget: Maximum number of tokens to use
|
||||
chunk_size: Maximum number of tokens per chunk
|
||||
overlap_size: Number of tokens to overlap between chunks
|
||||
detail_level: Level of detail for the report (brief, standard, detailed, comprehensive)
|
||||
|
||||
Returns:
|
||||
Generated report as a string
|
||||
"""
|
||||
# Set detail level if specified
|
||||
if detail_level:
|
||||
self.set_detail_level(detail_level)
|
||||
|
||||
# Prepare documents for report
|
||||
selected_chunks = await self.prepare_documents_for_report(
|
||||
search_results,
|
||||
|
@ -153,7 +218,11 @@ class ReportGenerator:
|
|||
)
|
||||
|
||||
# Generate report using report synthesizer
|
||||
report = await self.report_synthesizer.synthesize_report(selected_chunks, query)
|
||||
report = await self.report_synthesizer.synthesize_report(
|
||||
selected_chunks,
|
||||
query,
|
||||
detail_level=self.detail_level
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ import litellm
|
|||
from litellm import completion
|
||||
|
||||
from config.config import get_config
|
||||
from report.report_detail_levels import get_report_detail_level_manager, DetailLevel
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
@ -166,7 +167,7 @@ class ReportSynthesizer:
|
|||
|
||||
return processed_chunks
|
||||
|
||||
async def reduce_processed_chunks(self, processed_chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory") -> str:
|
||||
async def reduce_processed_chunks(self, processed_chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory", detail_level: str = "standard") -> str:
|
||||
"""
|
||||
Reduce phase: Synthesize processed chunks into a coherent report.
|
||||
|
||||
|
@ -174,6 +175,7 @@ class ReportSynthesizer:
|
|||
processed_chunks: List of processed chunks with extracted information
|
||||
query: Original search query
|
||||
query_type: Type of query (factual, exploratory, comparative)
|
||||
detail_level: Level of detail for the report (brief, standard, detailed, comprehensive)
|
||||
|
||||
Returns:
|
||||
Synthesized report as a string
|
||||
|
@ -185,32 +187,9 @@ class ReportSynthesizer:
|
|||
context += f"Source: {chunk.get('url', 'Unknown')}\n"
|
||||
context += f"Extracted information:\n{chunk.get('extracted_info', '')}\n\n"
|
||||
|
||||
# Create a template based on query type
|
||||
if query_type == "factual":
|
||||
template = """Create a comprehensive factual report that directly answers the query. Focus on accuracy and clarity. Include:
|
||||
1. A clear, direct answer to the query
|
||||
2. Supporting evidence and facts from the sources
|
||||
3. Any relevant context needed to understand the answer
|
||||
4. Citations for all information (use numbered citations in square brackets [1], [2], etc.)
|
||||
5. A references section at the end listing all sources"""
|
||||
elif query_type == "comparative":
|
||||
template = """Create a comprehensive comparative report that analyzes different perspectives on the query. Include:
|
||||
1. An overview of the topic and why it's significant
|
||||
2. A balanced presentation of different viewpoints or approaches
|
||||
3. Analysis of similarities and differences
|
||||
4. Evidence supporting each perspective
|
||||
5. A synthesis of the information that highlights key insights
|
||||
6. Citations for all information (use numbered citations in square brackets [1], [2], etc.)
|
||||
7. A references section at the end listing all sources"""
|
||||
else: # exploratory (default)
|
||||
template = """Create a comprehensive exploratory report that investigates the query in depth. Include:
|
||||
1. An introduction that frames the topic and its significance
|
||||
2. Key concepts and definitions
|
||||
3. Main findings and insights from the sources
|
||||
4. Analysis of the information that highlights patterns and connections
|
||||
5. Implications or applications of the findings
|
||||
6. Citations for all information (use numbered citations in square brackets [1], [2], etc.)
|
||||
7. A references section at the end listing all sources"""
|
||||
# Get template modifier based on detail level and query type
|
||||
detail_level_manager = get_report_detail_level_manager()
|
||||
template = detail_level_manager.get_template_modifier(detail_level, query_type)
|
||||
|
||||
# Create the prompt for synthesizing the report
|
||||
messages = [
|
||||
|
@ -224,7 +203,7 @@ class ReportSynthesizer:
|
|||
Information from sources:
|
||||
{context}
|
||||
|
||||
Synthesize this information into a comprehensive report that addresses the query. Use your own words to create a coherent narrative, but ensure all information is based on the provided sources. Include citations and a references section."""}
|
||||
Synthesize this information into a report that addresses the query. Use your own words to create a coherent narrative, but ensure all information is based on the provided sources. Include citations and a references section."""}
|
||||
]
|
||||
|
||||
# Generate the report
|
||||
|
@ -232,7 +211,7 @@ class ReportSynthesizer:
|
|||
|
||||
return report
|
||||
|
||||
async def synthesize_report(self, chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory") -> str:
|
||||
async def synthesize_report(self, chunks: List[Dict[str, Any]], query: str, query_type: str = "exploratory", detail_level: str = "standard") -> str:
|
||||
"""
|
||||
Synthesize a report from document chunks using the map-reduce approach.
|
||||
|
||||
|
@ -240,12 +219,14 @@ class ReportSynthesizer:
|
|||
chunks: List of document chunks
|
||||
query: Original search query
|
||||
query_type: Type of query (factual, exploratory, comparative)
|
||||
detail_level: Level of detail for the report (brief, standard, detailed, comprehensive)
|
||||
|
||||
Returns:
|
||||
Synthesized report as a string
|
||||
"""
|
||||
logger.info(f"Synthesizing report for query: {query}")
|
||||
logger.info(f"Using {len(chunks)} document chunks")
|
||||
logger.info(f"Detail level: {detail_level}")
|
||||
|
||||
# Determine query type if not specified
|
||||
if query_type == "exploratory":
|
||||
|
@ -264,7 +245,7 @@ class ReportSynthesizer:
|
|||
|
||||
# Reduce phase: Synthesize processed chunks into a coherent report
|
||||
logger.info("Starting reduce phase: Synthesizing processed chunks into a report")
|
||||
report = await self.reduce_processed_chunks(processed_chunks, query, query_type)
|
||||
report = await self.reduce_processed_chunks(processed_chunks, query, query_type, detail_level)
|
||||
logger.info("Reduce phase complete: Report generated")
|
||||
|
||||
return report
|
||||
|
|
|
@ -22,6 +22,7 @@ from query.query_processor import get_query_processor
|
|||
from execution.search_executor import SearchExecutor
|
||||
from ranking.jina_reranker import get_jina_reranker
|
||||
from report.report_generator import get_report_generator, initialize_report_generator
|
||||
from report.report_detail_levels import get_report_detail_level_manager
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
@ -34,8 +35,9 @@ async def query_to_report(
|
|||
search_engines: Optional[List[str]] = None,
|
||||
num_results: int = 10,
|
||||
token_budget: Optional[int] = None,
|
||||
chunk_size: int = 1000,
|
||||
overlap_size: int = 100,
|
||||
chunk_size: Optional[int] = None,
|
||||
overlap_size: Optional[int] = None,
|
||||
detail_level: str = "standard",
|
||||
use_mock: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
|
@ -49,12 +51,14 @@ async def query_to_report(
|
|||
token_budget: Maximum number of tokens to use for report generation
|
||||
chunk_size: Maximum number of tokens per chunk
|
||||
overlap_size: Number of tokens to overlap between chunks
|
||||
detail_level: Level of detail for the report (brief, standard, detailed, comprehensive)
|
||||
use_mock: If True, use mock data instead of making actual API calls
|
||||
|
||||
Returns:
|
||||
Path to the generated report
|
||||
"""
|
||||
logger.info(f"Processing query: {query}")
|
||||
logger.info(f"Detail level: {detail_level}")
|
||||
|
||||
# Step 1: Process the query
|
||||
query_processor = get_query_processor()
|
||||
|
@ -76,6 +80,14 @@ async def query_to_report(
|
|||
|
||||
# Step 3: Execute search
|
||||
search_executor = SearchExecutor()
|
||||
|
||||
# If detail level is specified, adjust num_results based on the detail level
|
||||
if detail_level and not num_results:
|
||||
detail_level_manager = get_report_detail_level_manager()
|
||||
config = detail_level_manager.get_detail_level_config(detail_level)
|
||||
num_results = config.get("num_results", 10)
|
||||
logger.info(f"Using {num_results} results per search engine based on detail level: {detail_level}")
|
||||
|
||||
search_results = search_executor.execute_search(
|
||||
structured_query,
|
||||
search_engines=search_engines,
|
||||
|
@ -124,13 +136,14 @@ async def query_to_report(
|
|||
report_generator = get_report_generator()
|
||||
|
||||
# Step 6: Generate report
|
||||
logger.info(f"Generating report...")
|
||||
logger.info(f"Generating report with detail level: {detail_level}...")
|
||||
report = await report_generator.generate_report(
|
||||
search_results=reranked_results,
|
||||
query=query,
|
||||
token_budget=token_budget,
|
||||
chunk_size=chunk_size,
|
||||
overlap_size=overlap_size
|
||||
overlap_size=overlap_size,
|
||||
detail_level=detail_level
|
||||
)
|
||||
|
||||
logger.info(f"Report generated. Length: {len(report)} characters")
|
||||
|
@ -150,12 +163,17 @@ def main():
|
|||
parser.add_argument('query', help='The query to process')
|
||||
parser.add_argument('--output', '-o', default='report.md', help='Output file path')
|
||||
parser.add_argument('--search-engines', '-s', nargs='+', help='Search engines to use')
|
||||
parser.add_argument('--num-results', '-n', type=int, default=10, help='Number of results per search engine')
|
||||
parser.add_argument('--num-results', '-n', type=int, help='Number of results per search engine')
|
||||
parser.add_argument('--token-budget', '-t', type=int, help='Maximum number of tokens for report generation')
|
||||
parser.add_argument('--chunk-size', '-c', type=int, default=1000, help='Maximum tokens per chunk')
|
||||
parser.add_argument('--overlap-size', '-l', type=int, default=100, help='Tokens to overlap between chunks')
|
||||
parser.add_argument('--chunk-size', '-c', type=int, help='Maximum tokens per chunk')
|
||||
parser.add_argument('--overlap-size', '-l', type=int, help='Tokens to overlap between chunks')
|
||||
parser.add_argument('--detail-level', '-d', type=str, default='standard',
|
||||
choices=['brief', 'standard', 'detailed', 'comprehensive'],
|
||||
help='Level of detail for the report')
|
||||
parser.add_argument('--use-mock', '-m', action='store_true', help='Use mock data instead of API calls')
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging')
|
||||
parser.add_argument('--list-detail-levels', action='store_true',
|
||||
help='List available detail levels with descriptions and exit')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -163,6 +181,15 @@ def main():
|
|||
if args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
# List detail levels if requested
|
||||
if args.list_detail_levels:
|
||||
detail_level_manager = get_report_detail_level_manager()
|
||||
detail_levels = detail_level_manager.get_available_detail_levels()
|
||||
print("Available detail levels:")
|
||||
for level, description in detail_levels:
|
||||
print(f" {level}: {description}")
|
||||
return
|
||||
|
||||
# Run the workflow
|
||||
asyncio.run(query_to_report(
|
||||
query=args.query,
|
||||
|
@ -172,6 +199,7 @@ def main():
|
|||
token_budget=args.token_budget,
|
||||
chunk_size=args.chunk_size,
|
||||
overlap_size=args.overlap_size,
|
||||
detail_level=args.detail_level,
|
||||
use_mock=args.use_mock
|
||||
))
|
||||
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Test Detail Levels Script
|
||||
|
||||
This script tests the report generation with different detail levels
|
||||
for the same query to demonstrate the differences.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
# Add parent directory to path to import modules
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from scripts.query_to_report import query_to_report
|
||||
from report.report_detail_levels import get_report_detail_level_manager, DetailLevel
|
||||
|
||||
|
||||
async def run_detail_level_test(query: str, use_mock: bool = False):
|
||||
"""
|
||||
Run a test of the query to report workflow with different detail levels.
|
||||
|
||||
Args:
|
||||
query: The query to process
|
||||
use_mock: If True, use mock data instead of making actual API calls
|
||||
"""
|
||||
# Generate timestamp for unique output files
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# Get detail level manager
|
||||
detail_level_manager = get_report_detail_level_manager()
|
||||
|
||||
# Get all detail levels
|
||||
detail_levels = [level.value for level in DetailLevel]
|
||||
|
||||
print(f"Processing query: {query}")
|
||||
print(f"Testing {len(detail_levels)} detail levels: {', '.join(detail_levels)}")
|
||||
print(f"This may take several minutes to complete all detail levels...")
|
||||
|
||||
# Process each detail level
|
||||
for detail_level in detail_levels:
|
||||
print(f"\n{'=' * 80}")
|
||||
print(f"Processing detail level: {detail_level}")
|
||||
|
||||
# Get detail level configuration
|
||||
config = detail_level_manager.get_detail_level_config(detail_level)
|
||||
|
||||
# Print detail level configuration
|
||||
print(f"Detail level configuration:")
|
||||
print(f" Number of results per search engine: {config.get('num_results')}")
|
||||
print(f" Token budget: {config.get('token_budget')}")
|
||||
print(f" Chunk size: {config.get('chunk_size')}")
|
||||
print(f" Overlap size: {config.get('overlap_size')}")
|
||||
print(f" Model: {config.get('model')}")
|
||||
|
||||
# Set output file
|
||||
output_file = f"report_{timestamp}_{detail_level}.md"
|
||||
|
||||
# Run the workflow
|
||||
start_time = datetime.now()
|
||||
print(f"Started at: {start_time.strftime('%H:%M:%S')}")
|
||||
|
||||
await query_to_report(
|
||||
query=query,
|
||||
output_file=output_file,
|
||||
detail_level=detail_level,
|
||||
use_mock=use_mock
|
||||
)
|
||||
|
||||
end_time = datetime.now()
|
||||
duration = end_time - start_time
|
||||
print(f"Completed at: {end_time.strftime('%H:%M:%S')}")
|
||||
print(f"Duration: {duration.total_seconds():.2f} seconds")
|
||||
|
||||
# Get report file size
|
||||
file_size = os.path.getsize(output_file)
|
||||
print(f"Report saved to: {output_file}")
|
||||
print(f"Report size: {file_size} bytes")
|
||||
|
||||
# Count words in report
|
||||
try:
|
||||
with open(output_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
word_count = len(content.split())
|
||||
print(f"Word count: {word_count}")
|
||||
except Exception as e:
|
||||
print(f"Error reading report: {e}")
|
||||
|
||||
print(f"\n{'=' * 80}")
|
||||
print(f"All detail levels processed successfully!")
|
||||
print(f"Reports saved with prefix: report_{timestamp}_")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to parse arguments and run the test."""
|
||||
parser = argparse.ArgumentParser(description='Test report generation with different detail levels')
|
||||
parser.add_argument('--query', '-q', type=str,
|
||||
default="What is the environmental and economic impact of electric vehicles compared to traditional vehicles?",
|
||||
help='The query to process')
|
||||
parser.add_argument('--use-mock', '-m', action='store_true', help='Use mock data instead of API calls')
|
||||
parser.add_argument('--list-detail-levels', action='store_true',
|
||||
help='List available detail levels with descriptions and exit')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# List detail levels if requested
|
||||
if args.list_detail_levels:
|
||||
detail_level_manager = get_report_detail_level_manager()
|
||||
detail_levels = detail_level_manager.get_available_detail_levels()
|
||||
print("Available detail levels:")
|
||||
for level, description in detail_levels:
|
||||
print(f" {level}: {description}")
|
||||
return
|
||||
|
||||
# Run the test
|
||||
asyncio.run(run_detail_level_test(query=args.query, use_mock=args.use_mock))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -15,13 +15,15 @@ from datetime import datetime
|
|||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from scripts.query_to_report import query_to_report
|
||||
from report.report_detail_levels import get_report_detail_level_manager
|
||||
|
||||
|
||||
async def run_ev_test(use_mock: bool = False):
|
||||
async def run_ev_test(detail_level: str = "standard", use_mock: bool = False):
|
||||
"""
|
||||
Run a test of the query to report workflow with an electric vehicles query.
|
||||
|
||||
Args:
|
||||
detail_level: Level of detail for the report (brief, standard, detailed, comprehensive)
|
||||
use_mock: If True, use mock data instead of making actual API calls
|
||||
"""
|
||||
# Query about electric vehicles
|
||||
|
@ -29,16 +31,29 @@ async def run_ev_test(use_mock: bool = False):
|
|||
|
||||
# Generate timestamp for unique output file
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_file = f"ev_report_{timestamp}.md"
|
||||
output_file = f"ev_report_{timestamp}_{detail_level}.md"
|
||||
|
||||
print(f"Processing query: {query}")
|
||||
print(f"Detail level: {detail_level}")
|
||||
print(f"This may take a few minutes depending on the number of search results and API response times...")
|
||||
|
||||
# Get detail level configuration
|
||||
detail_level_manager = get_report_detail_level_manager()
|
||||
config = detail_level_manager.get_detail_level_config(detail_level)
|
||||
|
||||
# Print detail level configuration
|
||||
print(f"\nDetail level configuration:")
|
||||
print(f" Number of results per search engine: {config.get('num_results')}")
|
||||
print(f" Token budget: {config.get('token_budget')}")
|
||||
print(f" Chunk size: {config.get('chunk_size')}")
|
||||
print(f" Overlap size: {config.get('overlap_size')}")
|
||||
print(f" Model: {config.get('model')}")
|
||||
|
||||
# Run the workflow
|
||||
await query_to_report(
|
||||
query=query,
|
||||
output_file=output_file,
|
||||
num_results=7, # Get a good number of results for a comprehensive report
|
||||
detail_level=detail_level,
|
||||
use_mock=use_mock
|
||||
)
|
||||
|
||||
|
@ -60,12 +75,26 @@ async def run_ev_test(use_mock: bool = False):
|
|||
def main():
|
||||
"""Main function to parse arguments and run the test."""
|
||||
parser = argparse.ArgumentParser(description='Test the query to report workflow with EV query')
|
||||
parser.add_argument('--detail-level', '-d', type=str, default='standard',
|
||||
choices=['brief', 'standard', 'detailed', 'comprehensive'],
|
||||
help='Level of detail for the report')
|
||||
parser.add_argument('--use-mock', '-m', action='store_true', help='Use mock data instead of API calls')
|
||||
parser.add_argument('--list-detail-levels', action='store_true',
|
||||
help='List available detail levels with descriptions and exit')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# List detail levels if requested
|
||||
if args.list_detail_levels:
|
||||
detail_level_manager = get_report_detail_level_manager()
|
||||
detail_levels = detail_level_manager.get_available_detail_levels()
|
||||
print("Available detail levels:")
|
||||
for level, description in detail_levels:
|
||||
print(f" {level}: {description}")
|
||||
return
|
||||
|
||||
# Run the test
|
||||
asyncio.run(run_ev_test(use_mock=args.use_mock))
|
||||
asyncio.run(run_ev_test(detail_level=args.detail_level, use_mock=args.use_mock))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in New Issue