Implement batch processing for report generation to avoid payload limits

This commit is contained in:
Steve White 2025-02-28 11:10:19 -06:00
parent fd92885a65
commit 76cedb9528
2 changed files with 20 additions and 6 deletions

View File

@ -56,10 +56,10 @@ class ReportDetailLevelManager:
"description": "A comprehensive report with in-depth analysis, methodology, and implications."
},
DetailLevel.COMPREHENSIVE: {
"num_results": 10,
"token_budget": 120000,
"chunk_size": 1000,
"overlap_size": 100,
"num_results": 8,
"token_budget": 80000,
"chunk_size": 800,
"overlap_size": 80,
"model": "llama-3.3-70b-versatile",
"description": "An exhaustive report with all available information, extensive analysis, and detailed references."
}

View File

@ -354,8 +354,22 @@ class ReportSynthesizer:
logger.info(f"Starting map phase for {len(chunks)} document chunks with query type '{query_type}' and detail level '{detail_level}'")
# Map phase: Process individual chunks to extract key information
processed_chunks = await self.map_document_chunks(chunks, query, detail_level)
# Process chunks in batches to avoid hitting payload limits
batch_size = 3 # Process 3 chunks at a time
processed_chunks = []
for i in range(0, len(chunks), batch_size):
batch = chunks[i:i+batch_size]
logger.info(f"Processing batch {i//batch_size + 1}/{(len(chunks) + batch_size - 1)//batch_size} with {len(batch)} chunks")
# Process this batch
batch_results = await self.map_document_chunks(batch, query, detail_level)
processed_chunks.extend(batch_results)
# Add a small delay between batches to avoid rate limiting
if i + batch_size < len(chunks):
logger.info("Pausing briefly between batches...")
await asyncio.sleep(2)
logger.info(f"Starting reduce phase to synthesize report from {len(processed_chunks)} processed chunks")