From 21f75c0d25e1bb834d8486cc8efc50dd805f1723 Mon Sep 17 00:00:00 2001
From: Steve White <stwhite@mac.com>
Date: Wed, 12 Mar 2025 11:20:40 -0500
Subject: [PATCH] Add progress tracking to report generation UI

---
 report/report_generator.py | 14 ++++++++++++
 report/report_synthesis.py | 43 +++++++++++++++++++++++++++++++++++++
 run_ui.py                  |  7 ++++++
 ui/gradio_interface.py     | 44 ++++++++++++++++++++++++++++++++++----
 4 files changed, 104 insertions(+), 4 deletions(-)
diff --git a/report/report_generator.py b/report/report_generator.py
index aaf4094..2e8efad 100644
--- a/report/report_generator.py
+++ b/report/report_generator.py
@@ -187,6 +187,20 @@ class ReportGenerator:
         
         return selected_chunks
     
+    def set_progress_callback(self, callback):
+        """
+        Set the progress callback for both synthesizers.
+        
+        Args:
+            callback: Function that takes (current_progress, total, current_report) as arguments
+        """
+        # Set the callback for both synthesizers
+        if hasattr(self.report_synthesizer, 'set_progress_callback'):
+            self.report_synthesizer.set_progress_callback(callback)
+        
+        if hasattr(self.progressive_report_synthesizer, 'set_progress_callback'):
+            self.progressive_report_synthesizer.set_progress_callback(callback)
+    
     async def generate_report(self, 
                              search_results: List[Dict[str, Any]],
                              query: str,
diff --git a/report/report_synthesis.py b/report/report_synthesis.py
index 4e78566..b300b52 100644
--- a/report/report_synthesis.py
+++ b/report/report_synthesis.py
@@ -57,6 +57,26 @@ class ReportSynthesizer:
         
         # Flag to process <thinking> tags in model output
         self.process_thinking_tags = False
+        
+        # Progress tracking
+        self.progress_callback = None
+        self.total_chunks = 0
+        self.processed_chunk_count = 0
+    
+    def set_progress_callback(self, callback):
+        """
+        Set a callback function to report progress.
+        
+        Args:
+            callback: Function that takes (current_progress, total, current_report) as arguments
+        """
+        self.progress_callback = callback
+    
+    def _report_progress(self, current_report=None):
+        """Report progress through the callback if set."""
+        if self.progress_callback and self.total_chunks > 0:
+            progress = min(self.processed_chunk_count / self.total_chunks, 1.0)
+            self.progress_callback(progress, self.total_chunks, current_report)
     
     def _setup_provider(self) -> None:
         """Set up the LLM provider based on the model configuration."""
@@ -289,6 +309,10 @@ class ReportSynthesizer:
                     processed_chunk['extracted_info'] = extracted_info
                     batch_results.append(processed_chunk)
                     
+                    # Update progress
+                    self.processed_chunk_count += 1
+                    self._report_progress()
+                    
                     logger.info(f"Completed chunk {chunk_index}/{total_chunks} ({chunk_index/total_chunks*100:.1f}% complete)")
                 except Exception as e:
                     logger.error(f"Error processing chunk {chunk_index}/{total_chunks}: {str(e)}")
@@ -296,6 +320,10 @@ class ReportSynthesizer:
                     processed_chunk = chunk.copy()
                     processed_chunk['extracted_info'] = f"Error extracting information: {str(e)}"
                     batch_results.append(processed_chunk)
+                    
+                    # Update progress even for failed chunks
+                    self.processed_chunk_count += 1
+                    self._report_progress()
             
             processed_chunks.extend(batch_results)
             
@@ -510,6 +538,10 @@ class ReportSynthesizer:
             logger.warning("No document chunks provided for report synthesis.")
             return "No information found for the given query."
         
+        # Reset progress tracking
+        self.total_chunks = len(chunks)
+        self.processed_chunk_count = 0
+        
         # Verify that a template exists for the given query type and detail level
         template = self._get_template_from_strings(query_type, detail_level)
         if not template:
@@ -545,6 +577,9 @@ class ReportSynthesizer:
             # Recalculate estimated tokens
             total_tokens = sum(len(chunk.get('content', '').split()) * 1.3 for chunk in chunks)
             logger.info(f"Reduced to {len(chunks)} chunks with estimated {total_tokens} tokens")
+            
+            # Update total chunks for progress tracking
+            self.total_chunks = len(chunks)
         
         logger.info(f"Starting map phase for {len(chunks)} document chunks with query type '{query_type}' and detail level '{detail_level}'")
         
@@ -578,6 +613,10 @@ class ReportSynthesizer:
         
         logger.info(f"Starting reduce phase to synthesize report from {len(processed_chunks)} processed chunks")
         
+        # Update progress status for reduce phase
+        if self.progress_callback:
+            self.progress_callback(0.9, self.total_chunks, "Synthesizing final report...")
+        
         # Reduce phase: Synthesize processed chunks into a coherent report
         report = await self.reduce_processed_chunks(processed_chunks, query, query_type, detail_level)
         
@@ -586,6 +625,10 @@ class ReportSynthesizer:
             logger.info("Processing thinking tags in report")
             report = self._process_thinking_tags(report)
         
+        # Final progress update
+        if self.progress_callback:
+            self.progress_callback(1.0, self.total_chunks, report)
+        
         return report
 
 
diff --git a/run_ui.py b/run_ui.py
index 628d463..116b793 100755
--- a/run_ui.py
+++ b/run_ui.py
@@ -35,8 +35,15 @@ def main():
     args = parse_args()
     
     print("Starting Intelligent Research System UI...")
+    
+    # Create interface and initialize async components
+    import asyncio
     interface = GradioInterface()
     
+    # Run the async initialization in the event loop
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(interface.async_init())
+    
     # Launch with the specified arguments
     interface.launch(
         share=args.share,
diff --git a/ui/gradio_interface.py b/ui/gradio_interface.py
index 41db991..3de40ef 100644
--- a/ui/gradio_interface.py
+++ b/ui/gradio_interface.py
@@ -185,7 +185,7 @@ class GradioInterface:
         return markdown
 
     async def generate_report(self, query, detail_level="standard", custom_model=None, 
-                       results_file=None, process_thinking_tags=False):
+                       results_file=None, process_thinking_tags=False, progress=gr.Progress()):
         """
         Generate a report for the given query.
         
@@ -195,6 +195,7 @@ class GradioInterface:
             custom_model: Custom model to use for report generation
             results_file: Path to a file containing search results
             process_thinking_tags: Whether to process thinking tags in the model output
+            progress: Gradio progress indicator
             
         Returns:
             Path to the generated report
@@ -221,8 +222,15 @@ class GradioInterface:
             # If custom model is provided, use it
             if custom_model:
                 config["model"] = custom_model
-                # This will update the report synthesizer to use the custom model
-                self.report_generator.set_detail_level(detail_level)
+            
+            # Ensure report generator is initialized
+            if self.report_generator is None:
+                print("Initializing report generator...")
+                await initialize_report_generator()
+                self.report_generator = get_report_generator()
+            
+            # This will update the report synthesizer to use the custom model
+            self.report_generator.set_detail_level(detail_level)
                 
             print(f"Generating report with detail level: {detail_level}")
             print(f"Detail level configuration: {config}")
@@ -323,11 +331,36 @@ class GradioInterface:
                         top_n=config["num_results"]
                     )
             
+            # Set up progress tracking
+            self.progress_status = "Preparing documents..."
+            self.progress_value = 0
+            self.progress_total = 1  # Will be updated when we know the total chunks
+            
+            # Define progress callback function
+            def progress_callback(current_progress, total_chunks, current_report):
+                self.progress_value = current_progress
+                self.progress_total = total_chunks
+                # Update the progress bar
+                progress(current_progress)
+            
+            # Set the progress callback for the report generator
+            if hasattr(self.report_generator, 'set_progress_callback'):
+                self.report_generator.set_progress_callback(progress_callback)
+            
             # Generate the report
             print(f"Generating report with {len(search_results)} search results")
             if len(search_results) == 0:
                 print("WARNING: No search results found. Report generation may fail.")
-                
+            
+            # Update progress status based on detail level
+            if detail_level.lower() == "comprehensive":
+                self.progress_status = "Generating progressive report..."
+            else:
+                self.progress_status = "Processing document chunks..."
+            
+            # Initial progress update
+            progress(0)
+            
             report = await self.report_generator.generate_report(
                 search_results=search_results,
                 query=query,
@@ -337,6 +370,9 @@ class GradioInterface:
                 detail_level=detail_level
             )
             
+            # Final progress update
+            progress(1.0)
+            
             # Process thinking tags if requested
             if process_thinking_tags:
                 report = self._process_thinking_tags(report)