ira/sim-search-api/app/services/report_service.py

356 lines
13 KiB
Python

"""
Report service for the sim-search API.
This module provides services for report generation and management.
"""
import sys
import os
import time
import json
import asyncio
import tempfile
from pathlib import Path
from typing import Dict, Any, List, Optional, Union
from sqlalchemy.orm import Session
from app.core.config import settings
from app.db.models import Search, Report
from app.schemas.report import ReportCreate
# Add sim-search to the python path
sim_search_path = Path(settings.SIM_SEARCH_PATH)
sys.path.append(str(sim_search_path))
# Import sim-search components
from report.report_generator import get_report_generator, initialize_report_generator
from report.report_detail_levels import get_report_detail_level_manager
from services.search_service import SearchService
class ReportService:
"""
Service for report generation and management.
This class provides methods to generate and manage reports using
the sim-search report generation functionality.
"""
def __init__(self):
"""Initialize the report service."""
self.report_generator = None
self.detail_level_manager = get_report_detail_level_manager()
self.search_service = SearchService()
self.temp_dir = Path(tempfile.gettempdir()) / "sim-search-api"
self.temp_dir.mkdir(exist_ok=True)
async def initialize(self):
"""Initialize the report generator."""
await initialize_report_generator()
self.report_generator = get_report_generator()
async def generate_report_background(
self,
report_id: str,
report_in: ReportCreate,
search: Optional[Search] = None,
db: Optional[Session] = None,
progress_dict: Optional[Dict[str, Dict[str, Any]]] = None,
) -> None:
"""
Generate a report in the background.
Args:
report_id: ID of the report
report_in: Report creation parameters
search: Search record
db: Database session
progress_dict: Dictionary to store progress information
"""
try:
# Initialize report generator if not already initialized
if self.report_generator is None:
await self.initialize()
# Get search results
search_results = []
if search:
# Use search results from the database
search_results = search.results
elif report_in.search_results:
# Use search results provided in the request
search_results = report_in.search_results
else:
# Execute a new search
structured_query = {
"original_query": report_in.query,
"enhanced_query": report_in.query,
}
search_results_dict = await self.search_service.execute_search(
structured_query=structured_query,
num_results=10,
)
# Flatten search results
for engine_results in search_results_dict["results"].values():
search_results.extend(engine_results)
# Set up progress tracking
if progress_dict is not None:
def progress_callback(current_progress, total_chunks, current_report):
if report_id in progress_dict:
progress_dict[report_id] = {
"progress": current_progress,
"status": f"Processing chunk {int(current_progress * total_chunks)}/{total_chunks}...",
"current_chunk": int(current_progress * total_chunks),
"total_chunks": total_chunks,
"current_report": current_report,
}
self.report_generator.set_progress_callback(progress_callback)
# Set detail level
if report_in.detail_level:
self.report_generator.set_detail_level(report_in.detail_level)
# Set model if provided
if report_in.model:
self.report_generator.set_model(report_in.model)
# Generate report
report_content = await self.report_generator.generate_report(
search_results=search_results,
query=report_in.query,
token_budget=report_in.token_budget,
chunk_size=report_in.chunk_size,
overlap_size=report_in.overlap_size,
detail_level=report_in.detail_level,
query_type=report_in.query_type,
)
# Update report in database
if db:
report = db.query(Report).filter(Report.id == report_id).first()
if report:
report.content = report_content
report.model_used = self.report_generator.model_name
db.commit()
# Update progress
if progress_dict is not None and report_id in progress_dict:
progress_dict[report_id] = {
"progress": 1.0,
"status": "Report generation complete",
"current_chunk": 0,
"total_chunks": 0,
"current_report": None,
}
except Exception as e:
# Update progress with error
if progress_dict is not None and report_id in progress_dict:
progress_dict[report_id] = {
"progress": 1.0,
"status": f"Error generating report: {str(e)}",
"current_chunk": 0,
"total_chunks": 0,
"current_report": None,
}
# Update report in database with error
if db:
report = db.query(Report).filter(Report.id == report_id).first()
if report:
report.content = f"Error generating report: {str(e)}"
db.commit()
# Re-raise the exception
raise
async def generate_report_file(self, report: Report, format: str = "markdown") -> str:
"""
Generate a report file in the specified format.
Args:
report: Report record
format: Format of the report (markdown, html, pdf)
Returns:
Path to the generated file
"""
# Create a temporary file
file_path = self.temp_dir / f"report_{report.id}.{format}"
# Write the report content to the file
if format == "markdown":
with open(file_path, "w") as f:
f.write(report.content)
elif format == "html":
# Convert markdown to HTML
import markdown
html_content = markdown.markdown(report.content)
# Add HTML wrapper
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>{report.title}</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body {{
font-family: Arial, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}}
h1, h2, h3, h4, h5, h6 {{
margin-top: 1.5em;
margin-bottom: 0.5em;
}}
a {{
color: #0366d6;
text-decoration: none;
}}
a:hover {{
text-decoration: underline;
}}
pre {{
background-color: #f6f8fa;
border-radius: 3px;
padding: 16px;
overflow: auto;
}}
code {{
background-color: #f6f8fa;
border-radius: 3px;
padding: 0.2em 0.4em;
font-family: monospace;
}}
blockquote {{
border-left: 4px solid #dfe2e5;
padding-left: 16px;
margin-left: 0;
color: #6a737d;
}}
table {{
border-collapse: collapse;
width: 100%;
}}
table, th, td {{
border: 1px solid #dfe2e5;
}}
th, td {{
padding: 8px 16px;
text-align: left;
}}
tr:nth-child(even) {{
background-color: #f6f8fa;
}}
</style>
</head>
<body>
{html_content}
</body>
</html>
"""
with open(file_path, "w") as f:
f.write(html_content)
elif format == "pdf":
# Convert markdown to PDF
try:
import markdown
from weasyprint import HTML
# Convert markdown to HTML
html_content = markdown.markdown(report.content)
# Add HTML wrapper
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>{report.title}</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body {{
font-family: Arial, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}}
h1, h2, h3, h4, h5, h6 {{
margin-top: 1.5em;
margin-bottom: 0.5em;
}}
a {{
color: #0366d6;
text-decoration: none;
}}
pre {{
background-color: #f6f8fa;
border-radius: 3px;
padding: 16px;
overflow: auto;
}}
code {{
background-color: #f6f8fa;
border-radius: 3px;
padding: 0.2em 0.4em;
font-family: monospace;
}}
blockquote {{
border-left: 4px solid #dfe2e5;
padding-left: 16px;
margin-left: 0;
color: #6a737d;
}}
table {{
border-collapse: collapse;
width: 100%;
}}
table, th, td {{
border: 1px solid #dfe2e5;
}}
th, td {{
padding: 8px 16px;
text-align: left;
}}
tr:nth-child(even) {{
background-color: #f6f8fa;
}}
</style>
</head>
<body>
{html_content}
</body>
</html>
"""
# Create a temporary HTML file
html_file_path = self.temp_dir / f"report_{report.id}.html"
with open(html_file_path, "w") as f:
f.write(html_content)
# Convert HTML to PDF
HTML(filename=str(html_file_path)).write_pdf(str(file_path))
# Remove temporary HTML file
html_file_path.unlink()
except ImportError:
# If weasyprint is not installed, fall back to markdown
with open(file_path, "w") as f:
f.write(report.content)
else:
# Unsupported format, fall back to markdown
with open(file_path, "w") as f:
f.write(report.content)
return str(file_path)