356 lines
13 KiB
Python
356 lines
13 KiB
Python
"""
|
|
Report service for the sim-search API.
|
|
|
|
This module provides services for report generation and management.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import json
|
|
import asyncio
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Dict, Any, List, Optional, Union
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.core.config import settings
|
|
from app.db.models import Search, Report
|
|
from app.schemas.report import ReportCreate
|
|
|
|
# Add sim-search to the python path
|
|
sim_search_path = Path(settings.SIM_SEARCH_PATH)
|
|
sys.path.append(str(sim_search_path))
|
|
|
|
# Import sim-search components
|
|
from report.report_generator import get_report_generator, initialize_report_generator
|
|
from report.report_detail_levels import get_report_detail_level_manager
|
|
from app.services.search_service import SearchService
|
|
|
|
|
|
class ReportService:
|
|
"""
|
|
Service for report generation and management.
|
|
|
|
This class provides methods to generate and manage reports using
|
|
the sim-search report generation functionality.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the report service."""
|
|
self.report_generator = None
|
|
self.detail_level_manager = get_report_detail_level_manager()
|
|
self.search_service = SearchService()
|
|
self.temp_dir = Path(tempfile.gettempdir()) / "sim-search-api"
|
|
self.temp_dir.mkdir(exist_ok=True)
|
|
|
|
async def initialize(self):
|
|
"""Initialize the report generator."""
|
|
await initialize_report_generator()
|
|
self.report_generator = get_report_generator()
|
|
|
|
async def generate_report_background(
|
|
self,
|
|
report_id: str,
|
|
report_in: ReportCreate,
|
|
search: Optional[Search] = None,
|
|
db: Optional[Session] = None,
|
|
progress_dict: Optional[Dict[str, Dict[str, Any]]] = None,
|
|
) -> None:
|
|
"""
|
|
Generate a report in the background.
|
|
|
|
Args:
|
|
report_id: ID of the report
|
|
report_in: Report creation parameters
|
|
search: Search record
|
|
db: Database session
|
|
progress_dict: Dictionary to store progress information
|
|
"""
|
|
try:
|
|
# Initialize report generator if not already initialized
|
|
if self.report_generator is None:
|
|
await self.initialize()
|
|
|
|
# Get search results
|
|
search_results = []
|
|
if search:
|
|
# Use search results from the database
|
|
search_results = search.results
|
|
elif report_in.search_results:
|
|
# Use search results provided in the request
|
|
search_results = report_in.search_results
|
|
else:
|
|
# Execute a new search
|
|
structured_query = {
|
|
"original_query": report_in.query,
|
|
"enhanced_query": report_in.query,
|
|
}
|
|
|
|
search_results_dict = await self.search_service.execute_search(
|
|
structured_query=structured_query,
|
|
num_results=10,
|
|
)
|
|
|
|
# Flatten search results
|
|
for engine_results in search_results_dict["results"].values():
|
|
search_results.extend(engine_results)
|
|
|
|
# Set up progress tracking
|
|
if progress_dict is not None:
|
|
def progress_callback(current_progress, total_chunks, current_report):
|
|
if report_id in progress_dict:
|
|
progress_dict[report_id] = {
|
|
"progress": current_progress,
|
|
"status": f"Processing chunk {int(current_progress * total_chunks)}/{total_chunks}...",
|
|
"current_chunk": int(current_progress * total_chunks),
|
|
"total_chunks": total_chunks,
|
|
"current_report": current_report,
|
|
}
|
|
|
|
self.report_generator.set_progress_callback(progress_callback)
|
|
|
|
# Set detail level
|
|
if report_in.detail_level:
|
|
self.report_generator.set_detail_level(report_in.detail_level)
|
|
|
|
# Set model if provided
|
|
if report_in.model:
|
|
self.report_generator.set_model(report_in.model)
|
|
|
|
# Generate report
|
|
report_content = await self.report_generator.generate_report(
|
|
search_results=search_results,
|
|
query=report_in.query,
|
|
token_budget=report_in.token_budget,
|
|
chunk_size=report_in.chunk_size,
|
|
overlap_size=report_in.overlap_size,
|
|
detail_level=report_in.detail_level,
|
|
query_type=report_in.query_type,
|
|
)
|
|
|
|
# Update report in database
|
|
if db:
|
|
report = db.query(Report).filter(Report.id == report_id).first()
|
|
if report:
|
|
report.content = report_content
|
|
report.model_used = self.report_generator.model_name
|
|
db.commit()
|
|
|
|
# Update progress
|
|
if progress_dict is not None and report_id in progress_dict:
|
|
progress_dict[report_id] = {
|
|
"progress": 1.0,
|
|
"status": "Report generation complete",
|
|
"current_chunk": 0,
|
|
"total_chunks": 0,
|
|
"current_report": None,
|
|
}
|
|
|
|
except Exception as e:
|
|
# Update progress with error
|
|
if progress_dict is not None and report_id in progress_dict:
|
|
progress_dict[report_id] = {
|
|
"progress": 1.0,
|
|
"status": f"Error generating report: {str(e)}",
|
|
"current_chunk": 0,
|
|
"total_chunks": 0,
|
|
"current_report": None,
|
|
}
|
|
|
|
# Update report in database with error
|
|
if db:
|
|
report = db.query(Report).filter(Report.id == report_id).first()
|
|
if report:
|
|
report.content = f"Error generating report: {str(e)}"
|
|
db.commit()
|
|
|
|
# Re-raise the exception
|
|
raise
|
|
|
|
async def generate_report_file(self, report: Report, format: str = "markdown") -> str:
|
|
"""
|
|
Generate a report file in the specified format.
|
|
|
|
Args:
|
|
report: Report record
|
|
format: Format of the report (markdown, html, pdf)
|
|
|
|
Returns:
|
|
Path to the generated file
|
|
"""
|
|
# Create a temporary file
|
|
file_path = self.temp_dir / f"report_{report.id}.{format}"
|
|
|
|
# Write the report content to the file
|
|
if format == "markdown":
|
|
with open(file_path, "w") as f:
|
|
f.write(report.content)
|
|
elif format == "html":
|
|
# Convert markdown to HTML
|
|
import markdown
|
|
html_content = markdown.markdown(report.content)
|
|
|
|
# Add HTML wrapper
|
|
html_content = f"""
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>{report.title}</title>
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<style>
|
|
body {{
|
|
font-family: Arial, sans-serif;
|
|
line-height: 1.6;
|
|
max-width: 800px;
|
|
margin: 0 auto;
|
|
padding: 20px;
|
|
}}
|
|
h1, h2, h3, h4, h5, h6 {{
|
|
margin-top: 1.5em;
|
|
margin-bottom: 0.5em;
|
|
}}
|
|
a {{
|
|
color: #0366d6;
|
|
text-decoration: none;
|
|
}}
|
|
a:hover {{
|
|
text-decoration: underline;
|
|
}}
|
|
pre {{
|
|
background-color: #f6f8fa;
|
|
border-radius: 3px;
|
|
padding: 16px;
|
|
overflow: auto;
|
|
}}
|
|
code {{
|
|
background-color: #f6f8fa;
|
|
border-radius: 3px;
|
|
padding: 0.2em 0.4em;
|
|
font-family: monospace;
|
|
}}
|
|
blockquote {{
|
|
border-left: 4px solid #dfe2e5;
|
|
padding-left: 16px;
|
|
margin-left: 0;
|
|
color: #6a737d;
|
|
}}
|
|
table {{
|
|
border-collapse: collapse;
|
|
width: 100%;
|
|
}}
|
|
table, th, td {{
|
|
border: 1px solid #dfe2e5;
|
|
}}
|
|
th, td {{
|
|
padding: 8px 16px;
|
|
text-align: left;
|
|
}}
|
|
tr:nth-child(even) {{
|
|
background-color: #f6f8fa;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
{html_content}
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
with open(file_path, "w") as f:
|
|
f.write(html_content)
|
|
elif format == "pdf":
|
|
# Convert markdown to PDF
|
|
try:
|
|
import markdown
|
|
from weasyprint import HTML
|
|
|
|
# Convert markdown to HTML
|
|
html_content = markdown.markdown(report.content)
|
|
|
|
# Add HTML wrapper
|
|
html_content = f"""
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>{report.title}</title>
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<style>
|
|
body {{
|
|
font-family: Arial, sans-serif;
|
|
line-height: 1.6;
|
|
max-width: 800px;
|
|
margin: 0 auto;
|
|
padding: 20px;
|
|
}}
|
|
h1, h2, h3, h4, h5, h6 {{
|
|
margin-top: 1.5em;
|
|
margin-bottom: 0.5em;
|
|
}}
|
|
a {{
|
|
color: #0366d6;
|
|
text-decoration: none;
|
|
}}
|
|
pre {{
|
|
background-color: #f6f8fa;
|
|
border-radius: 3px;
|
|
padding: 16px;
|
|
overflow: auto;
|
|
}}
|
|
code {{
|
|
background-color: #f6f8fa;
|
|
border-radius: 3px;
|
|
padding: 0.2em 0.4em;
|
|
font-family: monospace;
|
|
}}
|
|
blockquote {{
|
|
border-left: 4px solid #dfe2e5;
|
|
padding-left: 16px;
|
|
margin-left: 0;
|
|
color: #6a737d;
|
|
}}
|
|
table {{
|
|
border-collapse: collapse;
|
|
width: 100%;
|
|
}}
|
|
table, th, td {{
|
|
border: 1px solid #dfe2e5;
|
|
}}
|
|
th, td {{
|
|
padding: 8px 16px;
|
|
text-align: left;
|
|
}}
|
|
tr:nth-child(even) {{
|
|
background-color: #f6f8fa;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
{html_content}
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Create a temporary HTML file
|
|
html_file_path = self.temp_dir / f"report_{report.id}.html"
|
|
with open(html_file_path, "w") as f:
|
|
f.write(html_content)
|
|
|
|
# Convert HTML to PDF
|
|
HTML(filename=str(html_file_path)).write_pdf(str(file_path))
|
|
|
|
# Remove temporary HTML file
|
|
html_file_path.unlink()
|
|
except ImportError:
|
|
# If weasyprint is not installed, fall back to markdown
|
|
with open(file_path, "w") as f:
|
|
f.write(report.content)
|
|
else:
|
|
# Unsupported format, fall back to markdown
|
|
with open(file_path, "w") as f:
|
|
f.write(report.content)
|
|
|
|
return str(file_path)
|