papers/server.go

488 lines
13 KiB
Go
Raw Normal View History

package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strings"
"sync"
"time"
"gitea.r8z.us/stwhite/arxiva"
"gitea.r8z.us/stwhite/paperformatter"
"gitea.r8z.us/stwhite/paperprocessor"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"github.com/go-chi/cors"
)
type ProcessingJob struct {
ID string // Unique job identifier
Status string // "pending", "processing", "completed", "failed"
StartTime time.Time // When the job started
Error string // Error message if failed
JSONPath string // Path to JSON output file
MDPath string // Path to Markdown output file
MarkdownText string // Content of markdown file when completed
}
type Server struct {
router *chi.Mux
port string
apiEndpoint string
jobs map[string]*ProcessingJob // Track processing jobs
jobsMutex sync.RWMutex // Protect jobs map
}
func NewServer(port string, apiEndpoint string) *Server {
s := &Server{
router: chi.NewRouter(),
port: port,
apiEndpoint: apiEndpoint,
jobs: make(map[string]*ProcessingJob),
}
s.setupRoutes()
return s
}
func (s *Server) setupRoutes() {
// Basic middleware
s.router.Use(middleware.Logger)
s.router.Use(middleware.Recoverer)
// CORS middleware
s.router.Use(cors.Handler(cors.Options{
AllowedOrigins: []string{"*"}, // Allow all origins in development
AllowedMethods: []string{"GET", "POST", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type"},
ExposedHeaders: []string{},
AllowCredentials: false,
MaxAge: 300, // Maximum value not ignored by any of major browsers
}))
// Routes
s.router.Post("/api/papers/search", s.handleSearch)
s.router.Post("/api/papers/process", s.handleProcess)
s.router.Post("/api/papers/search-process", s.handleSearchAndProcess)
s.router.Get("/api/jobs/{jobID}", s.handleJobStatus)
}
func (s *Server) Run() error {
addr := fmt.Sprintf(":%s", s.port)
log.Printf("Starting server on %s", addr)
return http.ListenAndServe(addr, s.router)
}
func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) {
var req struct {
StartDate string `json:"start_date"`
EndDate string `json:"end_date"`
Query string `json:"query"`
MaxResults int `json:"max_results"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
// Reuse existing validation
if !IsValidDate(req.StartDate) || !IsValidDate(req.EndDate) {
http.Error(w, "Invalid date format", http.StatusBadRequest)
return
}
papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
json.NewEncoder(w).Encode(papers)
}
func (s *Server) handleProcess(w http.ResponseWriter, r *http.Request) {
var req struct {
Papers []arxiva.Paper `json:"papers,omitempty"` // Optional: Direct paper data
InputFile string `json:"input_file,omitempty"` // Optional: Input file path
CriteriaFile string `json:"criteria_file,omitempty"` // Optional: Criteria file path
Criteria string `json:"criteria,omitempty"` // Optional: Direct criteria text
ApiKey string `json:"api_key"` // Required: API key
Model string `json:"model,omitempty"` // Optional: Model name
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
// Validate required fields
if req.CriteriaFile == "" && req.Criteria == "" {
http.Error(w, "either criteria_file or criteria must be provided", http.StatusBadRequest)
return
}
if req.ApiKey == "" {
http.Error(w, "api_key is required", http.StatusBadRequest)
return
}
// Create processor configuration
config := paperprocessor.Config{
APIEndpoint: s.apiEndpoint,
APIKey: req.ApiKey,
Model: req.Model,
RequestDelay: 2 * time.Second,
}
var inputJSON string
// Handle direct paper data
if len(req.Papers) > 0 {
// Create temporary file for paper data
tempFile, err := os.CreateTemp("", "papers-*.json")
if err != nil {
http.Error(w, fmt.Sprintf("Failed to create temp file: %v", err), http.StatusInternalServerError)
return
}
defer os.Remove(tempFile.Name()) // Clean up temp file
// Write papers to temp file
if err := json.NewEncoder(tempFile).Encode(req.Papers); err != nil {
http.Error(w, fmt.Sprintf("Failed to write papers: %v", err), http.StatusInternalServerError)
return
}
tempFile.Close()
inputJSON = tempFile.Name()
} else if req.InputFile != "" {
inputJSON = req.InputFile
} else {
http.Error(w, "either papers or input_file must be provided", http.StatusBadRequest)
return
}
// Generate output filenames
timestamp := time.Now().Format("20060102150405")
outputJSON := fmt.Sprintf("processed-%s.json", timestamp)
outputMD := fmt.Sprintf("processed-%s.md", timestamp)
// Handle criteria
var criteriaFile string
if req.Criteria != "" {
// Create temporary file for criteria
tempFile, err := os.CreateTemp("", "criteria-*.md")
if err != nil {
http.Error(w, fmt.Sprintf("Failed to create temp criteria file: %v", err), http.StatusInternalServerError)
return
}
defer os.Remove(tempFile.Name()) // Clean up temp file
// Write criteria to temp file
if _, err := tempFile.WriteString(req.Criteria); err != nil {
http.Error(w, fmt.Sprintf("Failed to write criteria: %v", err), http.StatusInternalServerError)
return
}
tempFile.Close()
criteriaFile = tempFile.Name()
} else {
criteriaFile = req.CriteriaFile
}
// Process the papers
if err := paperprocessor.ProcessFile(
inputJSON,
outputJSON,
criteriaFile,
config,
); err != nil {
http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
return
}
// Format to markdown
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
return
}
// Generate job ID and create job
jobID := fmt.Sprintf("job-%s", timestamp)
job := &ProcessingJob{
ID: jobID,
Status: "pending",
StartTime: time.Now(),
JSONPath: outputJSON,
MDPath: outputMD,
}
// Store job
s.jobsMutex.Lock()
s.jobs[jobID] = job
s.jobsMutex.Unlock()
// Start processing in background
go func() {
// Process the papers
if err := paperprocessor.ProcessFile(
inputJSON,
outputJSON,
criteriaFile,
config,
); err != nil {
s.jobsMutex.Lock()
job.Status = "failed"
job.Error = fmt.Sprintf("Processing failed: %v", err)
s.jobsMutex.Unlock()
return
}
// Format to markdown
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
s.jobsMutex.Lock()
job.Status = "failed"
job.Error = fmt.Sprintf("Formatting failed: %v", err)
s.jobsMutex.Unlock()
return
}
// Read markdown content
mdContent, err := os.ReadFile(outputMD)
if err != nil {
s.jobsMutex.Lock()
job.Status = "failed"
job.Error = fmt.Sprintf("Failed to read markdown: %v", err)
s.jobsMutex.Unlock()
return
}
// Convert Windows line endings to Unix
mdString := strings.ReplaceAll(string(mdContent), "\r\n", "\n")
// Ensure file ends with newline
if !strings.HasSuffix(mdString, "\n") {
mdString += "\n"
}
// Update job with success
s.jobsMutex.Lock()
job.Status = "completed"
job.MarkdownText = mdString
s.jobsMutex.Unlock()
}()
// Return job ID immediately
json.NewEncoder(w).Encode(struct {
JobID string `json:"job_id"`
}{
JobID: jobID,
})
}
func (s *Server) handleJobStatus(w http.ResponseWriter, r *http.Request) {
jobID := chi.URLParam(r, "jobID")
s.jobsMutex.RLock()
job, exists := s.jobs[jobID]
s.jobsMutex.RUnlock()
if !exists {
http.Error(w, "Job not found", http.StatusNotFound)
return
}
response := struct {
ID string `json:"id"`
Status string `json:"status"`
StartTime time.Time `json:"start_time"`
Error string `json:"error,omitempty"`
MarkdownText string `json:"markdown_text,omitempty"`
}{
ID: job.ID,
Status: job.Status,
StartTime: job.StartTime,
Error: job.Error,
}
// Only include markdown text if job is completed
if job.Status == "completed" {
response.MarkdownText = job.MarkdownText
}
json.NewEncoder(w).Encode(response)
}
func (s *Server) handleSearchAndProcess(w http.ResponseWriter, r *http.Request) {
var req struct {
StartDate string `json:"start_date"`
EndDate string `json:"end_date"`
Query string `json:"query"`
MaxResults int `json:"max_results"`
CriteriaFile string `json:"criteria_file,omitempty"`
Criteria string `json:"criteria,omitempty"`
ApiKey string `json:"api_key"`
Model string `json:"model,omitempty"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
// Validate dates
if !IsValidDate(req.StartDate) || !IsValidDate(req.EndDate) {
http.Error(w, "Invalid date format", http.StatusBadRequest)
return
}
// Fetch papers
papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// Save papers to temporary JSON file
baseFilename := fmt.Sprintf("%s-%s-%s", req.StartDate, req.EndDate, SanitizeFilename(req.Query))
inputJSON := baseFilename + ".json"
if err := arxiva.SaveToFile(papers, req.StartDate, req.EndDate, req.Query); err != nil {
http.Error(w, fmt.Sprintf("Failed to save papers: %v", err), http.StatusInternalServerError)
return
}
// Create processor configuration
config := paperprocessor.Config{
APIEndpoint: s.apiEndpoint,
APIKey: req.ApiKey,
Model: req.Model,
RequestDelay: 2 * time.Second,
}
// Handle criteria
var criteriaFile string
var tempCriteriaFile string // Track temporary file for cleanup
if req.Criteria != "" {
// Create temporary file for criteria
tempFile, err := os.CreateTemp("", "criteria-*.md")
if err != nil {
http.Error(w, fmt.Sprintf("Failed to create temp criteria file: %v", err), http.StatusInternalServerError)
return
}
tempCriteriaFile = tempFile.Name() // Save for cleanup after processing
// Write criteria to temp file
if _, err := tempFile.WriteString(req.Criteria); err != nil {
os.Remove(tempCriteriaFile) // Clean up on error
http.Error(w, fmt.Sprintf("Failed to write criteria: %v", err), http.StatusInternalServerError)
return
}
tempFile.Close()
criteriaFile = tempCriteriaFile
} else if req.CriteriaFile != "" {
criteriaFile = req.CriteriaFile
} else {
http.Error(w, "either criteria_file or criteria must be provided", http.StatusBadRequest)
return
}
// Process the papers
outputJSON := baseFilename + "-processed.json"
if err := paperprocessor.ProcessFile(
inputJSON,
outputJSON,
criteriaFile,
config,
); err != nil {
if tempCriteriaFile != "" {
os.Remove(tempCriteriaFile) // Clean up temp file on error
}
http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
return
}
// Format to markdown
outputMD := baseFilename + "-processed.md"
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
return
}
// Generate job ID and create job
jobID := fmt.Sprintf("job-%s", baseFilename)
job := &ProcessingJob{
ID: jobID,
Status: "pending",
StartTime: time.Now(),
JSONPath: outputJSON,
MDPath: outputMD,
}
// Store job
s.jobsMutex.Lock()
s.jobs[jobID] = job
s.jobsMutex.Unlock()
// Start processing in background
go func() {
defer func() {
if tempCriteriaFile != "" {
os.Remove(tempCriteriaFile) // Clean up temp file after processing
}
}()
// Process the papers
if err := paperprocessor.ProcessFile(
inputJSON,
outputJSON,
criteriaFile,
config,
); err != nil {
s.jobsMutex.Lock()
job.Status = "failed"
job.Error = fmt.Sprintf("Processing failed: %v", err)
s.jobsMutex.Unlock()
return
}
// Format to markdown
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
s.jobsMutex.Lock()
job.Status = "failed"
job.Error = fmt.Sprintf("Formatting failed: %v", err)
s.jobsMutex.Unlock()
return
}
// Read markdown content
mdContent, err := os.ReadFile(outputMD)
if err != nil {
s.jobsMutex.Lock()
job.Status = "failed"
job.Error = fmt.Sprintf("Failed to read markdown: %v", err)
s.jobsMutex.Unlock()
return
}
// Convert Windows line endings to Unix
mdString := strings.ReplaceAll(string(mdContent), "\r\n", "\n")
// Ensure file ends with newline
if !strings.HasSuffix(mdString, "\n") {
mdString += "\n"
}
// Update job with success
s.jobsMutex.Lock()
job.Status = "completed"
job.MarkdownText = mdString
s.jobsMutex.Unlock()
}()
// Return job ID immediately
json.NewEncoder(w).Encode(struct {
JobID string `json:"job_id"`
}{
JobID: jobID,
})
}