From e552411298c258d2e61014266183b1fb55ad1ec5 Mon Sep 17 00:00:00 2001 From: Steve White Date: Wed, 29 Jan 2025 11:48:13 -0600 Subject: [PATCH] enabled server mode and updated README.md --- README.md | 18 ++ go.sum | 2 + papers.go | 43 ++-- server.go | 623 +++++++++++++++++++++++++++++++++++++++--------------- utils.go | 27 +++ 5 files changed, 516 insertions(+), 197 deletions(-) create mode 100644 utils.go diff --git a/README.md b/README.md index acfba25..5b4fe10 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ This is hard to pull off with keyword searches. You might exclude every paper th - Generate both JSON and Markdown outputs - Customizable evaluation criteria - Rate-limited API requests (2-second delay between requests) +- HTTP API server mode for integration with other services ## Installation @@ -65,6 +66,8 @@ papers -input papers.json -api-key "your-key" -criteria criteria.md ### Optional Flags +- `-serve`: Run in server mode with HTTP API endpoints +- `-port`: Port to run server on (default: "8080") - `-search-only`: Fetch papers from arXiv and save to JSON file without processing - `-input`: Input JSON file containing papers (optional) - `-maxResults`: Maximum number of results to fetch (1-2000, default: 100) @@ -76,6 +79,21 @@ papers -input papers.json -api-key "your-key" -criteria criteria.md **NB: default API endpoint is LMStudio, and Phi-4 does a great job filtering papers** +## Server Mode + +The tool can be run as an HTTP server providing API endpoints for paper search and processing: + +```bash +papers -serve -port 8080 +``` + +This starts a server with the following endpoints: +- `POST /api/papers/search` - Search for papers on arXiv +- `POST /api/papers/process` - Process papers using LLM +- `POST /api/papers/search-process` - Combined search and process + +See [API.md](API.md) for detailed API documentation. + ## Pipeline 1. **Fetch**: Retrieves papers from arXiv based on specified date range and query diff --git a/go.sum b/go.sum index b882b57..20c7cf7 100644 --- a/go.sum +++ b/go.sum @@ -4,3 +4,5 @@ gitea.r8z.us/stwhite/paperformatter v0.1.3 h1:Z8yIdfCmQ+c5A5To+Y3XPHNOdK5B/q5VqB gitea.r8z.us/stwhite/paperformatter v0.1.3/go.mod h1:As2zIT0NSsMirYqdvIfIXXxIHOcdsHANhnh0VNcAluQ= gitea.r8z.us/stwhite/paperprocessor v0.1.8 h1:pV810JZQFhuKcle4ix7stUz12LZNIgFCVWxSC/RYWpE= gitea.r8z.us/stwhite/paperprocessor v0.1.8/go.mod h1:0wHe7XjtQICFrPKbO53SVrUiVw9yi8GOGo9J7znpo+E= +github.com/go-chi/chi/v5 v5.0.11 h1:BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA= +github.com/go-chi/chi/v5 v5.0.11/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= diff --git a/papers.go b/papers.go index f47142e..4451c7a 100644 --- a/papers.go +++ b/papers.go @@ -7,7 +7,6 @@ import ( "io" "log" "os" - "regexp" "strings" "time" @@ -57,26 +56,6 @@ func validateInputFile(path string) ([]Paper, error) { return papers, nil } -// sanitizeFilename replaces invalid filename characters to match arxiva's sanitization -func sanitizeFilename(s string) string { - s = strings.ReplaceAll(s, ":", "_") - s = strings.ReplaceAll(s, " ", "_") - return s -} - -// isValidDate checks if the date string is in YYYYMMDD format -func isValidDate(date string) bool { - // Check basic format with regex - matched, err := regexp.MatchString(`^\d{8}$`, date) - if err != nil || !matched { - return false - } - - // Parse date to verify it's a valid date - _, err = time.Parse("20060102", date) - return err == nil -} - func main() { // Set custom usage message flag.Usage = func() { @@ -105,10 +84,14 @@ func main() { fmt.Fprintf(os.Stderr, " With custom options:\n") fmt.Fprintf(os.Stderr, " %s -input papers.json -api-key \"your-key\" -model \"gpt-4\" -json-output \"results.json\" -md-output \"summary.md\"\n", os.Args[0]) fmt.Fprintf(os.Stderr, " Search only:\n") - fmt.Fprintf(os.Stderr, " %s -search-only -start 20240101 -end 20240131 -query \"machine learning\" \n\n", os.Args[0]) + fmt.Fprintf(os.Stderr, " %s -search-only -start 20240101 -end 20240131 -query \"machine learning\" \n", os.Args[0]) + fmt.Fprintf(os.Stderr, " Server mode:\n") + fmt.Fprintf(os.Stderr, " %s -serve -port 8080\n\n", os.Args[0]) } // Parse command line arguments + serve := flag.Bool("serve", false, "Run in server mode") + port := flag.String("port", "8080", "Port to run server on") searchOnly := flag.Bool("search-only", false, "Only fetch papers from arXiv and save to JSON file (do not process)") inputFile := flag.String("input", "", "Input JSON file containing papers (optional)") startDate := flag.String("start", "", "Start date in YYYYMMDD format (required if not using -input)") @@ -124,6 +107,14 @@ func main() { flag.Parse() // Validate required flags and input + if *serve { + server := NewServer(*port, *apiEndpoint) + if err := server.Run(); err != nil { + log.Fatal(err) + } + return + } + if *searchOnly { if *startDate == "" || *endDate == "" || *query == "" { fmt.Fprintf(os.Stderr, "Error: start date, end date, and query are required when using -search-only\n\n") @@ -132,7 +123,7 @@ func main() { } // Validate date format - if !isValidDate(*startDate) || !isValidDate(*endDate) { + if !IsValidDate(*startDate) || !IsValidDate(*endDate) { fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n") os.Exit(1) } @@ -154,7 +145,7 @@ func main() { log.Fatalf("Failed to save papers: %v", err) } - log.Printf("Successfully fetched and saved papers to %s-%s-%s.json", *startDate, *endDate, sanitizeFilename(*query)) + log.Printf("Successfully fetched and saved papers to %s-%s-%s.json", *startDate, *endDate, SanitizeFilename(*query)) os.Exit(0) } @@ -195,7 +186,7 @@ func main() { } // Validate date format - if !isValidDate(*startDate) || !isValidDate(*endDate) { + if !IsValidDate(*startDate) || !IsValidDate(*endDate) { fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n") os.Exit(1) } @@ -217,7 +208,7 @@ func main() { log.Fatalf("Failed to save papers: %v", err) } - baseFilename = fmt.Sprintf("%s-%s-%s", *startDate, *endDate, sanitizeFilename(*query)) + baseFilename = fmt.Sprintf("%s-%s-%s", *startDate, *endDate, SanitizeFilename(*query)) } // Create processor configuration diff --git a/server.go b/server.go index bf73150..0fadd06 100644 --- a/server.go +++ b/server.go @@ -1,193 +1,474 @@ package main import ( - "encoding/json" - "fmt" - "log" - "net/http" - - "github.com/go-chi/chi/v5" - "github.com/go-chi/chi/v5/middleware" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "strings" + "sync" + "time" + + "gitea.r8z.us/stwhite/arxiva" + "gitea.r8z.us/stwhite/paperformatter" + "gitea.r8z.us/stwhite/paperprocessor" + "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" ) -type Server struct { - router *chi.Mux - port string +type ProcessingJob struct { + ID string // Unique job identifier + Status string // "pending", "processing", "completed", "failed" + StartTime time.Time // When the job started + Error string // Error message if failed + JSONPath string // Path to JSON output file + MDPath string // Path to Markdown output file + MarkdownText string // Content of markdown file when completed } -func NewServer(port string) *Server { - s := &Server{ - router: chi.NewRouter(), - port: port, - } - - s.setupRoutes() - return s +type Server struct { + router *chi.Mux + port string + apiEndpoint string + jobs map[string]*ProcessingJob // Track processing jobs + jobsMutex sync.RWMutex // Protect jobs map +} + +func NewServer(port string, apiEndpoint string) *Server { + s := &Server{ + router: chi.NewRouter(), + port: port, + apiEndpoint: apiEndpoint, + jobs: make(map[string]*ProcessingJob), + } + + s.setupRoutes() + return s } func (s *Server) setupRoutes() { - s.router.Use(middleware.Logger) - s.router.Use(middleware.Recoverer) - - s.router.Post("/api/papers/search", s.handleSearch) - s.router.Post("/api/papers/process", s.handleProcess) - s.router.Post("/api/papers/search-process", s.handleSearchAndProcess) + s.router.Use(middleware.Logger) + s.router.Use(middleware.Recoverer) + + s.router.Post("/api/papers/search", s.handleSearch) + s.router.Post("/api/papers/process", s.handleProcess) + s.router.Post("/api/papers/search-process", s.handleSearchAndProcess) + s.router.Get("/api/jobs/{jobID}", s.handleJobStatus) } func (s *Server) Run() error { - addr := fmt.Sprintf(":%s", s.port) - log.Printf("Starting server on %s", addr) - return http.ListenAndServe(addr, s.router) + addr := fmt.Sprintf(":%s", s.port) + log.Printf("Starting server on %s", addr) + return http.ListenAndServe(addr, s.router) } func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) { - var req struct { - StartDate string `json:"start_date"` - EndDate string `json:"end_date"` - Query string `json:"query"` - MaxResults int `json:"max_results"` - } - - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, "Invalid request body", http.StatusBadRequest) - return - } - - // Reuse existing validation - if !isValidDate(req.StartDate) || !isValidDate(req.EndDate) { - http.Error(w, "Invalid date format", http.StatusBadRequest) - return - } - - papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - json.NewEncoder(w).Encode(papers) + var req struct { + StartDate string `json:"start_date"` + EndDate string `json:"end_date"` + Query string `json:"query"` + MaxResults int `json:"max_results"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "Invalid request body", http.StatusBadRequest) + return + } + + // Reuse existing validation + if !IsValidDate(req.StartDate) || !IsValidDate(req.EndDate) { + http.Error(w, "Invalid date format", http.StatusBadRequest) + return + } + + papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + json.NewEncoder(w).Encode(papers) } func (s *Server) handleProcess(w http.ResponseWriter, r *http.Request) { - var req struct { - InputFile string `json:"input_file"` - CriteriaFile string `json:"criteria_file"` - ApiKey string `json:"api_key"` - Model string `json:"model"` - } - - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, "Invalid request body", http.StatusBadRequest) - return - } - - // Create processor configuration - config := paperprocessor.Config{ - APIEndpoint: *apiEndpoint, // This would need to be passed to Server struct - APIKey: req.ApiKey, - Model: req.Model, - RequestDelay: 2 * time.Second, - } - - // Process the papers - outputJSON := req.InputFile + "-processed.json" - if err := paperprocessor.ProcessFile( - req.InputFile, - outputJSON, - req.CriteriaFile, - config, - ); err != nil { - http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError) - return - } - - // Format to markdown - outputMD := req.InputFile + "-processed.md" - if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil { - http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError) - return - } - - // Return the paths to the generated files - json.NewEncoder(w).Encode(struct { - JSONOutput string `json:"json_output"` - MDOutput string `json:"md_output"` - }{ - JSONOutput: outputJSON, - MDOutput: outputMD, - }) + var req struct { + Papers []arxiva.Paper `json:"papers,omitempty"` // Optional: Direct paper data + InputFile string `json:"input_file,omitempty"` // Optional: Input file path + CriteriaFile string `json:"criteria_file,omitempty"` // Optional: Criteria file path + Criteria string `json:"criteria,omitempty"` // Optional: Direct criteria text + ApiKey string `json:"api_key"` // Required: API key + Model string `json:"model,omitempty"` // Optional: Model name + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "Invalid request body", http.StatusBadRequest) + return + } + + // Validate required fields + if req.CriteriaFile == "" && req.Criteria == "" { + http.Error(w, "either criteria_file or criteria must be provided", http.StatusBadRequest) + return + } + if req.ApiKey == "" { + http.Error(w, "api_key is required", http.StatusBadRequest) + return + } + + // Create processor configuration + config := paperprocessor.Config{ + APIEndpoint: s.apiEndpoint, + APIKey: req.ApiKey, + Model: req.Model, + RequestDelay: 2 * time.Second, + } + + var inputJSON string + + // Handle direct paper data + if len(req.Papers) > 0 { + // Create temporary file for paper data + tempFile, err := os.CreateTemp("", "papers-*.json") + if err != nil { + http.Error(w, fmt.Sprintf("Failed to create temp file: %v", err), http.StatusInternalServerError) + return + } + defer os.Remove(tempFile.Name()) // Clean up temp file + + // Write papers to temp file + if err := json.NewEncoder(tempFile).Encode(req.Papers); err != nil { + http.Error(w, fmt.Sprintf("Failed to write papers: %v", err), http.StatusInternalServerError) + return + } + tempFile.Close() + inputJSON = tempFile.Name() + } else if req.InputFile != "" { + inputJSON = req.InputFile + } else { + http.Error(w, "either papers or input_file must be provided", http.StatusBadRequest) + return + } + + // Generate output filenames + timestamp := time.Now().Format("20060102150405") + outputJSON := fmt.Sprintf("processed-%s.json", timestamp) + outputMD := fmt.Sprintf("processed-%s.md", timestamp) + + // Handle criteria + var criteriaFile string + if req.Criteria != "" { + // Create temporary file for criteria + tempFile, err := os.CreateTemp("", "criteria-*.md") + if err != nil { + http.Error(w, fmt.Sprintf("Failed to create temp criteria file: %v", err), http.StatusInternalServerError) + return + } + defer os.Remove(tempFile.Name()) // Clean up temp file + + // Write criteria to temp file + if _, err := tempFile.WriteString(req.Criteria); err != nil { + http.Error(w, fmt.Sprintf("Failed to write criteria: %v", err), http.StatusInternalServerError) + return + } + tempFile.Close() + criteriaFile = tempFile.Name() + } else { + criteriaFile = req.CriteriaFile + } + + // Process the papers + if err := paperprocessor.ProcessFile( + inputJSON, + outputJSON, + criteriaFile, + config, + ); err != nil { + http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError) + return + } + + // Format to markdown + if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil { + http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError) + return + } + + // Generate job ID and create job + jobID := fmt.Sprintf("job-%s", timestamp) + job := &ProcessingJob{ + ID: jobID, + Status: "pending", + StartTime: time.Now(), + JSONPath: outputJSON, + MDPath: outputMD, + } + + // Store job + s.jobsMutex.Lock() + s.jobs[jobID] = job + s.jobsMutex.Unlock() + + // Start processing in background + go func() { + // Process the papers + if err := paperprocessor.ProcessFile( + inputJSON, + outputJSON, + criteriaFile, + config, + ); err != nil { + s.jobsMutex.Lock() + job.Status = "failed" + job.Error = fmt.Sprintf("Processing failed: %v", err) + s.jobsMutex.Unlock() + return + } + + // Format to markdown + if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil { + s.jobsMutex.Lock() + job.Status = "failed" + job.Error = fmt.Sprintf("Formatting failed: %v", err) + s.jobsMutex.Unlock() + return + } + + // Read markdown content + mdContent, err := os.ReadFile(outputMD) + if err != nil { + s.jobsMutex.Lock() + job.Status = "failed" + job.Error = fmt.Sprintf("Failed to read markdown: %v", err) + s.jobsMutex.Unlock() + return + } + + // Convert Windows line endings to Unix + mdString := strings.ReplaceAll(string(mdContent), "\r\n", "\n") + // Ensure file ends with newline + if !strings.HasSuffix(mdString, "\n") { + mdString += "\n" + } + + // Update job with success + s.jobsMutex.Lock() + job.Status = "completed" + job.MarkdownText = mdString + s.jobsMutex.Unlock() + }() + + // Return job ID immediately + json.NewEncoder(w).Encode(struct { + JobID string `json:"job_id"` + }{ + JobID: jobID, + }) +} + +func (s *Server) handleJobStatus(w http.ResponseWriter, r *http.Request) { + jobID := chi.URLParam(r, "jobID") + + s.jobsMutex.RLock() + job, exists := s.jobs[jobID] + s.jobsMutex.RUnlock() + + if !exists { + http.Error(w, "Job not found", http.StatusNotFound) + return + } + + response := struct { + ID string `json:"id"` + Status string `json:"status"` + StartTime time.Time `json:"start_time"` + Error string `json:"error,omitempty"` + MarkdownText string `json:"markdown_text,omitempty"` + }{ + ID: job.ID, + Status: job.Status, + StartTime: job.StartTime, + Error: job.Error, + } + + // Only include markdown text if job is completed + if job.Status == "completed" { + response.MarkdownText = job.MarkdownText + } + + json.NewEncoder(w).Encode(response) } func (s *Server) handleSearchAndProcess(w http.ResponseWriter, r *http.Request) { - var req struct { - StartDate string `json:"start_date"` - EndDate string `json:"end_date"` - Query string `json:"query"` - MaxResults int `json:"max_results"` - CriteriaFile string `json:"criteria_file"` - ApiKey string `json:"api_key"` - Model string `json:"model"` - } - - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, "Invalid request body", http.StatusBadRequest) - return - } - - // Validate dates - if !isValidDate(req.StartDate) || !isValidDate(req.EndDate) { - http.Error(w, "Invalid date format", http.StatusBadRequest) - return - } - - // Fetch papers - papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Save papers to temporary JSON file - baseFilename := fmt.Sprintf("%s-%s-%s", req.StartDate, req.EndDate, sanitizeFilename(req.Query)) - inputJSON := baseFilename + ".json" - if err := arxiva.SaveToFile(papers, req.StartDate, req.EndDate, req.Query); err != nil { - http.Error(w, fmt.Sprintf("Failed to save papers: %v", err), http.StatusInternalServerError) - return - } - - // Create processor configuration - config := paperprocessor.Config{ - APIEndpoint: *apiEndpoint, // This would need to be passed to Server struct - APIKey: req.ApiKey, - Model: req.Model, - RequestDelay: 2 * time.Second, - } - - // Process the papers - outputJSON := baseFilename + "-processed.json" - if err := paperprocessor.ProcessFile( - inputJSON, - outputJSON, - req.CriteriaFile, - config, - ); err != nil { - http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError) - return - } - - // Format to markdown - outputMD := baseFilename + "-processed.md" - if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil { - http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError) - return - } - - // Return the paths to the generated files - json.NewEncoder(w).Encode(struct { - JSONOutput string `json:"json_output"` - MDOutput string `json:"md_output"` - }{ - JSONOutput: outputJSON, - MDOutput: outputMD, - }) -} \ No newline at end of file + var req struct { + StartDate string `json:"start_date"` + EndDate string `json:"end_date"` + Query string `json:"query"` + MaxResults int `json:"max_results"` + CriteriaFile string `json:"criteria_file,omitempty"` + Criteria string `json:"criteria,omitempty"` + ApiKey string `json:"api_key"` + Model string `json:"model,omitempty"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "Invalid request body", http.StatusBadRequest) + return + } + + // Validate dates + if !IsValidDate(req.StartDate) || !IsValidDate(req.EndDate) { + http.Error(w, "Invalid date format", http.StatusBadRequest) + return + } + + // Fetch papers + papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + // Save papers to temporary JSON file + baseFilename := fmt.Sprintf("%s-%s-%s", req.StartDate, req.EndDate, SanitizeFilename(req.Query)) + inputJSON := baseFilename + ".json" + if err := arxiva.SaveToFile(papers, req.StartDate, req.EndDate, req.Query); err != nil { + http.Error(w, fmt.Sprintf("Failed to save papers: %v", err), http.StatusInternalServerError) + return + } + + // Create processor configuration + config := paperprocessor.Config{ + APIEndpoint: s.apiEndpoint, + APIKey: req.ApiKey, + Model: req.Model, + RequestDelay: 2 * time.Second, + } + + // Handle criteria + var criteriaFile string + var tempCriteriaFile string // Track temporary file for cleanup + + if req.Criteria != "" { + // Create temporary file for criteria + tempFile, err := os.CreateTemp("", "criteria-*.md") + if err != nil { + http.Error(w, fmt.Sprintf("Failed to create temp criteria file: %v", err), http.StatusInternalServerError) + return + } + tempCriteriaFile = tempFile.Name() // Save for cleanup after processing + + // Write criteria to temp file + if _, err := tempFile.WriteString(req.Criteria); err != nil { + os.Remove(tempCriteriaFile) // Clean up on error + http.Error(w, fmt.Sprintf("Failed to write criteria: %v", err), http.StatusInternalServerError) + return + } + tempFile.Close() + criteriaFile = tempCriteriaFile + } else if req.CriteriaFile != "" { + criteriaFile = req.CriteriaFile + } else { + http.Error(w, "either criteria_file or criteria must be provided", http.StatusBadRequest) + return + } + + // Process the papers + outputJSON := baseFilename + "-processed.json" + if err := paperprocessor.ProcessFile( + inputJSON, + outputJSON, + criteriaFile, + config, + ); err != nil { + if tempCriteriaFile != "" { + os.Remove(tempCriteriaFile) // Clean up temp file on error + } + http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError) + return + } + + // Format to markdown + outputMD := baseFilename + "-processed.md" + if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil { + http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError) + return + } + + // Generate job ID and create job + jobID := fmt.Sprintf("job-%s", baseFilename) + job := &ProcessingJob{ + ID: jobID, + Status: "pending", + StartTime: time.Now(), + JSONPath: outputJSON, + MDPath: outputMD, + } + + // Store job + s.jobsMutex.Lock() + s.jobs[jobID] = job + s.jobsMutex.Unlock() + + // Start processing in background + go func() { + defer func() { + if tempCriteriaFile != "" { + os.Remove(tempCriteriaFile) // Clean up temp file after processing + } + }() + + // Process the papers + if err := paperprocessor.ProcessFile( + inputJSON, + outputJSON, + criteriaFile, + config, + ); err != nil { + s.jobsMutex.Lock() + job.Status = "failed" + job.Error = fmt.Sprintf("Processing failed: %v", err) + s.jobsMutex.Unlock() + return + } + + // Format to markdown + if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil { + s.jobsMutex.Lock() + job.Status = "failed" + job.Error = fmt.Sprintf("Formatting failed: %v", err) + s.jobsMutex.Unlock() + return + } + + // Read markdown content + mdContent, err := os.ReadFile(outputMD) + if err != nil { + s.jobsMutex.Lock() + job.Status = "failed" + job.Error = fmt.Sprintf("Failed to read markdown: %v", err) + s.jobsMutex.Unlock() + return + } + + // Convert Windows line endings to Unix + mdString := strings.ReplaceAll(string(mdContent), "\r\n", "\n") + // Ensure file ends with newline + if !strings.HasSuffix(mdString, "\n") { + mdString += "\n" + } + + // Update job with success + s.jobsMutex.Lock() + job.Status = "completed" + job.MarkdownText = mdString + s.jobsMutex.Unlock() + }() + + // Return job ID immediately + json.NewEncoder(w).Encode(struct { + JobID string `json:"job_id"` + }{ + JobID: jobID, + }) +} diff --git a/utils.go b/utils.go new file mode 100644 index 0000000..3ccb2c7 --- /dev/null +++ b/utils.go @@ -0,0 +1,27 @@ +package main + +import ( + "regexp" + "strings" + "time" +) + +// IsValidDate checks if the date string is in YYYYMMDD format +func IsValidDate(date string) bool { + // Check basic format with regex + matched, err := regexp.MatchString(`^\d{8}$`, date) + if err != nil || !matched { + return false + } + + // Parse date to verify it's a valid date + _, err = time.Parse("20060102", date) + return err == nil +} + +// SanitizeFilename replaces invalid filename characters to match arxiva's sanitization +func SanitizeFilename(s string) string { + s = strings.ReplaceAll(s, ":", "_") + s = strings.ReplaceAll(s, " ", "_") + return s +}