enabled server mode and updated README.md
This commit is contained in:
parent
ac208fabdd
commit
e552411298
18
README.md
18
README.md
|
@ -27,6 +27,7 @@ This is hard to pull off with keyword searches. You might exclude every paper th
|
||||||
- Generate both JSON and Markdown outputs
|
- Generate both JSON and Markdown outputs
|
||||||
- Customizable evaluation criteria
|
- Customizable evaluation criteria
|
||||||
- Rate-limited API requests (2-second delay between requests)
|
- Rate-limited API requests (2-second delay between requests)
|
||||||
|
- HTTP API server mode for integration with other services
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
@ -65,6 +66,8 @@ papers -input papers.json -api-key "your-key" -criteria criteria.md
|
||||||
|
|
||||||
### Optional Flags
|
### Optional Flags
|
||||||
|
|
||||||
|
- `-serve`: Run in server mode with HTTP API endpoints
|
||||||
|
- `-port`: Port to run server on (default: "8080")
|
||||||
- `-search-only`: Fetch papers from arXiv and save to JSON file without processing
|
- `-search-only`: Fetch papers from arXiv and save to JSON file without processing
|
||||||
- `-input`: Input JSON file containing papers (optional)
|
- `-input`: Input JSON file containing papers (optional)
|
||||||
- `-maxResults`: Maximum number of results to fetch (1-2000, default: 100)
|
- `-maxResults`: Maximum number of results to fetch (1-2000, default: 100)
|
||||||
|
@ -76,6 +79,21 @@ papers -input papers.json -api-key "your-key" -criteria criteria.md
|
||||||
|
|
||||||
**NB: default API endpoint is LMStudio, and Phi-4 does a great job filtering papers**
|
**NB: default API endpoint is LMStudio, and Phi-4 does a great job filtering papers**
|
||||||
|
|
||||||
|
## Server Mode
|
||||||
|
|
||||||
|
The tool can be run as an HTTP server providing API endpoints for paper search and processing:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
papers -serve -port 8080
|
||||||
|
```
|
||||||
|
|
||||||
|
This starts a server with the following endpoints:
|
||||||
|
- `POST /api/papers/search` - Search for papers on arXiv
|
||||||
|
- `POST /api/papers/process` - Process papers using LLM
|
||||||
|
- `POST /api/papers/search-process` - Combined search and process
|
||||||
|
|
||||||
|
See [API.md](API.md) for detailed API documentation.
|
||||||
|
|
||||||
## Pipeline
|
## Pipeline
|
||||||
|
|
||||||
1. **Fetch**: Retrieves papers from arXiv based on specified date range and query
|
1. **Fetch**: Retrieves papers from arXiv based on specified date range and query
|
||||||
|
|
2
go.sum
2
go.sum
|
@ -4,3 +4,5 @@ gitea.r8z.us/stwhite/paperformatter v0.1.3 h1:Z8yIdfCmQ+c5A5To+Y3XPHNOdK5B/q5VqB
|
||||||
gitea.r8z.us/stwhite/paperformatter v0.1.3/go.mod h1:As2zIT0NSsMirYqdvIfIXXxIHOcdsHANhnh0VNcAluQ=
|
gitea.r8z.us/stwhite/paperformatter v0.1.3/go.mod h1:As2zIT0NSsMirYqdvIfIXXxIHOcdsHANhnh0VNcAluQ=
|
||||||
gitea.r8z.us/stwhite/paperprocessor v0.1.8 h1:pV810JZQFhuKcle4ix7stUz12LZNIgFCVWxSC/RYWpE=
|
gitea.r8z.us/stwhite/paperprocessor v0.1.8 h1:pV810JZQFhuKcle4ix7stUz12LZNIgFCVWxSC/RYWpE=
|
||||||
gitea.r8z.us/stwhite/paperprocessor v0.1.8/go.mod h1:0wHe7XjtQICFrPKbO53SVrUiVw9yi8GOGo9J7znpo+E=
|
gitea.r8z.us/stwhite/paperprocessor v0.1.8/go.mod h1:0wHe7XjtQICFrPKbO53SVrUiVw9yi8GOGo9J7znpo+E=
|
||||||
|
github.com/go-chi/chi/v5 v5.0.11 h1:BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA=
|
||||||
|
github.com/go-chi/chi/v5 v5.0.11/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
|
||||||
|
|
43
papers.go
43
papers.go
|
@ -7,7 +7,6 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -57,26 +56,6 @@ func validateInputFile(path string) ([]Paper, error) {
|
||||||
return papers, nil
|
return papers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// sanitizeFilename replaces invalid filename characters to match arxiva's sanitization
|
|
||||||
func sanitizeFilename(s string) string {
|
|
||||||
s = strings.ReplaceAll(s, ":", "_")
|
|
||||||
s = strings.ReplaceAll(s, " ", "_")
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
// isValidDate checks if the date string is in YYYYMMDD format
|
|
||||||
func isValidDate(date string) bool {
|
|
||||||
// Check basic format with regex
|
|
||||||
matched, err := regexp.MatchString(`^\d{8}$`, date)
|
|
||||||
if err != nil || !matched {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse date to verify it's a valid date
|
|
||||||
_, err = time.Parse("20060102", date)
|
|
||||||
return err == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// Set custom usage message
|
// Set custom usage message
|
||||||
flag.Usage = func() {
|
flag.Usage = func() {
|
||||||
|
@ -105,10 +84,14 @@ func main() {
|
||||||
fmt.Fprintf(os.Stderr, " With custom options:\n")
|
fmt.Fprintf(os.Stderr, " With custom options:\n")
|
||||||
fmt.Fprintf(os.Stderr, " %s -input papers.json -api-key \"your-key\" -model \"gpt-4\" -json-output \"results.json\" -md-output \"summary.md\"\n", os.Args[0])
|
fmt.Fprintf(os.Stderr, " %s -input papers.json -api-key \"your-key\" -model \"gpt-4\" -json-output \"results.json\" -md-output \"summary.md\"\n", os.Args[0])
|
||||||
fmt.Fprintf(os.Stderr, " Search only:\n")
|
fmt.Fprintf(os.Stderr, " Search only:\n")
|
||||||
fmt.Fprintf(os.Stderr, " %s -search-only -start 20240101 -end 20240131 -query \"machine learning\" \n\n", os.Args[0])
|
fmt.Fprintf(os.Stderr, " %s -search-only -start 20240101 -end 20240131 -query \"machine learning\" \n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " Server mode:\n")
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -serve -port 8080\n\n", os.Args[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse command line arguments
|
// Parse command line arguments
|
||||||
|
serve := flag.Bool("serve", false, "Run in server mode")
|
||||||
|
port := flag.String("port", "8080", "Port to run server on")
|
||||||
searchOnly := flag.Bool("search-only", false, "Only fetch papers from arXiv and save to JSON file (do not process)")
|
searchOnly := flag.Bool("search-only", false, "Only fetch papers from arXiv and save to JSON file (do not process)")
|
||||||
inputFile := flag.String("input", "", "Input JSON file containing papers (optional)")
|
inputFile := flag.String("input", "", "Input JSON file containing papers (optional)")
|
||||||
startDate := flag.String("start", "", "Start date in YYYYMMDD format (required if not using -input)")
|
startDate := flag.String("start", "", "Start date in YYYYMMDD format (required if not using -input)")
|
||||||
|
@ -124,6 +107,14 @@ func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
// Validate required flags and input
|
// Validate required flags and input
|
||||||
|
if *serve {
|
||||||
|
server := NewServer(*port, *apiEndpoint)
|
||||||
|
if err := server.Run(); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if *searchOnly {
|
if *searchOnly {
|
||||||
if *startDate == "" || *endDate == "" || *query == "" {
|
if *startDate == "" || *endDate == "" || *query == "" {
|
||||||
fmt.Fprintf(os.Stderr, "Error: start date, end date, and query are required when using -search-only\n\n")
|
fmt.Fprintf(os.Stderr, "Error: start date, end date, and query are required when using -search-only\n\n")
|
||||||
|
@ -132,7 +123,7 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate date format
|
// Validate date format
|
||||||
if !isValidDate(*startDate) || !isValidDate(*endDate) {
|
if !IsValidDate(*startDate) || !IsValidDate(*endDate) {
|
||||||
fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
|
fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
@ -154,7 +145,7 @@ func main() {
|
||||||
log.Fatalf("Failed to save papers: %v", err)
|
log.Fatalf("Failed to save papers: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Successfully fetched and saved papers to %s-%s-%s.json", *startDate, *endDate, sanitizeFilename(*query))
|
log.Printf("Successfully fetched and saved papers to %s-%s-%s.json", *startDate, *endDate, SanitizeFilename(*query))
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,7 +186,7 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate date format
|
// Validate date format
|
||||||
if !isValidDate(*startDate) || !isValidDate(*endDate) {
|
if !IsValidDate(*startDate) || !IsValidDate(*endDate) {
|
||||||
fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
|
fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
@ -217,7 +208,7 @@ func main() {
|
||||||
log.Fatalf("Failed to save papers: %v", err)
|
log.Fatalf("Failed to save papers: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
baseFilename = fmt.Sprintf("%s-%s-%s", *startDate, *endDate, sanitizeFilename(*query))
|
baseFilename = fmt.Sprintf("%s-%s-%s", *startDate, *endDate, SanitizeFilename(*query))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create processor configuration
|
// Create processor configuration
|
||||||
|
|
581
server.go
581
server.go
|
@ -1,193 +1,474 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/go-chi/chi/v5"
|
"gitea.r8z.us/stwhite/arxiva"
|
||||||
"github.com/go-chi/chi/v5/middleware"
|
"gitea.r8z.us/stwhite/paperformatter"
|
||||||
|
"gitea.r8z.us/stwhite/paperprocessor"
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
"github.com/go-chi/chi/v5/middleware"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Server struct {
|
type ProcessingJob struct {
|
||||||
router *chi.Mux
|
ID string // Unique job identifier
|
||||||
port string
|
Status string // "pending", "processing", "completed", "failed"
|
||||||
|
StartTime time.Time // When the job started
|
||||||
|
Error string // Error message if failed
|
||||||
|
JSONPath string // Path to JSON output file
|
||||||
|
MDPath string // Path to Markdown output file
|
||||||
|
MarkdownText string // Content of markdown file when completed
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewServer(port string) *Server {
|
type Server struct {
|
||||||
s := &Server{
|
router *chi.Mux
|
||||||
router: chi.NewRouter(),
|
port string
|
||||||
port: port,
|
apiEndpoint string
|
||||||
}
|
jobs map[string]*ProcessingJob // Track processing jobs
|
||||||
|
jobsMutex sync.RWMutex // Protect jobs map
|
||||||
|
}
|
||||||
|
|
||||||
s.setupRoutes()
|
func NewServer(port string, apiEndpoint string) *Server {
|
||||||
return s
|
s := &Server{
|
||||||
|
router: chi.NewRouter(),
|
||||||
|
port: port,
|
||||||
|
apiEndpoint: apiEndpoint,
|
||||||
|
jobs: make(map[string]*ProcessingJob),
|
||||||
|
}
|
||||||
|
|
||||||
|
s.setupRoutes()
|
||||||
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) setupRoutes() {
|
func (s *Server) setupRoutes() {
|
||||||
s.router.Use(middleware.Logger)
|
s.router.Use(middleware.Logger)
|
||||||
s.router.Use(middleware.Recoverer)
|
s.router.Use(middleware.Recoverer)
|
||||||
|
|
||||||
s.router.Post("/api/papers/search", s.handleSearch)
|
s.router.Post("/api/papers/search", s.handleSearch)
|
||||||
s.router.Post("/api/papers/process", s.handleProcess)
|
s.router.Post("/api/papers/process", s.handleProcess)
|
||||||
s.router.Post("/api/papers/search-process", s.handleSearchAndProcess)
|
s.router.Post("/api/papers/search-process", s.handleSearchAndProcess)
|
||||||
|
s.router.Get("/api/jobs/{jobID}", s.handleJobStatus)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) Run() error {
|
func (s *Server) Run() error {
|
||||||
addr := fmt.Sprintf(":%s", s.port)
|
addr := fmt.Sprintf(":%s", s.port)
|
||||||
log.Printf("Starting server on %s", addr)
|
log.Printf("Starting server on %s", addr)
|
||||||
return http.ListenAndServe(addr, s.router)
|
return http.ListenAndServe(addr, s.router)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) {
|
||||||
var req struct {
|
var req struct {
|
||||||
StartDate string `json:"start_date"`
|
StartDate string `json:"start_date"`
|
||||||
EndDate string `json:"end_date"`
|
EndDate string `json:"end_date"`
|
||||||
Query string `json:"query"`
|
Query string `json:"query"`
|
||||||
MaxResults int `json:"max_results"`
|
MaxResults int `json:"max_results"`
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reuse existing validation
|
// Reuse existing validation
|
||||||
if !isValidDate(req.StartDate) || !isValidDate(req.EndDate) {
|
if !IsValidDate(req.StartDate) || !IsValidDate(req.EndDate) {
|
||||||
http.Error(w, "Invalid date format", http.StatusBadRequest)
|
http.Error(w, "Invalid date format", http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
|
papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
json.NewEncoder(w).Encode(papers)
|
json.NewEncoder(w).Encode(papers)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) handleProcess(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) handleProcess(w http.ResponseWriter, r *http.Request) {
|
||||||
var req struct {
|
var req struct {
|
||||||
InputFile string `json:"input_file"`
|
Papers []arxiva.Paper `json:"papers,omitempty"` // Optional: Direct paper data
|
||||||
CriteriaFile string `json:"criteria_file"`
|
InputFile string `json:"input_file,omitempty"` // Optional: Input file path
|
||||||
ApiKey string `json:"api_key"`
|
CriteriaFile string `json:"criteria_file,omitempty"` // Optional: Criteria file path
|
||||||
Model string `json:"model"`
|
Criteria string `json:"criteria,omitempty"` // Optional: Direct criteria text
|
||||||
}
|
ApiKey string `json:"api_key"` // Required: API key
|
||||||
|
Model string `json:"model,omitempty"` // Optional: Model name
|
||||||
|
}
|
||||||
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create processor configuration
|
// Validate required fields
|
||||||
config := paperprocessor.Config{
|
if req.CriteriaFile == "" && req.Criteria == "" {
|
||||||
APIEndpoint: *apiEndpoint, // This would need to be passed to Server struct
|
http.Error(w, "either criteria_file or criteria must be provided", http.StatusBadRequest)
|
||||||
APIKey: req.ApiKey,
|
return
|
||||||
Model: req.Model,
|
}
|
||||||
RequestDelay: 2 * time.Second,
|
if req.ApiKey == "" {
|
||||||
}
|
http.Error(w, "api_key is required", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Process the papers
|
// Create processor configuration
|
||||||
outputJSON := req.InputFile + "-processed.json"
|
config := paperprocessor.Config{
|
||||||
if err := paperprocessor.ProcessFile(
|
APIEndpoint: s.apiEndpoint,
|
||||||
req.InputFile,
|
APIKey: req.ApiKey,
|
||||||
outputJSON,
|
Model: req.Model,
|
||||||
req.CriteriaFile,
|
RequestDelay: 2 * time.Second,
|
||||||
config,
|
}
|
||||||
); err != nil {
|
|
||||||
http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format to markdown
|
var inputJSON string
|
||||||
outputMD := req.InputFile + "-processed.md"
|
|
||||||
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
|
||||||
http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the paths to the generated files
|
// Handle direct paper data
|
||||||
json.NewEncoder(w).Encode(struct {
|
if len(req.Papers) > 0 {
|
||||||
JSONOutput string `json:"json_output"`
|
// Create temporary file for paper data
|
||||||
MDOutput string `json:"md_output"`
|
tempFile, err := os.CreateTemp("", "papers-*.json")
|
||||||
}{
|
if err != nil {
|
||||||
JSONOutput: outputJSON,
|
http.Error(w, fmt.Sprintf("Failed to create temp file: %v", err), http.StatusInternalServerError)
|
||||||
MDOutput: outputMD,
|
return
|
||||||
})
|
}
|
||||||
|
defer os.Remove(tempFile.Name()) // Clean up temp file
|
||||||
|
|
||||||
|
// Write papers to temp file
|
||||||
|
if err := json.NewEncoder(tempFile).Encode(req.Papers); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to write papers: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
inputJSON = tempFile.Name()
|
||||||
|
} else if req.InputFile != "" {
|
||||||
|
inputJSON = req.InputFile
|
||||||
|
} else {
|
||||||
|
http.Error(w, "either papers or input_file must be provided", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate output filenames
|
||||||
|
timestamp := time.Now().Format("20060102150405")
|
||||||
|
outputJSON := fmt.Sprintf("processed-%s.json", timestamp)
|
||||||
|
outputMD := fmt.Sprintf("processed-%s.md", timestamp)
|
||||||
|
|
||||||
|
// Handle criteria
|
||||||
|
var criteriaFile string
|
||||||
|
if req.Criteria != "" {
|
||||||
|
// Create temporary file for criteria
|
||||||
|
tempFile, err := os.CreateTemp("", "criteria-*.md")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to create temp criteria file: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer os.Remove(tempFile.Name()) // Clean up temp file
|
||||||
|
|
||||||
|
// Write criteria to temp file
|
||||||
|
if _, err := tempFile.WriteString(req.Criteria); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to write criteria: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
criteriaFile = tempFile.Name()
|
||||||
|
} else {
|
||||||
|
criteriaFile = req.CriteriaFile
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process the papers
|
||||||
|
if err := paperprocessor.ProcessFile(
|
||||||
|
inputJSON,
|
||||||
|
outputJSON,
|
||||||
|
criteriaFile,
|
||||||
|
config,
|
||||||
|
); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format to markdown
|
||||||
|
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate job ID and create job
|
||||||
|
jobID := fmt.Sprintf("job-%s", timestamp)
|
||||||
|
job := &ProcessingJob{
|
||||||
|
ID: jobID,
|
||||||
|
Status: "pending",
|
||||||
|
StartTime: time.Now(),
|
||||||
|
JSONPath: outputJSON,
|
||||||
|
MDPath: outputMD,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store job
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
s.jobs[jobID] = job
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
|
||||||
|
// Start processing in background
|
||||||
|
go func() {
|
||||||
|
// Process the papers
|
||||||
|
if err := paperprocessor.ProcessFile(
|
||||||
|
inputJSON,
|
||||||
|
outputJSON,
|
||||||
|
criteriaFile,
|
||||||
|
config,
|
||||||
|
); err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Processing failed: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format to markdown
|
||||||
|
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Formatting failed: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read markdown content
|
||||||
|
mdContent, err := os.ReadFile(outputMD)
|
||||||
|
if err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Failed to read markdown: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert Windows line endings to Unix
|
||||||
|
mdString := strings.ReplaceAll(string(mdContent), "\r\n", "\n")
|
||||||
|
// Ensure file ends with newline
|
||||||
|
if !strings.HasSuffix(mdString, "\n") {
|
||||||
|
mdString += "\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update job with success
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "completed"
|
||||||
|
job.MarkdownText = mdString
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Return job ID immediately
|
||||||
|
json.NewEncoder(w).Encode(struct {
|
||||||
|
JobID string `json:"job_id"`
|
||||||
|
}{
|
||||||
|
JobID: jobID,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) handleJobStatus(w http.ResponseWriter, r *http.Request) {
|
||||||
|
jobID := chi.URLParam(r, "jobID")
|
||||||
|
|
||||||
|
s.jobsMutex.RLock()
|
||||||
|
job, exists := s.jobs[jobID]
|
||||||
|
s.jobsMutex.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
http.Error(w, "Job not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
response := struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
StartTime time.Time `json:"start_time"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
MarkdownText string `json:"markdown_text,omitempty"`
|
||||||
|
}{
|
||||||
|
ID: job.ID,
|
||||||
|
Status: job.Status,
|
||||||
|
StartTime: job.StartTime,
|
||||||
|
Error: job.Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only include markdown text if job is completed
|
||||||
|
if job.Status == "completed" {
|
||||||
|
response.MarkdownText = job.MarkdownText
|
||||||
|
}
|
||||||
|
|
||||||
|
json.NewEncoder(w).Encode(response)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) handleSearchAndProcess(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) handleSearchAndProcess(w http.ResponseWriter, r *http.Request) {
|
||||||
var req struct {
|
var req struct {
|
||||||
StartDate string `json:"start_date"`
|
StartDate string `json:"start_date"`
|
||||||
EndDate string `json:"end_date"`
|
EndDate string `json:"end_date"`
|
||||||
Query string `json:"query"`
|
Query string `json:"query"`
|
||||||
MaxResults int `json:"max_results"`
|
MaxResults int `json:"max_results"`
|
||||||
CriteriaFile string `json:"criteria_file"`
|
CriteriaFile string `json:"criteria_file,omitempty"`
|
||||||
ApiKey string `json:"api_key"`
|
Criteria string `json:"criteria,omitempty"`
|
||||||
Model string `json:"model"`
|
ApiKey string `json:"api_key"`
|
||||||
}
|
Model string `json:"model,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate dates
|
// Validate dates
|
||||||
if !isValidDate(req.StartDate) || !isValidDate(req.EndDate) {
|
if !IsValidDate(req.StartDate) || !IsValidDate(req.EndDate) {
|
||||||
http.Error(w, "Invalid date format", http.StatusBadRequest)
|
http.Error(w, "Invalid date format", http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch papers
|
// Fetch papers
|
||||||
papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
|
papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save papers to temporary JSON file
|
// Save papers to temporary JSON file
|
||||||
baseFilename := fmt.Sprintf("%s-%s-%s", req.StartDate, req.EndDate, sanitizeFilename(req.Query))
|
baseFilename := fmt.Sprintf("%s-%s-%s", req.StartDate, req.EndDate, SanitizeFilename(req.Query))
|
||||||
inputJSON := baseFilename + ".json"
|
inputJSON := baseFilename + ".json"
|
||||||
if err := arxiva.SaveToFile(papers, req.StartDate, req.EndDate, req.Query); err != nil {
|
if err := arxiva.SaveToFile(papers, req.StartDate, req.EndDate, req.Query); err != nil {
|
||||||
http.Error(w, fmt.Sprintf("Failed to save papers: %v", err), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("Failed to save papers: %v", err), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create processor configuration
|
// Create processor configuration
|
||||||
config := paperprocessor.Config{
|
config := paperprocessor.Config{
|
||||||
APIEndpoint: *apiEndpoint, // This would need to be passed to Server struct
|
APIEndpoint: s.apiEndpoint,
|
||||||
APIKey: req.ApiKey,
|
APIKey: req.ApiKey,
|
||||||
Model: req.Model,
|
Model: req.Model,
|
||||||
RequestDelay: 2 * time.Second,
|
RequestDelay: 2 * time.Second,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process the papers
|
// Handle criteria
|
||||||
outputJSON := baseFilename + "-processed.json"
|
var criteriaFile string
|
||||||
if err := paperprocessor.ProcessFile(
|
var tempCriteriaFile string // Track temporary file for cleanup
|
||||||
inputJSON,
|
|
||||||
outputJSON,
|
|
||||||
req.CriteriaFile,
|
|
||||||
config,
|
|
||||||
); err != nil {
|
|
||||||
http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format to markdown
|
if req.Criteria != "" {
|
||||||
outputMD := baseFilename + "-processed.md"
|
// Create temporary file for criteria
|
||||||
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
tempFile, err := os.CreateTemp("", "criteria-*.md")
|
||||||
http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
|
if err != nil {
|
||||||
return
|
http.Error(w, fmt.Sprintf("Failed to create temp criteria file: %v", err), http.StatusInternalServerError)
|
||||||
}
|
return
|
||||||
|
}
|
||||||
|
tempCriteriaFile = tempFile.Name() // Save for cleanup after processing
|
||||||
|
|
||||||
// Return the paths to the generated files
|
// Write criteria to temp file
|
||||||
json.NewEncoder(w).Encode(struct {
|
if _, err := tempFile.WriteString(req.Criteria); err != nil {
|
||||||
JSONOutput string `json:"json_output"`
|
os.Remove(tempCriteriaFile) // Clean up on error
|
||||||
MDOutput string `json:"md_output"`
|
http.Error(w, fmt.Sprintf("Failed to write criteria: %v", err), http.StatusInternalServerError)
|
||||||
}{
|
return
|
||||||
JSONOutput: outputJSON,
|
}
|
||||||
MDOutput: outputMD,
|
tempFile.Close()
|
||||||
})
|
criteriaFile = tempCriteriaFile
|
||||||
|
} else if req.CriteriaFile != "" {
|
||||||
|
criteriaFile = req.CriteriaFile
|
||||||
|
} else {
|
||||||
|
http.Error(w, "either criteria_file or criteria must be provided", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process the papers
|
||||||
|
outputJSON := baseFilename + "-processed.json"
|
||||||
|
if err := paperprocessor.ProcessFile(
|
||||||
|
inputJSON,
|
||||||
|
outputJSON,
|
||||||
|
criteriaFile,
|
||||||
|
config,
|
||||||
|
); err != nil {
|
||||||
|
if tempCriteriaFile != "" {
|
||||||
|
os.Remove(tempCriteriaFile) // Clean up temp file on error
|
||||||
|
}
|
||||||
|
http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format to markdown
|
||||||
|
outputMD := baseFilename + "-processed.md"
|
||||||
|
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate job ID and create job
|
||||||
|
jobID := fmt.Sprintf("job-%s", baseFilename)
|
||||||
|
job := &ProcessingJob{
|
||||||
|
ID: jobID,
|
||||||
|
Status: "pending",
|
||||||
|
StartTime: time.Now(),
|
||||||
|
JSONPath: outputJSON,
|
||||||
|
MDPath: outputMD,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store job
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
s.jobs[jobID] = job
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
|
||||||
|
// Start processing in background
|
||||||
|
go func() {
|
||||||
|
defer func() {
|
||||||
|
if tempCriteriaFile != "" {
|
||||||
|
os.Remove(tempCriteriaFile) // Clean up temp file after processing
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Process the papers
|
||||||
|
if err := paperprocessor.ProcessFile(
|
||||||
|
inputJSON,
|
||||||
|
outputJSON,
|
||||||
|
criteriaFile,
|
||||||
|
config,
|
||||||
|
); err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Processing failed: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format to markdown
|
||||||
|
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Formatting failed: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read markdown content
|
||||||
|
mdContent, err := os.ReadFile(outputMD)
|
||||||
|
if err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Failed to read markdown: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert Windows line endings to Unix
|
||||||
|
mdString := strings.ReplaceAll(string(mdContent), "\r\n", "\n")
|
||||||
|
// Ensure file ends with newline
|
||||||
|
if !strings.HasSuffix(mdString, "\n") {
|
||||||
|
mdString += "\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update job with success
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "completed"
|
||||||
|
job.MarkdownText = mdString
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Return job ID immediately
|
||||||
|
json.NewEncoder(w).Encode(struct {
|
||||||
|
JobID string `json:"job_id"`
|
||||||
|
}{
|
||||||
|
JobID: jobID,
|
||||||
|
})
|
||||||
}
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IsValidDate checks if the date string is in YYYYMMDD format
|
||||||
|
func IsValidDate(date string) bool {
|
||||||
|
// Check basic format with regex
|
||||||
|
matched, err := regexp.MatchString(`^\d{8}$`, date)
|
||||||
|
if err != nil || !matched {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse date to verify it's a valid date
|
||||||
|
_, err = time.Parse("20060102", date)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SanitizeFilename replaces invalid filename characters to match arxiva's sanitization
|
||||||
|
func SanitizeFilename(s string) string {
|
||||||
|
s = strings.ReplaceAll(s, ":", "_")
|
||||||
|
s = strings.ReplaceAll(s, " ", "_")
|
||||||
|
return s
|
||||||
|
}
|
Loading…
Reference in New Issue