Compare commits
6 Commits
main
...
claude-edi
Author | SHA1 | Date |
---|---|---|
Steve White | 9e0a7e594d | |
Steve White | 9329294305 | |
Steve White | e552411298 | |
Steve White | ac208fabdd | |
Steve White | 3f628f0805 | |
Steve White | 5b89d0b04a |
48
.clinerules
48
.clinerules
|
@ -1,47 +1 @@
|
||||||
## Here are the api signatures for arxiva
|
After all major changes, update git with an informative commit message.
|
||||||
### FetchPapers(startDate, endDate, query string, maxResults int) ([]Paper, error)
|
|
||||||
startDate: Start date in format "YYYYMMDD"
|
|
||||||
endDate: End date in format "YYYYMMDD"
|
|
||||||
query: Search query
|
|
||||||
maxResults: Maximum number of results (1-2000)
|
|
||||||
Fetches papers from arXiv API
|
|
||||||
|
|
||||||
### SaveToFile(papers []Paper, startDate, endDate, query string) error
|
|
||||||
papers: Array of Paper structs
|
|
||||||
startDate: Start date in format "YYYYMMDD"
|
|
||||||
endDate: End date in format "YYYYMMDD"
|
|
||||||
query: Search query
|
|
||||||
Saves papers to a JSON file
|
|
||||||
|
|
||||||
JSON file is named "YYYMMDD-YYYYMMDD-query.json" (where YYYYMMDD is start date and YYYYMMDD is end date and query is search query)
|
|
||||||
|
|
||||||
## here is the API signature for paperprocessor:
|
|
||||||
|
|
||||||
### ProcessFile
|
|
||||||
`func ProcessFile(inputPath, outputPath, criteriaPath string, config Config, debug bool) error`
|
|
||||||
|
|
||||||
Processes papers from input JSON file and writes results to output JSON file
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- inputPath: Path to input JSON file containing papers array
|
|
||||||
- outputPath: Path to write processing results JSON
|
|
||||||
- criteriaPath: Path to text file with evaluation criteria
|
|
||||||
- config: Configuration settings for API and processing
|
|
||||||
- debug: Enable debug logging when true
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- error: Processing error or nil if successful
|
|
||||||
|
|
||||||
You create config like this:
|
|
||||||
config := paperprocessor.Config{
|
|
||||||
APIEndpoint: "http://localhost:1234/v1/chat/completions",
|
|
||||||
APIKey: apiKey,
|
|
||||||
Model: "qwen2-7b-instruct",
|
|
||||||
RequestDelay: 2 * time.Second, // 2 second delay between requests
|
|
||||||
|
|
||||||
|
|
||||||
## Here is the usage for paperformatter:
|
|
||||||
err := paperformatter.FormatPapers("input.json", "output.md")
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
# Markdown files except documentation
|
||||||
*.md
|
*.md
|
||||||
|
!README.md
|
||||||
|
!API.md
|
||||||
*.json
|
*.json
|
||||||
papers
|
papers
|
||||||
|
|
|
@ -0,0 +1,220 @@
|
||||||
|
# Papers API Reference
|
||||||
|
|
||||||
|
This document describes the HTTP API endpoints available when running Papers in server mode.
|
||||||
|
|
||||||
|
## Running the Server
|
||||||
|
|
||||||
|
Start the server using:
|
||||||
|
```bash
|
||||||
|
papers -serve -port 8080
|
||||||
|
```
|
||||||
|
|
||||||
|
The server will listen on the specified port (default: 8080).
|
||||||
|
|
||||||
|
## Important Notes
|
||||||
|
|
||||||
|
### CORS
|
||||||
|
CORS is enabled on the server with the following configuration:
|
||||||
|
- Allowed Origins: All origins (`*`) in development
|
||||||
|
- Allowed Methods: `GET`, `POST`, `OPTIONS`
|
||||||
|
- Allowed Headers: `Accept`, `Authorization`, `Content-Type`
|
||||||
|
- Credentials: Not allowed
|
||||||
|
- Max Age: 300 seconds
|
||||||
|
|
||||||
|
Note: In production, you should restrict allowed origins to your specific domain(s).
|
||||||
|
|
||||||
|
### Authentication
|
||||||
|
No authentication is required beyond the API key for LLM processing. The API key should be included in the request body for processing endpoints.
|
||||||
|
|
||||||
|
### Timing Considerations
|
||||||
|
- Initial paper search: typically < 5 seconds
|
||||||
|
- Processing time: up to 30 minutes for large batches
|
||||||
|
- Job status polling: recommended interval is 15 seconds
|
||||||
|
- LLM rate limiting: 2-second delay between requests
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
### Search Papers
|
||||||
|
`POST /api/papers/search`
|
||||||
|
|
||||||
|
Search for papers on arXiv based on date range and query.
|
||||||
|
|
||||||
|
**Request Body:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"start_date": "20240101", // Required: Start date in YYYYMMDD format
|
||||||
|
"end_date": "20240131", // Required: End date in YYYYMMDD format
|
||||||
|
"query": "machine learning", // Required: Search query
|
||||||
|
"max_results": 5 // Optional: Maximum number of results (1-2000, default: 100)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"title": "Paper Title",
|
||||||
|
"abstract": "Paper Abstract",
|
||||||
|
"arxiv_id": "2401.12345"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Process Papers
|
||||||
|
`POST /api/papers/process`
|
||||||
|
|
||||||
|
Process papers using the specified LLM model. Papers can be provided either directly in the request or by referencing a JSON file.
|
||||||
|
|
||||||
|
**Request Body:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
// Option 1: Direct paper data
|
||||||
|
"papers": [ // Optional: Array of papers
|
||||||
|
{
|
||||||
|
"title": "Paper Title",
|
||||||
|
"abstract": "Paper Abstract",
|
||||||
|
"arxiv_id": "2401.12345"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
|
||||||
|
// Option 2: File reference
|
||||||
|
"input_file": "papers.json", // Optional: Path to input JSON file
|
||||||
|
|
||||||
|
// Criteria (one of these is required)
|
||||||
|
"criteria": "Accepted papers MUST:\n* primarily address LLMs...", // Optional: Direct criteria text
|
||||||
|
"criteria_file": "criteria.md", // Optional: Path to criteria markdown file
|
||||||
|
|
||||||
|
// Required fields
|
||||||
|
"api_key": "your-key", // Required: API key for LLM service
|
||||||
|
|
||||||
|
// Optional fields
|
||||||
|
"model": "phi-4" // Optional: Model to use (default: phi-4)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Either `papers` or `input_file` must be provided, but not both
|
||||||
|
- Either `criteria` or `criteria_file` must be provided, but not both
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job_id": "job-20240129113500"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The endpoint returns immediately with a job ID. Use this ID with the job status endpoint to check progress and get results.
|
||||||
|
|
||||||
|
### Search and Process Papers
|
||||||
|
`POST /api/papers/search-process`
|
||||||
|
|
||||||
|
Combined endpoint to search for papers and process them in one request. This endpoint automatically saves the papers to a file and processes them.
|
||||||
|
|
||||||
|
**Request Body:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"start_date": "20240101", // Required: Start date in YYYYMMDD format
|
||||||
|
"end_date": "20240131", // Required: End date in YYYYMMDD format
|
||||||
|
"query": "machine learning", // Required: Search query
|
||||||
|
"max_results": 5, // Optional: Maximum number of results (1-2000, default: 100)
|
||||||
|
// Criteria (one of these is required)
|
||||||
|
"criteria": "Accepted papers MUST:\n* primarily address LLMs...", // Optional: Direct criteria text
|
||||||
|
"criteria_file": "criteria.md", // Optional: Path to criteria markdown file
|
||||||
|
|
||||||
|
// Required fields
|
||||||
|
"api_key": "your-key", // Required: API key for LLM service
|
||||||
|
|
||||||
|
// Optional fields
|
||||||
|
"model": "phi-4" // Optional: Model to use (default: phi-4)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job_id": "job-20240101-20240131-machine_learning"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get Job Status
|
||||||
|
`GET /api/jobs/{jobID}`
|
||||||
|
|
||||||
|
Check the status of a processing job and retrieve results when complete.
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "job-20240129113500",
|
||||||
|
"status": "completed", // "pending", "processing", "completed", or "failed"
|
||||||
|
"start_time": "2024-01-29T11:35:00Z",
|
||||||
|
"error": "", // Error message if status is "failed"
|
||||||
|
"markdown_text": "# Results\n..." // Full markdown content when completed
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Processing Flow
|
||||||
|
|
||||||
|
1. Submit a processing request (either `/api/papers/process` or `/api/papers/search-process`)
|
||||||
|
2. Receive a job ID immediately
|
||||||
|
3. Poll the job status endpoint until the job is completed
|
||||||
|
4. When completed, the markdown content will be in the `markdown_text` field of the response
|
||||||
|
|
||||||
|
Example workflow:
|
||||||
|
```bash
|
||||||
|
# 1. Submit processing request
|
||||||
|
curl -X POST -H "Content-Type: application/json" -d '{
|
||||||
|
"start_date": "20240101",
|
||||||
|
"end_date": "20240131",
|
||||||
|
"query": "machine learning",
|
||||||
|
"criteria": "Accepted papers MUST:\n* primarily address LLMs...",
|
||||||
|
"api_key": "your-key",
|
||||||
|
"model": "phi-4"
|
||||||
|
}' http://localhost:8080/api/papers/search-process
|
||||||
|
|
||||||
|
# Response: {"job_id": "job-20240101-20240131-machine_learning"}
|
||||||
|
|
||||||
|
# 2. Check job status and get results
|
||||||
|
curl http://localhost:8080/api/jobs/job-20240101-20240131-machine_learning
|
||||||
|
|
||||||
|
# Response when completed:
|
||||||
|
{
|
||||||
|
"id": "job-20240101-20240131-machine_learning",
|
||||||
|
"status": "completed",
|
||||||
|
"start_time": "2024-01-29T11:35:00Z",
|
||||||
|
"markdown_text": "# Results\n\n## Accepted Papers\n\n1. Paper Title..."
|
||||||
|
}
|
||||||
|
|
||||||
|
# 3. Save markdown to file (example using jq)
|
||||||
|
# The -r flag is important to get raw output without JSON escaping
|
||||||
|
curl http://localhost:8080/api/jobs/job-20240101-20240131-machine_learning | jq -r '.markdown_text' > results.md
|
||||||
|
|
||||||
|
# Alternative using Python (handles JSON escaping)
|
||||||
|
curl http://localhost:8080/api/jobs/job-20240101-20240131-machine_learning | python3 -c '
|
||||||
|
import json, sys
|
||||||
|
response = json.load(sys.stdin)
|
||||||
|
if response.get("status") == "completed":
|
||||||
|
with open("results.md", "w") as f:
|
||||||
|
f.write(response["markdown_text"])
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: Processing can take up to 30 minutes depending on the number of papers and LLM response times. The job status endpoint can be polled periodically (e.g., every 30 seconds) to check progress.
|
||||||
|
|
||||||
|
## Error Responses
|
||||||
|
|
||||||
|
All endpoints return appropriate HTTP status codes:
|
||||||
|
|
||||||
|
- 200: Success
|
||||||
|
- 400: Bad Request (invalid parameters)
|
||||||
|
- 500: Internal Server Error
|
||||||
|
|
||||||
|
Error responses include a message explaining the error:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"error": "Invalid date format"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rate Limiting
|
||||||
|
|
||||||
|
The server includes built-in rate limiting for LLM API requests (2-second delay between requests) to prevent overwhelming the LLM service.
|
18
README.md
18
README.md
|
@ -27,6 +27,7 @@ This is hard to pull off with keyword searches. You might exclude every paper th
|
||||||
- Generate both JSON and Markdown outputs
|
- Generate both JSON and Markdown outputs
|
||||||
- Customizable evaluation criteria
|
- Customizable evaluation criteria
|
||||||
- Rate-limited API requests (2-second delay between requests)
|
- Rate-limited API requests (2-second delay between requests)
|
||||||
|
- HTTP API server mode for integration with other services
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
@ -65,6 +66,8 @@ papers -input papers.json -api-key "your-key" -criteria criteria.md
|
||||||
|
|
||||||
### Optional Flags
|
### Optional Flags
|
||||||
|
|
||||||
|
- `-serve`: Run in server mode with HTTP API endpoints
|
||||||
|
- `-port`: Port to run server on (default: "8080")
|
||||||
- `-search-only`: Fetch papers from arXiv and save to JSON file without processing
|
- `-search-only`: Fetch papers from arXiv and save to JSON file without processing
|
||||||
- `-input`: Input JSON file containing papers (optional)
|
- `-input`: Input JSON file containing papers (optional)
|
||||||
- `-maxResults`: Maximum number of results to fetch (1-2000, default: 100)
|
- `-maxResults`: Maximum number of results to fetch (1-2000, default: 100)
|
||||||
|
@ -76,6 +79,21 @@ papers -input papers.json -api-key "your-key" -criteria criteria.md
|
||||||
|
|
||||||
**NB: default API endpoint is LMStudio, and Phi-4 does a great job filtering papers**
|
**NB: default API endpoint is LMStudio, and Phi-4 does a great job filtering papers**
|
||||||
|
|
||||||
|
## Server Mode
|
||||||
|
|
||||||
|
The tool can be run as an HTTP server providing API endpoints for paper search and processing:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
papers -serve -port 8080
|
||||||
|
```
|
||||||
|
|
||||||
|
This starts a server with the following endpoints:
|
||||||
|
- `POST /api/papers/search` - Search for papers on arXiv
|
||||||
|
- `POST /api/papers/process` - Process papers using LLM
|
||||||
|
- `POST /api/papers/search-process` - Combined search and process
|
||||||
|
|
||||||
|
See [API.md](API.md) for detailed API documentation.
|
||||||
|
|
||||||
## Pipeline
|
## Pipeline
|
||||||
|
|
||||||
1. **Fetch**: Retrieves papers from arXiv based on specified date range and query
|
1. **Fetch**: Retrieves papers from arXiv based on specified date range and query
|
||||||
|
|
|
@ -0,0 +1,258 @@
|
||||||
|
diff --git a/go.mod b/go.mod
|
||||||
|
--- a/go.mod
|
||||||
|
+++ b/go.mod
|
||||||
|
@@ -7,4 +7,5 @@ require (
|
||||||
|
gitea.r8z.us/stwhite/arxiva v0.1.0
|
||||||
|
gitea.r8z.us/stwhite/paperformatter v0.1.3
|
||||||
|
gitea.r8z.us/stwhite/paperprocessor v0.1.8
|
||||||
|
+ github.com/go-chi/chi/v5 v5.0.11
|
||||||
|
)
|
||||||
|
|
||||||
|
diff --git a/papers.go b/papers.go
|
||||||
|
--- a/papers.go
|
||||||
|
+++ b/papers.go
|
||||||
|
@@ -6,6 +6,7 @@ import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
+ "net/http" // Added for server mode
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
@@ -47,6 +48,8 @@ func main() {
|
||||||
|
flag.Usage = func() {
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: %s [options]\n\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, "Description:\n")
|
||||||
|
- fmt.Fprintf(os.Stderr, " Fetches papers from arXiv (or uses input file), processes them using an LLM, and generates both JSON and Markdown outputs.\n\n")
|
||||||
|
+ fmt.Fprintf(os.Stderr, " Fetches papers from arXiv (or uses input file), processes them using an LLM,\n")
|
||||||
|
+ fmt.Fprintf(os.Stderr, " and generates both JSON and Markdown outputs. Can also run as an HTTP server.\n\n")
|
||||||
|
+ fmt.Fprintf(os.Stderr, "Server Mode:\n")
|
||||||
|
+ fmt.Fprintf(os.Stderr, " Run as an HTTP server with: %s -serve [-port 8080]\n\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, "Pipeline:\n")
|
||||||
|
// ... rest of usage function
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse command line arguments
|
||||||
|
+ serveMode := flag.Bool("serve", false, "Run as HTTP server")
|
||||||
|
+ port := flag.String("port", "8080", "Port to run server on when using -serve")
|
||||||
|
searchOnly := flag.Bool("search-only", false, "Only fetch papers from arXiv and save to JSON file")
|
||||||
|
// ... rest of flag declarations
|
||||||
|
|
||||||
|
+ // Check if we should run in server mode
|
||||||
|
+ if *serveMode {
|
||||||
|
+ server := NewServer(*port)
|
||||||
|
+ server.apiEndpoint = *apiEndpoint
|
||||||
|
+ server.apiKey = *apiKey
|
||||||
|
+ server.model = *model
|
||||||
|
+
|
||||||
|
+ log.Printf("Starting server on port %s...", *port)
|
||||||
|
+ if err := server.Run(); err != nil {
|
||||||
|
+ log.Fatalf("Server failed: %v", err)
|
||||||
|
+ }
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
// Rest of main function unchanged
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/server.go b/server.go
|
||||||
|
new file mode 100644
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/server.go
|
||||||
|
@@ -0,0 +1,189 @@
|
||||||
|
+package main
|
||||||
|
+
|
||||||
|
+import (
|
||||||
|
+ "encoding/json"
|
||||||
|
+ "fmt"
|
||||||
|
+ "log"
|
||||||
|
+ "net/http"
|
||||||
|
+
|
||||||
|
+ "github.com/go-chi/chi/v5"
|
||||||
|
+ "github.com/go-chi/chi/v5/middleware"
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
+type Server struct {
|
||||||
|
+ router *chi.Mux
|
||||||
|
+ port string
|
||||||
|
+ apiEndpoint string
|
||||||
|
+ apiKey string
|
||||||
|
+ model string
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func NewServer(port string) *Server {
|
||||||
|
+ s := &Server{
|
||||||
|
+ router: chi.NewRouter(),
|
||||||
|
+ port: port,
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ s.setupRoutes()
|
||||||
|
+ return s
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func (s *Server) setupRoutes() {
|
||||||
|
+ s.router.Use(middleware.Logger)
|
||||||
|
+ s.router.Use(middleware.Recoverer)
|
||||||
|
+
|
||||||
|
+ s.router.Post("/api/papers/search", s.handleSearch)
|
||||||
|
+ s.router.Post("/api/papers/process", s.handleProcess)
|
||||||
|
+ s.router.Post("/api/papers/search-process", s.handleSearchAndProcess)
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func (s *Server) Run() error {
|
||||||
|
+ addr := fmt.Sprintf(":%s", s.port)
|
||||||
|
+ log.Printf("Starting server on %s", addr)
|
||||||
|
+ return http.ListenAndServe(addr, s.router)
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) {
|
||||||
|
+ var req struct {
|
||||||
|
+ StartDate string `json:"start_date"`
|
||||||
|
+ EndDate string `json:"end_date"`
|
||||||
|
+ Query string `json:"query"`
|
||||||
|
+ MaxResults int `json:"max_results"`
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
+ http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Reuse existing validation
|
||||||
|
+ if !isValidDate(req.StartDate) || !isValidDate(req.EndDate) {
|
||||||
|
+ http.Error(w, "Invalid date format", http.StatusBadRequest)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
|
||||||
|
+ if err != nil {
|
||||||
|
+ http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ json.NewEncoder(w).Encode(papers)
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func (s *Server) handleProcess(w http.ResponseWriter, r *http.Request) {
|
||||||
|
+ var req struct {
|
||||||
|
+ InputFile string `json:"input_file"`
|
||||||
|
+ CriteriaFile string `json:"criteria_file"`
|
||||||
|
+ ApiKey string `json:"api_key"`
|
||||||
|
+ Model string `json:"model"`
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
+ http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Create processor configuration
|
||||||
|
+ config := paperprocessor.Config{
|
||||||
|
+ APIEndpoint: s.apiEndpoint,
|
||||||
|
+ APIKey: req.ApiKey,
|
||||||
|
+ Model: req.Model,
|
||||||
|
+ RequestDelay: 2 * time.Second,
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Process the papers
|
||||||
|
+ outputJSON := req.InputFile + "-processed.json"
|
||||||
|
+ if err := paperprocessor.ProcessFile(
|
||||||
|
+ req.InputFile,
|
||||||
|
+ outputJSON,
|
||||||
|
+ req.CriteriaFile,
|
||||||
|
+ config,
|
||||||
|
+ ); err != nil {
|
||||||
|
+ http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Format to markdown
|
||||||
|
+ outputMD := req.InputFile + "-processed.md"
|
||||||
|
+ if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
+ http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Return the paths to the generated files
|
||||||
|
+ json.NewEncoder(w).Encode(struct {
|
||||||
|
+ JSONOutput string `json:"json_output"`
|
||||||
|
+ MDOutput string `json:"md_output"`
|
||||||
|
+ }{
|
||||||
|
+ JSONOutput: outputJSON,
|
||||||
|
+ MDOutput: outputMD,
|
||||||
|
+ })
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func (s *Server) handleSearchAndProcess(w http.ResponseWriter, r *http.Request) {
|
||||||
|
+ var req struct {
|
||||||
|
+ StartDate string `json:"start_date"`
|
||||||
|
+ EndDate string `json:"end_date"`
|
||||||
|
+ Query string `json:"query"`
|
||||||
|
+ MaxResults int `json:"max_results"`
|
||||||
|
+ CriteriaFile string `json:"criteria_file"`
|
||||||
|
+ ApiKey string `json:"api_key"`
|
||||||
|
+ Model string `json:"model"`
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
+ http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Validate dates
|
||||||
|
+ if !isValidDate(req.StartDate) || !isValidDate(req.EndDate) {
|
||||||
|
+ http.Error(w, "Invalid date format", http.StatusBadRequest)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Fetch papers
|
||||||
|
+ papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
|
||||||
|
+ if err != nil {
|
||||||
|
+ http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Save papers to temporary JSON file
|
||||||
|
+ baseFilename := fmt.Sprintf("%s-%s-%s", req.StartDate, req.EndDate, sanitizeFilename(req.Query))
|
||||||
|
+ inputJSON := baseFilename + ".json"
|
||||||
|
+ if err := arxiva.SaveToFile(papers, req.StartDate, req.EndDate, req.Query); err != nil {
|
||||||
|
+ http.Error(w, fmt.Sprintf("Failed to save papers: %v", err), http.StatusInternalServerError)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Create processor configuration
|
||||||
|
+ config := paperprocessor.Config{
|
||||||
|
+ APIEndpoint: s.apiEndpoint,
|
||||||
|
+ APIKey: req.ApiKey,
|
||||||
|
+ Model: req.Model,
|
||||||
|
+ RequestDelay: 2 * time.Second,
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Process the papers
|
||||||
|
+ outputJSON := baseFilename + "-processed.json"
|
||||||
|
+ if err := paperprocessor.ProcessFile(
|
||||||
|
+ inputJSON,
|
||||||
|
+ outputJSON,
|
||||||
|
+ req.CriteriaFile,
|
||||||
|
+ config,
|
||||||
|
+ ); err != nil {
|
||||||
|
+ http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Format to markdown
|
||||||
|
+ outputMD := baseFilename + "-processed.md"
|
||||||
|
+ if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
+ http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Return the paths to the generated files
|
||||||
|
+ json.NewEncoder(w).Encode(struct {
|
||||||
|
+ JSONOutput string `json:"json_output"`
|
||||||
|
+ MDOutput string `json:"md_output"`
|
||||||
|
+ }{
|
||||||
|
+ JSONOutput: outputJSON,
|
||||||
|
+ MDOutput: outputMD,
|
||||||
|
+ })
|
||||||
|
+}
|
2
go.mod
2
go.mod
|
@ -8,4 +8,6 @@ require (
|
||||||
gitea.r8z.us/stwhite/arxiva v0.1.0
|
gitea.r8z.us/stwhite/arxiva v0.1.0
|
||||||
gitea.r8z.us/stwhite/paperformatter v0.1.3
|
gitea.r8z.us/stwhite/paperformatter v0.1.3
|
||||||
gitea.r8z.us/stwhite/paperprocessor v0.1.8
|
gitea.r8z.us/stwhite/paperprocessor v0.1.8
|
||||||
|
github.com/go-chi/chi/v5 v5.0.11
|
||||||
|
github.com/go-chi/cors v1.2.1
|
||||||
)
|
)
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -4,3 +4,7 @@ gitea.r8z.us/stwhite/paperformatter v0.1.3 h1:Z8yIdfCmQ+c5A5To+Y3XPHNOdK5B/q5VqB
|
||||||
gitea.r8z.us/stwhite/paperformatter v0.1.3/go.mod h1:As2zIT0NSsMirYqdvIfIXXxIHOcdsHANhnh0VNcAluQ=
|
gitea.r8z.us/stwhite/paperformatter v0.1.3/go.mod h1:As2zIT0NSsMirYqdvIfIXXxIHOcdsHANhnh0VNcAluQ=
|
||||||
gitea.r8z.us/stwhite/paperprocessor v0.1.8 h1:pV810JZQFhuKcle4ix7stUz12LZNIgFCVWxSC/RYWpE=
|
gitea.r8z.us/stwhite/paperprocessor v0.1.8 h1:pV810JZQFhuKcle4ix7stUz12LZNIgFCVWxSC/RYWpE=
|
||||||
gitea.r8z.us/stwhite/paperprocessor v0.1.8/go.mod h1:0wHe7XjtQICFrPKbO53SVrUiVw9yi8GOGo9J7znpo+E=
|
gitea.r8z.us/stwhite/paperprocessor v0.1.8/go.mod h1:0wHe7XjtQICFrPKbO53SVrUiVw9yi8GOGo9J7znpo+E=
|
||||||
|
github.com/go-chi/chi/v5 v5.0.11 h1:BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA=
|
||||||
|
github.com/go-chi/chi/v5 v5.0.11/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
|
||||||
|
github.com/go-chi/cors v1.2.1 h1:xEC8UT3Rlp2QuWNEr4Fs/c2EAGVKBwy/1vHx3bppil4=
|
||||||
|
github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn836hqM7JxpglAy2Vzc58=
|
||||||
|
|
43
papers.go
43
papers.go
|
@ -7,7 +7,6 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -57,26 +56,6 @@ func validateInputFile(path string) ([]Paper, error) {
|
||||||
return papers, nil
|
return papers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// sanitizeFilename replaces invalid filename characters to match arxiva's sanitization
|
|
||||||
func sanitizeFilename(s string) string {
|
|
||||||
s = strings.ReplaceAll(s, ":", "_")
|
|
||||||
s = strings.ReplaceAll(s, " ", "_")
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
// isValidDate checks if the date string is in YYYYMMDD format
|
|
||||||
func isValidDate(date string) bool {
|
|
||||||
// Check basic format with regex
|
|
||||||
matched, err := regexp.MatchString(`^\d{8}$`, date)
|
|
||||||
if err != nil || !matched {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse date to verify it's a valid date
|
|
||||||
_, err = time.Parse("20060102", date)
|
|
||||||
return err == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// Set custom usage message
|
// Set custom usage message
|
||||||
flag.Usage = func() {
|
flag.Usage = func() {
|
||||||
|
@ -105,10 +84,14 @@ func main() {
|
||||||
fmt.Fprintf(os.Stderr, " With custom options:\n")
|
fmt.Fprintf(os.Stderr, " With custom options:\n")
|
||||||
fmt.Fprintf(os.Stderr, " %s -input papers.json -api-key \"your-key\" -model \"gpt-4\" -json-output \"results.json\" -md-output \"summary.md\"\n", os.Args[0])
|
fmt.Fprintf(os.Stderr, " %s -input papers.json -api-key \"your-key\" -model \"gpt-4\" -json-output \"results.json\" -md-output \"summary.md\"\n", os.Args[0])
|
||||||
fmt.Fprintf(os.Stderr, " Search only:\n")
|
fmt.Fprintf(os.Stderr, " Search only:\n")
|
||||||
fmt.Fprintf(os.Stderr, " %s -search-only -start 20240101 -end 20240131 -query \"machine learning\" \n\n", os.Args[0])
|
fmt.Fprintf(os.Stderr, " %s -search-only -start 20240101 -end 20240131 -query \"machine learning\" \n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " Server mode:\n")
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -serve -port 8080\n\n", os.Args[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse command line arguments
|
// Parse command line arguments
|
||||||
|
serve := flag.Bool("serve", false, "Run in server mode")
|
||||||
|
port := flag.String("port", "8080", "Port to run server on")
|
||||||
searchOnly := flag.Bool("search-only", false, "Only fetch papers from arXiv and save to JSON file (do not process)")
|
searchOnly := flag.Bool("search-only", false, "Only fetch papers from arXiv and save to JSON file (do not process)")
|
||||||
inputFile := flag.String("input", "", "Input JSON file containing papers (optional)")
|
inputFile := flag.String("input", "", "Input JSON file containing papers (optional)")
|
||||||
startDate := flag.String("start", "", "Start date in YYYYMMDD format (required if not using -input)")
|
startDate := flag.String("start", "", "Start date in YYYYMMDD format (required if not using -input)")
|
||||||
|
@ -124,6 +107,14 @@ func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
// Validate required flags and input
|
// Validate required flags and input
|
||||||
|
if *serve {
|
||||||
|
server := NewServer(*port, *apiEndpoint)
|
||||||
|
if err := server.Run(); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if *searchOnly {
|
if *searchOnly {
|
||||||
if *startDate == "" || *endDate == "" || *query == "" {
|
if *startDate == "" || *endDate == "" || *query == "" {
|
||||||
fmt.Fprintf(os.Stderr, "Error: start date, end date, and query are required when using -search-only\n\n")
|
fmt.Fprintf(os.Stderr, "Error: start date, end date, and query are required when using -search-only\n\n")
|
||||||
|
@ -132,7 +123,7 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate date format
|
// Validate date format
|
||||||
if !isValidDate(*startDate) || !isValidDate(*endDate) {
|
if !IsValidDate(*startDate) || !IsValidDate(*endDate) {
|
||||||
fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
|
fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
@ -154,7 +145,7 @@ func main() {
|
||||||
log.Fatalf("Failed to save papers: %v", err)
|
log.Fatalf("Failed to save papers: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Successfully fetched and saved papers to %s-%s-%s.json", *startDate, *endDate, sanitizeFilename(*query))
|
log.Printf("Successfully fetched and saved papers to %s-%s-%s.json", *startDate, *endDate, SanitizeFilename(*query))
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,7 +186,7 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate date format
|
// Validate date format
|
||||||
if !isValidDate(*startDate) || !isValidDate(*endDate) {
|
if !IsValidDate(*startDate) || !IsValidDate(*endDate) {
|
||||||
fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
|
fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
@ -217,7 +208,7 @@ func main() {
|
||||||
log.Fatalf("Failed to save papers: %v", err)
|
log.Fatalf("Failed to save papers: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
baseFilename = fmt.Sprintf("%s-%s-%s", *startDate, *endDate, sanitizeFilename(*query))
|
baseFilename = fmt.Sprintf("%s-%s-%s", *startDate, *endDate, SanitizeFilename(*query))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create processor configuration
|
// Create processor configuration
|
||||||
|
|
|
@ -0,0 +1,487 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gitea.r8z.us/stwhite/arxiva"
|
||||||
|
"gitea.r8z.us/stwhite/paperformatter"
|
||||||
|
"gitea.r8z.us/stwhite/paperprocessor"
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
"github.com/go-chi/chi/v5/middleware"
|
||||||
|
"github.com/go-chi/cors"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ProcessingJob struct {
|
||||||
|
ID string // Unique job identifier
|
||||||
|
Status string // "pending", "processing", "completed", "failed"
|
||||||
|
StartTime time.Time // When the job started
|
||||||
|
Error string // Error message if failed
|
||||||
|
JSONPath string // Path to JSON output file
|
||||||
|
MDPath string // Path to Markdown output file
|
||||||
|
MarkdownText string // Content of markdown file when completed
|
||||||
|
}
|
||||||
|
|
||||||
|
type Server struct {
|
||||||
|
router *chi.Mux
|
||||||
|
port string
|
||||||
|
apiEndpoint string
|
||||||
|
jobs map[string]*ProcessingJob // Track processing jobs
|
||||||
|
jobsMutex sync.RWMutex // Protect jobs map
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewServer(port string, apiEndpoint string) *Server {
|
||||||
|
s := &Server{
|
||||||
|
router: chi.NewRouter(),
|
||||||
|
port: port,
|
||||||
|
apiEndpoint: apiEndpoint,
|
||||||
|
jobs: make(map[string]*ProcessingJob),
|
||||||
|
}
|
||||||
|
|
||||||
|
s.setupRoutes()
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) setupRoutes() {
|
||||||
|
// Basic middleware
|
||||||
|
s.router.Use(middleware.Logger)
|
||||||
|
s.router.Use(middleware.Recoverer)
|
||||||
|
|
||||||
|
// CORS middleware
|
||||||
|
s.router.Use(cors.Handler(cors.Options{
|
||||||
|
AllowedOrigins: []string{"*"}, // Allow all origins in development
|
||||||
|
AllowedMethods: []string{"GET", "POST", "OPTIONS"},
|
||||||
|
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type"},
|
||||||
|
ExposedHeaders: []string{},
|
||||||
|
AllowCredentials: false,
|
||||||
|
MaxAge: 300, // Maximum value not ignored by any of major browsers
|
||||||
|
}))
|
||||||
|
|
||||||
|
// Routes
|
||||||
|
s.router.Post("/api/papers/search", s.handleSearch)
|
||||||
|
s.router.Post("/api/papers/process", s.handleProcess)
|
||||||
|
s.router.Post("/api/papers/search-process", s.handleSearchAndProcess)
|
||||||
|
s.router.Get("/api/jobs/{jobID}", s.handleJobStatus)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) Run() error {
|
||||||
|
addr := fmt.Sprintf(":%s", s.port)
|
||||||
|
log.Printf("Starting server on %s", addr)
|
||||||
|
return http.ListenAndServe(addr, s.router)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req struct {
|
||||||
|
StartDate string `json:"start_date"`
|
||||||
|
EndDate string `json:"end_date"`
|
||||||
|
Query string `json:"query"`
|
||||||
|
MaxResults int `json:"max_results"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reuse existing validation
|
||||||
|
if !IsValidDate(req.StartDate) || !IsValidDate(req.EndDate) {
|
||||||
|
http.Error(w, "Invalid date format", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
json.NewEncoder(w).Encode(papers)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) handleProcess(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req struct {
|
||||||
|
Papers []arxiva.Paper `json:"papers,omitempty"` // Optional: Direct paper data
|
||||||
|
InputFile string `json:"input_file,omitempty"` // Optional: Input file path
|
||||||
|
CriteriaFile string `json:"criteria_file,omitempty"` // Optional: Criteria file path
|
||||||
|
Criteria string `json:"criteria,omitempty"` // Optional: Direct criteria text
|
||||||
|
ApiKey string `json:"api_key"` // Required: API key
|
||||||
|
Model string `json:"model,omitempty"` // Optional: Model name
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate required fields
|
||||||
|
if req.CriteriaFile == "" && req.Criteria == "" {
|
||||||
|
http.Error(w, "either criteria_file or criteria must be provided", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if req.ApiKey == "" {
|
||||||
|
http.Error(w, "api_key is required", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create processor configuration
|
||||||
|
config := paperprocessor.Config{
|
||||||
|
APIEndpoint: s.apiEndpoint,
|
||||||
|
APIKey: req.ApiKey,
|
||||||
|
Model: req.Model,
|
||||||
|
RequestDelay: 2 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
var inputJSON string
|
||||||
|
|
||||||
|
// Handle direct paper data
|
||||||
|
if len(req.Papers) > 0 {
|
||||||
|
// Create temporary file for paper data
|
||||||
|
tempFile, err := os.CreateTemp("", "papers-*.json")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to create temp file: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer os.Remove(tempFile.Name()) // Clean up temp file
|
||||||
|
|
||||||
|
// Write papers to temp file
|
||||||
|
if err := json.NewEncoder(tempFile).Encode(req.Papers); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to write papers: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
inputJSON = tempFile.Name()
|
||||||
|
} else if req.InputFile != "" {
|
||||||
|
inputJSON = req.InputFile
|
||||||
|
} else {
|
||||||
|
http.Error(w, "either papers or input_file must be provided", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate output filenames
|
||||||
|
timestamp := time.Now().Format("20060102150405")
|
||||||
|
outputJSON := fmt.Sprintf("processed-%s.json", timestamp)
|
||||||
|
outputMD := fmt.Sprintf("processed-%s.md", timestamp)
|
||||||
|
|
||||||
|
// Handle criteria
|
||||||
|
var criteriaFile string
|
||||||
|
if req.Criteria != "" {
|
||||||
|
// Create temporary file for criteria
|
||||||
|
tempFile, err := os.CreateTemp("", "criteria-*.md")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to create temp criteria file: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer os.Remove(tempFile.Name()) // Clean up temp file
|
||||||
|
|
||||||
|
// Write criteria to temp file
|
||||||
|
if _, err := tempFile.WriteString(req.Criteria); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to write criteria: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
criteriaFile = tempFile.Name()
|
||||||
|
} else {
|
||||||
|
criteriaFile = req.CriteriaFile
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process the papers
|
||||||
|
if err := paperprocessor.ProcessFile(
|
||||||
|
inputJSON,
|
||||||
|
outputJSON,
|
||||||
|
criteriaFile,
|
||||||
|
config,
|
||||||
|
); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format to markdown
|
||||||
|
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate job ID and create job
|
||||||
|
jobID := fmt.Sprintf("job-%s", timestamp)
|
||||||
|
job := &ProcessingJob{
|
||||||
|
ID: jobID,
|
||||||
|
Status: "pending",
|
||||||
|
StartTime: time.Now(),
|
||||||
|
JSONPath: outputJSON,
|
||||||
|
MDPath: outputMD,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store job
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
s.jobs[jobID] = job
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
|
||||||
|
// Start processing in background
|
||||||
|
go func() {
|
||||||
|
// Process the papers
|
||||||
|
if err := paperprocessor.ProcessFile(
|
||||||
|
inputJSON,
|
||||||
|
outputJSON,
|
||||||
|
criteriaFile,
|
||||||
|
config,
|
||||||
|
); err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Processing failed: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format to markdown
|
||||||
|
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Formatting failed: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read markdown content
|
||||||
|
mdContent, err := os.ReadFile(outputMD)
|
||||||
|
if err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Failed to read markdown: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert Windows line endings to Unix
|
||||||
|
mdString := strings.ReplaceAll(string(mdContent), "\r\n", "\n")
|
||||||
|
// Ensure file ends with newline
|
||||||
|
if !strings.HasSuffix(mdString, "\n") {
|
||||||
|
mdString += "\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update job with success
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "completed"
|
||||||
|
job.MarkdownText = mdString
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Return job ID immediately
|
||||||
|
json.NewEncoder(w).Encode(struct {
|
||||||
|
JobID string `json:"job_id"`
|
||||||
|
}{
|
||||||
|
JobID: jobID,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) handleJobStatus(w http.ResponseWriter, r *http.Request) {
|
||||||
|
jobID := chi.URLParam(r, "jobID")
|
||||||
|
|
||||||
|
s.jobsMutex.RLock()
|
||||||
|
job, exists := s.jobs[jobID]
|
||||||
|
s.jobsMutex.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
http.Error(w, "Job not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
response := struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
StartTime time.Time `json:"start_time"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
MarkdownText string `json:"markdown_text,omitempty"`
|
||||||
|
}{
|
||||||
|
ID: job.ID,
|
||||||
|
Status: job.Status,
|
||||||
|
StartTime: job.StartTime,
|
||||||
|
Error: job.Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only include markdown text if job is completed
|
||||||
|
if job.Status == "completed" {
|
||||||
|
response.MarkdownText = job.MarkdownText
|
||||||
|
}
|
||||||
|
|
||||||
|
json.NewEncoder(w).Encode(response)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) handleSearchAndProcess(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req struct {
|
||||||
|
StartDate string `json:"start_date"`
|
||||||
|
EndDate string `json:"end_date"`
|
||||||
|
Query string `json:"query"`
|
||||||
|
MaxResults int `json:"max_results"`
|
||||||
|
CriteriaFile string `json:"criteria_file,omitempty"`
|
||||||
|
Criteria string `json:"criteria,omitempty"`
|
||||||
|
ApiKey string `json:"api_key"`
|
||||||
|
Model string `json:"model,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate dates
|
||||||
|
if !IsValidDate(req.StartDate) || !IsValidDate(req.EndDate) {
|
||||||
|
http.Error(w, "Invalid date format", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch papers
|
||||||
|
papers, err := arxiva.FetchPapers(req.StartDate, req.EndDate, req.Query, req.MaxResults)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save papers to temporary JSON file
|
||||||
|
baseFilename := fmt.Sprintf("%s-%s-%s", req.StartDate, req.EndDate, SanitizeFilename(req.Query))
|
||||||
|
inputJSON := baseFilename + ".json"
|
||||||
|
if err := arxiva.SaveToFile(papers, req.StartDate, req.EndDate, req.Query); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to save papers: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create processor configuration
|
||||||
|
config := paperprocessor.Config{
|
||||||
|
APIEndpoint: s.apiEndpoint,
|
||||||
|
APIKey: req.ApiKey,
|
||||||
|
Model: req.Model,
|
||||||
|
RequestDelay: 2 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle criteria
|
||||||
|
var criteriaFile string
|
||||||
|
var tempCriteriaFile string // Track temporary file for cleanup
|
||||||
|
|
||||||
|
if req.Criteria != "" {
|
||||||
|
// Create temporary file for criteria
|
||||||
|
tempFile, err := os.CreateTemp("", "criteria-*.md")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to create temp criteria file: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tempCriteriaFile = tempFile.Name() // Save for cleanup after processing
|
||||||
|
|
||||||
|
// Write criteria to temp file
|
||||||
|
if _, err := tempFile.WriteString(req.Criteria); err != nil {
|
||||||
|
os.Remove(tempCriteriaFile) // Clean up on error
|
||||||
|
http.Error(w, fmt.Sprintf("Failed to write criteria: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
criteriaFile = tempCriteriaFile
|
||||||
|
} else if req.CriteriaFile != "" {
|
||||||
|
criteriaFile = req.CriteriaFile
|
||||||
|
} else {
|
||||||
|
http.Error(w, "either criteria_file or criteria must be provided", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process the papers
|
||||||
|
outputJSON := baseFilename + "-processed.json"
|
||||||
|
if err := paperprocessor.ProcessFile(
|
||||||
|
inputJSON,
|
||||||
|
outputJSON,
|
||||||
|
criteriaFile,
|
||||||
|
config,
|
||||||
|
); err != nil {
|
||||||
|
if tempCriteriaFile != "" {
|
||||||
|
os.Remove(tempCriteriaFile) // Clean up temp file on error
|
||||||
|
}
|
||||||
|
http.Error(w, fmt.Sprintf("Processing failed: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format to markdown
|
||||||
|
outputMD := baseFilename + "-processed.md"
|
||||||
|
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("Formatting failed: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate job ID and create job
|
||||||
|
jobID := fmt.Sprintf("job-%s", baseFilename)
|
||||||
|
job := &ProcessingJob{
|
||||||
|
ID: jobID,
|
||||||
|
Status: "pending",
|
||||||
|
StartTime: time.Now(),
|
||||||
|
JSONPath: outputJSON,
|
||||||
|
MDPath: outputMD,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store job
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
s.jobs[jobID] = job
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
|
||||||
|
// Start processing in background
|
||||||
|
go func() {
|
||||||
|
defer func() {
|
||||||
|
if tempCriteriaFile != "" {
|
||||||
|
os.Remove(tempCriteriaFile) // Clean up temp file after processing
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Process the papers
|
||||||
|
if err := paperprocessor.ProcessFile(
|
||||||
|
inputJSON,
|
||||||
|
outputJSON,
|
||||||
|
criteriaFile,
|
||||||
|
config,
|
||||||
|
); err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Processing failed: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format to markdown
|
||||||
|
if err := paperformatter.FormatPapers(outputJSON, outputMD); err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Formatting failed: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read markdown content
|
||||||
|
mdContent, err := os.ReadFile(outputMD)
|
||||||
|
if err != nil {
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "failed"
|
||||||
|
job.Error = fmt.Sprintf("Failed to read markdown: %v", err)
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert Windows line endings to Unix
|
||||||
|
mdString := strings.ReplaceAll(string(mdContent), "\r\n", "\n")
|
||||||
|
// Ensure file ends with newline
|
||||||
|
if !strings.HasSuffix(mdString, "\n") {
|
||||||
|
mdString += "\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update job with success
|
||||||
|
s.jobsMutex.Lock()
|
||||||
|
job.Status = "completed"
|
||||||
|
job.MarkdownText = mdString
|
||||||
|
s.jobsMutex.Unlock()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Return job ID immediately
|
||||||
|
json.NewEncoder(w).Encode(struct {
|
||||||
|
JobID string `json:"job_id"`
|
||||||
|
}{
|
||||||
|
JobID: jobID,
|
||||||
|
})
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IsValidDate checks if the date string is in YYYYMMDD format
|
||||||
|
func IsValidDate(date string) bool {
|
||||||
|
// Check basic format with regex
|
||||||
|
matched, err := regexp.MatchString(`^\d{8}$`, date)
|
||||||
|
if err != nil || !matched {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse date to verify it's a valid date
|
||||||
|
_, err = time.Parse("20060102", date)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SanitizeFilename replaces invalid filename characters to match arxiva's sanitization
|
||||||
|
func SanitizeFilename(s string) string {
|
||||||
|
s = strings.ReplaceAll(s, ":", "_")
|
||||||
|
s = strings.ReplaceAll(s, " ", "_")
|
||||||
|
return s
|
||||||
|
}
|
Loading…
Reference in New Issue