2 changed files with 52 additions and 262 deletions
--- a/README.md
+++ b/README.md
@ -1,95 +0,0 @@
 # Papers
 A Go CLI tool for fetching, processing, and analyzing academic papers from arXiv using LLM-based evaluation.
 ## Features
 - Fetch papers from arXiv API based on date range and search query
 - Process papers using configurable LLM models (default: phi-4)
 - Generate both JSON and Markdown outputs
 - Customizable evaluation criteria
 - Rate-limited API requests (2-second delay between requests)
 ## Installation
 ```bash
 go install gitea.r8z.us/stwhite/papers@latest
 ```
 ## Usage
 Basic usage:
 ```bash
 papers -start 20240101 -end 20240131 -query "machine learning" -api-key "your-key"
 ```
 With custom model and output paths:
 ```bash
 papers -start 20240101 -end 20240131 -query "machine learning" -api-key "your-key" \
  -model "gpt-4" -json-output "results.json" -md-output "summary.md"
 ```
 Fetch papers without processing:
 ```bash
 papers -search-only -start 20240101 -end 20240131 -query "machine learning"
 ```
 Use input file:
 ```bash
 papers -input papers.json -api-key "your-key"
 ```
 ### Required Flags
 - `-start`: Start date (YYYYMMDD format)
 - `-end`: End date (YYYYMMDD format)
 - `-query`: Search query
 ### Optional Flags
 - `-search-only`: Fetch papers from arXiv and save to JSON file without processing
 - `-input`: Input JSON file containing papers (optional)
 - `-maxResults`: Maximum number of results to fetch (1-2000, default: 100)
 - `-model`: LLM model to use for processing (default: "phi-4")
 - `-api-endpoint`: API endpoint URL (default: "http://localhost:1234/v1/chat/completions")
 - `-criteria`: Path to evaluation criteria markdown file (default: "criteria.md")
 - `-json-output`: Custom JSON output file path (default: YYYYMMDD-YYYYMMDD-query.json)
 - `-md-output`: Custom Markdown output file path (default: YYYYMMDD-YYYYMMDD-query.md)
 ## Pipeline
 1. **Fetch**: Retrieves papers from arXiv based on specified date range and query
 2. **Save**: Stores raw paper data in JSON format
 3. **Process**: Evaluates papers using the specified LLM model according to criteria
 4. **Format**: Generates both JSON and Markdown outputs of the processed results
 ## Output Files
 The tool generates two types of output files:
 1. **JSON Output**: Contains the raw processing results
   - Default name format: `YYYYMMDD-YYYYMMDD-query.json`
   - Can be customized with `-json-output` flag
 2. **Markdown Output**: Human-readable formatted results
   - Default name format: `YYYYMMDD-YYYYMMDD-query.md`
   - Can be customized with `-md-output` flag
 ## Dependencies
 - [arxiva](gitea.r8z.us/stwhite/arxiva): Paper fetching from arXiv
 - [paperprocessor](gitea.r8z.us/stwhite/paperprocessor): LLM-based paper processing
 - [paperformatter](gitea.r8z.us/stwhite/paperformatter): Output formatting
 ## Error Handling
 The tool includes various error checks:
 - Date format validation (YYYYMMDD)
 - Required flag validation
 - Maximum results range validation (1-2000)
 - File system operations verification
 - API request error handling
 ## License
 [License information not provided in source]
--- a/papers.go
+++ b/papers.go
@ -1,10 +1,8 @@
 package main
 import (
 	"encoding/json"
 	"flag"
 	"fmt"
 	"io"
 	"log"
 	"os"
 	"regexp"
@ -16,47 +14,6 @@ import (
 	"gitea.r8z.us/stwhite/paperprocessor"
 )
 // Paper represents the expected structure of papers in the input JSON file
 type Paper struct {
 	Title    string `json:"title"`
 	Abstract string `json:"abstract"`
 	ArxivID  string `json:"arxiv_id"`
 }
 // validateInputFile checks if the input file exists and has valid JSON structure
 func validateInputFile(path string) ([]Paper, error) {
 	file, err := os.Open(path)
 	if err != nil {
 		return nil, fmt.Errorf("failed to open input file: %v", err)
 	}
 	defer file.Close()
 	content, err := io.ReadAll(file)
 	if err != nil {
 		return nil, fmt.Errorf("failed to read input file: %v", err)
 	}
 	var papers []Paper
 	if err := json.Unmarshal(content, &papers); err != nil {
 		return nil, fmt.Errorf("invalid JSON format: %v", err)
 	}
 	// Validate required fields
 	for i, paper := range papers {
 		if paper.Title == "" {
 			return nil, fmt.Errorf("paper at index %d missing title", i)
 		}
 		if paper.Abstract == "" {
 			return nil, fmt.Errorf("paper at index %d missing abstract", i)
 		}
 		if paper.ArxivID == "" {
 			return nil, fmt.Errorf("paper at index %d missing arxiv_id", i)
 		}
 	}
 	return papers, nil
 }
 // sanitizeFilename replaces invalid filename characters to match arxiva's sanitization
 func sanitizeFilename(s string) string {
 	s = strings.ReplaceAll(s, ":", "_")
@ -82,38 +39,31 @@ func main() {
 	flag.Usage = func() {
 		fmt.Fprintf(os.Stderr, "Usage: %s [options]\n\n", os.Args[0])
 		fmt.Fprintf(os.Stderr, "Description:\n")
-		fmt.Fprintf(os.Stderr, "  Fetches papers from arXiv (or uses input file), processes them using an LLM, and generates both JSON and Markdown outputs.\n\n")
+		fmt.Fprintf(os.Stderr, "  Fetches papers from arXiv, processes them using an LLM, and generates both JSON and Markdown outputs.\n\n")
 		fmt.Fprintf(os.Stderr, "Pipeline:\n")
-		fmt.Fprintf(os.Stderr, "  1. Either:\n")
+		fmt.Fprintf(os.Stderr, "  1. Fetches papers from arXiv based on date range and query\n")
-		fmt.Fprintf(os.Stderr, "     a) Fetches papers from arXiv based on date range and query, or\n")
+		fmt.Fprintf(os.Stderr, "  2. Saves raw papers to JSON (format: YYYYMMDD-YYYYMMDD-query.json)\n")
-		fmt.Fprintf(os.Stderr, "     b) Uses papers from provided input file\n")
+		fmt.Fprintf(os.Stderr, "  3. Processes papers using specified LLM model\n")
-		fmt.Fprintf(os.Stderr, "  2. Processes papers using specified LLM model\n")
+		fmt.Fprintf(os.Stderr, "  4. Formats results to both JSON and Markdown\n\n")
 		fmt.Fprintf(os.Stderr, "  3. Formats results to both JSON and Markdown\n\n")
 		fmt.Fprintf(os.Stderr, "Required flags:\n")
 		fmt.Fprintf(os.Stderr, "  -api-key   : API key for LLM service\n\n")
 		fmt.Fprintf(os.Stderr, "Required for arXiv fetching (if not using -input):\n")
 		fmt.Fprintf(os.Stderr, "  -start     : Start date (YYYYMMDD)\n")
 		fmt.Fprintf(os.Stderr, "  -end       : End date (YYYYMMDD)\n")
-		fmt.Fprintf(os.Stderr, "  -query     : Search query\n\n")
+		fmt.Fprintf(os.Stderr, "  -query     : Search query\n")
 		fmt.Fprintf(os.Stderr, "  -api-key   : API key for LLM service\n\n")
 		fmt.Fprintf(os.Stderr, "Options:\n")
 		flag.PrintDefaults()
 		fmt.Fprintf(os.Stderr, "\nExamples:\n")
-		fmt.Fprintf(os.Stderr, "  Using arXiv:\n")
+		fmt.Fprintf(os.Stderr, "  Basic usage:\n")
 		fmt.Fprintf(os.Stderr, "    %s -start 20240101 -end 20240131 -query \"machine learning\" -api-key \"your-key\"\n\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  Using input file:\n")
+		fmt.Fprintf(os.Stderr, "  With custom model and outputs:\n")
-		fmt.Fprintf(os.Stderr, "    %s -input papers.json -api-key \"your-key\"\n\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "    %s -start 20240101 -end 20240131 -query \"machine learning\" -api-key \"your-key\" \\\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  With custom options:\n")
+		fmt.Fprintf(os.Stderr, "      -model \"gpt-4\" -json-output \"results.json\" -md-output \"summary.md\"\n")
 		fmt.Fprintf(os.Stderr, "    %s -input papers.json -api-key \"your-key\" -model \"gpt-4\" -json-output \"results.json\" -md-output \"summary.md\"\n", os.Args[0])
 		fmt.Fprintf(os.Stderr, "  Search only:\n")
 		fmt.Fprintf(os.Stderr, "    %s -search-only -start 20240101 -end 20240131 -query \"machine learning\" \n\n", os.Args[0])
 	}
 	// Parse command line arguments
-	searchOnly := flag.Bool("search-only", false, "Only fetch papers from arXiv and save to JSON file (do not process)")
+	startDate := flag.String("start", "", "Start date in YYYYMMDD format")
-	inputFile := flag.String("input", "", "Input JSON file containing papers (optional)")
+	endDate := flag.String("end", "", "End date in YYYYMMDD format")
-	startDate := flag.String("start", "", "Start date in YYYYMMDD format (required if not using -input)")
+	query := flag.String("query", "", "Search query")
 	endDate := flag.String("end", "", "End date in YYYYMMDD format (required if not using -input)")
 	query := flag.String("query", "", "Search query (required if not using -input)")
 	maxResults := flag.Int("maxResults", 100, "Maximum number of results (1-2000)")
 	model := flag.String("model", "phi-4", "Model to use for processing")
 	apiKey := flag.String("api-key", "", "API key for service authentication")
@ -123,10 +73,20 @@ func main() {
 	mdOutput := flag.String("md-output", "", "Markdown output file path (default: YYYYMMDD-YYYYMMDD-query.md)")
 	flag.Parse()
-	// Validate required flags and input
+	// Generate base filename from parameters with sanitization
-	if *searchOnly {
+	baseFilename := fmt.Sprintf("%s-%s-%s", *startDate, *endDate, sanitizeFilename(*query))
-		if *startDate == "" || *endDate == "" || *query == "" {
+
-			fmt.Fprintf(os.Stderr, "Error: start date, end date, and query are required when using -search-only\n\n")
+	// Set default output filenames if not provided
 	if *jsonOutput == "" {
 		*jsonOutput = baseFilename + ".json"
 	}
 	if *mdOutput == "" {
 		*mdOutput = baseFilename + ".md"
 	}
 	// Validate required flags
 	if *startDate == "" || *endDate == "" || *query == "" || *apiKey == "" {
 		fmt.Fprintf(os.Stderr, "Error: start date, end date, query, and api-key are required\n\n")
 		flag.Usage()
 		os.Exit(1)
 	}
@ -143,83 +103,6 @@ func main() {
 		os.Exit(1)
 	}
 		// Fetch papers from arXiv
 		papers, err := arxiva.FetchPapers(*startDate, *endDate, *query, *maxResults)
 		if err != nil {
 			log.Fatalf("Failed to fetch papers: %v", err)
 		}
 		// Save papers to JSON file using the same naming convention
 		if err := arxiva.SaveToFile(papers, *startDate, *endDate, *query); err != nil {
 			log.Fatalf("Failed to save papers: %v", err)
 		}
 		log.Printf("Successfully fetched and saved papers to %s-%s-%s.json", *startDate, *endDate, sanitizeFilename(*query))
 		os.Exit(0)
 	}
 	var (
 		papers       []arxiva.Paper
 		err          error
 		baseFilename string
 	)
 	if *inputFile != "" {
 		// Use input file
 		inputPapers, err := validateInputFile(*inputFile)
 		if err != nil {
 			log.Fatalf("Invalid input file: %v", err)
 		}
 		// Convert input papers to arxiva.Paper format
 		papers = make([]arxiva.Paper, len(inputPapers))
 		for i, p := range inputPapers {
 			papers[i] = arxiva.Paper{
 				Title:    p.Title,
 				Abstract: p.Abstract,
 				ArxivID:  p.ArxivID,
 			}
 		}
 		// Use input filename as base for outputs
 		baseFilename = *inputFile
 		if ext := ".json"; strings.HasSuffix(baseFilename, ext) {
 			baseFilename = baseFilename[:len(baseFilename)-len(ext)]
 		}
 	} else {
 		// Validate arXiv fetching parameters
 		if *startDate == "" || *endDate == "" || *query == "" {
 			fmt.Fprintf(os.Stderr, "Error: start date, end date, and query are required when not using -input\n\n")
 			flag.Usage()
 			os.Exit(1)
 		}
 		// Validate date format
 		if !isValidDate(*startDate) || !isValidDate(*endDate) {
 			fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
 			os.Exit(1)
 		}
 		// Validate maxResults range
 		if *maxResults < 1 || *maxResults > 2000 {
 			fmt.Fprintf(os.Stderr, "Error: maxResults must be between 1 and 2000\n")
 			os.Exit(1)
 		}
 		// Fetch papers from arXiv
 		papers, err = arxiva.FetchPapers(*startDate, *endDate, *query, *maxResults)
 		if err != nil {
 			log.Fatalf("Failed to fetch papers: %v", err)
 		}
 		// Save papers to JSON file using the same naming convention
 		if err := arxiva.SaveToFile(papers, *startDate, *endDate, *query); err != nil {
 			log.Fatalf("Failed to save papers: %v", err)
 		}
 		baseFilename = fmt.Sprintf("%s-%s-%s", *startDate, *endDate, sanitizeFilename(*query))
 	}
 	// Create processor configuration
 	config := paperprocessor.Config{
 		APIEndpoint:  *apiEndpoint,
@ -228,24 +111,26 @@ func main() {
 		RequestDelay: 2 * time.Second,
 	}
-	// Get criteria filename without extension for output naming
+	// Fetch papers using command line args
-	criteriaBase := *criteriaFile
+	papers, err := arxiva.FetchPapers(*startDate, *endDate, *query, *maxResults)
-	if ext := ".md"; strings.HasSuffix(criteriaBase, ext) {
+	if err != nil {
-		criteriaBase = criteriaBase[:len(criteriaBase)-len(ext)]
+		log.Fatalf("Failed to fetch papers: %v", err)
 	}
-	// Set default output filenames if not provided
+	// Save papers to JSON file using the same naming convention
-	if *jsonOutput == "" {
+	if err := arxiva.SaveToFile(papers, *startDate, *endDate, *query); err != nil {
-		*jsonOutput = fmt.Sprintf("%s-%s.json", baseFilename, criteriaBase)
+		log.Fatalf("Failed to save papers: %v", err)
 	}
 	if *mdOutput == "" {
 		*mdOutput = fmt.Sprintf("%s-%s.md", baseFilename, criteriaBase)
 	}
-	// Process the papers
+	// Wait briefly for file system to sync and verify file exists
-	inputJson := baseFilename + ".json"
+	time.Sleep(100 * time.Millisecond)
 	if _, err := os.Stat(baseFilename + ".json"); os.IsNotExist(err) {
 		log.Fatalf("Failed to find saved papers file: %s", baseFilename+".json")
 	}
 	// Process the saved file using the base filename
 	if err := paperprocessor.ProcessFile(
-		inputJson,
+		baseFilename+".json",
 		*jsonOutput,
 		*criteriaFile,
 		config,