From 4813904fc75994362297c6581d01beb19cd471a4 Mon Sep 17 00:00:00 2001 From: Steve White Date: Wed, 29 Jan 2025 09:25:20 -0600 Subject: [PATCH] Updated README.md and papers to add -search-only and -input options --- README.md | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ papers.go | 39 ++++++++++++++++++++--- 2 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..b1b9e57 --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +# Papers + +A Go CLI tool for fetching, processing, and analyzing academic papers from arXiv using LLM-based evaluation. + +## Features + +- Fetch papers from arXiv API based on date range and search query +- Process papers using configurable LLM models (default: phi-4) +- Generate both JSON and Markdown outputs +- Customizable evaluation criteria +- Rate-limited API requests (2-second delay between requests) + +## Installation + +```bash +go install gitea.r8z.us/stwhite/papers@latest +``` + +## Usage + +Basic usage: +```bash +papers -start 20240101 -end 20240131 -query "machine learning" -api-key "your-key" +``` + +With custom model and output paths: +```bash +papers -start 20240101 -end 20240131 -query "machine learning" -api-key "your-key" \ + -model "gpt-4" -json-output "results.json" -md-output "summary.md" +``` + +Fetch papers without processing: +```bash +papers -search-only -start 20240101 -end 20240131 -query "machine learning" +``` + +Use input file: +```bash +papers -input papers.json -api-key "your-key" +``` + +### Required Flags + +- `-start`: Start date (YYYYMMDD format) +- `-end`: End date (YYYYMMDD format) +- `-query`: Search query + +### Optional Flags + +- `-search-only`: Fetch papers from arXiv and save to JSON file without processing +- `-input`: Input JSON file containing papers (optional) +- `-maxResults`: Maximum number of results to fetch (1-2000, default: 100) +- `-model`: LLM model to use for processing (default: "phi-4") +- `-api-endpoint`: API endpoint URL (default: "http://localhost:1234/v1/chat/completions") +- `-criteria`: Path to evaluation criteria markdown file (default: "criteria.md") +- `-json-output`: Custom JSON output file path (default: YYYYMMDD-YYYYMMDD-query.json) +- `-md-output`: Custom Markdown output file path (default: YYYYMMDD-YYYYMMDD-query.md) + +## Pipeline + +1. **Fetch**: Retrieves papers from arXiv based on specified date range and query +2. **Save**: Stores raw paper data in JSON format +3. **Process**: Evaluates papers using the specified LLM model according to criteria +4. **Format**: Generates both JSON and Markdown outputs of the processed results + +## Output Files + +The tool generates two types of output files: + +1. **JSON Output**: Contains the raw processing results + - Default name format: `YYYYMMDD-YYYYMMDD-query.json` + - Can be customized with `-json-output` flag + +2. **Markdown Output**: Human-readable formatted results + - Default name format: `YYYYMMDD-YYYYMMDD-query.md` + - Can be customized with `-md-output` flag + +## Dependencies + +- [arxiva](gitea.r8z.us/stwhite/arxiva): Paper fetching from arXiv +- [paperprocessor](gitea.r8z.us/stwhite/paperprocessor): LLM-based paper processing +- [paperformatter](gitea.r8z.us/stwhite/paperformatter): Output formatting + +## Error Handling + +The tool includes various error checks: +- Date format validation (YYYYMMDD) +- Required flag validation +- Maximum results range validation (1-2000) +- File system operations verification +- API request error handling + +## License + +[License information not provided in source] diff --git a/papers.go b/papers.go index 754f3a5..f47142e 100644 --- a/papers.go +++ b/papers.go @@ -104,9 +104,12 @@ func main() { fmt.Fprintf(os.Stderr, " %s -input papers.json -api-key \"your-key\"\n\n", os.Args[0]) fmt.Fprintf(os.Stderr, " With custom options:\n") fmt.Fprintf(os.Stderr, " %s -input papers.json -api-key \"your-key\" -model \"gpt-4\" -json-output \"results.json\" -md-output \"summary.md\"\n", os.Args[0]) + fmt.Fprintf(os.Stderr, " Search only:\n") + fmt.Fprintf(os.Stderr, " %s -search-only -start 20240101 -end 20240131 -query \"machine learning\" \n\n", os.Args[0]) } // Parse command line arguments + searchOnly := flag.Bool("search-only", false, "Only fetch papers from arXiv and save to JSON file (do not process)") inputFile := flag.String("input", "", "Input JSON file containing papers (optional)") startDate := flag.String("start", "", "Start date in YYYYMMDD format (required if not using -input)") endDate := flag.String("end", "", "End date in YYYYMMDD format (required if not using -input)") @@ -121,10 +124,38 @@ func main() { flag.Parse() // Validate required flags and input - if *apiKey == "" { - fmt.Fprintf(os.Stderr, "Error: api-key is required\n\n") - flag.Usage() - os.Exit(1) + if *searchOnly { + if *startDate == "" || *endDate == "" || *query == "" { + fmt.Fprintf(os.Stderr, "Error: start date, end date, and query are required when using -search-only\n\n") + flag.Usage() + os.Exit(1) + } + + // Validate date format + if !isValidDate(*startDate) || !isValidDate(*endDate) { + fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n") + os.Exit(1) + } + + // Validate maxResults range + if *maxResults < 1 || *maxResults > 2000 { + fmt.Fprintf(os.Stderr, "Error: maxResults must be between 1 and 2000\n") + os.Exit(1) + } + + // Fetch papers from arXiv + papers, err := arxiva.FetchPapers(*startDate, *endDate, *query, *maxResults) + if err != nil { + log.Fatalf("Failed to fetch papers: %v", err) + } + + // Save papers to JSON file using the same naming convention + if err := arxiva.SaveToFile(papers, *startDate, *endDate, *query); err != nil { + log.Fatalf("Failed to save papers: %v", err) + } + + log.Printf("Successfully fetched and saved papers to %s-%s-%s.json", *startDate, *endDate, sanitizeFilename(*query)) + os.Exit(0) } var (