Updated README.md and papers to add -search-only and -input options
This commit is contained in:
parent
5983a9b8ac
commit
4813904fc7
|
@ -0,0 +1,95 @@
|
|||
# Papers
|
||||
|
||||
A Go CLI tool for fetching, processing, and analyzing academic papers from arXiv using LLM-based evaluation.
|
||||
|
||||
## Features
|
||||
|
||||
- Fetch papers from arXiv API based on date range and search query
|
||||
- Process papers using configurable LLM models (default: phi-4)
|
||||
- Generate both JSON and Markdown outputs
|
||||
- Customizable evaluation criteria
|
||||
- Rate-limited API requests (2-second delay between requests)
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
go install gitea.r8z.us/stwhite/papers@latest
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Basic usage:
|
||||
```bash
|
||||
papers -start 20240101 -end 20240131 -query "machine learning" -api-key "your-key"
|
||||
```
|
||||
|
||||
With custom model and output paths:
|
||||
```bash
|
||||
papers -start 20240101 -end 20240131 -query "machine learning" -api-key "your-key" \
|
||||
-model "gpt-4" -json-output "results.json" -md-output "summary.md"
|
||||
```
|
||||
|
||||
Fetch papers without processing:
|
||||
```bash
|
||||
papers -search-only -start 20240101 -end 20240131 -query "machine learning"
|
||||
```
|
||||
|
||||
Use input file:
|
||||
```bash
|
||||
papers -input papers.json -api-key "your-key"
|
||||
```
|
||||
|
||||
### Required Flags
|
||||
|
||||
- `-start`: Start date (YYYYMMDD format)
|
||||
- `-end`: End date (YYYYMMDD format)
|
||||
- `-query`: Search query
|
||||
|
||||
### Optional Flags
|
||||
|
||||
- `-search-only`: Fetch papers from arXiv and save to JSON file without processing
|
||||
- `-input`: Input JSON file containing papers (optional)
|
||||
- `-maxResults`: Maximum number of results to fetch (1-2000, default: 100)
|
||||
- `-model`: LLM model to use for processing (default: "phi-4")
|
||||
- `-api-endpoint`: API endpoint URL (default: "http://localhost:1234/v1/chat/completions")
|
||||
- `-criteria`: Path to evaluation criteria markdown file (default: "criteria.md")
|
||||
- `-json-output`: Custom JSON output file path (default: YYYYMMDD-YYYYMMDD-query.json)
|
||||
- `-md-output`: Custom Markdown output file path (default: YYYYMMDD-YYYYMMDD-query.md)
|
||||
|
||||
## Pipeline
|
||||
|
||||
1. **Fetch**: Retrieves papers from arXiv based on specified date range and query
|
||||
2. **Save**: Stores raw paper data in JSON format
|
||||
3. **Process**: Evaluates papers using the specified LLM model according to criteria
|
||||
4. **Format**: Generates both JSON and Markdown outputs of the processed results
|
||||
|
||||
## Output Files
|
||||
|
||||
The tool generates two types of output files:
|
||||
|
||||
1. **JSON Output**: Contains the raw processing results
|
||||
- Default name format: `YYYYMMDD-YYYYMMDD-query.json`
|
||||
- Can be customized with `-json-output` flag
|
||||
|
||||
2. **Markdown Output**: Human-readable formatted results
|
||||
- Default name format: `YYYYMMDD-YYYYMMDD-query.md`
|
||||
- Can be customized with `-md-output` flag
|
||||
|
||||
## Dependencies
|
||||
|
||||
- [arxiva](gitea.r8z.us/stwhite/arxiva): Paper fetching from arXiv
|
||||
- [paperprocessor](gitea.r8z.us/stwhite/paperprocessor): LLM-based paper processing
|
||||
- [paperformatter](gitea.r8z.us/stwhite/paperformatter): Output formatting
|
||||
|
||||
## Error Handling
|
||||
|
||||
The tool includes various error checks:
|
||||
- Date format validation (YYYYMMDD)
|
||||
- Required flag validation
|
||||
- Maximum results range validation (1-2000)
|
||||
- File system operations verification
|
||||
- API request error handling
|
||||
|
||||
## License
|
||||
|
||||
[License information not provided in source]
|
39
papers.go
39
papers.go
|
@ -104,9 +104,12 @@ func main() {
|
|||
fmt.Fprintf(os.Stderr, " %s -input papers.json -api-key \"your-key\"\n\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, " With custom options:\n")
|
||||
fmt.Fprintf(os.Stderr, " %s -input papers.json -api-key \"your-key\" -model \"gpt-4\" -json-output \"results.json\" -md-output \"summary.md\"\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, " Search only:\n")
|
||||
fmt.Fprintf(os.Stderr, " %s -search-only -start 20240101 -end 20240131 -query \"machine learning\" \n\n", os.Args[0])
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
searchOnly := flag.Bool("search-only", false, "Only fetch papers from arXiv and save to JSON file (do not process)")
|
||||
inputFile := flag.String("input", "", "Input JSON file containing papers (optional)")
|
||||
startDate := flag.String("start", "", "Start date in YYYYMMDD format (required if not using -input)")
|
||||
endDate := flag.String("end", "", "End date in YYYYMMDD format (required if not using -input)")
|
||||
|
@ -121,10 +124,38 @@ func main() {
|
|||
flag.Parse()
|
||||
|
||||
// Validate required flags and input
|
||||
if *apiKey == "" {
|
||||
fmt.Fprintf(os.Stderr, "Error: api-key is required\n\n")
|
||||
flag.Usage()
|
||||
os.Exit(1)
|
||||
if *searchOnly {
|
||||
if *startDate == "" || *endDate == "" || *query == "" {
|
||||
fmt.Fprintf(os.Stderr, "Error: start date, end date, and query are required when using -search-only\n\n")
|
||||
flag.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Validate date format
|
||||
if !isValidDate(*startDate) || !isValidDate(*endDate) {
|
||||
fmt.Fprintf(os.Stderr, "Error: dates must be in YYYYMMDD format\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Validate maxResults range
|
||||
if *maxResults < 1 || *maxResults > 2000 {
|
||||
fmt.Fprintf(os.Stderr, "Error: maxResults must be between 1 and 2000\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Fetch papers from arXiv
|
||||
papers, err := arxiva.FetchPapers(*startDate, *endDate, *query, *maxResults)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to fetch papers: %v", err)
|
||||
}
|
||||
|
||||
// Save papers to JSON file using the same naming convention
|
||||
if err := arxiva.SaveToFile(papers, *startDate, *endDate, *query); err != nil {
|
||||
log.Fatalf("Failed to save papers: %v", err)
|
||||
}
|
||||
|
||||
log.Printf("Successfully fetched and saved papers to %s-%s-%s.json", *startDate, *endDate, sanitizeFilename(*query))
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
Loading…
Reference in New Issue