From be9b9f1ff8c92da5ba6ebc72f8116e73e1a623fc Mon Sep 17 00:00:00 2001 From: Steve White Date: Sat, 25 Jan 2025 13:53:40 -0600 Subject: [PATCH] Initial Commit. --- README.md | 102 ++++++++++++++++++++ go.mod | 3 + paperprocessor.go | 236 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 341 insertions(+) create mode 100644 README.md create mode 100644 go.mod create mode 100644 paperprocessor.go diff --git a/README.md b/README.md new file mode 100644 index 0000000..f20fd5e --- /dev/null +++ b/README.md @@ -0,0 +1,102 @@ +# Paper Processor + +Go package for automated evaluation of academic papers using LLM-based criteria + +## Features + +- Process multiple papers with configurable API settings +- Structured evaluation results (accepted/rejected) +- Rate limiting with request delay configuration +- File-based processing (JSON input/output) +- Customizable evaluation criteria + +## Installation + +```bash +go get github.com/yourusername/paperprocessor +``` + +## Usage + +### Basic Configuration +```go +import "github.com/yourusername/paperprocessor" + +config := paperprocessor.Config{ + APIEndpoint: "https://api.llm-provider.com/v1/chat/completions", + APIKey: "your-api-key", + Model: "llm-model-name", + RequestDelay: time.Second * 2, +} +``` + +### File Processing +```go +err := paperprocessor.ProcessFile( + "input/papers.json", + "output/results.json", + "criteria.txt", + config, +) +``` + +## Input Formats + +### Papers JSON +```json +[ + { + "title": "Paper Title", + "abstract": "Paper abstract text...", + "arxiv_id": "1234.56789" + } +] +``` + +### Criteria File +``` +Evaluation criteria: +- Relevance to quantum computing +- Novelty of approach +- Technical rigor +``` + +## Output Example +```json +{ + "accepted": [ + { + "paper": { + "title": "Advanced Quantum Computing Methods", + "abstract": "...", + "arxiv_id": "2301.12345" + }, + "decision": "ACCEPT", + "explanation": "Fulfills all criteria..." + } + ], + "rejected": [ + { + "paper": { + "title": "Basic Classical Algorithms", + "abstract": "...", + "arxiv_id": "2301.67890" + }, + "decision": "REJECT", + "explanation": "Doesn't meet novelty requirements..." + } + ] +} +``` + +## Configuration Options + +| Parameter | Description | Default | +|-----------------|--------------------------------------|---------------| +| APIEndpoint | LLM API endpoint URL | Required | +| APIKey | API authentication key | Required | +| Model | LLM model name | Required | +| RequestDelay | Delay between API requests | 1 second | + +## License +MIT diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..0275cca --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module paperprocessor + +go 1.23.1 diff --git a/paperprocessor.go b/paperprocessor.go new file mode 100644 index 0000000..686521f --- /dev/null +++ b/paperprocessor.go @@ -0,0 +1,236 @@ +package paperprocessor + +import ( + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "time" +) + +// Paper represents a single academic paper +type Paper struct { + Title string `json:"title"` + Abstract string `json:"abstract"` + ArxivID string `json:"arxiv_id"` +} + +// PaperResult represents the decision for a single paper +type PaperResult struct { + Paper Paper `json:"paper"` + Decision string `json:"decision"` + Explanation string `json:"explanation"` +} + +// ProcessingResult represents the final output structure +type ProcessingResult struct { + Accepted []PaperResult `json:"accepted"` + Rejected []PaperResult `json:"rejected"` +} + +// Config holds the configuration for the processor +type Config struct { + APIEndpoint string + APIKey string + Model string + RequestDelay time.Duration // Delay between API requests +} + +// Processor handles the paper processing workflow +type Processor struct { + config Config +} + +// NewProcessor creates a new processor instance +func NewProcessor(config Config) (*Processor, error) { + // Set default delay if not specified + if config.RequestDelay == 0 { + config.RequestDelay = time.Second // Default 1 second delay + } + + // Validate required configuration + if config.APIKey == "" { + return nil, fmt.Errorf("API key is required") + } + if config.APIEndpoint == "" { + return nil, fmt.Errorf("API endpoint is required") + } + if config.Model == "" { + return nil, fmt.Errorf("model name is required") + } + + return &Processor{ + config: config, + }, nil +} + +// ProcessPapers processes a list of papers against given criteria +func (p *Processor) ProcessPapers(papers []Paper, criteria string) (*ProcessingResult, error) { + result := &ProcessingResult{ + Accepted: make([]PaperResult, 0), + Rejected: make([]PaperResult, 0), + } + + for i, paper := range papers { + if i > 0 { // Don't delay before the first request + time.Sleep(p.config.RequestDelay) + } + decision, err := p.evaluatePaper(paper, criteria) + if err != nil { + return nil, fmt.Errorf("error processing paper %s: %v", paper.ArxivID, err) + } + + paperResult := PaperResult{ + Paper: paper, + Decision: decision.Decision, + Explanation: decision.Explanation, + } + + if decision.Decision == "ACCEPT" { + result.Accepted = append(result.Accepted, paperResult) + } else { + result.Rejected = append(result.Rejected, paperResult) + } + } + + return result, nil +} + +type llmRequest struct { + Model string `json:"model"` + Messages []message `json:"messages"` +} + +type message struct { + Role string `json:"role"` + Content string `json:"content"` +} + +type llmResponse struct { + Choices []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + } `json:"choices"` +} + +type decisionResult struct { + Decision string + Explanation string +} + +func (p *Processor) evaluatePaper(paper Paper, criteria string) (*decisionResult, error) { + prompt := fmt.Sprintf(`Please evaluate the following academic paper against the provided criteria. +Respond with either "ACCEPT" or "REJECT" followed by a brief explanation of your decision. +Your response should be in the format: +DECISION +Explanation + +Criteria: +%s + +Paper Title: %s +Abstract: %s`, criteria, paper.Title, paper.Abstract) + + reqBody := llmRequest{ + Model: p.config.Model, + Messages: []message{ + { + Role: "user", + Content: prompt, + }, + }, + } + + reqJSON, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("error marshaling request: %v", err) + } + + req, err := http.NewRequest("POST", p.config.APIEndpoint, bytes.NewBuffer(reqJSON)) + if err != nil { + return nil, fmt.Errorf("error creating request: %v", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", p.config.APIKey)) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("error making request: %v", err) + } + defer resp.Body.Close() + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("error reading response: %v", err) + } + + var llmResp llmResponse + if err := json.Unmarshal(body, &llmResp); err != nil { + return nil, fmt.Errorf("error unmarshaling response: %v", err) + } + + if len(llmResp.Choices) == 0 { + return nil, fmt.Errorf("no response from LLM") + } + + content := llmResp.Choices[0].Message.Content + lines := bytes.Split([]byte(content), []byte("\n")) + + if len(lines) < 2 { + return nil, fmt.Errorf("invalid response format") + } + + decision := string(bytes.TrimSpace(lines[0])) + explanation := string(bytes.TrimSpace(bytes.Join(lines[1:], []byte("\n")))) + + return &decisionResult{ + Decision: decision, + Explanation: explanation, + }, nil +} + +// ProcessFile processes a JSON file containing papers and writes results to an output file +func ProcessFile(inputPath, outputPath, criteriaPath string, config Config) error { + // Read input papers + inputData, err := ioutil.ReadFile(inputPath) + if err != nil { + return fmt.Errorf("error reading input file: %v", err) + } + + var papers []Paper + if err := json.Unmarshal(inputData, &papers); err != nil { + return fmt.Errorf("error parsing input JSON: %v", err) + } + + // Read criteria + criteriaData, err := ioutil.ReadFile(criteriaPath) + if err != nil { + return fmt.Errorf("error reading criteria file: %v", err) + } + + // Process papers + processor, err := NewProcessor(config) + if err != nil { + return fmt.Errorf("error creating processor: %v", err) + } + result, err := processor.ProcessPapers(papers, string(criteriaData)) + if err != nil { + return fmt.Errorf("error processing papers: %v", err) + } + + // Write results + outputData, err := json.MarshalIndent(result, "", " ") + if err != nil { + return fmt.Errorf("error marshaling output JSON: %v", err) + } + + if err := ioutil.WriteFile(outputPath, outputData, 0644); err != nil { + return fmt.Errorf("error writing output file: %v", err) + } + + return nil +}