Initial Commit.
This commit is contained in:
commit
be9b9f1ff8
|
@ -0,0 +1,102 @@
|
|||
# Paper Processor
|
||||
|
||||
Go package for automated evaluation of academic papers using LLM-based criteria
|
||||
|
||||
## Features
|
||||
|
||||
- Process multiple papers with configurable API settings
|
||||
- Structured evaluation results (accepted/rejected)
|
||||
- Rate limiting with request delay configuration
|
||||
- File-based processing (JSON input/output)
|
||||
- Customizable evaluation criteria
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
go get github.com/yourusername/paperprocessor
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Configuration
|
||||
```go
|
||||
import "github.com/yourusername/paperprocessor"
|
||||
|
||||
config := paperprocessor.Config{
|
||||
APIEndpoint: "https://api.llm-provider.com/v1/chat/completions",
|
||||
APIKey: "your-api-key",
|
||||
Model: "llm-model-name",
|
||||
RequestDelay: time.Second * 2,
|
||||
}
|
||||
```
|
||||
|
||||
### File Processing
|
||||
```go
|
||||
err := paperprocessor.ProcessFile(
|
||||
"input/papers.json",
|
||||
"output/results.json",
|
||||
"criteria.txt",
|
||||
config,
|
||||
)
|
||||
```
|
||||
|
||||
## Input Formats
|
||||
|
||||
### Papers JSON
|
||||
```json
|
||||
[
|
||||
{
|
||||
"title": "Paper Title",
|
||||
"abstract": "Paper abstract text...",
|
||||
"arxiv_id": "1234.56789"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Criteria File
|
||||
```
|
||||
Evaluation criteria:
|
||||
- Relevance to quantum computing
|
||||
- Novelty of approach
|
||||
- Technical rigor
|
||||
```
|
||||
|
||||
## Output Example
|
||||
```json
|
||||
{
|
||||
"accepted": [
|
||||
{
|
||||
"paper": {
|
||||
"title": "Advanced Quantum Computing Methods",
|
||||
"abstract": "...",
|
||||
"arxiv_id": "2301.12345"
|
||||
},
|
||||
"decision": "ACCEPT",
|
||||
"explanation": "Fulfills all criteria..."
|
||||
}
|
||||
],
|
||||
"rejected": [
|
||||
{
|
||||
"paper": {
|
||||
"title": "Basic Classical Algorithms",
|
||||
"abstract": "...",
|
||||
"arxiv_id": "2301.67890"
|
||||
},
|
||||
"decision": "REJECT",
|
||||
"explanation": "Doesn't meet novelty requirements..."
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------------|--------------------------------------|---------------|
|
||||
| APIEndpoint | LLM API endpoint URL | Required |
|
||||
| APIKey | API authentication key | Required |
|
||||
| Model | LLM model name | Required |
|
||||
| RequestDelay | Delay between API requests | 1 second |
|
||||
|
||||
## License
|
||||
MIT
|
|
@ -0,0 +1,236 @@
|
|||
package paperprocessor
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Paper represents a single academic paper
|
||||
type Paper struct {
|
||||
Title string `json:"title"`
|
||||
Abstract string `json:"abstract"`
|
||||
ArxivID string `json:"arxiv_id"`
|
||||
}
|
||||
|
||||
// PaperResult represents the decision for a single paper
|
||||
type PaperResult struct {
|
||||
Paper Paper `json:"paper"`
|
||||
Decision string `json:"decision"`
|
||||
Explanation string `json:"explanation"`
|
||||
}
|
||||
|
||||
// ProcessingResult represents the final output structure
|
||||
type ProcessingResult struct {
|
||||
Accepted []PaperResult `json:"accepted"`
|
||||
Rejected []PaperResult `json:"rejected"`
|
||||
}
|
||||
|
||||
// Config holds the configuration for the processor
|
||||
type Config struct {
|
||||
APIEndpoint string
|
||||
APIKey string
|
||||
Model string
|
||||
RequestDelay time.Duration // Delay between API requests
|
||||
}
|
||||
|
||||
// Processor handles the paper processing workflow
|
||||
type Processor struct {
|
||||
config Config
|
||||
}
|
||||
|
||||
// NewProcessor creates a new processor instance
|
||||
func NewProcessor(config Config) (*Processor, error) {
|
||||
// Set default delay if not specified
|
||||
if config.RequestDelay == 0 {
|
||||
config.RequestDelay = time.Second // Default 1 second delay
|
||||
}
|
||||
|
||||
// Validate required configuration
|
||||
if config.APIKey == "" {
|
||||
return nil, fmt.Errorf("API key is required")
|
||||
}
|
||||
if config.APIEndpoint == "" {
|
||||
return nil, fmt.Errorf("API endpoint is required")
|
||||
}
|
||||
if config.Model == "" {
|
||||
return nil, fmt.Errorf("model name is required")
|
||||
}
|
||||
|
||||
return &Processor{
|
||||
config: config,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ProcessPapers processes a list of papers against given criteria
|
||||
func (p *Processor) ProcessPapers(papers []Paper, criteria string) (*ProcessingResult, error) {
|
||||
result := &ProcessingResult{
|
||||
Accepted: make([]PaperResult, 0),
|
||||
Rejected: make([]PaperResult, 0),
|
||||
}
|
||||
|
||||
for i, paper := range papers {
|
||||
if i > 0 { // Don't delay before the first request
|
||||
time.Sleep(p.config.RequestDelay)
|
||||
}
|
||||
decision, err := p.evaluatePaper(paper, criteria)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error processing paper %s: %v", paper.ArxivID, err)
|
||||
}
|
||||
|
||||
paperResult := PaperResult{
|
||||
Paper: paper,
|
||||
Decision: decision.Decision,
|
||||
Explanation: decision.Explanation,
|
||||
}
|
||||
|
||||
if decision.Decision == "ACCEPT" {
|
||||
result.Accepted = append(result.Accepted, paperResult)
|
||||
} else {
|
||||
result.Rejected = append(result.Rejected, paperResult)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type llmRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []message `json:"messages"`
|
||||
}
|
||||
|
||||
type message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type llmResponse struct {
|
||||
Choices []struct {
|
||||
Message struct {
|
||||
Content string `json:"content"`
|
||||
} `json:"message"`
|
||||
} `json:"choices"`
|
||||
}
|
||||
|
||||
type decisionResult struct {
|
||||
Decision string
|
||||
Explanation string
|
||||
}
|
||||
|
||||
func (p *Processor) evaluatePaper(paper Paper, criteria string) (*decisionResult, error) {
|
||||
prompt := fmt.Sprintf(`Please evaluate the following academic paper against the provided criteria.
|
||||
Respond with either "ACCEPT" or "REJECT" followed by a brief explanation of your decision.
|
||||
Your response should be in the format:
|
||||
DECISION
|
||||
Explanation
|
||||
|
||||
Criteria:
|
||||
%s
|
||||
|
||||
Paper Title: %s
|
||||
Abstract: %s`, criteria, paper.Title, paper.Abstract)
|
||||
|
||||
reqBody := llmRequest{
|
||||
Model: p.config.Model,
|
||||
Messages: []message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: prompt,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
reqJSON, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error marshaling request: %v", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", p.config.APIEndpoint, bytes.NewBuffer(reqJSON))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating request: %v", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", p.config.APIKey))
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading response: %v", err)
|
||||
}
|
||||
|
||||
var llmResp llmResponse
|
||||
if err := json.Unmarshal(body, &llmResp); err != nil {
|
||||
return nil, fmt.Errorf("error unmarshaling response: %v", err)
|
||||
}
|
||||
|
||||
if len(llmResp.Choices) == 0 {
|
||||
return nil, fmt.Errorf("no response from LLM")
|
||||
}
|
||||
|
||||
content := llmResp.Choices[0].Message.Content
|
||||
lines := bytes.Split([]byte(content), []byte("\n"))
|
||||
|
||||
if len(lines) < 2 {
|
||||
return nil, fmt.Errorf("invalid response format")
|
||||
}
|
||||
|
||||
decision := string(bytes.TrimSpace(lines[0]))
|
||||
explanation := string(bytes.TrimSpace(bytes.Join(lines[1:], []byte("\n"))))
|
||||
|
||||
return &decisionResult{
|
||||
Decision: decision,
|
||||
Explanation: explanation,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ProcessFile processes a JSON file containing papers and writes results to an output file
|
||||
func ProcessFile(inputPath, outputPath, criteriaPath string, config Config) error {
|
||||
// Read input papers
|
||||
inputData, err := ioutil.ReadFile(inputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading input file: %v", err)
|
||||
}
|
||||
|
||||
var papers []Paper
|
||||
if err := json.Unmarshal(inputData, &papers); err != nil {
|
||||
return fmt.Errorf("error parsing input JSON: %v", err)
|
||||
}
|
||||
|
||||
// Read criteria
|
||||
criteriaData, err := ioutil.ReadFile(criteriaPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading criteria file: %v", err)
|
||||
}
|
||||
|
||||
// Process papers
|
||||
processor, err := NewProcessor(config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating processor: %v", err)
|
||||
}
|
||||
result, err := processor.ProcessPapers(papers, string(criteriaData))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error processing papers: %v", err)
|
||||
}
|
||||
|
||||
// Write results
|
||||
outputData, err := json.MarshalIndent(result, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error marshaling output JSON: %v", err)
|
||||
}
|
||||
|
||||
if err := ioutil.WriteFile(outputPath, outputData, 0644); err != nil {
|
||||
return fmt.Errorf("error writing output file: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
Loading…
Reference in New Issue