Armored decisions more; smaller models have more variance

Armored response parsing, dump failed analysis to a file
Armored decision parsing more
2025-01-26 15:07:34 -06:00 · 2025-01-26 14:37:29 -06:00 · 2025-01-26 14:23:24 -06:00
3 changed files with 84 additions and 25 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+*.swp
--- a/README.md
+++ b/README.md
@ -9,6 +9,8 @@ Go package for automated evaluation of academic papers using LLM-based criteria
 - Rate limiting with request delay configuration
 - File-based processing (JSON input/output)
 - Customizable evaluation criteria
+- Robust error handling with failure tracking
+- Automatic dump file for failed analyses

 ## Installation

@ -19,7 +21,7 @@ go get gitea.r8z.us/stwhite/paperprocessor
 ## API Reference

 ### ProcessFile
-`func ProcessFile(inputPath, outputPath, criteriaPath string, config Config, debug bool) error`
+`func ProcessFile(inputPath, outputPath, criteriaPath string, config Config) error`

 Processes papers from input JSON file and writes results to output JSON file

@ -28,7 +30,6 @@ Parameters:
 - outputPath: Path to write processing results JSON
 - criteriaPath: Path to text file with evaluation criteria
 - config: Configuration settings for API and processing
- debug: Enable debug logging when true

 Returns:
 - error: Processing error or nil if successful
@ -54,7 +55,6 @@ err := paperprocessor.ProcessFile(
    "output/results.json",
    "criteria.txt",
    config,
-    true, // debug mode
 )
 if err != nil {
    log.Fatal("Processing failed:", err)
@ -106,10 +106,23 @@ Evaluation criteria:
            "decision": "REJECT",
            "explanation": "Doesn't meet novelty requirements..."
        }
+    ],
+    "failed": [
+        {
+            "paper": {
+                "title": "Problematic Paper",
+                "abstract": "...",
+                "arxiv_id": "2301.11111"
+            },
+            "error": "invalid decision format",
+            "output": ""
+        }
    ]
 }
 ```

+When papers fail processing, they are added to the "failed" list in the output JSON and also written to a `dump.json` file for detailed review.
+
 ## Configuration Options

 | Parameter       | Description                          | Default       |
--- a/paperprocessor.go
+++ b/paperprocessor.go
@ -8,7 +8,6 @@ import (
 	"net/http"
 	"strings"
 	"time"
-	"unicode"
 )

 // Paper represents a single academic paper
@ -29,6 +28,11 @@ type PaperResult struct {
 type ProcessingResult struct {
 	Accepted []PaperResult `json:"accepted"`
 	Rejected []PaperResult `json:"rejected"`
+	Failed   []struct {
+		Paper  Paper  `json:"paper"`
+		Error  string `json:"error"`
+		Output string `json:"output"`
+	} `json:"failed"`
 }

 // Config holds the configuration for the processor
@ -80,7 +84,17 @@ func (p *Processor) ProcessPapers(papers []Paper, criteria string) (*ProcessingR
 		}
 		decision, err := p.evaluatePaper(paper, criteria)
 		if err != nil {
-			return nil, fmt.Errorf("error processing paper %s: %v", paper.ArxivID, err)
+			// Instead of returning error, add to failed list
+			result.Failed = append(result.Failed, struct {
+				Paper  Paper  `json:"paper"`
+				Error  string `json:"error"`
+				Output string `json:"output"`
+			}{
+				Paper:  paper,
+				Error:  err.Error(),
+				Output: decision.RawOutput, // Include raw output for debugging
+			})
+			continue
 		}

 		paperResult := PaperResult{
@ -96,6 +110,14 @@ func (p *Processor) ProcessPapers(papers []Paper, criteria string) (*ProcessingR
 		}
 	}

+	// Write failed analyses to dump file if any exist
+	if len(result.Failed) > 0 {
+		dumpData, err := json.MarshalIndent(result.Failed, "", "  ")
+		if err == nil { // Only try to write if marshaling succeeded
+			ioutil.WriteFile("dump.json", dumpData, 0644)
+		}
+	}
+
 	return result, nil
 }

@ -120,12 +142,14 @@ type llmResponse struct {
 type decisionResult struct {
 	Decision    string
 	Explanation string
+	RawOutput   string // Store raw output for error reporting
 }

 func (p *Processor) evaluatePaper(paper Paper, criteria string) (*decisionResult, error) {
 	prompt := fmt.Sprintf(`Please evaluate the following academic paper against the provided criteria.
-Respond with either "ACCEPT" or "REJECT" followed by a brief explanation of your decision. 
-Do not use markdown emphasis in your response. Keep your response clear and concise.
+Respond with either "ACCEPT" or "REJECT" followed by an explanation of your decision. 
+For ACCEPT decisions, provide a thorough explanation. For REJECT decisions, keep the explanation brief and focused on the key reason.
+Do not use markdown, bullet points, or quotes in your response. Keep your response clear and concise.
 Your response should be in the format:
 DECISION
 Explanation
@ -148,12 +172,12 @@ Abstract: %s`, criteria, paper.Title, paper.Abstract)

 	reqJSON, err := json.Marshal(reqBody)
 	if err != nil {
-		return nil, fmt.Errorf("error marshaling request: %v", err)
+		return &decisionResult{RawOutput: ""}, fmt.Errorf("error marshaling request: %v", err)
 	}

 	req, err := http.NewRequest("POST", p.config.APIEndpoint, bytes.NewBuffer(reqJSON))
 	if err != nil {
-		return nil, fmt.Errorf("error creating request: %v", err)
+		return &decisionResult{RawOutput: ""}, fmt.Errorf("error creating request: %v", err)
 	}

 	req.Header.Set("Content-Type", "application/json")
@ -162,51 +186,63 @@ Abstract: %s`, criteria, paper.Title, paper.Abstract)
 	client := &http.Client{}
 	resp, err := client.Do(req)
 	if err != nil {
-		return nil, fmt.Errorf("error making request: %v", err)
+		return &decisionResult{RawOutput: ""}, fmt.Errorf("error making request: %v", err)
 	}
 	defer resp.Body.Close()

 	body, err := ioutil.ReadAll(resp.Body)
 	if err != nil {
-		return nil, fmt.Errorf("error reading response: %v", err)
+		return &decisionResult{RawOutput: ""}, fmt.Errorf("error reading response: %v", err)
 	}

 	var llmResp llmResponse
 	if err := json.Unmarshal(body, &llmResp); err != nil {
-		return nil, fmt.Errorf("error unmarshaling response: %v", err)
+		return &decisionResult{RawOutput: string(body)}, fmt.Errorf("error unmarshaling response: %v", err)
 	}

 	if len(llmResp.Choices) == 0 {
-		return nil, fmt.Errorf("no response from LLM")
+		return &decisionResult{RawOutput: string(body)}, fmt.Errorf("no response from LLM")
 	}

 	content := llmResp.Choices[0].Message.Content

-	// Find first line with ACCEPT/REJECT
+	// Find line with ACCEPT/REJECT
 	var decisionLine string
 	lines := bytes.Split([]byte(content), []byte("\n"))
-	for _, line := range lines {
-		if strings.Contains(strings.ToUpper(string(line)), "ACCEPT") ||
-			strings.Contains(strings.ToUpper(string(line)), "REJECT") {
+	for i, line := range lines {
+		upperLine := strings.ToUpper(string(line))
+		// Check current line
+		if strings.Contains(upperLine, "ACCEPT") || strings.Contains(upperLine, "REJECT") {
 			decisionLine = string(line)
 			break
 		}
+		// If current line is "DECISION", check next line
+		if strings.TrimSpace(upperLine) == "DECISION" && i+1 < len(lines) {
+			nextLine := strings.ToUpper(string(lines[i+1]))
+			if strings.Contains(nextLine, "ACCEPT") || strings.Contains(nextLine, "REJECT") {
+				decisionLine = string(lines[i+1])
+				break
+			}
+		}
 	}

 	if decisionLine == "" {
-		return nil, fmt.Errorf("no decision found in response. Full response:\n%s", content)
+		return &decisionResult{RawOutput: content}, fmt.Errorf("no decision found in response. Full response:\n%s", content)
 	}

 	// Clean and normalize decision
 	rawDecision := strings.TrimSpace(decisionLine)

-	// Remove "DECISION:" prefix if present and trim non-alphabetic characters
-	cleanDecision := strings.TrimPrefix(rawDecision, "DECISION:")
-	cleanDecision = strings.TrimFunc(cleanDecision, func(r rune) bool {
-		return !unicode.IsLetter(r) && !unicode.IsNumber(r)
-	})
+	// Handle common prefixes and clean the decision text
+	cleanDecision := rawDecision
+	for _, prefix := range []string{"DECISION:", "Decision:", "-", "\"", "*", "THIS PAPER IS"} {
+		cleanDecision = strings.TrimPrefix(cleanDecision, prefix)
+	}
+	cleanDecision = strings.TrimSpace(cleanDecision)
+	// Remove any remaining quotes
+	cleanDecision = strings.Trim(cleanDecision, "\"")

-	// Normalize case and check for valid decision
+	// Normalize case
 	upperDecision := strings.ToUpper(cleanDecision)
 	var decision string
 	switch {
@ -215,16 +251,25 @@ Abstract: %s`, criteria, paper.Title, paper.Abstract)
 	case strings.HasPrefix(upperDecision, "REJECT"):
 		decision = "REJECT"
 	default:
-		return nil, fmt.Errorf("invalid decision value: %q (cleaned: %q). Full response:\n%s",
+		return &decisionResult{RawOutput: content}, fmt.Errorf("invalid decision value: %q (cleaned: %q). Full response:\n%s",
 			rawDecision, cleanDecision, content)
 	}

 	// Get explanation as everything after the decision line
 	explanation := strings.TrimSpace(strings.Replace(content, decisionLine, "", 1))

+	// Remove any "Explanation" header if present
+	explanation = strings.TrimPrefix(strings.TrimSpace(explanation), "Explanation")
+	explanation = strings.TrimSpace(explanation)
+
+	if explanation == "" {
+		return &decisionResult{RawOutput: content}, fmt.Errorf("empty explanation in response. Full response:\n%s", content)
+	}
+
 	return &decisionResult{
 		Decision:    decision,
 		Explanation: explanation,
+		RawOutput:   content,
 	}, nil
 }
Author	SHA1	Message	Date
Steve White	7f4e0ead52	Armored decisions more; smaller models have more variance	2025-01-26 15:07:34 -06:00
Steve White	78db506564	Armored response parsing, dump failed analysis to a file	2025-01-26 14:37:29 -06:00
Steve White	72ba9ac98f	Armored decision parsing more	2025-01-26 14:23:24 -06:00