Fixed json input parsing

This commit is contained in:
Steve White 2025-01-25 23:06:45 -06:00
parent 8c8af7dcaa
commit cd6870fa1a
5 changed files with 150 additions and 2 deletions

View File

@ -21,8 +21,9 @@ if err != nil {
## Input Format ## Input Format
The input JSON file should contain an array of entries, where each entry has the following structure: The library supports two JSON input formats:
### Format 1: Direct Array
```json ```json
[ [
{ {
@ -37,6 +38,39 @@ The input JSON file should contain an array of entries, where each entry has the
] ]
``` ```
### Format 2: Categorized Object
```json
{
"accepted": [
{
"paper": {
"title": "Paper Title",
"abstract": "Paper Abstract",
"arxiv_id": "2301.00000"
},
"decision": "accept",
"explanation": "Explanation for the decision"
}
],
"rejected": [
{
"paper": {
"title": "Another Paper",
"abstract": "Another Abstract",
"arxiv_id": "2301.00001"
},
"decision": "reject",
"explanation": "Explanation for rejection"
}
]
}
```
Both formats support the same entry structure, with each entry containing:
- A paper object with title, abstract, and arXiv ID
- A decision (accept/reject, case-insensitive)
- An explanation for the decision
## Output Format ## Output Format
The library generates a Markdown file that: The library generates a Markdown file that:

29
examples/input.json Normal file
View File

@ -0,0 +1,29 @@
[
{
"paper": {
"title": "Deep Learning Advances in Computer Vision",
"abstract": "This paper presents recent advances in deep learning approaches for computer vision tasks. We discuss novel architectures that have achieved state-of-the-art results in image classification, object detection, and semantic segmentation.\n\nOur comprehensive analysis shows that attention mechanisms and multi-scale feature fusion are key components for improving model performance.",
"arxiv_id": "2301.0001"
},
"decision": "accept",
"explanation": "Excellent comprehensive review with novel insights. The analysis of attention mechanisms is particularly valuable."
},
{
"paper": {
"title": "Quantum Computing: A Survey of Current Challenges",
"abstract": "We survey the current challenges in quantum computing, focusing on error correction, qubit coherence, and scalability issues. The paper provides a systematic review of recent approaches to these challenges.",
"arxiv_id": "2301.0002"
},
"decision": "accept",
"explanation": "Well-structured survey that effectively summarizes the field's major challenges."
},
{
"paper": {
"title": "Blockchain in Healthcare",
"abstract": "This paper proposes a blockchain-based system for managing electronic health records. We discuss implementation details and potential privacy concerns.",
"arxiv_id": "2301.0003"
},
"decision": "reject",
"explanation": "The proposed system lacks novelty and does not adequately address existing privacy solutions in the field."
}
]

38
examples/main.go Normal file
View File

@ -0,0 +1,38 @@
package main
import (
"fmt"
"log"
"os"
"gitea.r8z.us/stwhite/paperformatter"
)
func main() {
// Check if input and output files are provided
if len(os.Args) != 3 {
fmt.Println("Usage: go run main.go <input.json> <output.md>")
fmt.Println("Example: go run main.go input.json output.md")
os.Exit(1)
}
inputFile := os.Args[1]
outputFile := os.Args[2]
// Format the papers
err := paperformatter.FormatPapers(inputFile, outputFile)
if err != nil {
switch e := err.(type) {
case *paperformatter.ValidationError:
log.Fatalf("Validation error: %v", e)
case *paperformatter.FileError:
log.Fatalf("File error: %v", e)
case *paperformatter.JSONError:
log.Fatalf("JSON error: %v", e)
default:
log.Fatalf("Error: %v", err)
}
}
fmt.Printf("Successfully formatted papers from %s to %s\n", inputFile, outputFile)
}

View File

@ -61,9 +61,20 @@ func FormatPapers(inputFile, outputFile string) error {
} }
} }
// Try to parse as direct array first
var entries []Entry var entries []Entry
if err := json.Unmarshal(data, &entries); err != nil { if err := json.Unmarshal(data, &entries); err != nil {
return &JSONError{Wrapped: err} // If that fails, try parsing as object with accepted/rejected arrays
var input struct {
Accepted []Entry `json:"accepted"`
Rejected []Entry `json:"rejected,omitempty"`
}
if err := json.Unmarshal(data, &input); err != nil {
return &JSONError{Wrapped: err}
}
// Combine accepted and rejected entries
entries = append(entries, input.Accepted...)
entries = append(entries, input.Rejected...)
} }
// Validate all entries before processing // Validate all entries before processing

View File

@ -141,6 +141,42 @@ func TestFormatPapers(t *testing.T) {
wantErr: true, wantErr: true,
errContains: "decision must be either 'accept' or 'reject'", errContains: "decision must be either 'accept' or 'reject'",
}, },
{
name: "categorized format",
input: `{
"accepted": [
{
"paper": {
"title": "Test Paper 1",
"abstract": "Abstract 1",
"arxiv_id": "2301.0001"
},
"decision": "accept",
"explanation": "Good paper"
}
],
"rejected": [
{
"paper": {
"title": "Test Paper 2",
"abstract": "Abstract 2",
"arxiv_id": "2301.0002"
},
"decision": "reject",
"explanation": "Needs work"
}
]
}`,
wantErr: false,
checkOutput: func(t *testing.T, output string) {
if !strings.Contains(output, "## Test Paper 1") {
t.Error("Output missing accepted paper")
}
if !strings.Contains(output, "## Test Paper 2") {
t.Error("Output missing rejected paper")
}
},
},
{ {
name: "case insensitive decision", name: "case insensitive decision",
input: `[ input: `[