From cd6870fa1a83a2b5f300208779bed1884a095d64 Mon Sep 17 00:00:00 2001 From: Steve White Date: Sat, 25 Jan 2025 23:06:45 -0600 Subject: [PATCH] Fixed json input parsing --- README.md | 36 +++++++++++++++++++++++++++++++++++- examples/input.json | 29 +++++++++++++++++++++++++++++ examples/main.go | 38 ++++++++++++++++++++++++++++++++++++++ paperformatter.go | 13 ++++++++++++- paperformatter_test.go | 36 ++++++++++++++++++++++++++++++++++++ 5 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 examples/input.json create mode 100644 examples/main.go diff --git a/README.md b/README.md index a97b7d8..ca0763c 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,9 @@ if err != nil { ## Input Format -The input JSON file should contain an array of entries, where each entry has the following structure: +The library supports two JSON input formats: +### Format 1: Direct Array ```json [ { @@ -37,6 +38,39 @@ The input JSON file should contain an array of entries, where each entry has the ] ``` +### Format 2: Categorized Object +```json +{ + "accepted": [ + { + "paper": { + "title": "Paper Title", + "abstract": "Paper Abstract", + "arxiv_id": "2301.00000" + }, + "decision": "accept", + "explanation": "Explanation for the decision" + } + ], + "rejected": [ + { + "paper": { + "title": "Another Paper", + "abstract": "Another Abstract", + "arxiv_id": "2301.00001" + }, + "decision": "reject", + "explanation": "Explanation for rejection" + } + ] +} +``` + +Both formats support the same entry structure, with each entry containing: +- A paper object with title, abstract, and arXiv ID +- A decision (accept/reject, case-insensitive) +- An explanation for the decision + ## Output Format The library generates a Markdown file that: diff --git a/examples/input.json b/examples/input.json new file mode 100644 index 0000000..ff53d3d --- /dev/null +++ b/examples/input.json @@ -0,0 +1,29 @@ +[ + { + "paper": { + "title": "Deep Learning Advances in Computer Vision", + "abstract": "This paper presents recent advances in deep learning approaches for computer vision tasks. We discuss novel architectures that have achieved state-of-the-art results in image classification, object detection, and semantic segmentation.\n\nOur comprehensive analysis shows that attention mechanisms and multi-scale feature fusion are key components for improving model performance.", + "arxiv_id": "2301.0001" + }, + "decision": "accept", + "explanation": "Excellent comprehensive review with novel insights. The analysis of attention mechanisms is particularly valuable." + }, + { + "paper": { + "title": "Quantum Computing: A Survey of Current Challenges", + "abstract": "We survey the current challenges in quantum computing, focusing on error correction, qubit coherence, and scalability issues. The paper provides a systematic review of recent approaches to these challenges.", + "arxiv_id": "2301.0002" + }, + "decision": "accept", + "explanation": "Well-structured survey that effectively summarizes the field's major challenges." + }, + { + "paper": { + "title": "Blockchain in Healthcare", + "abstract": "This paper proposes a blockchain-based system for managing electronic health records. We discuss implementation details and potential privacy concerns.", + "arxiv_id": "2301.0003" + }, + "decision": "reject", + "explanation": "The proposed system lacks novelty and does not adequately address existing privacy solutions in the field." + } +] diff --git a/examples/main.go b/examples/main.go new file mode 100644 index 0000000..761cceb --- /dev/null +++ b/examples/main.go @@ -0,0 +1,38 @@ +package main + +import ( + "fmt" + "log" + "os" + + "gitea.r8z.us/stwhite/paperformatter" +) + +func main() { + // Check if input and output files are provided + if len(os.Args) != 3 { + fmt.Println("Usage: go run main.go ") + fmt.Println("Example: go run main.go input.json output.md") + os.Exit(1) + } + + inputFile := os.Args[1] + outputFile := os.Args[2] + + // Format the papers + err := paperformatter.FormatPapers(inputFile, outputFile) + if err != nil { + switch e := err.(type) { + case *paperformatter.ValidationError: + log.Fatalf("Validation error: %v", e) + case *paperformatter.FileError: + log.Fatalf("File error: %v", e) + case *paperformatter.JSONError: + log.Fatalf("JSON error: %v", e) + default: + log.Fatalf("Error: %v", err) + } + } + + fmt.Printf("Successfully formatted papers from %s to %s\n", inputFile, outputFile) +} diff --git a/paperformatter.go b/paperformatter.go index 9a8387c..e96da3f 100644 --- a/paperformatter.go +++ b/paperformatter.go @@ -61,9 +61,20 @@ func FormatPapers(inputFile, outputFile string) error { } } + // Try to parse as direct array first var entries []Entry if err := json.Unmarshal(data, &entries); err != nil { - return &JSONError{Wrapped: err} + // If that fails, try parsing as object with accepted/rejected arrays + var input struct { + Accepted []Entry `json:"accepted"` + Rejected []Entry `json:"rejected,omitempty"` + } + if err := json.Unmarshal(data, &input); err != nil { + return &JSONError{Wrapped: err} + } + // Combine accepted and rejected entries + entries = append(entries, input.Accepted...) + entries = append(entries, input.Rejected...) } // Validate all entries before processing diff --git a/paperformatter_test.go b/paperformatter_test.go index 04e2d7f..ddf1819 100644 --- a/paperformatter_test.go +++ b/paperformatter_test.go @@ -141,6 +141,42 @@ func TestFormatPapers(t *testing.T) { wantErr: true, errContains: "decision must be either 'accept' or 'reject'", }, + { + name: "categorized format", + input: `{ + "accepted": [ + { + "paper": { + "title": "Test Paper 1", + "abstract": "Abstract 1", + "arxiv_id": "2301.0001" + }, + "decision": "accept", + "explanation": "Good paper" + } + ], + "rejected": [ + { + "paper": { + "title": "Test Paper 2", + "abstract": "Abstract 2", + "arxiv_id": "2301.0002" + }, + "decision": "reject", + "explanation": "Needs work" + } + ] + }`, + wantErr: false, + checkOutput: func(t *testing.T, output string) { + if !strings.Contains(output, "## Test Paper 1") { + t.Error("Output missing accepted paper") + } + if !strings.Contains(output, "## Test Paper 2") { + t.Error("Output missing rejected paper") + } + }, + }, { name: "case insensitive decision", input: `[