Initial commit

2025-01-25 22:26:30 -06:00 · 2025-01-25 22:26:30 -06:00 · 8c8af7dcaa
commit 8c8af7dcaa
6 changed files with 764 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,123 @@
 # Paper Formatter
 A Go library that converts JSON files containing paper reviews into formatted Markdown documents. The library organizes papers into accepted and rejected categories, formatting each entry with title, arXiv link, abstract, decision, and explanation.
 ## Installation
 ```bash
 go get gitea.r8z.us/stwhite/paperformatter
 ```
 ## Usage
 ```go
 import "gitea.r8z.us/stwhite/paperformatter"
 err := paperformatter.FormatPapers("input.json", "output.md")
 if err != nil {
    log.Fatal(err)
 }
 ```
 ## Input Format
 The input JSON file should contain an array of entries, where each entry has the following structure:
 ```json
 [
  {
    "paper": {
      "title": "Paper Title",
      "abstract": "Paper Abstract",
      "arxiv_id": "2301.00000"
    },
    "decision": "accept",
    "explanation": "Explanation for the decision"
  }
 ]
 ```
 ## Output Format
 The library generates a Markdown file that:
 1. Separates papers into "Accepted Papers" and "Rejected Papers" sections
 2. For each paper, includes:
   - Title as a heading
   - ArXiv link
   - Abstract (formatted as a blockquote)
   - Decision
   - Explanation for the decision
 Example output:
 ```markdown
 # Accepted Papers
 ## Example Paper Title
 [arXiv:2301.00000](https://arxiv.org/abs/2301.00000)
 > This is the paper's abstract...
 **Decision:** accept
 **Explanation:** This is the explanation for accepting the paper...
 # Rejected Papers
 [Additional papers would be listed here...]
 ```
 ## Error Handling
 The library provides detailed error handling with custom error types:
 ### Validation Errors
 - Empty or missing required fields (title, abstract, arxiv_id)
 - Invalid decision values (must be "accept" or "reject")
 - Empty explanation
 ### File Operation Errors
 - Input file cannot be read
 - Output file cannot be written
 - Includes detailed information about the failed operation and file path
 ### JSON Parsing Errors
 - Invalid JSON format
 - Missing required fields in JSON structure
 Each error type provides specific information about what went wrong:
 ```go
 // Example error handling
 err := paperformatter.FormatPapers("input.json", "output.md")
 if err != nil {
    switch e := err.(type) {
    case *paperformatter.ValidationError:
        fmt.Printf("Validation failed for %s: %s\n", e.Field, e.Message)
    case *paperformatter.FileError:
        fmt.Printf("File operation '%s' failed for '%s': %v\n", e.Op, e.Path, e.Wrapped)
    case *paperformatter.JSONError:
        fmt.Printf("JSON parsing failed: %v\n", e.Wrapped)
    default:
        fmt.Printf("Error: %v\n", err)
    }
 }
 ```
 ## Requirements
 - Go 1.23.1 or later
 ## Testing
 The library includes comprehensive unit tests covering:
 - Successful format conversion
 - Error handling cases
 - Edge cases (empty input, case sensitivity)
 - File I/O operations
 - Multiline text formatting
 Run the tests using:
 ```bash
 go test -v
 ```
--- a/errors.go
+++ b/errors.go
@ -0,0 +1,108 @@
 package paperformatter
 import (
 	"fmt"
 	"strings"
 )
 // ValidationError represents an error that occurs when input validation fails.
 // It includes the field name that failed validation and a descriptive message.
 type ValidationError struct {
 	Field   string
 	Message string
 }
 func (e *ValidationError) Error() string {
 	return fmt.Sprintf("validation error for %s: %s", e.Field, e.Message)
 }
 // FileError represents an error that occurs during file operations.
 // It includes the file path, the operation that failed (e.g., "read" or "write"),
 // and the underlying system error.
 type FileError struct {
 	Path    string
 	Op      string
 	Wrapped error
 }
 func (e *FileError) Error() string {
 	return fmt.Sprintf("file operation '%s' failed for '%s': %v", e.Op, e.Path, e.Wrapped)
 }
 func (e *FileError) Unwrap() error {
 	return e.Wrapped
 }
 // JSONError represents an error that occurs during JSON parsing.
 // It wraps the underlying parsing error from the encoding/json package.
 type JSONError struct {
 	Wrapped error
 }
 func (e *JSONError) Error() string {
 	return fmt.Sprintf("JSON parsing error: %v", e.Wrapped)
 }
 func (e *JSONError) Unwrap() error {
 	return e.Wrapped
 }
 // validatePaper checks if a Paper has all required fields.
 // Returns a ValidationError if any field is empty.
 func validatePaper(paper Paper) error {
 	if paper.Title == "" {
 		return &ValidationError{
 			Field:   "title",
 			Message: "title cannot be empty",
 		}
 	}
 	if paper.Abstract == "" {
 		return &ValidationError{
 			Field:   "abstract",
 			Message: "abstract cannot be empty",
 		}
 	}
 	if paper.ArxivID == "" {
 		return &ValidationError{
 			Field:   "arxiv_id",
 			Message: "arxiv_id cannot be empty",
 		}
 	}
 	return nil
 }
 // validateEntry performs validation on an entire Entry, including its Paper.
 // It checks for required fields and valid decision values.
 // Returns a ValidationError if validation fails.
 func validateEntry(entry Entry) error {
 	if err := validatePaper(entry.Paper); err != nil {
 		return err
 	}
 	if entry.Decision == "" {
 		return &ValidationError{
 			Field:   "decision",
 			Message: "decision cannot be empty",
 		}
 	}
 	decision := strings.ToUpper(entry.Decision)
 	if decision != "ACCEPT" && decision != "REJECT" {
 		return &ValidationError{
 			Field:   "decision",
 			Message: "decision must be either 'accept' or 'reject'",
 		}
 	}
 	if entry.Explanation == "" {
 		return &ValidationError{
 			Field:   "explanation",
 			Message: "explanation cannot be empty",
 		}
 	}
 	return nil
 }
--- a/example_test.go
+++ b/example_test.go
@ -0,0 +1,166 @@
 package paperformatter_test
 import (
 	"fmt"
 	"log"
 	"os"
 	"gitea.r8z.us/stwhite/paperformatter"
 )
 func ExampleFormatPapers() {
 	// Create a sample input file
 	input := `[
 		{
 			"paper": {
 				"title": "Example Research Paper",
 				"abstract": "This is a sample abstract for the example paper.",
 				"arxiv_id": "2301.0001"
 			},
 			"decision": "accept",
 			"explanation": "Well-written paper with novel contributions."
 		}
 	]`
 	inputFile := "example_input.json"
 	outputFile := "example_output.md"
 	// Write sample input
 	if err := os.WriteFile(inputFile, []byte(input), 0644); err != nil {
 		log.Fatal(err)
 	}
 	defer os.Remove(inputFile)
 	defer os.Remove(outputFile)
 	// Format the papers
 	if err := paperformatter.FormatPapers(inputFile, outputFile); err != nil {
 		log.Fatal(err)
 	}
 	// Read and print the output
 	output, err := os.ReadFile(outputFile)
 	if err != nil {
 		log.Fatal(err)
 	}
 	fmt.Println(string(output))
 	// Output:
 	// # Accepted Papers
 	//
 	// ## Example Research Paper
 	//
 	// [arXiv:2301.0001](https://arxiv.org/abs/2301.0001)
 	//
 	// > This is a sample abstract for the example paper.
 	//
 	// **Decision:** accept
 	//
 	// **Explanation:** Well-written paper with novel contributions.
 	//
 	// # Rejected Papers
 }
 func ExampleFormatPapers_multipleEntries() {
 	// Create a sample input file with multiple entries
 	input := `[
 		{
 			"paper": {
 				"title": "First Paper",
 				"abstract": "Abstract for the first paper.",
 				"arxiv_id": "2301.0001"
 			},
 			"decision": "accept",
 			"explanation": "Good contribution."
 		},
 		{
 			"paper": {
 				"title": "Second Paper",
 				"abstract": "Abstract for the second paper.",
 				"arxiv_id": "2301.0002"
 			},
 			"decision": "reject",
 			"explanation": "Needs more work."
 		}
 	]`
 	inputFile := "example_multi_input.json"
 	outputFile := "example_multi_output.md"
 	if err := os.WriteFile(inputFile, []byte(input), 0644); err != nil {
 		log.Fatal(err)
 	}
 	defer os.Remove(inputFile)
 	defer os.Remove(outputFile)
 	if err := paperformatter.FormatPapers(inputFile, outputFile); err != nil {
 		log.Fatal(err)
 	}
 	output, err := os.ReadFile(outputFile)
 	if err != nil {
 		log.Fatal(err)
 	}
 	fmt.Println(string(output))
 	// Output:
 	// # Accepted Papers
 	//
 	// ## First Paper
 	//
 	// [arXiv:2301.0001](https://arxiv.org/abs/2301.0001)
 	//
 	// > Abstract for the first paper.
 	//
 	// **Decision:** accept
 	//
 	// **Explanation:** Good contribution.
 	//
 	// # Rejected Papers
 	//
 	// ## Second Paper
 	//
 	// [arXiv:2301.0002](https://arxiv.org/abs/2301.0002)
 	//
 	// > Abstract for the second paper.
 	//
 	// **Decision:** reject
 	//
 	// **Explanation:** Needs more work.
 }
 func ExampleFormatPapers_errorHandling() {
 	// Example of handling different error types
 	input := `[
 		{
 			"paper": {
 				"title": "", 
 				"abstract": "Abstract",
 				"arxiv_id": "2301.0001"
 			},
 			"decision": "accept",
 			"explanation": "Good paper"
 		}
 	]`
 	inputFile := "example_error_input.json"
 	outputFile := "example_error_output.md"
 	if err := os.WriteFile(inputFile, []byte(input), 0644); err != nil {
 		log.Fatal(err)
 	}
 	defer os.Remove(inputFile)
 	defer os.Remove(outputFile)
 	err := paperformatter.FormatPapers(inputFile, outputFile)
 	if err != nil {
 		switch e := err.(type) {
 		case *paperformatter.ValidationError:
 			fmt.Printf("Validation error: %v\n", e)
 		case *paperformatter.FileError:
 			fmt.Printf("File error: %v\n", e)
 		case *paperformatter.JSONError:
 			fmt.Printf("JSON error: %v\n", e)
 		default:
 			fmt.Printf("Error: %v\n", err)
 		}
 	}
 	// Output: Error: entry 1: validation error for title: title cannot be empty
 }
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,3 @@
 module gitea.r8z.us/stwhite/paperformatter
 go 1.23.1
--- a/paperformatter.go
+++ b/paperformatter.go
@ -0,0 +1,118 @@
 // Package paperformatter provides functionality to convert JSON files containing paper reviews
 // into formatted Markdown documents. It organizes papers into accepted and rejected categories,
 // with each entry containing a title, arXiv link, abstract, decision, and explanation.
 //
 // The package provides robust error handling with custom error types for validation,
 // file operations, and JSON parsing issues. It supports case-insensitive decision values
 // and properly formats multiline abstracts.
 //
 // Basic usage:
 //
 //	err := paperformatter.FormatPapers("input.json", "output.md")
 //	if err != nil {
 //	    log.Fatal(err)
 //	}
 //
 // For detailed examples, see the examples in the documentation.
 package paperformatter
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"strings"
 )
 // Paper represents a research paper with its basic metadata.
 type Paper struct {
 	Title    string `json:"title"`
 	Abstract string `json:"abstract"`
 	ArxivID  string `json:"arxiv_id"`
 }
 // Entry represents a paper review entry containing the paper details,
 // the review decision, and an explanation for the decision.
 type Entry struct {
 	Paper       Paper  `json:"paper"`
 	Decision    string `json:"decision"`
 	Explanation string `json:"explanation"`
 }
 // FormatPapers reads paper reviews from a JSON file and generates a formatted Markdown document.
 // The output document organizes papers into "Accepted Papers" and "Rejected Papers" sections.
 //
 // The input JSON file should contain an array of Entry objects. Each entry must have:
 //   - A paper with a title, abstract, and arXiv ID
 //   - A decision ("accept" or "reject", case-insensitive)
 //   - An explanation for the decision
 //
 // The function returns an error if:
 //   - The input file cannot be read (FileError)
 //   - The JSON is invalid (JSONError)
 //   - Any entry fails validation (ValidationError)
 //   - The output file cannot be written (FileError)
 func FormatPapers(inputFile, outputFile string) error {
 	data, err := os.ReadFile(inputFile)
 	if err != nil {
 		return &FileError{
 			Path:    inputFile,
 			Op:      "read",
 			Wrapped: err,
 		}
 	}
 	var entries []Entry
 	if err := json.Unmarshal(data, &entries); err != nil {
 		return &JSONError{Wrapped: err}
 	}
 	// Validate all entries before processing
 	for i, entry := range entries {
 		if err := validateEntry(entry); err != nil {
 			return fmt.Errorf("entry %d: %w", i+1, err)
 		}
 	}
 	var accepted, rejected []Entry
 	for _, entry := range entries {
 		if strings.ToUpper(entry.Decision) == "ACCEPT" {
 			accepted = append(accepted, entry)
 		} else {
 			rejected = append(rejected, entry)
 		}
 	}
 	output := "# Accepted Papers\n\n"
 	for _, entry := range accepted {
 		output += formatEntry(entry)
 	}
 	output += "# Rejected Papers\n\n"
 	for _, entry := range rejected {
 		output += formatEntry(entry)
 	}
 	if err := os.WriteFile(outputFile, []byte(output), 0644); err != nil {
 		return &FileError{
 			Path:    outputFile,
 			Op:      "write",
 			Wrapped: err,
 		}
 	}
 	return nil
 }
 func formatEntry(entry Entry) string {
 	return fmt.Sprintf("## %s\n\n"+
 		"[arXiv:%s](https://arxiv.org/abs/%s)\n\n"+
 		"> %s\n\n"+
 		"**Decision:** %s\n\n"+
 		"**Explanation:** %s\n\n",
 		entry.Paper.Title,
 		entry.Paper.ArxivID,
 		entry.Paper.ArxivID,
 		strings.ReplaceAll(entry.Paper.Abstract, "\n", "\n> "),
 		entry.Decision,
 		entry.Explanation)
 }
--- a/paperformatter_test.go
+++ b/paperformatter_test.go
@ -0,0 +1,246 @@
 package paperformatter
 import (
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 )
 func TestFormatPapers(t *testing.T) {
 	// Create temporary directory for test files
 	tmpDir, err := os.MkdirTemp("", "paperformatter-test-*")
 	if err != nil {
 		t.Fatalf("Failed to create temp directory: %v", err)
 	}
 	defer os.RemoveAll(tmpDir)
 	tests := []struct {
 		name        string
 		input       string
 		wantErr     bool
 		errContains string
 		checkOutput func(t *testing.T, output string)
 	}{
 		{
 			name: "happy path - mixed decisions",
 			input: `[
 				{
 					"paper": {
 						"title": "Test Paper 1",
 						"abstract": "Abstract 1",
 						"arxiv_id": "2301.0001"
 					},
 					"decision": "accept",
 					"explanation": "Good paper"
 				},
 				{
 					"paper": {
 						"title": "Test Paper 2",
 						"abstract": "Abstract 2",
 						"arxiv_id": "2301.0002"
 					},
 					"decision": "reject",
 					"explanation": "Needs work"
 				}
 			]`,
 			wantErr: false,
 			checkOutput: func(t *testing.T, output string) {
 				// Check sections exist
 				if !strings.Contains(output, "# Accepted Papers") {
 					t.Error("Output missing Accepted Papers section")
 				}
 				if !strings.Contains(output, "# Rejected Papers") {
 					t.Error("Output missing Rejected Papers section")
 				}
 				// Check paper details
 				if !strings.Contains(output, "## Test Paper 1") {
 					t.Error("Output missing accepted paper title")
 				}
 				if !strings.Contains(output, "## Test Paper 2") {
 					t.Error("Output missing rejected paper title")
 				}
 				// Check formatting
 				if !strings.Contains(output, "[arXiv:2301.0001]") {
 					t.Error("Output missing arXiv link for accepted paper")
 				}
 				if !strings.Contains(output, "> Abstract 1") {
 					t.Error("Output missing abstract blockquote for accepted paper")
 				}
 				if !strings.Contains(output, "**Decision:** accept") {
 					t.Error("Output missing decision for accepted paper")
 				}
 			},
 		},
 		{
 			name:    "empty input array",
 			input:   `[]`,
 			wantErr: false,
 			checkOutput: func(t *testing.T, output string) {
 				if !strings.Contains(output, "# Accepted Papers") {
 					t.Error("Output missing Accepted Papers section")
 				}
 				if !strings.Contains(output, "# Rejected Papers") {
 					t.Error("Output missing Rejected Papers section")
 				}
 			},
 		},
 		{
 			name:        "invalid JSON",
 			input:       `not json`,
 			wantErr:     true,
 			errContains: "JSON parsing error",
 		},
 		{
 			name: "missing title",
 			input: `[
 				{
 					"paper": {
 						"title": "",
 						"abstract": "Abstract",
 						"arxiv_id": "2301.0001"
 					},
 					"decision": "accept",
 					"explanation": "Good paper"
 				}
 			]`,
 			wantErr:     true,
 			errContains: "validation error for title: title cannot be empty",
 		},
 		{
 			name: "missing abstract",
 			input: `[
 				{
 					"paper": {
 						"title": "Test Paper",
 						"abstract": "",
 						"arxiv_id": "2301.0001"
 					},
 					"decision": "accept",
 					"explanation": "Good paper"
 				}
 			]`,
 			wantErr:     true,
 			errContains: "validation error for abstract: abstract cannot be empty",
 		},
 		{
 			name: "invalid decision",
 			input: `[
 				{
 					"paper": {
 						"title": "Test Paper",
 						"abstract": "Abstract",
 						"arxiv_id": "2301.0001"
 					},
 					"decision": "maybe",
 					"explanation": "Good paper"
 				}
 			]`,
 			wantErr:     true,
 			errContains: "decision must be either 'accept' or 'reject'",
 		},
 		{
 			name: "case insensitive decision",
 			input: `[
 				{
 					"paper": {
 						"title": "Test Paper",
 						"abstract": "Abstract",
 						"arxiv_id": "2301.0001"
 					},
 					"decision": "ACCEPT",
 					"explanation": "Good paper"
 				}
 			]`,
 			wantErr: false,
 			checkOutput: func(t *testing.T, output string) {
 				if !strings.Contains(output, "## Test Paper") {
 					t.Error("Output missing paper with uppercase decision")
 				}
 			},
 		},
 		{
 			name: "multiline abstract",
 			input: `[
 				{
 					"paper": {
 						"title": "Test Paper",
 						"abstract": "Line 1\nLine 2\nLine 3",
 						"arxiv_id": "2301.0001"
 					},
 					"decision": "accept",
 					"explanation": "Good paper"
 				}
 			]`,
 			wantErr: false,
 			checkOutput: func(t *testing.T, output string) {
 				if !strings.Contains(output, "> Line 1\n> Line 2\n> Line 3") {
 					t.Error("Output not handling multiline abstract correctly")
 				}
 			},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			// Create input file
 			inputPath := filepath.Join(tmpDir, "input.json")
 			if err := os.WriteFile(inputPath, []byte(tt.input), 0644); err != nil {
 				t.Fatalf("Failed to write input file: %v", err)
 			}
 			// Create output path
 			outputPath := filepath.Join(tmpDir, "output.md")
 			// Run the formatter
 			err := FormatPapers(inputPath, outputPath)
 			// Check error cases
 			if tt.wantErr {
 				if err == nil {
 					t.Error("Expected error but got none")
 				} else if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) {
 					t.Errorf("Expected error containing %q but got %q", tt.errContains, err.Error())
 				}
 				return
 			}
 			// Check success cases
 			if err != nil {
 				t.Fatalf("Unexpected error: %v", err)
 			}
 			// Read and verify output
 			output, err := os.ReadFile(outputPath)
 			if err != nil {
 				t.Fatalf("Failed to read output file: %v", err)
 			}
 			if tt.checkOutput != nil {
 				tt.checkOutput(t, string(output))
 			}
 		})
 	}
 }
 func TestFormatPapersFileErrors(t *testing.T) {
 	// Test non-existent input file
 	err := FormatPapers("nonexistent.json", "output.md")
 	if err == nil {
 		t.Error("Expected error for non-existent input file")
 	}
 	// Test invalid output path
 	tmpfile, err := os.CreateTemp("", "test.json")
 	if err != nil {
 		t.Fatalf("Failed to create temp file: %v", err)
 	}
 	defer os.Remove(tmpfile.Name())
 	err = FormatPapers(tmpfile.Name(), "/nonexistent/directory/output.md")
 	if err == nil {
 		t.Error("Expected error for invalid output path")
 	}
 }
		`@ -0,0 +1,3 @@`
							`module gitea.r8z.us/stwhite/paperformatter`

							`go 1.23.1`