Initial commit

This commit is contained in:
Steve White 2025-01-25 22:26:30 -06:00
commit 8c8af7dcaa
6 changed files with 764 additions and 0 deletions

123
README.md Normal file
View File

@ -0,0 +1,123 @@
# Paper Formatter
A Go library that converts JSON files containing paper reviews into formatted Markdown documents. The library organizes papers into accepted and rejected categories, formatting each entry with title, arXiv link, abstract, decision, and explanation.
## Installation
```bash
go get gitea.r8z.us/stwhite/paperformatter
```
## Usage
```go
import "gitea.r8z.us/stwhite/paperformatter"
err := paperformatter.FormatPapers("input.json", "output.md")
if err != nil {
log.Fatal(err)
}
```
## Input Format
The input JSON file should contain an array of entries, where each entry has the following structure:
```json
[
{
"paper": {
"title": "Paper Title",
"abstract": "Paper Abstract",
"arxiv_id": "2301.00000"
},
"decision": "accept",
"explanation": "Explanation for the decision"
}
]
```
## Output Format
The library generates a Markdown file that:
1. Separates papers into "Accepted Papers" and "Rejected Papers" sections
2. For each paper, includes:
- Title as a heading
- ArXiv link
- Abstract (formatted as a blockquote)
- Decision
- Explanation for the decision
Example output:
```markdown
# Accepted Papers
## Example Paper Title
[arXiv:2301.00000](https://arxiv.org/abs/2301.00000)
> This is the paper's abstract...
**Decision:** accept
**Explanation:** This is the explanation for accepting the paper...
# Rejected Papers
[Additional papers would be listed here...]
```
## Error Handling
The library provides detailed error handling with custom error types:
### Validation Errors
- Empty or missing required fields (title, abstract, arxiv_id)
- Invalid decision values (must be "accept" or "reject")
- Empty explanation
### File Operation Errors
- Input file cannot be read
- Output file cannot be written
- Includes detailed information about the failed operation and file path
### JSON Parsing Errors
- Invalid JSON format
- Missing required fields in JSON structure
Each error type provides specific information about what went wrong:
```go
// Example error handling
err := paperformatter.FormatPapers("input.json", "output.md")
if err != nil {
switch e := err.(type) {
case *paperformatter.ValidationError:
fmt.Printf("Validation failed for %s: %s\n", e.Field, e.Message)
case *paperformatter.FileError:
fmt.Printf("File operation '%s' failed for '%s': %v\n", e.Op, e.Path, e.Wrapped)
case *paperformatter.JSONError:
fmt.Printf("JSON parsing failed: %v\n", e.Wrapped)
default:
fmt.Printf("Error: %v\n", err)
}
}
```
## Requirements
- Go 1.23.1 or later
## Testing
The library includes comprehensive unit tests covering:
- Successful format conversion
- Error handling cases
- Edge cases (empty input, case sensitivity)
- File I/O operations
- Multiline text formatting
Run the tests using:
```bash
go test -v
```

108
errors.go Normal file
View File

@ -0,0 +1,108 @@
package paperformatter
import (
"fmt"
"strings"
)
// ValidationError represents an error that occurs when input validation fails.
// It includes the field name that failed validation and a descriptive message.
type ValidationError struct {
Field string
Message string
}
func (e *ValidationError) Error() string {
return fmt.Sprintf("validation error for %s: %s", e.Field, e.Message)
}
// FileError represents an error that occurs during file operations.
// It includes the file path, the operation that failed (e.g., "read" or "write"),
// and the underlying system error.
type FileError struct {
Path string
Op string
Wrapped error
}
func (e *FileError) Error() string {
return fmt.Sprintf("file operation '%s' failed for '%s': %v", e.Op, e.Path, e.Wrapped)
}
func (e *FileError) Unwrap() error {
return e.Wrapped
}
// JSONError represents an error that occurs during JSON parsing.
// It wraps the underlying parsing error from the encoding/json package.
type JSONError struct {
Wrapped error
}
func (e *JSONError) Error() string {
return fmt.Sprintf("JSON parsing error: %v", e.Wrapped)
}
func (e *JSONError) Unwrap() error {
return e.Wrapped
}
// validatePaper checks if a Paper has all required fields.
// Returns a ValidationError if any field is empty.
func validatePaper(paper Paper) error {
if paper.Title == "" {
return &ValidationError{
Field: "title",
Message: "title cannot be empty",
}
}
if paper.Abstract == "" {
return &ValidationError{
Field: "abstract",
Message: "abstract cannot be empty",
}
}
if paper.ArxivID == "" {
return &ValidationError{
Field: "arxiv_id",
Message: "arxiv_id cannot be empty",
}
}
return nil
}
// validateEntry performs validation on an entire Entry, including its Paper.
// It checks for required fields and valid decision values.
// Returns a ValidationError if validation fails.
func validateEntry(entry Entry) error {
if err := validatePaper(entry.Paper); err != nil {
return err
}
if entry.Decision == "" {
return &ValidationError{
Field: "decision",
Message: "decision cannot be empty",
}
}
decision := strings.ToUpper(entry.Decision)
if decision != "ACCEPT" && decision != "REJECT" {
return &ValidationError{
Field: "decision",
Message: "decision must be either 'accept' or 'reject'",
}
}
if entry.Explanation == "" {
return &ValidationError{
Field: "explanation",
Message: "explanation cannot be empty",
}
}
return nil
}

166
example_test.go Normal file
View File

@ -0,0 +1,166 @@
package paperformatter_test
import (
"fmt"
"log"
"os"
"gitea.r8z.us/stwhite/paperformatter"
)
func ExampleFormatPapers() {
// Create a sample input file
input := `[
{
"paper": {
"title": "Example Research Paper",
"abstract": "This is a sample abstract for the example paper.",
"arxiv_id": "2301.0001"
},
"decision": "accept",
"explanation": "Well-written paper with novel contributions."
}
]`
inputFile := "example_input.json"
outputFile := "example_output.md"
// Write sample input
if err := os.WriteFile(inputFile, []byte(input), 0644); err != nil {
log.Fatal(err)
}
defer os.Remove(inputFile)
defer os.Remove(outputFile)
// Format the papers
if err := paperformatter.FormatPapers(inputFile, outputFile); err != nil {
log.Fatal(err)
}
// Read and print the output
output, err := os.ReadFile(outputFile)
if err != nil {
log.Fatal(err)
}
fmt.Println(string(output))
// Output:
// # Accepted Papers
//
// ## Example Research Paper
//
// [arXiv:2301.0001](https://arxiv.org/abs/2301.0001)
//
// > This is a sample abstract for the example paper.
//
// **Decision:** accept
//
// **Explanation:** Well-written paper with novel contributions.
//
// # Rejected Papers
}
func ExampleFormatPapers_multipleEntries() {
// Create a sample input file with multiple entries
input := `[
{
"paper": {
"title": "First Paper",
"abstract": "Abstract for the first paper.",
"arxiv_id": "2301.0001"
},
"decision": "accept",
"explanation": "Good contribution."
},
{
"paper": {
"title": "Second Paper",
"abstract": "Abstract for the second paper.",
"arxiv_id": "2301.0002"
},
"decision": "reject",
"explanation": "Needs more work."
}
]`
inputFile := "example_multi_input.json"
outputFile := "example_multi_output.md"
if err := os.WriteFile(inputFile, []byte(input), 0644); err != nil {
log.Fatal(err)
}
defer os.Remove(inputFile)
defer os.Remove(outputFile)
if err := paperformatter.FormatPapers(inputFile, outputFile); err != nil {
log.Fatal(err)
}
output, err := os.ReadFile(outputFile)
if err != nil {
log.Fatal(err)
}
fmt.Println(string(output))
// Output:
// # Accepted Papers
//
// ## First Paper
//
// [arXiv:2301.0001](https://arxiv.org/abs/2301.0001)
//
// > Abstract for the first paper.
//
// **Decision:** accept
//
// **Explanation:** Good contribution.
//
// # Rejected Papers
//
// ## Second Paper
//
// [arXiv:2301.0002](https://arxiv.org/abs/2301.0002)
//
// > Abstract for the second paper.
//
// **Decision:** reject
//
// **Explanation:** Needs more work.
}
func ExampleFormatPapers_errorHandling() {
// Example of handling different error types
input := `[
{
"paper": {
"title": "",
"abstract": "Abstract",
"arxiv_id": "2301.0001"
},
"decision": "accept",
"explanation": "Good paper"
}
]`
inputFile := "example_error_input.json"
outputFile := "example_error_output.md"
if err := os.WriteFile(inputFile, []byte(input), 0644); err != nil {
log.Fatal(err)
}
defer os.Remove(inputFile)
defer os.Remove(outputFile)
err := paperformatter.FormatPapers(inputFile, outputFile)
if err != nil {
switch e := err.(type) {
case *paperformatter.ValidationError:
fmt.Printf("Validation error: %v\n", e)
case *paperformatter.FileError:
fmt.Printf("File error: %v\n", e)
case *paperformatter.JSONError:
fmt.Printf("JSON error: %v\n", e)
default:
fmt.Printf("Error: %v\n", err)
}
}
// Output: Error: entry 1: validation error for title: title cannot be empty
}

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module gitea.r8z.us/stwhite/paperformatter
go 1.23.1

118
paperformatter.go Normal file
View File

@ -0,0 +1,118 @@
// Package paperformatter provides functionality to convert JSON files containing paper reviews
// into formatted Markdown documents. It organizes papers into accepted and rejected categories,
// with each entry containing a title, arXiv link, abstract, decision, and explanation.
//
// The package provides robust error handling with custom error types for validation,
// file operations, and JSON parsing issues. It supports case-insensitive decision values
// and properly formats multiline abstracts.
//
// Basic usage:
//
// err := paperformatter.FormatPapers("input.json", "output.md")
// if err != nil {
// log.Fatal(err)
// }
//
// For detailed examples, see the examples in the documentation.
package paperformatter
import (
"encoding/json"
"fmt"
"os"
"strings"
)
// Paper represents a research paper with its basic metadata.
type Paper struct {
Title string `json:"title"`
Abstract string `json:"abstract"`
ArxivID string `json:"arxiv_id"`
}
// Entry represents a paper review entry containing the paper details,
// the review decision, and an explanation for the decision.
type Entry struct {
Paper Paper `json:"paper"`
Decision string `json:"decision"`
Explanation string `json:"explanation"`
}
// FormatPapers reads paper reviews from a JSON file and generates a formatted Markdown document.
// The output document organizes papers into "Accepted Papers" and "Rejected Papers" sections.
//
// The input JSON file should contain an array of Entry objects. Each entry must have:
// - A paper with a title, abstract, and arXiv ID
// - A decision ("accept" or "reject", case-insensitive)
// - An explanation for the decision
//
// The function returns an error if:
// - The input file cannot be read (FileError)
// - The JSON is invalid (JSONError)
// - Any entry fails validation (ValidationError)
// - The output file cannot be written (FileError)
func FormatPapers(inputFile, outputFile string) error {
data, err := os.ReadFile(inputFile)
if err != nil {
return &FileError{
Path: inputFile,
Op: "read",
Wrapped: err,
}
}
var entries []Entry
if err := json.Unmarshal(data, &entries); err != nil {
return &JSONError{Wrapped: err}
}
// Validate all entries before processing
for i, entry := range entries {
if err := validateEntry(entry); err != nil {
return fmt.Errorf("entry %d: %w", i+1, err)
}
}
var accepted, rejected []Entry
for _, entry := range entries {
if strings.ToUpper(entry.Decision) == "ACCEPT" {
accepted = append(accepted, entry)
} else {
rejected = append(rejected, entry)
}
}
output := "# Accepted Papers\n\n"
for _, entry := range accepted {
output += formatEntry(entry)
}
output += "# Rejected Papers\n\n"
for _, entry := range rejected {
output += formatEntry(entry)
}
if err := os.WriteFile(outputFile, []byte(output), 0644); err != nil {
return &FileError{
Path: outputFile,
Op: "write",
Wrapped: err,
}
}
return nil
}
func formatEntry(entry Entry) string {
return fmt.Sprintf("## %s\n\n"+
"[arXiv:%s](https://arxiv.org/abs/%s)\n\n"+
"> %s\n\n"+
"**Decision:** %s\n\n"+
"**Explanation:** %s\n\n",
entry.Paper.Title,
entry.Paper.ArxivID,
entry.Paper.ArxivID,
strings.ReplaceAll(entry.Paper.Abstract, "\n", "\n> "),
entry.Decision,
entry.Explanation)
}

246
paperformatter_test.go Normal file
View File

@ -0,0 +1,246 @@
package paperformatter
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestFormatPapers(t *testing.T) {
// Create temporary directory for test files
tmpDir, err := os.MkdirTemp("", "paperformatter-test-*")
if err != nil {
t.Fatalf("Failed to create temp directory: %v", err)
}
defer os.RemoveAll(tmpDir)
tests := []struct {
name string
input string
wantErr bool
errContains string
checkOutput func(t *testing.T, output string)
}{
{
name: "happy path - mixed decisions",
input: `[
{
"paper": {
"title": "Test Paper 1",
"abstract": "Abstract 1",
"arxiv_id": "2301.0001"
},
"decision": "accept",
"explanation": "Good paper"
},
{
"paper": {
"title": "Test Paper 2",
"abstract": "Abstract 2",
"arxiv_id": "2301.0002"
},
"decision": "reject",
"explanation": "Needs work"
}
]`,
wantErr: false,
checkOutput: func(t *testing.T, output string) {
// Check sections exist
if !strings.Contains(output, "# Accepted Papers") {
t.Error("Output missing Accepted Papers section")
}
if !strings.Contains(output, "# Rejected Papers") {
t.Error("Output missing Rejected Papers section")
}
// Check paper details
if !strings.Contains(output, "## Test Paper 1") {
t.Error("Output missing accepted paper title")
}
if !strings.Contains(output, "## Test Paper 2") {
t.Error("Output missing rejected paper title")
}
// Check formatting
if !strings.Contains(output, "[arXiv:2301.0001]") {
t.Error("Output missing arXiv link for accepted paper")
}
if !strings.Contains(output, "> Abstract 1") {
t.Error("Output missing abstract blockquote for accepted paper")
}
if !strings.Contains(output, "**Decision:** accept") {
t.Error("Output missing decision for accepted paper")
}
},
},
{
name: "empty input array",
input: `[]`,
wantErr: false,
checkOutput: func(t *testing.T, output string) {
if !strings.Contains(output, "# Accepted Papers") {
t.Error("Output missing Accepted Papers section")
}
if !strings.Contains(output, "# Rejected Papers") {
t.Error("Output missing Rejected Papers section")
}
},
},
{
name: "invalid JSON",
input: `not json`,
wantErr: true,
errContains: "JSON parsing error",
},
{
name: "missing title",
input: `[
{
"paper": {
"title": "",
"abstract": "Abstract",
"arxiv_id": "2301.0001"
},
"decision": "accept",
"explanation": "Good paper"
}
]`,
wantErr: true,
errContains: "validation error for title: title cannot be empty",
},
{
name: "missing abstract",
input: `[
{
"paper": {
"title": "Test Paper",
"abstract": "",
"arxiv_id": "2301.0001"
},
"decision": "accept",
"explanation": "Good paper"
}
]`,
wantErr: true,
errContains: "validation error for abstract: abstract cannot be empty",
},
{
name: "invalid decision",
input: `[
{
"paper": {
"title": "Test Paper",
"abstract": "Abstract",
"arxiv_id": "2301.0001"
},
"decision": "maybe",
"explanation": "Good paper"
}
]`,
wantErr: true,
errContains: "decision must be either 'accept' or 'reject'",
},
{
name: "case insensitive decision",
input: `[
{
"paper": {
"title": "Test Paper",
"abstract": "Abstract",
"arxiv_id": "2301.0001"
},
"decision": "ACCEPT",
"explanation": "Good paper"
}
]`,
wantErr: false,
checkOutput: func(t *testing.T, output string) {
if !strings.Contains(output, "## Test Paper") {
t.Error("Output missing paper with uppercase decision")
}
},
},
{
name: "multiline abstract",
input: `[
{
"paper": {
"title": "Test Paper",
"abstract": "Line 1\nLine 2\nLine 3",
"arxiv_id": "2301.0001"
},
"decision": "accept",
"explanation": "Good paper"
}
]`,
wantErr: false,
checkOutput: func(t *testing.T, output string) {
if !strings.Contains(output, "> Line 1\n> Line 2\n> Line 3") {
t.Error("Output not handling multiline abstract correctly")
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create input file
inputPath := filepath.Join(tmpDir, "input.json")
if err := os.WriteFile(inputPath, []byte(tt.input), 0644); err != nil {
t.Fatalf("Failed to write input file: %v", err)
}
// Create output path
outputPath := filepath.Join(tmpDir, "output.md")
// Run the formatter
err := FormatPapers(inputPath, outputPath)
// Check error cases
if tt.wantErr {
if err == nil {
t.Error("Expected error but got none")
} else if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) {
t.Errorf("Expected error containing %q but got %q", tt.errContains, err.Error())
}
return
}
// Check success cases
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
// Read and verify output
output, err := os.ReadFile(outputPath)
if err != nil {
t.Fatalf("Failed to read output file: %v", err)
}
if tt.checkOutput != nil {
tt.checkOutput(t, string(output))
}
})
}
}
func TestFormatPapersFileErrors(t *testing.T) {
// Test non-existent input file
err := FormatPapers("nonexistent.json", "output.md")
if err == nil {
t.Error("Expected error for non-existent input file")
}
// Test invalid output path
tmpfile, err := os.CreateTemp("", "test.json")
if err != nil {
t.Fatalf("Failed to create temp file: %v", err)
}
defer os.Remove(tmpfile.Name())
err = FormatPapers(tmpfile.Name(), "/nonexistent/directory/output.md")
if err == nil {
t.Error("Expected error for invalid output path")
}
}