paperformatter/paperformatter.go

130 lines
3.7 KiB
Go

// Package paperformatter provides functionality to convert JSON files containing paper reviews
// into formatted Markdown documents. It organizes papers into accepted and rejected categories,
// with each entry containing a title, arXiv link, abstract, decision, and explanation.
//
// The package provides robust error handling with custom error types for validation,
// file operations, and JSON parsing issues. It supports case-insensitive decision values
// and properly formats multiline abstracts.
//
// Basic usage:
//
// err := paperformatter.FormatPapers("input.json", "output.md")
// if err != nil {
// log.Fatal(err)
// }
//
// For detailed examples, see the examples in the documentation.
package paperformatter
import (
"encoding/json"
"fmt"
"os"
"strings"
)
// Paper represents a research paper with its basic metadata.
type Paper struct {
Title string `json:"title"`
Abstract string `json:"abstract"`
ArxivID string `json:"arxiv_id"`
}
// Entry represents a paper review entry containing the paper details,
// the review decision, and an explanation for the decision.
type Entry struct {
Paper Paper `json:"paper"`
Decision string `json:"decision"`
Explanation string `json:"explanation"`
}
// FormatPapers reads paper reviews from a JSON file and generates a formatted Markdown document.
// The output document organizes papers into "Accepted Papers" and "Rejected Papers" sections.
//
// The input JSON file should contain an array of Entry objects. Each entry must have:
// - A paper with a title, abstract, and arXiv ID
// - A decision ("accept" or "reject", case-insensitive)
// - An explanation for the decision
//
// The function returns an error if:
// - The input file cannot be read (FileError)
// - The JSON is invalid (JSONError)
// - Any entry fails validation (ValidationError)
// - The output file cannot be written (FileError)
func FormatPapers(inputFile, outputFile string) error {
data, err := os.ReadFile(inputFile)
if err != nil {
return &FileError{
Path: inputFile,
Op: "read",
Wrapped: err,
}
}
// Try to parse as direct array first
var entries []Entry
if err := json.Unmarshal(data, &entries); err != nil {
// If that fails, try parsing as object with accepted/rejected arrays
var input struct {
Accepted []Entry `json:"accepted"`
Rejected []Entry `json:"rejected,omitempty"`
}
if err := json.Unmarshal(data, &input); err != nil {
return &JSONError{Wrapped: err}
}
// Combine accepted and rejected entries
entries = append(entries, input.Accepted...)
entries = append(entries, input.Rejected...)
}
// Validate all entries before processing
for i, entry := range entries {
if err := validateEntry(entry); err != nil {
return fmt.Errorf("entry %d: %w", i+1, err)
}
}
var accepted, rejected []Entry
for _, entry := range entries {
if strings.ToUpper(entry.Decision) == "ACCEPT" {
accepted = append(accepted, entry)
} else {
rejected = append(rejected, entry)
}
}
output := "# Accepted Papers\n\n"
for _, entry := range accepted {
output += formatEntry(entry)
}
output += "# Rejected Papers\n\n"
for _, entry := range rejected {
output += formatEntry(entry)
}
if err := os.WriteFile(outputFile, []byte(output), 0644); err != nil {
return &FileError{
Path: outputFile,
Op: "write",
Wrapped: err,
}
}
return nil
}
func formatEntry(entry Entry) string {
return fmt.Sprintf("## %s\n\n"+
"[arXiv:%s](https://arxiv.org/abs/%s)\n\n"+
"> %s\n\n"+
"**Decision:** %s\n\n"+
"**Explanation:** %s\n\n",
entry.Paper.Title,
entry.Paper.ArxivID,
entry.Paper.ArxivID,
strings.ReplaceAll(entry.Paper.Abstract, "\n", "\n> "),
entry.Decision,
entry.Explanation)
}