paper-system/arxiv-processor/arxiv/client.go

79 lines
1.9 KiB
Go
Raw Permalink Normal View History

2025-01-24 15:26:47 +00:00
package arxiv
import (
"context"
"encoding/xml"
"fmt"
"io"
"net/http"
"net/url"
"time"
)
// Client represents an arXiv API client
type Client struct {
baseURL string
httpClient *http.Client
}
// NewClient creates a new arXiv API client
func NewClient() *Client {
return &Client{
baseURL: "http://export.arxiv.org/api/query",
httpClient: &http.Client{Timeout: 30 * time.Second},
}
}
// Query represents search parameters for arXiv API
type Query struct {
Category string
DateRange string
MaxResults int
StartOffset int
}
// Paper represents a single arXiv paper
type Paper struct {
ID string `xml:"id"`
Title string `xml:"title"`
Summary string `xml:"summary"`
Published time.Time `xml:"published"`
Updated time.Time `xml:"updated"`
Authors []Author `xml:"author"`
}
// Author represents a paper author
type Author struct {
Name string `xml:"name"`
}
// FetchPapers retrieves papers from arXiv API
func (c *Client) FetchPapers(ctx context.Context, query Query) ([]Paper, error) {
params := url.Values{}
params.Add("search_query", fmt.Sprintf("%s AND submittedDate:[%s]", query.Category, query.DateRange))
params.Add("max_results", fmt.Sprintf("%d", query.MaxResults))
params.Add("start", fmt.Sprintf("%d", query.StartOffset))
resp, err := c.httpClient.Get(fmt.Sprintf("%s?%s", c.baseURL, params.Encode()))
if err != nil {
return nil, fmt.Errorf("failed to fetch papers: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
return parseResponse(resp.Body)
}
func parseResponse(r io.Reader) ([]Paper, error) {
var feed struct {
Entries []Paper `xml:"entry"`
}
if err := xml.NewDecoder(r).Decode(&feed); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
return feed.Entries, nil
}