package arxiva import ( "encoding/json" "encoding/xml" "fmt" "io/ioutil" "net/http" "net/url" "strings" "time" ) var httpClient = &http.Client{} type Paper struct { Title string `json:"title"` Abstract string `json:"abstract"` ArxivID string `json:"arxiv_id"` } type Feed struct { XMLName xml.Name `xml:"feed"` Entries []Entry `xml:"entry"` } type Entry struct { Title string `xml:"title"` Abstract string `xml:"summary"` ID string `xml:"id"` } func SanitizeFilename(query string) string { invalid := []string{"/", "\\", "?", "%", "*", ":", "|", "\"", "<", ">", " "} sanitized := query for _, char := range invalid { sanitized = strings.ReplaceAll(sanitized, char, "_") } return sanitized } func extractArxivID(idURL string) string { parts := strings.Split(idURL, "/") return parts[len(parts)-1] } func FetchPapers(startDate string, endDate string, query string, maxPapers int) ([]Paper, error) { if maxPapers < 1 || maxPapers > 2000 { return nil, fmt.Errorf("maxPapers must be between 1 and 2000") } _, err := time.Parse("20060102", startDate) if err != nil { return nil, fmt.Errorf("invalid start date format: %v", err) } _, err = time.Parse("20060102", endDate) if err != nil { return nil, fmt.Errorf("invalid end date format: %v", err) } baseURL := "http://export.arxiv.org/api/query" searchQuery := fmt.Sprintf("submittedDate:[%s TO %s] AND %s", startDate, endDate, query) params := url.Values{} params.Add("search_query", searchQuery) params.Add("max_results", fmt.Sprintf("%d", maxPapers)) req, err := http.NewRequest("GET", baseURL+"?"+params.Encode(), nil) if err != nil { return nil, fmt.Errorf("failed to create request: %v", err) } resp, err := httpClient.Do(req) if err != nil { return nil, fmt.Errorf("failed to fetch from arXiv: %v", err) } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("failed to read response: %v", err) } var feed Feed if err := xml.Unmarshal(body, &feed); err != nil { return nil, fmt.Errorf("failed to parse XML: %v", err) } var papers []Paper for _, entry := range feed.Entries { paper := Paper{ Title: strings.TrimSpace(entry.Title), Abstract: strings.TrimSpace(entry.Abstract), ArxivID: extractArxivID(entry.ID), } papers = append(papers, paper) } return papers, nil } func SaveToFile(papers []Paper, startDate string, endDate string, query string) error { if len(papers) == 0 { return fmt.Errorf("no papers to save") } filename := fmt.Sprintf("%s-%s-%s.json", startDate, endDate, SanitizeFilename(query)) data, err := json.MarshalIndent(papers, "", " ") if err != nil { return fmt.Errorf("failed to marshal JSON: %v", err) } err = ioutil.WriteFile(filename, data, 0644) if err != nil { return fmt.Errorf("failed to write file: %v", err) } return nil }