Skip to content

Commit e1aa0c9

Browse files
author
R. S. Doiel
committed
Quick Save
1 parent fd34fb1 commit e1aa0c9

File tree

13 files changed

+223
-26
lines changed

13 files changed

+223
-26
lines changed

CITATION.cff

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ cff-version: 1.2.0
33
message: "If you use this software, please cite it as below."
44
type: software
55
title: articlefetch
6-
abstract: "A client that will submit an search to an RDM instance and retrieve the articles in the results."
6+
abstract: "A client that will find the person ID provided in Caltech Library's feed then retrieve the list of articles
7+
before fetching the PDFs from our RDM instance."
78
authors:
89
- family-names: Doiel
910
given-names: R. S.

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
# articlefetch
44

5-
A client that will submit an search to an RDM instance and retrieve the articles in the results.
6-
5+
A client that will find the person ID provided in Caltech Library's feed then retrieve the list of articles
6+
before fetching the PDFs from our RDM instance.
77

88

99
### Authors

about.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
---
22
title: articlefetch
3-
abstract: "A client that will submit an search to an RDM instance and retrieve the articles in the results."
3+
abstract: "A client that will find the person ID provided in Caltech Library's feed then retrieve the list of articles
4+
before fetching the PDFs from our RDM instance."
45
authors:
56
- family_name: Doiel
67
given_name: R. S.
@@ -33,7 +34,8 @@ About this software
3334

3435

3536

36-
A client that will submit an search to an RDM instance and retrieve the articles in the results.
37+
A client that will find the person ID provided in Caltech Library's feed then retrieve the list of articles
38+
before fetching the PDFs from our RDM instance.
3739

3840

3941
- GitHub: <https://github.com/caltechlibrary/articlefetch>

articlefetch.1.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
%articlefetch(1) user manual | version 0.0.0 0fd37b8
1+
%articlefetch(1) user manual | version 0.0.0 fd34fb1
22
% R. S. Doiel
33
% 2025-10-23
44

@@ -8,12 +8,12 @@ articlefetch
88

99
# SYNOPSIS
1010

11-
articlefetch [OPTIONS] HOSTNAME QUERY_STRING
11+
articlefetch [OPTIONS] RDM_HOSTNAME CLPID
1212

1313
# DESCRIPTION
1414

15-
Take the HOSTNAME and QUERY_STRING values, retrieve the results from RDM and then using the results
16-
retrieve the PDFs.
15+
Use the CLPID provided to retreive a list of article from feeds, then use the
16+
RDM_HOSTNAME to retrieve the PDFs for the articles found.
1717

1818
# OPTIONS
1919

@@ -29,7 +29,7 @@ retrieve the PDFs.
2929
# EXAMPLE
3030

3131
~~~shell
32-
articlefetch authors.library.caltech.edu "Grubbs, Robert"
32+
articlefetch authors.library.caltech.edu Grubbs-R-H
3333
~~~
3434

3535

articlefetch.go

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,50 @@
11
package articlefetch
22

33
import (
4+
"fmt"
45
"io"
6+
"os"
7+
"strings"
8+
"time"
59
)
610

7-
func Run(in io.Reader, out io.Writer, eout io.Writer, appName string, hostname string, query string) int {
8-
return 1 // DEBUG
11+
func Run(in io.Reader, out io.Writer, eout io.Writer, appName string, hostname string, clpid string) int {
12+
// NOTE: URL encode our query string
13+
feedsUrl := FeedsURL(clpid)
14+
rdmIds, err := FeedsRdmIds(feedsUrl)
15+
if err != nil {
16+
fmt.Fprintf(os.Stderr, "%s\n", err)
17+
return 1
18+
}
19+
tot := len(rdmIds)
20+
retrieved := 0
21+
fmt.Printf("retrieving %d records\n", tot)
22+
pdfToRetrieve := []string{}
23+
for i, id := range rdmIds {
24+
rdmUrl := RdmRecordURL(hostname, id)
25+
src, duration, err := RdmFetchJSON(rdmUrl)
26+
if err != nil {
27+
fmt.Fprintf(os.Stderr, "%s\n", err)
28+
continue
29+
}
30+
time.Sleep(duration)
31+
32+
pdfUrls, err := RdmPdfURLs(src)
33+
if err != nil {
34+
fmt.Fprintf(os.Stderr, "failed to find a pdfUrls %q, %s\n", rdmUrl, err)
35+
continue
36+
}
37+
if len(pdfUrls) > 0 {
38+
fmt.Printf("DEBUG pdfUrls (%d)\n\t%+v\n", len(pdfUrls), strings.Join(pdfUrls, "\n\t"))
39+
pdfToRetrieve = append(pdfToRetrieve, pdfUrls...)
40+
time.Sleep(10 * time.Second)
41+
}
42+
retrieved += 1
43+
if (i % 5) == 0{
44+
fmt.Printf("%d/%d processed\n", i+1, tot)
45+
}
46+
}
47+
fmt.Printf("%d/%d retrieved\n", retrieved, tot)
48+
fmt.Printf("Retrieve the following URL:\n\n%s\n\n", strings.Join(pdfToRetrieve, "\n\t"))
49+
return 0
950
}

cmd/articlefetch/articlefetch.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ func main() {
4242

4343
// handle missing hostname and query
4444
if len(args) != 2 {
45-
fmt.Fprintf(os.Stderr, "missing hostname or query")
45+
fmt.Fprintf(os.Stderr, "missing RDM hostname or clpid")
4646
os.Exit(1)
4747
}
48-
hostname, query := args[0], args[1]
49-
os.Exit(articlefetch.Run(os.Stdin, os.Stdout, os.Stderr, appName, hostname, query))
48+
hostname, clpid:= args[0], args[1]
49+
os.Exit(articlefetch.Run(os.Stdin, os.Stdout, os.Stderr, appName, hostname, clpid))
5050
}

codemeta.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"dateCreated": "2025-10-23",
1515
"dateModified": "2025-10-23",
1616
"datePublished": "2025-10-23",
17-
"description": "A client that will submit an search to an RDM instance and retrieve the articles in the results.",
17+
"description": "A client that will find the person ID provided in Caltech Library's feed then retrieve the list of articles\nbefore fetching the PDFs from our RDM instance.",
1818
"name": "articlefetch",
1919
"version": "0.0.0",
2020
"developmentStatus": "concept",

helptext.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ const (
1111
1212
# SYNOPSIS
1313
14-
{app_name} [OPTIONS] HOSTNAME QUERY_STRING
14+
{app_name} [OPTIONS] RDM_HOSTNAME CLPID
1515
1616
# DESCRIPTION
1717
18-
Take the HOSTNAME and QUERY_STRING values, retrieve the results from RDM and then using the results
19-
retrieve the PDFs.
18+
Use the CLPID provided to retreive a list of article from feeds, then use the
19+
RDM_HOSTNAME to retrieve the PDFs for the articles found.
2020
2121
# OPTIONS
2222
@@ -32,7 +32,7 @@ retrieve the PDFs.
3232
# EXAMPLE
3333
3434
~~~shell
35-
{app_name} authors.library.caltech.edu "Grubbs, Robert"
35+
{app_name} authors.library.caltech.edu Grubbs-R-H
3636
~~~
3737
3838
`

installer.ps1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env pwsh
2-
# generated with CMTools 0.0.0 0fd37b8
2+
# generated with CMTools 0.0.0 fd34fb1
33

44
#
55
# Set the package name and version to install

installer.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/sh
2-
# generated with CMTools 0.0.0 0fd37b8
2+
# generated with CMTools 0.0.0 fd34fb1
33

44
#
55
# Set the package name and version to install

0 commit comments

Comments
 (0)