Skip to content

Commit 22099ff

Browse files
committed
Enhance config loading and data processing
Added support for loading configuration files from a URL and improved data processing by implementing comprehensive Skolemization and graph association. Enhanced bulkLoader with a new flag for archiving, updated documentation, and incremented the version.
1 parent 98053e1 commit 22099ff

File tree

6 files changed

+119
-20
lines changed

6 files changed

+119
-20
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.0.18-df-development
1+
2.0.19-df-development

docs/httpSPARQL.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,15 @@ curl -H 'Accept: application/sparql-results+json' http://coreos.lan:9090/blazegr
2323

2424
```bash
2525
curl -H 'Accept: application/sparql-results+json' http://coreos.lan:9090/blazegraph/namespace/iow/sparql --data-urlencode 'query=SELECT (COUNT(DISTINCT ?graph) AS ?namedGraphsCount)(COUNT(*) AS ?triplesCount)WHERE {GRAPH ?graph {?subject ?predicate ?object}}'
26+
```
27+
28+
29+
### Oxigraph
30+
31+
```bash
32+
curl -i -X PUT -H 'Content-Type:text/x-nquads' --data-binary @veupathdb_release.nq http://localhost:7878/store
33+
```
34+
35+
```bash
36+
curl -i -X POST -H 'Content-Type:text/x-nquads' --data-binary @veupathdb_release.nq http://localhost:7878/store
2637
```

internal/objects/pipecopy.go

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,6 @@ func PipeCopy(v1 *viper.Viper, mc *minio.Client, name, bucket, prefix, destprefi
4747
}
4848
}(pw)
4949

50-
// Set and use a "single file flag" to bypass skolimaization since if it is a single file
51-
// the JSON-LD to RDF will correctly map blank nodes.
52-
// NOTE: with a background context we can't get the len(channel) so we have to iterate it.
53-
// This is fast, but it means we have to do the ListObjects twice
5450
clen := 0
5551
sf := false
5652
ctx, cancel := context.WithCancel(context.Background())
@@ -67,25 +63,23 @@ func PipeCopy(v1 *viper.Viper, mc *minio.Client, name, bucket, prefix, destprefi
6763

6864
objectCh := mc.ListObjects(context.Background(), bucket, minio.ListObjectsOptions{Prefix: prefix, Recursive: isRecursive})
6965

70-
// for object := range mc.ListObjects(context.Background(), bucket, minio.ListObjectsOptions{Prefix: prefix, Recursive: isRecursive}, doneCh) {
66+
lastProcessed := false
67+
idList := make([]string, 0)
7168
for object := range objectCh {
7269
fo, err := mc.GetObject(context.Background(), bucket, object.Key, minio.GetObjectOptions{})
7370
if err != nil {
7471
fmt.Println(err)
72+
continue
7573
}
76-
7774
var b bytes.Buffer
7875
bw := bufio.NewWriter(&b)
79-
8076
_, err = io.Copy(bw, fo)
8177
if err != nil {
8278
log.Println(err)
79+
continue
8380
}
84-
8581
s := string(b.Bytes())
86-
8782
nq := ""
88-
//log.Println("Calling JSONLDtoNQ")
8983
if strings.HasSuffix(object.Key, ".nq") {
9084
nq = s
9185
} else {
@@ -95,33 +89,59 @@ func PipeCopy(v1 *viper.Viper, mc *minio.Client, name, bucket, prefix, destprefi
9589
return
9690
}
9791
}
98-
9992
var snq string
100-
10193
if sf {
102-
snq = nq // just pass through the RDF without trying to Skolemize since we ar a single fil
94+
snq = nq
10395
} else {
10496
snq, err = graph.Skolemization(nq, object.Key)
10597
if err != nil {
10698
return
10799
}
108100
}
109-
110-
// 1) get graph URI
111101
ctx, err := graph.MakeURN(v1, object.Key)
112102
if err != nil {
113103
return
114104
}
115-
// 2) convert NT to NQ
116105
csnq, err := graph.NtToNq(snq, ctx)
117106
if err != nil {
118107
return
119108
}
120-
121109
_, err = pw.Write([]byte(csnq))
122110
if err != nil {
123111
return
124112
}
113+
idList = append(idList, ctx)
114+
lastProcessed = true
115+
}
116+
117+
// Once we are done with the loop, put in the triples to associate all the graphURIs with the org.
118+
if lastProcessed {
119+
120+
data := `_:b0 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DataCatalog> .
121+
_:b0 <https://schema.org/dateCreated> "2024-09-20" .
122+
_:b0 <https://schema.org/description> "This is an example data catalog containing various datasets from this organization" .
123+
_:b0 <https://schema.org/provider> _:b1 .
124+
_:b0 <https://schema.org/publisher> _:b2 .
125+
_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Organization> .
126+
_:b1 <https://schema.org/name> "Provider XYZ" .
127+
_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Organization> .
128+
_:b2 <https://schema.org/name> "DeCoder" .
129+
`
130+
for _, item := range idList {
131+
data += `_:b0 <https://schema.org/dataset> <` + item + `> .` + "\n"
132+
}
133+
134+
sdata, err := graph.Skolemization(data, "release graph prov for ORG")
135+
if err != nil {
136+
log.Println(err)
137+
}
138+
139+
// Perform the final write to the pipe here
140+
// ilstr := strings.Join(idList, ",")
141+
_, err = pw.Write([]byte(sdata))
142+
if err != nil {
143+
log.Println(err)
144+
}
125145
}
126146
}()
127147

internal/services/releases/bulkLoader.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ func BulkRelease(v1 *viper.Viper, mc *minio.Client) error {
5757
return err
5858
}
5959

60+
// TODO Should this be optional / controlled by flag?
6061
// Copy the "latest" graph just made to archive with a date
6162
// This means the graph in latests is a duplicate of the most recently dated version in archive/{provider}
6263
const layout = "2006-01-02-15-04-05"

pkg/cli/root.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import (
1818
"github.com/spf13/viper"
1919
)
2020

21-
var cfgFile, cfgName, cfgPath, nabuConfName string
21+
var cfgFile, cfgURL, cfgName, cfgPath, nabuConfName string
2222
var minioVal, portVal, accessVal, secretVal, bucketVal string
2323
var sslVal, dangerousVal bool
2424
var viperVal *viper.Viper
@@ -75,6 +75,7 @@ func init() {
7575
// Enpoint Server setting var
7676
rootCmd.PersistentFlags().StringVar(&endpointVal, "endpoint", "", "end point server set for the SPARQL endpoints")
7777

78+
rootCmd.PersistentFlags().StringVar(&cfgURL, "cfgURL", "configs", "URL location for config file")
7879
rootCmd.PersistentFlags().StringVar(&cfgPath, "cfgPath", "configs", "base location for config files (default is configs/)")
7980
rootCmd.PersistentFlags().StringVar(&cfgName, "cfgName", "local", "config file (default is local so configs/local)")
8081
rootCmd.PersistentFlags().StringVar(&nabuConfName, "nabuConfName", "nabu", "config file (default is local so configs/local)")
@@ -105,6 +106,11 @@ func initConfig() {
105106
if err != nil {
106107
log.Fatal("cannot read config %s", err)
107108
}
109+
} else if cfgURL != "" {
110+
viperVal, err = config.ReadNabuConfigURL(cfgURL)
111+
if err != nil {
112+
log.Fatal("cannot read config URL %s", err)
113+
}
108114
} else {
109115
// Find home directory.
110116
//home, err := os.UserHomeDir()

pkg/config/nabuConfig.go

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
package config
22

3-
import "github.com/spf13/viper"
3+
import (
4+
"fmt"
5+
"github.com/spf13/viper"
6+
"io"
7+
"net/http"
8+
"strings"
9+
)
410

511
var nabuTemplate = map[string]interface{}{
612
"minio": MinioTemplate,
@@ -28,3 +34,58 @@ func ReadNabuConfig(filename string, cfgPath string) (*viper.Viper, error) {
2834
err := v.ReadInConfig()
2935
return v, err
3036
}
37+
38+
func ReadNabuConfigURL(configURL string) (*viper.Viper, error) {
39+
v := viper.New()
40+
for key, value := range nabuTemplate {
41+
v.SetDefault(key, value)
42+
}
43+
44+
fmt.Printf("Reading config from URL: %v\n", configURL)
45+
46+
resp, err := http.Get(configURL)
47+
if err != nil {
48+
return v, err
49+
}
50+
defer resp.Body.Close()
51+
52+
if resp.StatusCode != http.StatusOK {
53+
return v, fmt.Errorf("HTTP request failed with status code %v", resp.StatusCode)
54+
}
55+
56+
// Read the content of the config file
57+
configData, err := io.ReadAll(resp.Body)
58+
if err != nil {
59+
return v, err
60+
}
61+
62+
// Convert configData to a string
63+
configString := string(configData)
64+
65+
// Convert the string to an io.Reader
66+
reader := strings.NewReader(configString)
67+
68+
//v.SetConfigName(fileNameWithoutExtTrimSuffix(filename))
69+
//v.AddConfigPath(cfgPath)
70+
v.SetConfigType("yaml")
71+
//v.BindEnv("headless", "GLEANER_HEADLESS_ENDPOINT")
72+
v.BindEnv("minio.address", "MINIO_ADDRESS")
73+
v.BindEnv("minio.port", "MINIO_PORT")
74+
v.BindEnv("minio.ssl", "MINIO_USE_SSL")
75+
v.BindEnv("minio.accesskey", "MINIO_ACCESS_KEY")
76+
v.BindEnv("minio.secretkey", "MINIO_SECRET_KEY")
77+
v.BindEnv("minio.bucket", "MINIO_BUCKET")
78+
v.AutomaticEnv()
79+
80+
err = v.ReadConfig(reader)
81+
if err != nil {
82+
fmt.Printf("Error reading config from URL: %v\n", err)
83+
return v, err
84+
}
85+
86+
fmt.Printf("Config read from URL: %v\n", v.AllSettings())
87+
88+
//err = v.ReadInConfig()
89+
90+
return v, err
91+
}

0 commit comments

Comments
 (0)