-
Notifications
You must be signed in to change notification settings - Fork 1
Df dev with cfgURL and datacatalog in release graph #57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from 4 commits
59c259b
98053e1
22099ff
c897b6f
e34ad75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| 2.0.18-df-development | ||
| 2.0.19-df-development |
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ import ( | |
| "io" | ||
| "strings" | ||
| "sync" | ||
| "time" | ||
|
|
||
| "github.com/spf13/viper" | ||
|
|
||
|
|
@@ -17,6 +18,11 @@ import ( | |
| "github.com/minio/minio-go/v7" | ||
| ) | ||
|
|
||
| func getLastElement(s string) string { | ||
| parts := strings.Split(s, "/") | ||
| return parts[len(parts)-1] | ||
| } | ||
|
|
||
| // PipeCopy writes a new object based on an prefix, this function assumes the objects are valid when concatenated | ||
| // v1: viper config object | ||
| // mc: minio client pointer | ||
|
|
@@ -47,10 +53,6 @@ func PipeCopy(v1 *viper.Viper, mc *minio.Client, name, bucket, prefix, destprefi | |
| } | ||
| }(pw) | ||
|
|
||
| // Set and use a "single file flag" to bypass skolimaization since if it is a single file | ||
| // the JSON-LD to RDF will correctly map blank nodes. | ||
| // NOTE: with a background context we can't get the len(channel) so we have to iterate it. | ||
| // This is fast, but it means we have to do the ListObjects twice | ||
| clen := 0 | ||
| sf := false | ||
| ctx, cancel := context.WithCancel(context.Background()) | ||
|
|
@@ -67,25 +69,23 @@ func PipeCopy(v1 *viper.Viper, mc *minio.Client, name, bucket, prefix, destprefi | |
|
|
||
| objectCh := mc.ListObjects(context.Background(), bucket, minio.ListObjectsOptions{Prefix: prefix, Recursive: isRecursive}) | ||
|
|
||
| // for object := range mc.ListObjects(context.Background(), bucket, minio.ListObjectsOptions{Prefix: prefix, Recursive: isRecursive}, doneCh) { | ||
| lastProcessed := false | ||
| idList := make([]string, 0) | ||
| for object := range objectCh { | ||
| fo, err := mc.GetObject(context.Background(), bucket, object.Key, minio.GetObjectOptions{}) | ||
| if err != nil { | ||
| fmt.Println(err) | ||
| continue | ||
| } | ||
|
|
||
| var b bytes.Buffer | ||
| bw := bufio.NewWriter(&b) | ||
|
|
||
| _, err = io.Copy(bw, fo) | ||
| if err != nil { | ||
| log.Println(err) | ||
| continue | ||
| } | ||
|
|
||
| s := string(b.Bytes()) | ||
|
|
||
| nq := "" | ||
| //log.Println("Calling JSONLDtoNQ") | ||
| if strings.HasSuffix(object.Key, ".nq") { | ||
| nq = s | ||
| } else { | ||
|
|
@@ -95,33 +95,64 @@ func PipeCopy(v1 *viper.Viper, mc *minio.Client, name, bucket, prefix, destprefi | |
| return | ||
| } | ||
| } | ||
|
|
||
| var snq string | ||
|
|
||
| if sf { | ||
| snq = nq // just pass through the RDF without trying to Skolemize since we ar a single fil | ||
| snq = nq | ||
| } else { | ||
| snq, err = graph.Skolemization(nq, object.Key) | ||
| if err != nil { | ||
| return | ||
| } | ||
| } | ||
|
|
||
| // 1) get graph URI | ||
| ctx, err := graph.MakeURN(v1, object.Key) | ||
| if err != nil { | ||
| return | ||
| } | ||
| // 2) convert NT to NQ | ||
| csnq, err := graph.NtToNq(snq, ctx) | ||
| if err != nil { | ||
| return | ||
| } | ||
|
|
||
| _, err = pw.Write([]byte(csnq)) | ||
| if err != nil { | ||
| return | ||
| } | ||
| idList = append(idList, ctx) | ||
| lastProcessed = true | ||
| } | ||
|
|
||
| // Once we are done with the loop, put in the triples to associate all the graphURIs with the org. | ||
| if lastProcessed { | ||
|
|
||
| data := `_:b0 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DataCatalog> . | ||
|
||
| _:b0 <https://schema.org/dateCreated> "` + time.Now().Format("2006-01-02 15:04:05") + `" . | ||
| _:b0 <https://schema.org/description> "GleanerIO Nabu generated catalog" . | ||
| _:b0 <https://schema.org/provider> _:b1 . | ||
|
||
| _:b0 <https://schema.org/publisher> _:b2 . | ||
|
||
| _:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Organization> . | ||
| _:b1 <https://schema.org/name> "` + getLastElement(prefix) + `" . | ||
| _:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Organization> . | ||
| _:b2 <https://schema.org/name> "` + bucket + `" .` | ||
|
|
||
| for _, item := range idList { | ||
| data += `_:b0 <https://schema.org/dataset> <` + item + `> .` + "\n" | ||
| } | ||
|
|
||
| // TODO MakeURN with _:b0 Q's Will this work with a blank node? do after Skolemization? | ||
| // namedgraph, err := graph.MakeURN(v1, "resource IRI") | ||
| // sdataWithContext, err := graph.NtToNq(sdata, namedgraph) | ||
|
|
||
| // TODO: Skolemize with sdataWithContext | ||
| sdata, err := graph.Skolemization(data, "release graph prov for ORG") | ||
| if err != nil { | ||
| log.Println(err) | ||
| } | ||
|
|
||
| // Perform the final write to the pipe here | ||
| // ilstr := strings.Join(idList, ",") | ||
| _, err = pw.Write([]byte(sdata)) | ||
| if err != nil { | ||
| log.Println(err) | ||
| } | ||
| } | ||
| }() | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nabu release --cfgURL https://provisium.io/data/nabuconfig.yaml --prefix summoned/dataverse --endpoint localoxi