Skip to content

Commit 552a703

Browse files
committed
Introducing Resource.RawData. Already updating README.md with section about reading raw data. #13
1 parent 6756cb0 commit 552a703

File tree

4 files changed

+187
-19
lines changed

4 files changed

+187
-19
lines changed

README.md

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ A Go library for working with [Data Packages](http://specs.frictionlessdata.io/d
88
- [datapackage-go](#datapackage-go)
99
- [Install](#install)
1010
- [Main Features](#main-features)
11-
- [Loading and validating data package descriptors](#loading-and-validating-data-package-descriptors)
11+
- [Loading and validating tabular data package descriptors](#loading-and-validating-tabular-data-package-descriptors)
1212
- [Accessing data package resources](#accessing-data-package-resources)
1313
- [Loading zip bundles](#loading-zip-bundles)
1414
- [Creating a zip bundle with the data package.](#creating-a-zip-bundle-with-the-data-package)
1515
- [CSV dialect support](#csv-dialect-support)
1616
- [Loading multipart resources](#loading-multipart-resources)
17+
- [Loading non-tabular resources](#loading-non-tabular-resources)
1718
- [Manipulating data packages programatically](#manipulating-data-packages-programatically)
1819

1920
<!-- /TOC -->
@@ -26,7 +27,7 @@ $ go get -u github.com/frictionlessdata/datapackage-go/...
2627

2728
## Main Features
2829

29-
### Loading and validating data package descriptors
30+
### Loading and validating tabular data package descriptors
3031

3132
A [data package](http://frictionlessdata.io/specs/data-package/) is a collection of [resources](http://frictionlessdata.io/specs/data-resource/). The [datapackage.Package](https://godoc.org/github.com/frictionlessdata/datapackage-go/datapackage#Package) provides various capabilities like loading local or remote data package, saving a data package descriptor and many more.
3233

@@ -192,6 +193,59 @@ And all the rest of the code would still be working.
192193
A complete example can be found [here](https://github.com/frictionlessdata/datapackage-go/tree/master/examples/multipart).
193194

194195

196+
### Loading non-tabular resources
197+
198+
A [Data package](https://frictionlessdata.io/data-packages/) is a container format used to describe and package a collection of data. Even though there is additional support for dealing with tabular resources, it can be used to package any kind of data.
199+
200+
For instance, lets say an user needs to load JSON-LD information along with some tabular data (for more on this use case, please take a look at [this](https://github.com/frictionlessdata/datapackage-go/issues/13) issue). That can be packed together in a data package descriptor:
201+
202+
```json
203+
{
204+
"name": "carp-lake",
205+
"title": "Carp Lake Title",
206+
"description": "Tephra and Lithology from Carp Lake",
207+
"resources": [
208+
{
209+
"name":"data",
210+
"path": "data/carpLakeCoreStratigraphy.csv",
211+
"format": "csv",
212+
"schema": {
213+
"fields": [
214+
{"name": "depth", "type": "number"},
215+
{"name": "notes", "type": "text"},
216+
{"name": "core_segments", "type": "text"}
217+
]
218+
}
219+
},
220+
{
221+
"name": "schemaorg",
222+
"path": "data/schemaorg-ld.json",
223+
"format": "application/ld+json"
224+
}
225+
]
226+
}
227+
```
228+
229+
The package loading proceeds as usual.
230+
231+
232+
```go
233+
pkg, err := datapackage.Load("data/datapackage.json")
234+
// Check error.
235+
```
236+
237+
Once the data package is loaded, we could use the [Resource.RawRead](https://godoc.org/github.com/frictionlessdata/datapackage-go/datapackage#Resource.GetSchema) method to access `schemaorg` resource contents as a byte slice.
238+
239+
```go
240+
so := pkg.GetResource("schemaorg")
241+
soContents, _ := so.RawRead()
242+
// Use contents. For instance, one could validate the JSON-LD schema and unmarshal it into a data structure.
243+
244+
data := pkg.GetResource("data")
245+
dataContents, err := data.ReadAll()
246+
// As data is a tabular resource, its content can be loaded as [][]string.
247+
```
248+
195249
### Manipulating data packages programatically
196250

197251
The datapackage-go library also makes it easy to save packages. Let's say you're creating a program that produces data packages and would like to add or remove resource:

datapackage/package_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,28 @@ func ExampleLoad_readAll() {
5656
// Output: [[foo] [bar]]
5757
}
5858

59+
func ExampleLoad_readRaw() {
60+
dir, _ := ioutil.TempDir("", "datapackage_exampleload")
61+
defer os.Remove(dir)
62+
descriptorPath := filepath.Join(dir, "pkg.json")
63+
descriptorContents := `{"resources": [{
64+
"name": "res1",
65+
"path": "schemaorg.json",
66+
"format": "application/ld+json",
67+
"profile": "data-resource"
68+
}]}`
69+
ioutil.WriteFile(descriptorPath, []byte(descriptorContents), 0666)
70+
71+
resPath := filepath.Join(dir, "schemaorg.json")
72+
resContent := []byte(`{"@context": {"@vocab": "http://schema.org/"}}`)
73+
ioutil.WriteFile(resPath, resContent, 0666)
74+
75+
pkg, _ := Load(descriptorPath, validator.InMemoryLoader())
76+
contents, _ := pkg.GetResource("res1").RawRead()
77+
fmt.Println(string(contents))
78+
// Output: {"@context": {"@vocab": "http://schema.org/"}}
79+
}
80+
5981
func ExampleLoad_readAllRemote() {
6082
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
6183
// If the request is for data, returns the content.

datapackage/resource.go

Lines changed: 76 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@ import (
66
"fmt"
77
"io"
88
"io/ioutil"
9+
"net/http"
910
"net/url"
11+
"os"
1012
"path"
1113
"path/filepath"
1214
"strings"
15+
"sync"
16+
"time"
1317

1418
"github.com/frictionlessdata/datapackage-go/clone"
1519
"github.com/frictionlessdata/datapackage-go/validator"
@@ -190,19 +194,67 @@ func (r *Resource) GetTable(opts ...csv.CreationOpts) (table.Table, error) {
190194
return nil, fmt.Errorf("only csv and string is supported for inlining data")
191195
}
192196
}
193-
// Paths: create a table from the concatenation of the data in all paths.
194-
var buf bytes.Buffer
195-
for _, p := range r.path {
196-
if r.basePath != "" {
197-
p = joinPaths(r.basePath, p)
197+
buf, err := loadContents(r.basePath, r.path, csvLoadFunc)
198+
if err != nil {
199+
return nil, err
200+
}
201+
t, err := csv.NewTable(func() (io.ReadCloser, error) { return ioutil.NopCloser(bytes.NewReader(buf)), nil }, fullOpts...)
202+
if err != nil {
203+
return nil, err
204+
}
205+
return t, nil
206+
}
207+
208+
func csvLoadFunc(p string) func() (io.ReadCloser, error) {
209+
if strings.HasPrefix(p, "http") {
210+
return csv.Remote(p)
211+
}
212+
return csv.FromFile(p)
213+
}
214+
215+
const (
216+
remoteFetchTimeout = 15 * time.Second
217+
)
218+
219+
var (
220+
httpClient *http.Client
221+
startHTTPClient sync.Once
222+
)
223+
224+
func binaryLoadFunc(p string) func() (io.ReadCloser, error) {
225+
if strings.HasPrefix(p, "http") {
226+
return func() (io.ReadCloser, error) {
227+
startHTTPClient.Do(func() {
228+
httpClient = &http.Client{
229+
Timeout: remoteFetchTimeout,
230+
}
231+
})
232+
resp, err := httpClient.Get(p)
233+
if err != nil {
234+
return nil, err
235+
}
236+
defer resp.Body.Close()
237+
b, err := ioutil.ReadAll(resp.Body)
238+
if err != nil {
239+
return nil, err
240+
}
241+
return ioutil.NopCloser(bytes.NewReader(b)), nil
198242
}
199-
var source csv.Source
200-
if strings.HasPrefix(p, "http") {
201-
source = csv.Remote(p)
202-
} else {
203-
source = csv.FromFile(p)
243+
}
244+
return func() (io.ReadCloser, error) {
245+
return os.Open(p)
246+
}
247+
}
248+
249+
type loadFunc func(string) func() (io.ReadCloser, error)
250+
251+
func loadContents(basePath string, path []string, f loadFunc) ([]byte, error) {
252+
var buf bytes.Buffer
253+
for _, p := range path {
254+
if basePath != "" {
255+
p = joinPaths(basePath, p)
204256
}
205-
rc, err := source()
257+
rc, err := f(p)()
206258
if err != nil {
207259
return nil, err
208260
}
@@ -212,13 +264,11 @@ func (r *Resource) GetTable(opts ...csv.CreationOpts) (table.Table, error) {
212264
return nil, err
213265
}
214266
buf.Write(b)
215-
buf.WriteRune('\n')
216-
}
217-
t, err := csv.NewTable(func() (io.ReadCloser, error) { return ioutil.NopCloser(strings.NewReader(buf.String())), nil }, fullOpts...)
218-
if err != nil {
219-
return nil, err
267+
if len(path) > 1 {
268+
buf.WriteRune('\n')
269+
}
220270
}
221-
return t, nil
271+
return buf.Bytes(), nil
222272
}
223273

224274
func joinPaths(basePath, path string) string {
@@ -239,6 +289,15 @@ func (r *Resource) ReadAll(opts ...csv.CreationOpts) ([][]string, error) {
239289
return t.ReadAll()
240290
}
241291

292+
// RawRead reads all resource contents and return it as byte slice.
293+
// It can be used to access the content of non-tabular resources.
294+
func (r *Resource) RawRead() ([]byte, error) {
295+
if r.data != nil {
296+
return []byte(r.data.(string)), nil
297+
}
298+
return loadContents(r.basePath, r.path, binaryLoadFunc)
299+
}
300+
242301
// Iter returns an Iterator to read the tabular resource. Iter returns an error
243302
// if the table physical source can not be iterated.
244303
// The iteration process always start at the beginning of the table.

datapackage/resource_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,3 +418,36 @@ func TestResource_Cast(t *testing.T) {
418418
}
419419
})
420420
}
421+
422+
func TestResource_RawRead(t *testing.T) {
423+
t.Run("Remote", func(t *testing.T) {
424+
is := is.New(t)
425+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
426+
fmt.Fprint(w, "1234")
427+
}))
428+
defer ts.Close()
429+
resStr := fmt.Sprintf(`
430+
{
431+
"name": "ids",
432+
"path": "%s/id1",
433+
"profile": "data-resource"
434+
}`, ts.URL)
435+
res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry())
436+
is.NoErr(err)
437+
contents, err := res.RawRead()
438+
is.NoErr(err)
439+
is.Equal(string(contents), "1234")
440+
})
441+
t.Run("Inline", func(t *testing.T) {
442+
is := is.New(t)
443+
resStr := `
444+
{
445+
"name": "ids", "data": "{\"foo\":\"1234\"}", "profile":"data-resource", "mediatype":"application/json"
446+
}`
447+
res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry())
448+
is.NoErr(err)
449+
contents, err := res.RawRead()
450+
is.NoErr(err)
451+
is.Equal(string(contents), "{\"foo\":\"1234\"}")
452+
})
453+
}

0 commit comments

Comments
 (0)