Skip to content

Commit 0cb68ce

Browse files
committed
Documentation update.
1 parent 8f5b3e2 commit 0cb68ce

File tree

2 files changed

+32
-25
lines changed

2 files changed

+32
-25
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ if err != nil {
3737
return err
3838
}
3939

40-
orders := csvplus.CsvFileDataSource("orders.csv").SelectColumns("order_id", "cust_id", "prod_id", "qty", "ts")
40+
orders := csvplus.CsvFileDataSource("orders.csv").SelectColumns("cust_id", "prod_id", "qty", "ts")
4141

4242
return customers.
4343
Join(orders, "cust_id").
@@ -55,7 +55,7 @@ For more details see the [documentation](https://godoc.org/github.com/maxim2266/
5555

5656
### Design principles
5757

58-
The package functionality is based on operations on the following entities:
58+
The package functionality is based on the operations on the following entities:
5959
- type Row
6060
- interface DataSource
6161
- type Table
@@ -74,11 +74,11 @@ operation on `DataSource` is iteration over the rows. The iteration is performed
7474
an implementation of the interface for `.csv` files.
7575

7676
#### Type `Table`
77-
Type `Table` implements sequential operations on a given data source as well as the `DataSource`
77+
Type `Table` implements sequential operations on a given data source, as well as the `DataSource`
7878
interface itself and other iterating methods. All sequential operations are 'lazy', i.e. they are not
7979
invoked immediately, but instead they return a new table which, when iterated over, invokes
8080
the particular operation. The operations can be chained using so called fluent interface.
81-
The actual iteration over a table only happens when any of the following methods are called:
81+
The actual iteration over a table only happens when any of the following methods is called:
8282
- `ForEach`
8383
- `IndexOn`
8484
- `UniqueIndexOn`

csvplus.go

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ It is assumed that each column has a unique name.
4949
In a .csv file, the column names may either come from the first line of the file ("expected header"),
5050
or they can be set-up via configuration of the reader object ("assumed header").
5151
52-
Using meaningful column names instead of indices is usually more convenient as columns get rearranged
52+
Using meaningful column names instead of indices is usually more convenient when the columns get rearranged
5353
during the execution of the processing pipeline.
5454
*/
5555
type Row map[string]string
@@ -125,7 +125,7 @@ func (row Row) SelectExisting(cols ...string) Row {
125125
}
126126

127127
// Select takes a list of column names and returns a new Row
128-
// with all the columns not on the list removed, or an error if any column is not present.
128+
// containing only the specified columns, or an error if any column is not present.
129129
func (row Row) Select(cols ...string) (Row, error) {
130130
r := make(map[string]string, len(cols))
131131

@@ -173,23 +173,27 @@ type RowFunc func(Row) error
173173
// DataSource is the interface to any data that can be represented as a sequence of Rows.
174174
type DataSource interface {
175175
// ForEach should call the given RowFunc once per each Row. The iteration should
176-
// continue for as long as the RowFunc returns 'nil'. When RowFunc returns a non-nil error,
177-
// this function should stop iteration and return an error, which may be either the original one,
178-
// or some other error. The special value of io.EOF should be treated as a 'stop iteration'
179-
// command, in which case this function should return 'nil' error. Given that Rows can be modified
180-
// by the RowFunc, the implementations should only pass copies of their underlying rows
181-
// to the supplied RowFunc.
176+
// continue for as long as the RowFunc returns 'nil'. When RowFunc returns
177+
// a non-nil error, this function should stop iteration and return an error,
178+
// which may be either the original one, or some other error. The special
179+
// value of io.EOF should be treated as a 'stop iteration' command, in which
180+
// case this function should return 'nil' error. Given that Rows can be modified
181+
// by the RowFunc, the implementations should only pass copies of their
182+
// underlying rows to the supplied RowFunc.
182183
ForEach(RowFunc) error
183184
}
184185

185-
// Table is a DataSource, enriched with a number of lazy stream processing operations
186-
// that can be combined using fluent interface as described in https://en.wikipedia.org/wiki/Fluent_interface
186+
// Table implements sequential operations on a given data source as well as
187+
// the DataSource interface itself and other iterating methods. All sequential
188+
// operations are 'lazy', i.e. they are not invoked immediately, but instead
189+
// they return a new table which, when iterated over, invokes the particular
190+
// operation. The operations can be chained using so called fluent interface.
187191
type Table struct {
188192
source DataSource
189193
wrap func(RowFunc) RowFunc
190194
}
191195

192-
// ForEach iterates over the input DataSource invoking all the operations in the processing pipeline,
196+
// ForEach iterates over the Table invoking all the operations in the processing pipeline,
193197
// and calls the specified RowFunc on each resulting Row.
194198
func (t *Table) ForEach(fn RowFunc) error {
195199
return t.source.ForEach(t.wrap(fn))
@@ -204,9 +208,9 @@ func Take(source DataSource) *Table {
204208
}
205209

206210
// Transform is the most generic operation on a Row. It takes a function which
207-
// maps a Row to another Row or an error. Any error returned from that function
211+
// maps a Row to another Row or returns an error. Any error returned from that function
208212
// stops the iteration, otherwise the returned Row, if not empty, gets passed
209-
// further down the processing pipeline.
213+
// down to the next stage of the processing pipeline.
210214
func (t *Table) Transform(trans func(Row) (Row, error)) *Table {
211215
return &Table{
212216
source: t,
@@ -322,7 +326,7 @@ func (t *Table) TakeWhile(pred func(Row) bool) *Table {
322326
}
323327
}
324328

325-
// DropWhile, upon iteration, ignores all the Rows as long as the specified predicate is true;
329+
// DropWhile, upon iteration, ignores all the Rows for as long as the specified predicate is true;
326330
// afterwards all the remaining Rows are passed down the pipeline.
327331
func (t *Table) DropWhile(pred func(Row) bool) *Table {
328332
return &Table{
@@ -343,7 +347,7 @@ func (t *Table) DropWhile(pred func(Row) bool) *Table {
343347

344348
// ToCsvFile iterates the input source writing the selected columns to the file with the given name,
345349
// in "canonical" form with the header on the first line and with all the lines having the same number of fields,
346-
// using default settings for the underlying Writer from encoding/csv package.
350+
// using default settings for the underlying Writer from the encoding/csv package.
347351
func (t *Table) ToCsvFile(fileName string, columns ...string) error {
348352
if len(columns) == 0 {
349353
panic("Empty columns list in ToCsvFile()")
@@ -482,8 +486,11 @@ func (index *Index) SubIndex(values ...string) *Index {
482486

483487
// ResolveDuplicates calls the specified function once per each pack of duplicates with the same key.
484488
// The specified function must not modify its parameter and is expected to do one of the following:
489+
//
485490
// - Select and return one row from the input list. The row will be used as the only row with its key;
491+
//
486492
// - Return an empty row. The entire set of rows will be ignored;
493+
//
487494
// - Return an error which will be passed back to the caller of ResolveDuplicates().
488495
func (index *Index) ResolveDuplicates(resolve func(rows []Row) (Row, error)) error {
489496
return index.impl.dedup(resolve)
@@ -758,7 +765,7 @@ type CsvDataSource struct {
758765
}
759766

760767
// CsvFileDataSource constructs a new CsvDataSource bound to the specified
761-
// file name and with default csv.Reader settings.
768+
// file name and with the default csv.Reader settings.
762769
func CsvFileDataSource(name string) *CsvDataSource {
763770
return &CsvDataSource{
764771
name: name,
@@ -791,7 +798,7 @@ func (s *CsvDataSource) TrimLeadingSpace() *CsvDataSource {
791798
return s
792799
}
793800

794-
// AssumeHeader sets the header for input files that do not have their column
801+
// AssumeHeader sets the header for those input files that do not have their column
795802
// names specified on the first line of the file. The header specification is a map
796803
// from assigned column names to their corresponding column indices.
797804
func (s *CsvDataSource) AssumeHeader(spec map[string]int) *CsvDataSource {
@@ -814,7 +821,7 @@ func (s *CsvDataSource) AssumeHeader(spec map[string]int) *CsvDataSource {
814821
// names specified on the first line of the file. The line gets verified
815822
// against this specification each time the input file is opened.
816823
// The header specification is a map from expected column names to their corresponding
817-
// column indices. A negative value for a index means that the real value of the index
824+
// column indices. A negative value for an index means that the real value of the index
818825
// will be found searching the first line of the file for the specified column name.
819826
func (s *CsvDataSource) ExpectHeader(spec map[string]int) *CsvDataSource {
820827
if len(spec) == 0 {
@@ -861,7 +868,7 @@ func (s *CsvDataSource) NumFields(n int) *CsvDataSource {
861868
return s
862869
}
863870

864-
// NumFieldsAuto specifies that the number of field on each line must match that of
871+
// NumFieldsAuto specifies that the number of fields on each line must match that of
865872
// the first line of the input file.
866873
func (s *CsvDataSource) NumFieldsAuto() *CsvDataSource {
867874
return s.NumFields(0)
@@ -875,7 +882,7 @@ func (s *CsvDataSource) NumFieldsAny() *CsvDataSource {
875882
}
876883

877884
// ForEach reads the input file line by line, converts each line to a Row and calls
878-
// the specified RowFunc per each row. ForEach is goroutine-safe and may be called multiple times.
885+
// the supplied RowFunc. ForEach is goroutine-safe and may be called multiple times.
879886
func (s *CsvDataSource) ForEach(fn RowFunc) error {
880887
var lineNo uint64
881888

@@ -1007,7 +1014,7 @@ func (s *CsvDataSource) mapError(err error, lineNo uint64) error {
10071014
}
10081015
}
10091016

1010-
// DataSourceError is the type of the error returned from CsvDataSource
1017+
// DataSourceError is the type of the error returned from CsvDataSource.ForEach method.
10111018
type DataSourceError struct {
10121019
Name string
10131020
Line uint64

0 commit comments

Comments
 (0)