@@ -49,7 +49,7 @@ It is assumed that each column has a unique name.
4949In a .csv file, the column names may either come from the first line of the file ("expected header"),
5050or they can be set-up via configuration of the reader object ("assumed header").
5151
52- Using meaningful column names instead of indices is usually more convenient as columns get rearranged
52+ Using meaningful column names instead of indices is usually more convenient when the columns get rearranged
5353during the execution of the processing pipeline.
5454*/
5555type Row map [string ]string
@@ -125,7 +125,7 @@ func (row Row) SelectExisting(cols ...string) Row {
125125}
126126
127127// Select takes a list of column names and returns a new Row
128- // with all the columns not on the list removed , or an error if any column is not present.
128+ // containing only the specified columns , or an error if any column is not present.
129129func (row Row ) Select (cols ... string ) (Row , error ) {
130130 r := make (map [string ]string , len (cols ))
131131
@@ -173,23 +173,27 @@ type RowFunc func(Row) error
173173// DataSource is the interface to any data that can be represented as a sequence of Rows.
174174type DataSource interface {
175175 // ForEach should call the given RowFunc once per each Row. The iteration should
176- // continue for as long as the RowFunc returns 'nil'. When RowFunc returns a non-nil error,
177- // this function should stop iteration and return an error, which may be either the original one,
178- // or some other error. The special value of io.EOF should be treated as a 'stop iteration'
179- // command, in which case this function should return 'nil' error. Given that Rows can be modified
180- // by the RowFunc, the implementations should only pass copies of their underlying rows
181- // to the supplied RowFunc.
176+ // continue for as long as the RowFunc returns 'nil'. When RowFunc returns
177+ // a non-nil error, this function should stop iteration and return an error,
178+ // which may be either the original one, or some other error. The special
179+ // value of io.EOF should be treated as a 'stop iteration' command, in which
180+ // case this function should return 'nil' error. Given that Rows can be modified
181+ // by the RowFunc, the implementations should only pass copies of their
182+ // underlying rows to the supplied RowFunc.
182183 ForEach (RowFunc ) error
183184}
184185
185- // Table is a DataSource, enriched with a number of lazy stream processing operations
186- // that can be combined using fluent interface as described in https://en.wikipedia.org/wiki/Fluent_interface
186+ // Table implements sequential operations on a given data source as well as
187+ // the DataSource interface itself and other iterating methods. All sequential
188+ // operations are 'lazy', i.e. they are not invoked immediately, but instead
189+ // they return a new table which, when iterated over, invokes the particular
190+ // operation. The operations can be chained using so called fluent interface.
187191type Table struct {
188192 source DataSource
189193 wrap func (RowFunc ) RowFunc
190194}
191195
192- // ForEach iterates over the input DataSource invoking all the operations in the processing pipeline,
196+ // ForEach iterates over the Table invoking all the operations in the processing pipeline,
193197// and calls the specified RowFunc on each resulting Row.
194198func (t * Table ) ForEach (fn RowFunc ) error {
195199 return t .source .ForEach (t .wrap (fn ))
@@ -204,9 +208,9 @@ func Take(source DataSource) *Table {
204208}
205209
206210// Transform is the most generic operation on a Row. It takes a function which
207- // maps a Row to another Row or an error. Any error returned from that function
211+ // maps a Row to another Row or returns an error. Any error returned from that function
208212// stops the iteration, otherwise the returned Row, if not empty, gets passed
209- // further down the processing pipeline.
213+ // down to the next stage of the processing pipeline.
210214func (t * Table ) Transform (trans func (Row ) (Row , error )) * Table {
211215 return & Table {
212216 source : t ,
@@ -322,7 +326,7 @@ func (t *Table) TakeWhile(pred func(Row) bool) *Table {
322326 }
323327}
324328
325- // DropWhile, upon iteration, ignores all the Rows as long as the specified predicate is true;
329+ // DropWhile, upon iteration, ignores all the Rows for as long as the specified predicate is true;
326330// afterwards all the remaining Rows are passed down the pipeline.
327331func (t * Table ) DropWhile (pred func (Row ) bool ) * Table {
328332 return & Table {
@@ -343,7 +347,7 @@ func (t *Table) DropWhile(pred func(Row) bool) *Table {
343347
344348// ToCsvFile iterates the input source writing the selected columns to the file with the given name,
345349// in "canonical" form with the header on the first line and with all the lines having the same number of fields,
346- // using default settings for the underlying Writer from encoding/csv package.
350+ // using default settings for the underlying Writer from the encoding/csv package.
347351func (t * Table ) ToCsvFile (fileName string , columns ... string ) error {
348352 if len (columns ) == 0 {
349353 panic ("Empty columns list in ToCsvFile()" )
@@ -482,8 +486,11 @@ func (index *Index) SubIndex(values ...string) *Index {
482486
483487// ResolveDuplicates calls the specified function once per each pack of duplicates with the same key.
484488// The specified function must not modify its parameter and is expected to do one of the following:
489+ //
485490// - Select and return one row from the input list. The row will be used as the only row with its key;
491+ //
486492// - Return an empty row. The entire set of rows will be ignored;
493+ //
487494// - Return an error which will be passed back to the caller of ResolveDuplicates().
488495func (index * Index ) ResolveDuplicates (resolve func (rows []Row ) (Row , error )) error {
489496 return index .impl .dedup (resolve )
@@ -758,7 +765,7 @@ type CsvDataSource struct {
758765}
759766
760767// CsvFileDataSource constructs a new CsvDataSource bound to the specified
761- // file name and with default csv.Reader settings.
768+ // file name and with the default csv.Reader settings.
762769func CsvFileDataSource (name string ) * CsvDataSource {
763770 return & CsvDataSource {
764771 name : name ,
@@ -791,7 +798,7 @@ func (s *CsvDataSource) TrimLeadingSpace() *CsvDataSource {
791798 return s
792799}
793800
794- // AssumeHeader sets the header for input files that do not have their column
801+ // AssumeHeader sets the header for those input files that do not have their column
795802// names specified on the first line of the file. The header specification is a map
796803// from assigned column names to their corresponding column indices.
797804func (s * CsvDataSource ) AssumeHeader (spec map [string ]int ) * CsvDataSource {
@@ -814,7 +821,7 @@ func (s *CsvDataSource) AssumeHeader(spec map[string]int) *CsvDataSource {
814821// names specified on the first line of the file. The line gets verified
815822// against this specification each time the input file is opened.
816823// The header specification is a map from expected column names to their corresponding
817- // column indices. A negative value for a index means that the real value of the index
824+ // column indices. A negative value for an index means that the real value of the index
818825// will be found searching the first line of the file for the specified column name.
819826func (s * CsvDataSource ) ExpectHeader (spec map [string ]int ) * CsvDataSource {
820827 if len (spec ) == 0 {
@@ -861,7 +868,7 @@ func (s *CsvDataSource) NumFields(n int) *CsvDataSource {
861868 return s
862869}
863870
864- // NumFieldsAuto specifies that the number of field on each line must match that of
871+ // NumFieldsAuto specifies that the number of fields on each line must match that of
865872// the first line of the input file.
866873func (s * CsvDataSource ) NumFieldsAuto () * CsvDataSource {
867874 return s .NumFields (0 )
@@ -875,7 +882,7 @@ func (s *CsvDataSource) NumFieldsAny() *CsvDataSource {
875882}
876883
877884// ForEach reads the input file line by line, converts each line to a Row and calls
878- // the specified RowFunc per each row . ForEach is goroutine-safe and may be called multiple times.
885+ // the supplied RowFunc. ForEach is goroutine-safe and may be called multiple times.
879886func (s * CsvDataSource ) ForEach (fn RowFunc ) error {
880887 var lineNo uint64
881888
@@ -1007,7 +1014,7 @@ func (s *CsvDataSource) mapError(err error, lineNo uint64) error {
10071014 }
10081015}
10091016
1010- // DataSourceError is the type of the error returned from CsvDataSource
1017+ // DataSourceError is the type of the error returned from CsvDataSource.ForEach method.
10111018type DataSourceError struct {
10121019 Name string
10131020 Line uint64
0 commit comments