Skip to content

Commit 7030235

Browse files
committed
Clone from gulp-etl-tap-csv
0 parents  commit 7030235

37 files changed

+17900
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/node_modules
2+
.serverless
3+
jspm_packages

.vscode/launch.json

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"name": "GulpTester",
9+
"type": "node",
10+
"request": "launch",
11+
"program": "${workspaceRoot}/node_modules/gulp/bin/gulp.js",
12+
"stopOnEntry": false,
13+
"args": ["--gulpfile", "./debug/gulpfile.ts", "default"],
14+
"cwd": "${workspaceRoot}",
15+
"runtimeArgs": [
16+
"--nolazy", "-r", "ts-node/register"
17+
],
18+
"console": "internalConsole",
19+
"env": {"DEBUG_LEVEL":"info"}
20+
},
21+
{
22+
"name": "GulpTester Debug",
23+
"type": "node",
24+
"request": "launch",
25+
"program": "${workspaceRoot}/node_modules/gulp/bin/gulp.js",
26+
"stopOnEntry": false,
27+
"args": ["--gulpfile", "./debug/gulpfile.ts", "default"],
28+
"cwd": "${workspaceRoot}",
29+
"runtimeArgs": [
30+
"--nolazy", "-r", "ts-node/register"
31+
],
32+
"console": "internalConsole",
33+
"env": {"DEBUG_LEVEL":"debug"}
34+
},
35+
{
36+
"name": "GulpTester Debug - Buffer Mode",
37+
"type": "node",
38+
"request": "launch",
39+
"program": "${workspaceRoot}/node_modules/gulp/bin/gulp.js",
40+
"stopOnEntry": false,
41+
"args": ["--gulpfile", "./debug/gulpfile.ts", "runTapCsvBuffer"],
42+
"cwd": "${workspaceRoot}",
43+
"runtimeArgs": [
44+
"--nolazy", "-r", "ts-node/register"
45+
],
46+
"console": "internalConsole",
47+
"env": {"DEBUG_LEVEL":"debug"}
48+
},
49+
{
50+
"name": "csvParseWithoutGulp",
51+
"type": "node",
52+
"request": "launch",
53+
"program": "${workspaceRoot}/node_modules/gulp/bin/gulp.js",
54+
"stopOnEntry": false,
55+
"args": ["--gulpfile", "./debug/gulpfile.ts", "csvParseWithoutGulp"],
56+
"cwd": "${workspaceRoot}",
57+
"runtimeArgs": [
58+
"--nolazy", "-r", "ts-node/register"
59+
],
60+
"console": "internalConsole",
61+
"env": {"DEBUG_LEVEL":"info"}
62+
},
63+
{
64+
"name": "aws-doParse: debug TS w/o transpile using ts-node",
65+
"type": "node",
66+
"request": "launch",
67+
"args": ["src/aws/dev-handler.ts"],
68+
"runtimeArgs": ["-r", "ts-node/register"],
69+
"cwd": "${workspaceRoot}",
70+
"protocol": "inspector",
71+
"internalConsoleOptions": "openOnSessionStart",
72+
},
73+
]
74+
}

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2019 gulp ETL
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# gulp-etl-tap-mysql #
2+
3+
*(this plugin is being developed from **gulp-etl-tap-mysql**. The original readme from [gulp-etl-tap-csv](https://github.com/gulpetl/gulp-etl-tap-csv) is below)*
4+
5+
This plugin converts CSV files to **gulp-etl** **Message Stream** files; originally adapted from the [gulp-etl-handlelines](https://github.com/gulpetl/gulp-etl-handlelines) model plugin. It is a **gulp-etl** wrapper for [csv-parse](https://csv.js.org/parse/).
6+
7+
This is a **[gulp-etl](https://gulpetl.com/)** plugin, and as such it is a [gulp](https://gulpjs.com/) plugin. **gulp-etl** plugins work with [ndjson](http://ndjson.org/) data streams/files which we call **Message Streams** and which are compliant with the [Singer specification](https://github.com/singer-io/getting-started/blob/master/docs/SPEC.md#output). In the **gulp-etl** ecosystem, **taps** tap into an outside format or system (in this case, a CSV file) and convert their contents/output to a Message Stream, and **targets** convert/output Message Streams to an outside format or system. In this way, these modules can be stacked to convert from one format or system to another, either directly or with tranformations or other parsing in between. Message Streams look like this:
8+
9+
```
10+
{"type": "SCHEMA", "stream": "users", "key_properties": ["id"], "schema": {"required": ["id"], "type": "object", "properties": {"id": {"type": "integer"}}}}
11+
{"type": "RECORD", "stream": "users", "record": {"id": 1, "name": "Chris"}}
12+
{"type": "RECORD", "stream": "users", "record": {"id": 2, "name": "Mike"}}
13+
{"type": "SCHEMA", "stream": "locations", "key_properties": ["id"], "schema": {"required": ["id"], "type": "object", "properties": {"id": {"type": "integer"}}}}
14+
{"type": "RECORD", "stream": "locations", "record": {"id": 1, "name": "Philadelphia"}}
15+
{"type": "STATE", "value": {"users": 2, "locations": 1}}
16+
```
17+
18+
### Usage
19+
**gulp-etl** plugins accept a configObj as the first parameter; the configObj
20+
will contain any info the plugin needs. For this plugin the configObj is the "Options" object for [csv-parse](https://csv.js.org/parse/), described [here](https://csv.js.org/parse/options/); the only difference is that the "columns" property cannot be falsey, since it would result in arrays being returned
21+
for each row instead of objects. A falsey value for columns will be overridden to true.
22+
23+
##### Sample gulpfile.js
24+
```
25+
/* parse all .CSV files in a folder into Message Stream files in a different folder */
26+
27+
let gulp = require('gulp')
28+
var rename = require('gulp-rename')
29+
var tapCsv = require('gulp-etl-tap-csv').tapCsv
30+
31+
exports.default = function() {
32+
return gulp.src('data/*.csv')
33+
.pipe(tapCsv({ columns:true }))
34+
.pipe(rename({ extname: ".ndjson" })) // rename to *.ndjson
35+
.pipe(gulp.dest('output/'));
36+
}
37+
```
38+
### Quick Start for Coding on This Plugin
39+
* Dependencies:
40+
* [git](https://git-scm.com/downloads)
41+
* [nodejs](https://nodejs.org/en/download/releases/) - At least v6.3 (6.9 for Windows) required for TypeScript debugging
42+
* npm (installs with Node)
43+
* typescript - installed as a development dependency
44+
* Clone this repo and run `npm install` to install npm packages
45+
* Debug: with [VScode](https://code.visualstudio.com/download) use `Open Folder` to open the project folder, then hit F5 to debug. This runs without compiling to javascript using [ts-node](https://www.npmjs.com/package/ts-node)
46+
* Test: `npm test` or `npm t`
47+
* Compile to javascript: `npm run build`
48+
49+
### Testing
50+
51+
We are using [Jest](https://facebook.github.io/jest/docs/en/getting-started.html) for our testing. Each of our tests are in the `test` folder.
52+
53+
- Run `npm test` to run the test suites
54+
55+
56+
57+
Note: This document is written in [Markdown](https://daringfireball.net/projects/markdown/). We like to use [Typora](https://typora.io/) and [Markdown Preview Plus](https://chrome.google.com/webstore/detail/markdown-preview-plus/febilkbfcbhebfnokafefeacimjdckgl?hl=en-US) for our Markdown work..

debug/gulpfile.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
let gulp = require('gulp')
2+
import { tapCsv } from '../src/plugin'
3+
4+
import * as loglevel from 'loglevel'
5+
const log = loglevel.getLogger('gulpfile')
6+
log.setLevel((process.env.DEBUG_LEVEL || 'warn') as log.LogLevelDesc)
7+
// if needed, you can control the plugin's logging level separately from 'gulpfile' logging above
8+
// const pluginLog = loglevel.getLogger(PLUGIN_NAME)
9+
// pluginLog.setLevel('debug')
10+
11+
import * as rename from 'gulp-rename'
12+
const errorHandler = require('gulp-error-handle'); // handle all errors in one handler, but still stop the stream if there are errors
13+
14+
const pkginfo = require('pkginfo')(module); // project package.json info into module.exports
15+
const PLUGIN_NAME = module.exports.name;
16+
17+
import Vinyl = require('vinyl')
18+
19+
let gulpBufferMode = false;
20+
21+
function switchToBuffer(callback: any) {
22+
gulpBufferMode = true;
23+
24+
callback();
25+
}
26+
27+
function runTapCsv(callback: any) {
28+
log.info('gulp task starting for ' + PLUGIN_NAME)
29+
30+
return gulp.src('../testdata/*.csv',{buffer:gulpBufferMode})
31+
.pipe(errorHandler(function(err:any) {
32+
log.error('Error: ' + err)
33+
callback(err)
34+
}))
35+
.on('data', function (file:Vinyl) {
36+
log.info('Starting processing on ' + file.basename)
37+
})
38+
.pipe(tapCsv({raw:true/*, info:true */}))
39+
.pipe(rename({
40+
extname: ".ndjson",
41+
}))
42+
.pipe(gulp.dest('../testdata/processed'))
43+
.on('data', function (file:Vinyl) {
44+
log.info('Finished processing on ' + file.basename)
45+
})
46+
.on('end', function () {
47+
log.info('gulp task complete')
48+
callback()
49+
})
50+
51+
}
52+
53+
export function csvParseWithoutGulp(callback: any) {
54+
55+
const parse = require('csv-parse')
56+
57+
var parser = parse({delimiter: ',', columns:true});
58+
59+
require('fs').createReadStream('../testdata/cars.csv').pipe(parser)
60+
.on("data",(data:any)=>{
61+
console.log(data)
62+
});
63+
64+
}
65+
66+
exports.default = gulp.series(runTapCsv)
67+
exports.runTapCsvBuffer = gulp.series(switchToBuffer, runTapCsv)

dist/js/aws/dev-handler.js

Lines changed: 77 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/js/aws/dev-handler.js.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)