Skip to content

Commit 5b2c17c

Browse files
add alpha parquet data files loading with parquets lib (#13)
1 parent da83ab4 commit 5b2c17c

File tree

6 files changed

+207
-70
lines changed

6 files changed

+207
-70
lines changed

package-lock.json

Lines changed: 92 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "vscode-data-preview",
33
"displayName": "Data Preview",
44
"description": "Data Preview 🈸 extension for importing 📤 viewing 🔎 slicing 🔪 dicing 🎲 charting 📊 & exporting 📥 large JSON array/config, YAML, Apache Arrow, Avro & Excel data files",
5-
"version": "2.2.0",
5+
"version": "2.3.0",
66
"icon": "images/data-preview.png",
77
"publisher": "RandomFractalsInc",
88
"author": "Taras Novak a.k.a. dataPixy devTools maker :)",
@@ -416,8 +416,9 @@
416416
"json5": "^2.1.3",
417417
"jsonc-parser": "2.3.1",
418418
"json-spread": "0.3.2",
419+
"parquets": "^0.10.10",
419420
"properties": "^1.2.1",
420-
"snappy": "^6.3.4",
421+
"snappy": "^6.3.5",
421422
"superagent": "^6.1.0",
422423
"xlsx": "^0.16.7"
423424
}

src/data.manager.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@ import {JsonDataProvider} from './data.providers/json.data.provider';
1212
import {Json5DataProvider} from './data.providers/json5.data.provider';
1313
import {JsonLineDataProvider} from './data.providers/json.line.data.provider';
1414
import {MarkdownDataProvider} from './data.providers/markdown.data.provider';
15-
import {TextDataProvider} from './data.providers/text.data.provider';
15+
import {ParquetDataProvider} from './data.providers/parquet.data.provider';
1616
import {PropertiesDataProvider} from './data.providers/properties.data.provider';
17+
import {TextDataProvider} from './data.providers/text.data.provider';
1718
import {YamlDataProvider} from './data.providers/yaml.data.provider';
1819

1920
/**
@@ -143,8 +144,9 @@ export class DataManager implements IDataManager {
143144
this.addDataProvider(dataProviders, new Json5DataProvider());
144145
this.addDataProvider(dataProviders, new JsonLineDataProvider());
145146
this.addDataProvider(dataProviders, new MarkdownDataProvider());
146-
this.addDataProvider(dataProviders, new TextDataProvider());
147147
this.addDataProvider(dataProviders, new PropertiesDataProvider());
148+
this.addDataProvider(dataProviders, new ParquetDataProvider());
149+
this.addDataProvider(dataProviders, new TextDataProvider());
148150
this.addDataProvider(dataProviders, new YamlDataProvider());
149151
this._logger.debug('loadDataProviders(): loaded data providers:', Object.keys(dataProviders));
150152
return dataProviders;

src/data.preview.ts

Lines changed: 19 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import {
1919
// fs data parsing imports
2020
import * as fs from 'fs';
2121
import * as path from 'path';
22-
//import * as parquet from 'parquetjs';
2322

2423
// data preview imports
2524
import * as config from './config';
@@ -404,7 +403,7 @@ export class DataPreview {
404403
// update columns and rows state vars
405404
this._columns = columns;
406405
this._rowCount = rowCount;
407-
let dataStats: string = `Rows: ${rowCount.toLocaleString()}\tColumns: ${columns.length.toLocaleString()}`;
406+
let dataStats: string = `Rows: ${rowCount.toLocaleString()}\tColumns: ${columns?.length.toLocaleString()}`;
408407
if (this._tableNames.length > 0) {
409408
// add tables count to data preview data stats status display
410409
dataStats = `Tables: ${this._tableNames.length.toLocaleString()}\t${dataStats}`;
@@ -666,30 +665,23 @@ export class DataPreview {
666665
*/
667666
private getData(dataUrl: string, dataTable: string = ''): any {
668667
let data: any = [];
669-
if (this._fileExtension === '.parquet') {
670-
// TODO: sort out node-gyp lzo lib loading for parquet data files parse
671-
window.showInformationMessage('Parquet Data Preview 🈸 coming soon!');
672-
// data = this.getParquetData(dataFilePath);
673-
}
674-
else { // get data, table names, and data schema via data.manager api
675-
dataManager.getData(dataUrl, {
676-
dataTable: dataTable,
677-
createJsonFiles: this.createJsonFiles,
678-
createJsonSchema: this.createJsonSchema
679-
}, (data: any) => {
680-
this._tableNames = dataManager.getDataTableNames(dataUrl);
681-
this._dataSchema = dataManager.getDataSchema(dataUrl);
682-
this.loadData(data);
683-
// log data stats
684-
if (typeof data === 'string') {
685-
const dataLines: Array<string> = data.split('\n');
686-
this.logDataStats(dataLines);
687-
}
688-
else {
689-
this.logDataStats(data, this._dataSchema);
690-
}
691-
});
692-
}
668+
dataManager.getData(dataUrl, {
669+
dataTable: dataTable,
670+
createJsonFiles: this.createJsonFiles,
671+
createJsonSchema: this.createJsonSchema
672+
}, (data: any) => {
673+
this._tableNames = dataManager.getDataTableNames(dataUrl);
674+
this._dataSchema = dataManager.getDataSchema(dataUrl);
675+
this.loadData(data);
676+
// log data stats
677+
if (typeof data === 'string') {
678+
const dataLines: Array<string> = data.split('\n');
679+
this.logDataStats(dataLines);
680+
}
681+
else {
682+
this.logDataStats(data, this._dataSchema);
683+
}
684+
});
693685
return data;
694686
} // end of getData()
695687

@@ -716,29 +708,7 @@ export class DataPreview {
716708
}
717709
}
718710

719-
/**
720-
* Gets binary Parquet file data.
721-
* @param dataFilePath Parquet data file path.
722-
* @returns Array of row objects.
723-
*/ /*
724-
private async getParquetData(dataFilePath: string) {
725-
let dataSchema: any = {};
726-
let dataRows: Array<any> = [];
727-
const parquetReader = await parquet.ParquetReader.openFile(dataFilePath);
728-
const cursor = parquetReader.getCursor();
729-
// read all records
730-
let record = null;
731-
while (record = await cursor.next()) {
732-
dataRows.push(record);
733-
}
734-
await parquetReader.close();
735-
dataRows = dataRows.map(rowObject => this.flattenObject(rowObject));
736-
this.logDataStats(dataRows, dataSchema);
737-
// update web view
738-
this.loadData(dataRows);
739-
return dataRows;
740-
} */
741-
711+
742712
/**
743713
* Saves posted data from data view.
744714
* @param fileData File data to save.

0 commit comments

Comments
 (0)