Skip to content

Commit 406e33a

Browse files
committed
feat: sensitiveDataDetection in files like .csv etc added
2 parents 4da2abd + fd26523 commit 406e33a

File tree

13 files changed

+310
-10
lines changed

13 files changed

+310
-10
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ yarn-debug.log*
1111
yarn-error.log*
1212
lerna-debug.log*
1313
package-lock.json
14+
/git-proxy-test
1415

1516

1617
# Diagnostic reports (https://nodejs.org/api/report.html)

.husky/commit-msg

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env sh
2-
. "$(dirname -- "$0")/_/husky.sh"
1+
32

43
npx --no -- commitlint --edit ${1} && npm run lint

proxy.config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
"literals": [],
7979
"patterns": [],
8080
"providers": {},
81-
"ProxyFileTypes" : [".jpg"]
81+
"ProxyFileTypes" : [".jpg",".csv",".json",".xlsx"]
8282
,
8383
"aiMlUsage": {
8484
"enabled": true,

src/proxy/chain.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@ const pushActionChain = [
1111
proc.push.writePack,
1212
proc.push.getDiff,
1313
proc.push.checkForAiMlUsage,
14-
proc.push.checkExifJpeg,
15-
14+
proc.push.checkExifJpeg,
15+
proc.push.checkSensitiveData,
1616
proc.push.clearBareClone,
1717
proc.push.scanDiff,
1818
proc.push.blockForAuth,
1919
];
2020

21+
2122
const pullActionChain = [proc.push.checkRepoInAuthorisedList];
2223

2324
let pluginsInserted = false;
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
const fs = require('fs');
2+
const csv = require('csv-parser');
3+
const XLSX = require('xlsx');
4+
const path = require('path');
5+
const Step = require('../../actions').Step;
6+
const config = require('../../../config');
7+
8+
// const { exec: getDiffExec } = require('./getDiff');
9+
// Function to check for sensitive data patterns
10+
const commitConfig = config.getCommitConfig();
11+
const checkForSensitiveData = (cell) => {
12+
const sensitivePatterns = [
13+
/\d{3}-\d{2}-\d{4}/, // Social Security Number (SSN)
14+
/\b\d{16}\b/, // Credit card numbers
15+
/\b\d{5}-\d{4}\b/, // ZIP+4 codes
16+
// Add more patterns as needed
17+
];
18+
return sensitivePatterns.some(pattern => {
19+
if (pattern.test(String(cell))) {
20+
console.log(`\x1b[31mDetected sensitive data: ${cell}\x1b[0m`); // Log the detected sensitive data in red
21+
return true;
22+
}
23+
return false;
24+
});
25+
};
26+
// Function to process CSV files
27+
const processCSV = async (filePath) => {
28+
return new Promise((resolve, reject) => {
29+
let sensitiveDataFound = false;
30+
fs.createReadStream(filePath)
31+
.pipe(csv())
32+
.on('data', (row) => {
33+
for (const [key, value] of Object.entries(row)) {
34+
if (checkForSensitiveData(value)) {
35+
console.log(`\x1b[33mSensitive data found in CSV: ${key}: ${value}\x1b[0m`); // Log in yellow
36+
sensitiveDataFound = true;
37+
}
38+
}
39+
})
40+
.on('end', () => {
41+
if (!sensitiveDataFound) {
42+
console.log('No sensitive data found in CSV.');
43+
}
44+
resolve(sensitiveDataFound); // Resolve with the flag indicating if sensitive data was found
45+
})
46+
.on('error', (err) => {
47+
console.error(`Error reading CSV file: ${err.message}`);
48+
reject(err); // Reject the promise on error
49+
});
50+
});
51+
};
52+
// Function to process XLSX files
53+
const processXLSX = async (filePath) => {
54+
return new Promise((resolve, reject) => {
55+
let sensitiveDataFound = false;
56+
try {
57+
const workbook = XLSX.readFile(filePath);
58+
const sheetName = workbook.SheetNames[0];
59+
const sheet = workbook.Sheets[sheetName];
60+
const jsonData = XLSX.utils.sheet_to_json(sheet);
61+
jsonData.forEach((row) => {
62+
for (const [key, value] of Object.entries(row)) {
63+
if (checkForSensitiveData(value)) {
64+
console.log(`\x1b[33mSensitive data found in XLSX: ${key}: ${value}\x1b[0m`); // Log in yellow
65+
sensitiveDataFound = true;
66+
}
67+
}
68+
});
69+
if (!sensitiveDataFound) {
70+
console.log('No sensitive data found in XLSX.');
71+
}
72+
resolve(sensitiveDataFound); // Resolve with the flag indicating if sensitive data was found
73+
} catch (error) {
74+
console.error(`Error reading XLSX file: ${error.message}`);
75+
reject(error); // Reject the promise on error
76+
}
77+
});
78+
};
79+
// Function to check for sensitive data in .log and .json files
80+
const checkLogJsonFiles = async (filePath) => {
81+
return new Promise((resolve, reject) => {
82+
let sensitiveDataFound = false;
83+
fs.readFile(filePath, 'utf8', (err, data) => {
84+
if (err) {
85+
console.error(`Error reading file ${filePath}: ${err.message}`);
86+
return reject(err);
87+
}
88+
if (checkForSensitiveData(data)) {
89+
console.log(`\x1b[Sensitive data found in ${filePath}\x1b[0m`);
90+
sensitiveDataFound = true;
91+
}
92+
resolve(sensitiveDataFound);
93+
});
94+
});
95+
};
96+
// Function to parse the file based on its extension
97+
const parseFile = async (filePath) => {
98+
99+
const ext = path.extname(filePath).toLowerCase();
100+
const FilestoCheck = commitConfig.diff.block.proxyFileTypes;
101+
if(!FilestoCheck.includes(ext)){
102+
103+
console.log(`${ext} should be included in CommitConfig for proxy Check!`);
104+
return false;
105+
}
106+
107+
switch (ext) {
108+
case '.csv':
109+
return await processCSV(filePath);
110+
case '.xlsx':
111+
return await processXLSX(filePath);
112+
case '.log':
113+
return await checkLogJsonFiles(filePath);
114+
case '.json':
115+
return await checkLogJsonFiles(filePath);
116+
default:
117+
// Skip unsupported file types without logging
118+
return false; // Indicate that no sensitive data was found for unsupported types
119+
}
120+
};
121+
// Async exec function to handle actions
122+
// Function to parse file paths from git diff content
123+
const extractFilePathsFromDiff = (diffContent) => {
124+
const filePaths = [];
125+
const lines = diffContent.split('\n');
126+
127+
lines.forEach(line => {
128+
const match = line.match(/^diff --git a\/(.+?) b\/(.+?)$/);
129+
if (match) {
130+
filePaths.push(match[1]); // Extract the file path from "a/" in the diff line
131+
}
132+
});
133+
134+
return filePaths;
135+
};
136+
137+
const exec = async (req, action) => {
138+
const diffStep = action.steps.find((s) => s.stepName === 'diff');
139+
const step = new Step('checksensitiveData');
140+
141+
if (diffStep && diffStep.content) {
142+
console.log('Diff content:', diffStep.content);
143+
144+
// Use the parsing function to get file paths
145+
const filePaths = extractFilePathsFromDiff(diffStep.content);
146+
147+
if (filePaths.length > 0) {
148+
// Check for sensitive data in all files
149+
const sensitiveDataFound = await Promise.all(filePaths.map(parseFile));
150+
const anySensitiveDataDetected = sensitiveDataFound.some(found => found);
151+
152+
if (anySensitiveDataDetected) {
153+
step.blocked= true;
154+
step.error = true;
155+
step.errorMessage = 'Your push has been blocked due to sensitive data detection.';
156+
console.log(step.errorMessage);
157+
}
158+
} else {
159+
console.log('No file paths provided in the diff step.');
160+
}
161+
} else {
162+
console.log('No diff content available.');
163+
}
164+
action.addStep(step);
165+
return action; // Returning action for testing purposes
166+
};
167+
168+
169+
170+
exec.displayName = 'logFileChanges.exec';
171+
exports.exec = exec;

src/proxy/processors/push-action/getDiff.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ const Step = require('../../actions').Step;
22
const simpleGit = require('simple-git')
33

44

5+
56
const exec = async (req, action) => {
67
const step = new Step('diff');
78

src/proxy/processors/push-action/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ exports.checkUserPushPermission = require('./checkUserPushPermission').exec;
1313
exports.clearBareClone = require('./clearBareClone').exec;
1414
exports.checkForAiMlUsage = require('./checkForAiMlUsage').exec;
1515
exports.checkExifJpeg = require('./checkExifJpeg').exec;
16+
exports.checkSensitiveData = require('./checkSensitiveData').exec;

test/CheckSensitive.test.js

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// const path = require('path');
2+
const { exec } = require('../src/proxy/processors/push-action/checkSensitiveData.js'); // Adjust path as necessary
3+
const sinon = require('sinon');
4+
const {Action}=require('../src/proxy/actions/Action.js')
5+
const {Step}=require('../src/proxy/actions/Step.js')
6+
7+
8+
describe('Sensitive Data Detection', () => {
9+
let logStub;
10+
11+
beforeEach(() => {
12+
logStub = sinon.stub(console, 'log'); // Stub console.log before each test
13+
});
14+
15+
afterEach(() => {
16+
logStub.restore(); // Restore console.log after each test
17+
});
18+
19+
const createDiffContent = (filePaths) => {
20+
// Format file paths in diff format
21+
return filePaths.map(filePath => `diff --git a/${filePath} b/${filePath}`).join('\n');
22+
};
23+
// make sure the file types are added in proxyfiletypes in proxy.config.json
24+
it('should detect sensitive data in CSV file and block execution', async () => {
25+
const action = new Action('action_id', 'push', 'create', Date.now(), 'owner/repo');
26+
const step = new Step('diff');
27+
28+
// Create diff content simulating sensitive data in CSV
29+
step.setContent(createDiffContent(['test/test_data/sensitive_data.csv']));
30+
action.addStep(step)
31+
32+
await exec(null, action);
33+
sinon.assert.calledWith(logStub, sinon.match(/Your push has been blocked due to sensitive data detection/));
34+
});
35+
36+
it('should detect sensitive data in XLSX file and block execution', async () => {
37+
const action = new Action('action_id', 'push', 'create', Date.now(), 'owner/repo');
38+
const step = new Step('diff');
39+
step.setContent(createDiffContent(['test/test_data/sensitive_data2.xlsx']));
40+
action.addStep(step);
41+
42+
await exec(null, action);
43+
sinon.assert.calledWith(logStub, sinon.match(/Your push has been blocked due to sensitive data detection/));
44+
});
45+
46+
it('should detect sensitive data in a log file and block execution', async () => {
47+
48+
const action = new Action('action_id', 'push', 'create', Date.now(), 'owner/repo');
49+
const step = new Step('diff');
50+
step.setContent(createDiffContent(['test/test_data/sensitive_data3.log']));
51+
action.addStep(step);
52+
await exec(null, action);
53+
sinon.assert.calledWith(logStub, sinon.match(/Your push has been blocked due to sensitive data detection/));
54+
});
55+
56+
it('should detect sensitive data in a JSON file and block execution', async () => {
57+
58+
59+
const action = new Action('action_id', 'push', 'create', Date.now(), 'owner/repo');
60+
const step = new Step('diff');
61+
step.setContent(createDiffContent(['test/test_data/sensitive_data4.json']));
62+
action.addStep(step);
63+
await exec(null, action);
64+
sinon.assert.calledWith(logStub, sinon.match(/Your push has been blocked due to sensitive data detection/));
65+
});
66+
67+
it('should allow execution if no sensitive data is found', async () => {
68+
69+
70+
const action = new Action('action_id', 'push', 'create', Date.now(), 'owner/repo');
71+
const step = new Step('diff');
72+
step.setContent(createDiffContent(['test_data/no_sensitive_data.txt']));
73+
action.addStep(step);
74+
await exec(null, action);
75+
sinon.assert.neverCalledWith(logStub, sinon.match(/Your push has been blocked due to sensitive data detection/));
76+
});
77+
78+
it('should allow execution for an empty file', async () => {
79+
80+
81+
const action = new Action('action_id', 'push', 'create', Date.now(), 'owner/repo');
82+
const step = new Step('diff');
83+
step.setContent(createDiffContent(['test_data/empty_file.txt']));
84+
action.addStep(step);
85+
await exec(null, action);
86+
sinon.assert.neverCalledWith(logStub, sinon.match(/Your push has been blocked due to sensitive data detection/));
87+
});
88+
89+
it('should handle file-not-found scenario gracefully', async () => {
90+
91+
const action = new Action('action_id', 'push', 'create', Date.now(), 'owner/repo');
92+
const step = new Step('diff');
93+
step.setContent(createDiffContent(['test_data/non_existent_file.txt']));
94+
action.addStep(step);
95+
try {
96+
await exec(null, action);
97+
} catch (error) {
98+
sinon.assert.match(error.message, /ENOENT: no such file or directory/);
99+
}
100+
});
101+
});

test/CreateExcel.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
const XLSX = require('xlsx');
2+
const fs = require('fs');
3+
const path = require('path');
4+
// Example data with sensitive information
5+
const data = [
6+
{ Name: "John Doe", SSN: "123-45-6789", Email: "[email protected]" },
7+
{ Name: "Jane Smith", SSN: "987-65-4321", Email: "[email protected]" }
8+
];
9+
const worksheet = XLSX.utils.json_to_sheet(data);
10+
const workbook = XLSX.utils.book_new();
11+
XLSX.utils.book_append_sheet(workbook, worksheet, "SensitiveData");
12+
// Create the path to the test_data directory
13+
const testDataPath = path.join(__dirname, 'test_data'); // Ensure this points to the correct directory
14+
// Create the test_data directory if it doesn't exist
15+
if (!fs.existsSync(testDataPath)){
16+
fs.mkdirSync(testDataPath, { recursive: true }); // Using recursive to ensure all directories are created
17+
}
18+
// Write the Excel file to the test_data directory
19+
XLSX.writeFile(workbook, path.join(testDataPath, 'sensitive_data2.xlsx'));

0 commit comments

Comments
 (0)