1+ const { Step } = require ( '../../actions' ) ;
2+ const config = require ( '../../../config' ) ;
3+ const commitConfig = config . getCommitConfig ( ) ;
4+ const file = require ( '../../../config/file' ) ;
5+ const fs = require ( 'fs' ) ;
6+
7+ // Patterns for detecting different types of AI/ML assets
8+ const FILE_PATTERNS = {
9+ modelWeights : / \. ( h 5 | p b | p t | c k p t | p k l ) $ / ,
10+ // Regex for model weight files like .h5, .pt, .ckpt, or .pkl
11+ largeDatasets : / \. ( c s v | j s o n | x l s x ) $ / ,
12+ // Regex for large dataset files
13+ aiLibraries : / (?: i m p o r t \s + ( t e n s o r f l o w | t o r c h | k e r a s | s k l e a r n | t o k e n i z e r ) | r e q u i r e \( [ ' " ] t e n s o r f l o w | t o r c h | k e r a s | s k l e a r n | t o k e n i z e r [ ' " ] \) ) / ,
14+ // Regex for AI/ML libraries and tokenizers
15+ configKeys : / \b ( e p o c h s | l e a r n i n g _ r a t e | b a t c h _ s i z e | t o k e n ) \b / ,
16+ // Regex for config keys in JSON/YAML including token-related keys
17+ aiFunctionNames : / \b ( t r a i n _ m o d e l | p r e d i c t | e v a l u a t e | f i t | t r a n s f o r m | t o k e n i z e | t o k e n i z e r ) \b /
18+ // Regex for AI/ML function/class names with token/tokenizer
19+ } ;
20+
21+
22+ // Function to check if a file name suggests it is AI/ML related (model weights or dataset)
23+ const isAiMlFileByExtension = ( fileName ) => {
24+ const checkAiMlConfig = commitConfig . diff . block . aiMlUsage ;
25+ // check file extensions for common model weight files
26+ if ( checkAiMlConfig . blockPatterns . includes ( 'modelWeights' )
27+ && FILE_PATTERNS . modelWeights . test ( fileName ) ) {
28+ // console.log("FOUND MODEL WEIGHTS");
29+ return true ; }
30+ // check file extensions for large datasets
31+ if ( checkAiMlConfig . blockPatterns . includes ( 'largeDatasets' )
32+ && FILE_PATTERNS . largeDatasets . test ( fileName ) ) {
33+ // console.log("FOUND LARGE DATASETS");
34+ return true ; }
35+ return false ;
36+ } ;
37+
38+ // Function to check if file content suggests it is AI/ML related
39+ const isAiMlFileByContent = ( fileContent ) => {
40+ const checkAiMlConfig = commitConfig . diff . block . aiMlUsage ;
41+ // check file content for AI/ML libraries
42+ if ( checkAiMlConfig . blockPatterns . includes ( 'aiLibraries' )
43+ && FILE_PATTERNS . aiLibraries . test ( fileContent ) ) {
44+ // console.log("FOUND AI LIBRARIES");
45+ return true ; }
46+ // check file content for config keys
47+ if ( checkAiMlConfig . blockPatterns . includes ( 'configKeys' )
48+ && FILE_PATTERNS . configKeys . test ( fileContent ) ) {
49+ // console.log("FOUND CONFIG KEYS");
50+ return true ; }
51+ // check file content for AI/ML function/class names
52+ if ( checkAiMlConfig . blockPatterns . includes ( 'aiFunctionNames' )
53+ && FILE_PATTERNS . aiFunctionNames . test ( fileContent ) ) {
54+ // console.log("FOUND AI FUNCTION NAMES");
55+ return true ; }
56+ return false ;
57+ } ;
58+
59+
60+ // Main function to detect AI/ML usage in an array of file paths
61+ const detectAiMlUsageFiles = async ( filePaths ) => {
62+ const results = [ ] ;
63+ // console.log("filePaths!", filePaths);
64+ for ( const filePath of filePaths ) {
65+ try {
66+ const fileName = filePath . split ( '/' ) . pop ( ) ;
67+ // console.log(fileName, "!!!");
68+ // Check if the file name itself indicates AI/ML usage
69+ if ( isAiMlFileByExtension ( fileName ) ) {
70+ // console.log("FOUND EXTENSION for ", fileName);
71+ results . push ( false ) ; continue ;
72+ // Skip content check if the file name is a match
73+ }
74+ // Check for AI/ML indicators within the file content
75+ // console.log("testing content for ", fileName);
76+ const content = await fs . promises . readFile ( filePath , 'utf8' ) ;
77+ if ( isAiMlFileByContent ( content ) ) {
78+ results . push ( false ) ; continue ;
79+ }
80+ results . push ( true ) ; // No indicators found in content
81+ } catch ( err ) {
82+ console . error ( `Error reading file ${ filePath } :` , err ) ;
83+ results . push ( false ) ; // Treat errors as no AI/ML usage found
84+ }
85+ }
86+
87+ return results ;
88+ } ;
89+
90+ // Helper function to parse file paths from git diff content
91+ const extractFilePathsFromDiff = ( diffContent ) => {
92+ const filePaths = [ ] ;
93+ const lines = diffContent . split ( '\n' ) ;
94+
95+ lines . forEach ( line => {
96+ const match = line . match ( / ^ d i f f - - g i t a \/ ( .+ ?) b \/ ( .+ ?) $ / ) ;
97+ if ( match ) {
98+ filePaths . push ( match [ 1 ] ) ; // Extract the file path from "a/" in the diff line
99+ }
100+ } ) ;
101+
102+ return filePaths ;
103+ } ;
104+
105+ // Main exec function
106+ const exec = async ( req , action , log = console . log ) => {
107+ // console.log("HEYYY");
108+ const diffStep = action . steps . find ( ( s ) => s . stepName === 'diff' ) ;
109+ const step = new Step ( 'checkForAiMlUsage' ) ;
110+ action . addStep ( step ) ;
111+ if ( ! commitConfig . diff . block . aiMlUsage . enabled ) {
112+ // console.log("INSIDW!!")
113+ return action ;
114+ }
115+
116+ if ( diffStep && diffStep . content ) {
117+ const filePaths = extractFilePathsFromDiff ( diffStep . content ) ;
118+ // console.log(filePaths);
119+
120+ if ( filePaths . length ) {
121+ const aiMlDetected = await detectAiMlUsageFiles ( filePaths ) ;
122+ // console.log(aiMlDetected);
123+ const isBlocked = aiMlDetected . some ( found => ! found ) ;
124+ // const isBlocked = false;
125+
126+ if ( isBlocked ) {
127+ step . blocked = true ;
128+ step . error = true ;
129+ step . errorMessage = 'Your push has been blocked due to AI/ML usage detection' ;
130+ log ( step . errorMessage ) ;
131+ }
132+ } else {
133+ log ( 'No valid image files found in the diff content.' ) ;
134+ }
135+ } else {
136+ log ( 'No diff content available.' ) ;
137+ }
138+
139+ return action ;
140+ } ;
141+
142+ exec . displayName = 'logFileChanges.exec' ;
143+ module . exports = { exec } ;
0 commit comments