@@ -7,7 +7,6 @@ module LocalSources {
77 private import semmle.python.Concepts
88 private import semmle.python.dataflow.new.BarrierGuards
99 private import semmle.python.ApiGraphs
10- private import DataFlow:: PathGraph
1110
1211 abstract class Range extends DataFlow:: Node { }
1312
@@ -95,18 +94,26 @@ module LocalSources {
9594 call = API:: moduleImport ( [ "json" , "simplejson" ] ) .getMember ( "load" ) .getACall ( )
9695 or
9796 // yaml.load
98- call = API:: moduleImport ( "yaml" ) .getMember ( [ "load" , "load_all" , "safe_load" , "safe_load_all" ] ) .getACall ( )
97+ call =
98+ API:: moduleImport ( "yaml" )
99+ .getMember ( [ "load" , "load_all" , "safe_load" , "safe_load_all" ] )
100+ .getACall ( )
99101 or
100102 // msgpack.load
101103 call = API:: moduleImport ( "msgpack" ) .getMember ( "load" ) .getACall ( )
102104 or
103105 // pickle.load
104106 // dill.load
105- call = API:: moduleImport ( [ "cPickle" , "_pickle" , "pickle" , "dill" ] ) .getMember ( "load" ) .getACall ( )
107+ call =
108+ API:: moduleImport ( [ "cPickle" , "_pickle" , "pickle" , "dill" ] ) .getMember ( "load" ) .getACall ( )
106109 or
107110 // pickle.Unpickler.load
108111 // dill.Unpickler.load
109- call = API:: moduleImport ( [ "cPickle" , "pickle" , "dill" ] ) .getMember ( "Unpickler" ) .getACall ( ) .getAMethodCall ( "load" )
112+ call =
113+ API:: moduleImport ( [ "cPickle" , "pickle" , "dill" ] )
114+ .getMember ( "Unpickler" )
115+ .getACall ( )
116+ .getAMethodCall ( "load" )
110117 or
111118 // shelve.open
112119 call = API:: moduleImport ( "shelve" ) .getMember ( "open" ) .getACall ( )
@@ -137,45 +144,57 @@ module LocalSources {
137144 // pandas.read_gbq
138145 // pandas.read_stata
139146 // generate call expressions for each of the above pandas functions including ExcelFile.parse and HDFStore.* that have to be handled separately
140- call = API:: moduleImport ( "pandas" )
141- .getMember ( [
142- "read_csv" , "read_fwf" , "read_excel" , "read_json" , "read_html" , "read_xml" ,
143- "read_hdf" , "read_feather" , "read_parquet" , "read_orc" , "read_sas" , "read_spss" , "read_sql_table" ,
144- "read_sql_query" , "read_sql" , "read_gbq" , "read_stata"
145- ] )
146- .getACall ( )
147+ call =
148+ API:: moduleImport ( "pandas" )
149+ .getMember ( [
150+ "read_csv" , "read_fwf" , "read_excel" , "read_json" , "read_html" , "read_xml" ,
151+ "read_hdf" , "read_feather" , "read_parquet" , "read_orc" , "read_sas" , "read_spss" ,
152+ "read_sql_table" , "read_sql_query" , "read_sql" , "read_gbq" , "read_stata"
153+ ] )
154+ .getACall ( )
147155 or
148156 // pandas.ExcelFile.parse
149- call = API:: moduleImport ( "pandas" )
150- .getMember ( "ExcelFile" )
151- .getACall ( )
152- .getAMethodCall ( "parse" )
157+ call =
158+ API:: moduleImport ( "pandas" ) .getMember ( "ExcelFile" ) .getACall ( ) .getAMethodCall ( "parse" )
153159 or
154160 // pandas.HDFStore.get
155161 // pandas.HDFStore.select
156162 // pandas.HDFStore.info
157163 // pandas.HDFStore.keys
158164 // pandas.HDFStore.groups
159165 // pandas.HDFStore.walk
160- call = API:: moduleImport ( "pandas" )
161- .getMember ( "HDFStore" )
162- .getACall ( )
163- .getAMethodCall ( [ "get" , "select" , "info" , "keys" , "groups" , "walk" ] )
166+ call =
167+ API:: moduleImport ( "pandas" )
168+ .getMember ( "HDFStore" )
169+ .getACall ( )
170+ .getAMethodCall ( [ "get" , "select" , "info" , "keys" , "groups" , "walk" ] )
164171 or
165172 // polars.read_csv
166- call = API:: moduleImport ( "polars" ) .getMember ( [ "read_csv" , "read_csv_batched" , "scan_csv" ] ) .getACall ( )
173+ call =
174+ API:: moduleImport ( "polars" )
175+ .getMember ( [ "read_csv" , "read_csv_batched" , "scan_csv" ] )
176+ .getACall ( )
167177 or
168178 // polars.read_ipc
169- call = API:: moduleImport ( "polars" ) .getMember ( [ "read_ipc" , "scan_ipc" , "read_ipc_schema" ] ) .getACall ( )
179+ call =
180+ API:: moduleImport ( "polars" )
181+ .getMember ( [ "read_ipc" , "scan_ipc" , "read_ipc_schema" ] )
182+ .getACall ( )
170183 or
171184 // polars.read_parquet, polars.scan_parquet, polars.read_parquet_schema
172- call = API:: moduleImport ( "polars" ) .getMember ( [ "read_parquet" , "scan_parquet" , "read_parquet_schema" ] ) .getACall ( )
185+ call =
186+ API:: moduleImport ( "polars" )
187+ .getMember ( [ "read_parquet" , "scan_parquet" , "read_parquet_schema" ] )
188+ .getACall ( )
173189 or
174190 // polars.read_sql
175191 call = API:: moduleImport ( "polars" ) .getMember ( "read_sql" ) .getACall ( )
176192 or
177193 // polars.read_json, polars.read_ndjson, polars.scan_ndjson
178- call = API:: moduleImport ( "polars" ) .getMember ( [ "read_json" , "read_ndjson" , "scan_ndjson" ] ) .getACall ( )
194+ call =
195+ API:: moduleImport ( "polars" )
196+ .getMember ( [ "read_json" , "read_ndjson" , "scan_ndjson" ] )
197+ .getACall ( )
179198 or
180199 // polars.read_avro
181200 call = API:: moduleImport ( "polars" ) .getMember ( "read_avro" ) .getACall ( )
@@ -186,24 +205,37 @@ module LocalSources {
186205 // pyarrow.csv.read_csv
187206 // pyarrow.csv.open_csv
188207 // pyarrow.csv.CSVStreamingReader
189- call = API:: moduleImport ( "pyarrow" ) .getMember ( "csv" ) .getMember ( [ "read_csv" , "open_csv" , "CSVStreamingReader" ] ) .getACall ( )
208+ call =
209+ API:: moduleImport ( "pyarrow" )
210+ .getMember ( "csv" )
211+ .getMember ( [ "read_csv" , "open_csv" , "CSVStreamingReader" ] )
212+ .getACall ( )
190213 or
191214 // pyarrow.feather.read_feather
192215 // pyarrow.feather.read_table
193- call = API:: moduleImport ( "pyarrow" ) .getMember ( "feather" ) .getMember ( [ "read_feather" , "read_table" ] ) .getACall ( )
216+ call =
217+ API:: moduleImport ( "pyarrow" )
218+ .getMember ( "feather" )
219+ .getMember ( [ "read_feather" , "read_table" ] )
220+ .getACall ( )
194221 or
195222 // pyarrow.json.read_json
196223 call = API:: moduleImport ( "pyarrow" ) .getMember ( "json" ) .getMember ( "read_json" ) .getACall ( )
224+ or
197225 // pyarrow.parquet.ParquetDataset
198226 // pyarrow.parquet.ParquetFile
199227 // pyarrow.parquet.read_table
200228 // pyarrow.parquet.read_metadata
201229 // pyarrow.parquet.read_pandas
202230 // pyarrow.parquet.read_schema
203- or
204- call = API:: moduleImport ( "pyarrow" ) .getMember ( "parquet" ) .getMember ( [
205- "ParquetDataset" , "ParquetFile" , "read_table" , "read_metadata" , "read_pandas" , "read_schema"
206- ] ) .getACall ( )
231+ call =
232+ API:: moduleImport ( "pyarrow" )
233+ .getMember ( "parquet" )
234+ .getMember ( [
235+ "ParquetDataset" , "ParquetFile" , "read_table" , "read_metadata" , "read_pandas" ,
236+ "read_schema"
237+ ] )
238+ .getACall ( )
207239 ) and
208240 this = call
209241 ) and
0 commit comments