-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrapedir.py
More file actions
39 lines (32 loc) · 1.46 KB
/
scrapedir.py
File metadata and controls
39 lines (32 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
import sys
from entropycalc import *
def scrapeDirectory(directory, outputfile, chunksize):
for filename in os.listdir(directory):
print "scraping " + directory + filename
entropy_of_file_chunks(directory + filename, chunksize, outputfile)
def scrapeDirectoryTotals(directory, outputfile):
outfile = open(outputfile, "w")
for filename in os.listdir(directory):
print "scraping " + directory + filename
entropy = entropy_of_file(directory + filename)
filesize = os.stat(directory + filename).st_size
filetype = os.path.splitext(filename)[1]
string = filename + ", " + filetype + ", " + str(entropy) + ", " + str(filesize) + ", " + "\n"
outfile.write(string)
outfile.close()
if __name__ == "__main__":
if len(sys.argv) < 3:
print sys.argv[0] + " takes as arguments:"
print " input_dir - input directory to calculate entropy"
print " output_file - the file to put the entropy calculations in."
print " It is formatted as a CSV."
print " size_of_chunk - Optional - size of chunk to calculate on."
print " If not present, calculates over whole file."
exit()
input_dir = sys.argv[1]
output_file = sys.argv[2]
if len(sys.argv) > 3:
size_of_chunk = sys.argv[3]
scrapeDirectory(input_dir, output_file, size_of_chunk)
scrapeDirectoryTotals(input_dir, output_file)