Skip to content

Commit 56a2a2c

Browse files
committed
markup-check file extensions
1 parent 4b6581f commit 56a2a2c

File tree

5 files changed

+906
-71
lines changed

5 files changed

+906
-71
lines changed

config.json

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
{
2+
"exclude": {
3+
"pattern": [],
4+
"containers": [
5+
".aar",
6+
".apk",
7+
".bz2",
8+
".class",
9+
".gz",
10+
".jar",
11+
".lzma",
12+
".rpm",
13+
".tar",
14+
".war",
15+
".whl",
16+
".xz",
17+
".zip"
18+
],
19+
"documents": [
20+
".doc",
21+
".docx",
22+
".odp",
23+
".ods",
24+
".odt",
25+
".pdf",
26+
".ppt",
27+
".pptx",
28+
".xls",
29+
".xlsx"
30+
],
31+
"extension": [
32+
".7z",
33+
".a",
34+
".aac",
35+
".avi",
36+
".bin",
37+
".bmp",
38+
".css",
39+
".dmg",
40+
".ear",
41+
".eot",
42+
".elf",
43+
".exe",
44+
".gif",
45+
".gmo",
46+
".ico",
47+
".img",
48+
".info",
49+
".jpeg",
50+
".jpg",
51+
".lib",
52+
".map",
53+
".m4a",
54+
".mat",
55+
".mo",
56+
".mov",
57+
".mp3",
58+
".mp4",
59+
".mpg",
60+
".mkv",
61+
".npy",
62+
".npz",
63+
".obj",
64+
".oga",
65+
".ogg",
66+
".ogv",
67+
".ops",
68+
".pak",
69+
".png",
70+
".psd",
71+
".pyc",
72+
".pyd",
73+
".pyo",
74+
".rar",
75+
".rc",
76+
".rc2",
77+
".realm",
78+
".res",
79+
".s7z",
80+
".scss",
81+
".so",
82+
".sum",
83+
".svg",
84+
".swf",
85+
".tif",
86+
".tiff",
87+
".tlb",
88+
".ttf",
89+
".vcxproj",
90+
".vdproj",
91+
".wav",
92+
".webm",
93+
".webp",
94+
".wma",
95+
".woff",
96+
".woff2",
97+
".yuv"
98+
],
99+
"path": [
100+
"/.git/",
101+
"/.idea/",
102+
"/.svn/",
103+
"/__pycache__/",
104+
"/node_modules/",
105+
"/target/",
106+
"/.venv/",
107+
"/venv/"
108+
],
109+
"lines": [],
110+
"values": []
111+
},
112+
"source_ext": [
113+
".aspx",
114+
".cs",
115+
".cshtml",
116+
".ejs",
117+
".erb",
118+
".go",
119+
".html",
120+
".ipynb",
121+
".jsp",
122+
".jsx",
123+
".php",
124+
".phtml",
125+
".rb",
126+
".sh",
127+
".swift",
128+
".ts",
129+
".twig",
130+
".vue",
131+
".xhtml",
132+
".java",
133+
".js",
134+
".py",
135+
".cpp",
136+
".c",
137+
".h",
138+
".hpp",
139+
".mm",
140+
".cu",
141+
".y",
142+
".vb",
143+
".m",
144+
".cu"
145+
],
146+
"source_quote_ext": [
147+
".cs",
148+
".cc",
149+
".php",
150+
".tf",
151+
".kt",
152+
".go",
153+
".ipynb",
154+
".ts",
155+
".java",
156+
".js",
157+
".py",
158+
".cpp",
159+
".c",
160+
".h",
161+
".hpp"
162+
],
163+
"find_by_ext_list": [
164+
".pem",
165+
".cer",
166+
".csr",
167+
".der",
168+
".pfx",
169+
".p12",
170+
".key",
171+
".jks"
172+
],
173+
"bruteforce_list": [
174+
"",
175+
"changeit",
176+
"changeme",
177+
"tizen"
178+
],
179+
"check_for_literals": true,
180+
"max_password_value_length": 64,
181+
"max_url_cred_value_length": 80,
182+
"line_data_output": [
183+
"line",
184+
"line_num",
185+
"path",
186+
"info",
187+
"variable",
188+
"variable_start",
189+
"variable_end",
190+
"value",
191+
"value_start",
192+
"value_end",
193+
"entropy"
194+
],
195+
"candidate_output": [
196+
"rule",
197+
"severity",
198+
"confidence",
199+
"ml_probability",
200+
"line_data_list"
201+
]
202+
}

download_data.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525

2626
@functools.cache
2727
def get_words_in_path():
28-
# json format is used to prevent strings concatenation in python without comma in multiline
29-
with open("word_in_path.json") as f:
30-
# the file should be the same list in CredSweeper ml_config
28+
# copy of CredSweeper/ml_model/ml_config.json
29+
with open("ml_config.json") as f:
3130
result = json.load(f)
32-
return result
31+
# use the list to build a new file path
32+
return result["words_in_path"]
3333

3434

3535
def get_file_scope(path_without_extension: str):

0 commit comments

Comments
 (0)