Skip to content

Commit aa6e755

Browse files
Merge branch 'release/2.3.0'
2 parents feeaeab + 95e7a92 commit aa6e755

23 files changed

+38733
-129
lines changed

.travis.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,23 @@ before_install:
2323
build-essential
2424
cmake
2525
libfuzzy-dev
26+
p7zip-full
2627
unrar
28+
unzip
2729
- sudo apt-get -y -o Dpkg::Options::="--force-confnew" install docker-ce
2830

29-
- git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-elasticsearch.git $DOCKER_ELASTICSEARCH_PATH
3031

3132
# Build latest images spamscope-root, spamscope-elasticsearch
3233
# make images
3334
- if [ "$TRAVIS_BRANCH" == "master" ]; then
35+
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-elasticsearch.git $DOCKER_ELASTICSEARCH_PATH;
3436
cd $DOCKER_ELASTICSEARCH_PATH && docker build --build-arg SPAMSCOPE_VER=master -t $DOCKER_USERNAME/spamscope-elasticsearch . && cd -;
3537
docker run --rm -it $DOCKER_USERNAME/spamscope-elasticsearch /bin/sh -c 'python -m unittest discover -s tests -f -v';
3638
docker run --rm -it $DOCKER_USERNAME/spamscope-elasticsearch /bin/sh -c 'thug -V && spamscope-topology -v && spamscope-elasticsearch -v';
3739
fi
3840

3941
- if [ "$TRAVIS_BRANCH" == "develop" ]; then
42+
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-elasticsearch.git $DOCKER_ELASTICSEARCH_PATH;
4043
cd $DOCKER_ELASTICSEARCH_PATH && docker build --build-arg SPAMSCOPE_VER=develop -t $DOCKER_USERNAME/spamscope-elasticsearch:develop . && cd -;
4144
docker run --rm -it $DOCKER_USERNAME/spamscope-elasticsearch:develop /bin/sh -c 'python -m unittest discover -s tests -f -v';
4245
docker run --rm -it $DOCKER_USERNAME/spamscope-elasticsearch:develop /bin/sh -c 'thug -V && spamscope-topology -v && spamscope-elasticsearch -v';

conf/spamscope.example.yml

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,22 @@ raw_mail:
107107

108108
# Attachments bolt configuration
109109
attachments:
110-
content_types_blacklist:
111-
# All content types to remove from results
112-
# Example in content_types/blacklist/generic.example.yml
113-
generic: /path/to/generic_content_types
114-
custom: /path/to/custom_content_types
110+
# The lists of all components must be under lists keyword to load them
111+
# automatically
112+
commons:
113+
lists:
114+
blacklist_content_types:
115+
# All content types to remove from results
116+
# Example in content_types/blacklist/generic.example.yml
117+
generic: /path/to/generic_content_types
118+
custom: /path/to/custom_content_types
119+
120+
not_extract_content_types:
121+
# All content types that you don't want extract from archive
122+
# Example: application/java-archive (jar), you can save the jar
123+
# but do not extract the class inside.
124+
generic: /path/to/generic_content_types
125+
custom: /path/to/custom_content_types
115126

116127
# Apache Tika analysis: https://tika.apache.org/
117128
tika:
@@ -125,9 +136,10 @@ attachments:
125136

126137
# All content types to extract details
127138
# Example in content_types/tika/generic.example.yml
128-
valid_content_types:
129-
generic: /path/to/generic_content_types
130-
custom: /path/to/custom_content_types
139+
lists:
140+
whitelist_content_types:
141+
generic: /path/to/generic_content_types
142+
custom: /path/to/custom_content_types
131143

132144
# VirusTotal analysis: https://www.virustotal.com/
133145
virustotal:
@@ -138,9 +150,10 @@ attachments:
138150
# All content types to analyze with virustotal
139151
# Example in content_types/virustotal/generic.example.yml
140152
# Now is not active
141-
content_types_details:
142-
generic: /path/to/generic_content_types
143-
custom: /path/to/custom_content_types
153+
lists:
154+
whitelist_content_types:
155+
generic: /path/to/generic_content_types
156+
custom: /path/to/custom_content_types
144157

145158
# Thug analysis: https://github.com/buffer/thug
146159
thug:
@@ -188,6 +201,12 @@ attachments:
188201
ApiKey: xxxxx
189202
useragent: SpamScope
190203

204+
# This plugin store the samples on file system
205+
# in date format subfolders (YYYY-MM-DD)
206+
store_samples:
207+
enabled: false
208+
base_path: /tmp
209+
191210

192211
# Urls
193212
urls:
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"order": 0,
3+
"version": 1,
4+
"index_patterns": "spamscope_attachments-*",
5+
"settings": {
6+
"analysis": {
7+
"analyzer": {
8+
"header": {
9+
"tokenizer": "uax_url_email",
10+
"filter": [
11+
"lowercase"
12+
]
13+
},
14+
"html_body": {
15+
"char_filter": [
16+
"html_strip"
17+
],
18+
"tokenizer": "uax_url_email",
19+
"filter": [
20+
"lowercase"
21+
]
22+
},
23+
"path_pattern": {
24+
"tokenizer": "path_hierarchy",
25+
"filter": [
26+
"lowercase"
27+
]
28+
}
29+
}
30+
},
31+
"index.codec": "best_compression",
32+
"index.number_of_shards": 1,
33+
"index.number_of_replicas": 0,
34+
"index.refresh_interval": "5s",
35+
"index.mapping.ignore_malformed": true
36+
},
37+
"mappings": {
38+
"_doc": {
39+
"dynamic_templates": [
40+
{
41+
"hashes": {
42+
"mapping": {
43+
"type": "keyword",
44+
"eager_global_ordinals": true
45+
},
46+
"match_pattern": "regex",
47+
"match": "(^|.*\\.)(md5|sha1|sha256|sha512|ssdeep|extension)$"
48+
}
49+
},
50+
{
51+
"payload": {
52+
"mapping": {
53+
"type": "binary",
54+
"norms": false
55+
},
56+
"match_pattern": "regex",
57+
"match": "(^|.*\\.)(payload)$"
58+
}
59+
},
60+
{
61+
"structured": {
62+
"mapping": {
63+
"type": "keyword",
64+
"eager_global_ordinals": true
65+
},
66+
"match_pattern": "regex",
67+
"match": "(^|.*\\.)(charset|mail_server|mailbox|message_id|Content-Type|content_transfer_encoding|mail_content_type)$"
68+
}
69+
}
70+
],
71+
"properties": {
72+
"@timestamp": {
73+
"type": "date"
74+
},
75+
"@version": {
76+
"type": "keyword"
77+
}
78+
}
79+
}
80+
}
81+
}
Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"order": 0,
3-
"version": 5,
4-
"index_patterns": "spamscope_*-*",
3+
"version": 6,
4+
"index_patterns": "spamscope_mails-*",
55
"settings": {
66
"analysis": {
77
"analyzer": {
@@ -29,18 +29,19 @@
2929
}
3030
},
3131
"index.codec": "best_compression",
32-
"index.number_of_shards": 2,
32+
"index.number_of_shards": 1,
3333
"index.number_of_replicas": 0,
3434
"index.refresh_interval": "5s",
3535
"index.mapping.ignore_malformed": true
3636
},
3737
"mappings": {
38-
"analysis": {
38+
"_doc": {
3939
"dynamic_templates": [
4040
{
4141
"cert_serial": {
4242
"mapping": {
43-
"type": "text"
43+
"type": "text",
44+
"norms": false
4445
},
4546
"path_match": "network.shodan.data.ssl.cert.serial"
4647
}
@@ -58,7 +59,8 @@
5859
{
5960
"payload": {
6061
"mapping": {
61-
"type": "binary"
62+
"type": "binary",
63+
"norms": false
6264
},
6365
"match_pattern": "regex",
6466
"match": "(^|.*\\.)(payload)$"
@@ -71,7 +73,7 @@
7173
"analyzer": "header"
7274
},
7375
"match_pattern": "regex",
74-
"match": "^(from|to|headers)$"
76+
"match": "^(from|to|headers|bcc|cc|delivered_to|reply_to)$"
7577
}
7678
},
7779
{
@@ -103,28 +105,13 @@
103105
}
104106
},
105107
{
106-
"all_not_analyzed": {
108+
"structured": {
107109
"mapping": {
108110
"type": "keyword",
109111
"eager_global_ordinals": true
110112
},
111113
"match_pattern": "regex",
112-
"match": "(^|.*\\.)(charset|mail_server|mailbox|message_id|Content-Type|content_transfer_encoding|mail_content_type)$"
113-
}
114-
},
115-
{
116-
"all_string": {
117-
"mapping": {
118-
"type": "text",
119-
"fields": {
120-
"raw": {
121-
"ignore_above": 256,
122-
"type": "keyword",
123-
"eager_global_ordinals": true
124-
}
125-
}
126-
},
127-
"match_mapping_type": "string"
114+
"match": "(^|.*\\.)(charset|mail_server|mailbox|message_id|Content-Type|content_transfer_encoding|mail_content_type|to_domains)$"
128115
}
129116
}
130117
],

config.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
"localhost"
1212
],
1313
"log": {
14-
"path": "/var/log/spamscope",
14+
"path": "/logs",
1515
"max_bytes": 5000000,
16-
"backup_count": 3,
16+
"backup_count": 1,
1717
"level": "info"
1818
},
1919
"use_virtualenv": false,

project.clj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
(defproject spamscope "2.2.1-SNAPSHOT"
1+
(defproject spamscope "2.3.0-SNAPSHOT"
22
:resource-paths ["_resources"]
33
:target-path "_build"
44
:min-lein-version "2.0.0"

src/bolts/attachments.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,20 @@ def initialize(self, stormconf, context):
3434

3535
def _load_settings(self):
3636
# Loading configuration
37-
self._load_lists()
38-
39-
settings = copy.deepcopy(self.conf)
40-
settings.update(
41-
{"filter_cont_types": self._filter_cont_types,
42-
"tika_whitelist_cont_types": self._tika_whitelist_cont_types})
43-
37+
settings = self._load_lists()
4438
self.attach.reload(**settings)
4539

4640
def _load_lists(self):
41+
settings = copy.deepcopy(self.conf)
4742

48-
# Load content types to filter
49-
self._filter_cont_types = load_keywords_list(
50-
self.conf.get("content_types_blacklist", {}), lower=False)
51-
self.log("Content types to filter reloaded", "debug")
52-
53-
# Load Tika content types to analyze
54-
self._tika_whitelist_cont_types = set()
55-
if self.conf["tika"]["enabled"]:
56-
self._tika_whitelist_cont_types = load_keywords_list(
57-
self.conf["tika"].get("valid_content_types", {}), lower=False)
58-
self.log("Whitelist Tika content types reloaded", "debug")
43+
for k in self.conf:
44+
for i, j in self.conf[k].get("lists", {}).items():
45+
settings[k][i] = load_keywords_list(j)
46+
self.log("Loaded lists {!r} for {!r}".format(i, k), "debug")
47+
self.log("Keys[{!r}][{!r}]: {}".format(
48+
k, i, ", ".join(settings[k][i])), "debug")
49+
else:
50+
return settings
5951

6052
def process_tick(self, freq):
6153
"""Every freq seconds you reload the keywords. """

src/cli/spamscope_elasticsearch.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,8 @@ def update_nr_replicas(es, max_retry, nr_replicas, index):
200200
try:
201201
es.indices.put_settings(
202202
body={"index": {"number_of_replicas": int(nr_replicas)}},
203-
index=index)
203+
index=index,
204+
allow_no_indices=True)
204205
log.info("Updating replicas done")
205206
return
206207

src/modules/attachments/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@
1616

1717
from .utils import *
1818
from .attachments import Attachments as MailAttachments
19-
from .post_processing import processors, tika, virustotal, thug, zemana
19+
from .post_processing import processors, tika, virustotal, thug, zemana, store_samples

0 commit comments

Comments
 (0)