Skip to content

Commit 2792b15

Browse files
authored
Support for VersaDataset (#3270)
1 parent 8269ddf commit 2792b15

File tree

3 files changed

+74
-0
lines changed

3 files changed

+74
-0
lines changed

tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,11 @@ The main difference between this converter and `super_resolution` in data organi
832832
* `images_dir` - path to directory with images (optional, default image_2).
833833
* `label_start` - specifies label index start in label map. Optional, default value is 1. You can provide another value, if you want to use this dataset for separate label validation.
834834
* `images_suffix` - suffix for image file names (Optional, default: `.png`).
835+
* `malware_classification_converter` - converts dataset for malware detection task to `ClassificationAnnotation`.
836+
* `annotation_file` - path to dataset annotation file.
837+
* `data_dir` - path to dataset root folder.
838+
* `known_folder` - name of folder containing known files (optional, default `KNOWN_1000`).
839+
* `malicious_folder` - name of folder containing malicious files (optional, default `MALICIOUS_1000`).
835840

836841
## <a name="customizing-dataset-meta"></a>Customizing Dataset Meta
837842
There are situations when we need to customize some default dataset parameters (e.g. replace original dataset label map with own.)

tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
from .gan_annotation_converter import GANAnnotationConverter
130130
from .kitti_converter import KITTIConverter
131131
from .smartlab_action_recognition import SmartLabActionRecognition
132+
from .malware_classification import MalwareClassificationDatasetConverter
132133

133134
__all__ = [
134135
'BaseFormatConverter',
@@ -258,4 +259,5 @@
258259
'GANAnnotationConverter',
259260
'KITTIConverter',
260261
'SmartLabActionRecognition',
262+
'MalwareClassificationDatasetConverter',
261263
]
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""
2+
Copyright (c) 2018-2022 Intel Corporation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
17+
from ..representation import ClassificationAnnotation
18+
from ..utils import get_path, read_csv, check_file_existence
19+
from ..config import StringField, PathField
20+
from .format_converter import FileBasedAnnotationConverter
21+
from .format_converter import ConverterReturn
22+
23+
class MalwareClassificationDatasetConverter(FileBasedAnnotationConverter):
24+
__provider__ = 'malware_classification_converter'
25+
26+
@classmethod
27+
def parameters(cls):
28+
parameters = super().parameters()
29+
parameters.update({
30+
'known_folder': StringField(optional=True, default='KNOWN_1000',
31+
description="Known files folder"),
32+
'malicious_folder': StringField(optional=True, default='MALICIOUS_1000',
33+
description="Malicious files folder"),
34+
'data_dir': PathField(optional=False, default=None, is_directory=True, description="Dataset root folder"),
35+
})
36+
37+
return parameters
38+
39+
def configure(self):
40+
self.annotation_file = self.get_value_from_config('annotation_file')
41+
self.data_folder = self.get_value_from_config('data_dir')
42+
self.known_folder = self.get_value_from_config('known_folder')
43+
self.malicious_folder = self.get_value_from_config('malicious_folder')
44+
45+
self.data_dir = get_path(self.data_folder, is_directory=True)
46+
self.known_dir = get_path(self.data_folder / self.known_folder, is_directory=True)
47+
self.malicious_dir = get_path(self.data_folder / self.malicious_folder, is_directory=True)
48+
49+
def convert(self, check_content=False, progress_callback=None, progress_interval=100, **kwargs):
50+
annotations = []
51+
content_errors = []
52+
meta = None
53+
54+
annotation = read_csv(self.annotation_file, is_dict=False)
55+
56+
for entry_id, (identifier, kind) in enumerate(annotation):
57+
identifier = identifier.replace(':', '_')
58+
kind = int(kind)
59+
sample_path = self.malicious_dir / identifier if kind == 1 else self.known_dir / identifier
60+
if check_content:
61+
if not check_file_existence(sample_path):
62+
content_errors.append('{}: does not exist'.format(sample_path))
63+
annotations.append(ClassificationAnnotation(str(sample_path.relative_to(self.data_dir)), [kind, ]))
64+
if progress_callback is not None and entry_id % progress_interval == 0:
65+
progress_callback(entry_id * 100 / len(annotation))
66+
67+
return ConverterReturn(annotations, meta, content_errors)

0 commit comments

Comments
 (0)