|
1 | | -from py_fcm.fcm import FuzzyCognitiveMap, from_json, join_maps |
2 | | -from py_fcm.__const import TYPE_SIMPLE, TYPE_DECISION, TYPE_FUZZY |
| 1 | +from py_fcm.utils.functions import Relation |
| 2 | +from py_fcm.fcm_estimator import FcmEstimator |
| 3 | +from py_fcm.loader import from_json, join_maps, FuzzyCognitiveMap |
| 4 | +from py_fcm.utils.__const import TYPE_SIMPLE, TYPE_DECISION, TYPE_FUZZY |
| 5 | + |
| 6 | + |
| 7 | +def load_csv_dataset(dataset_dir, ds_name, factorize=False): |
| 8 | + import pandas |
| 9 | + from collections import OrderedDict |
| 10 | + data_dict = OrderedDict() |
| 11 | + with open(dataset_dir + "/" + ds_name) as file: |
| 12 | + content = [] |
| 13 | + for line in file: |
| 14 | + content.append(line.strip()) |
| 15 | + # first line describe the atributes names |
| 16 | + attributes = str(content[0]).split(',') |
| 17 | + for current_att in attributes: |
| 18 | + data_dict[current_att] = [] |
| 19 | + |
| 20 | + # print(data_dict) |
| 21 | + for line in range(1, len(content)): |
| 22 | + data_line = str(content[line]).split(',') |
| 23 | + # avoid rows with different attributes length |
| 24 | + if len(data_line) == len(attributes): |
| 25 | + # the missing data must be identified |
| 26 | + for data in range(0, len(data_line)): |
| 27 | + # reusing for value type inference |
| 28 | + current_att = data_line[data] |
| 29 | + data_dict[attributes[data]].append(current_att) |
| 30 | + else: |
| 31 | + # Handle errors in dataset matrix |
| 32 | + print("Errors in line: ", line, len(data_line), len(attributes)) |
| 33 | + # adding data set |
| 34 | + print("\n===> Dataset for test: ", ds_name) |
| 35 | + try: |
| 36 | + dataset_frame = pandas.DataFrame(data_dict).drop_duplicates() |
| 37 | + except Exception as err: |
| 38 | + for key, value in data_dict.items(): |
| 39 | + print(key, value) |
| 40 | + pass |
| 41 | + raise Exception(ds_name + " " + str(err)) |
| 42 | + # Transform the columns disc values in 0..N values |
| 43 | + if factorize: |
| 44 | + # for current_col in dataset_frame. |
| 45 | + dataset_frame = dataset_frame.apply(lambda x: pandas.factorize(x)[0]) |
| 46 | + return dataset_frame |
0 commit comments