Skip to content

Commit 7e5b6ef

Browse files
committed
Add package nectar
1 parent 578b4f5 commit 7e5b6ef

26 files changed

+2619
-0
lines changed

nectar/config/cflp.ini

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[Problem]
2+
n_facility = 10
3+
n_client = 10
4+
n_scenario = 50
5+
extensive_optimality_gap = 0.02
6+
# Time limit in seconds to solve the extensive form
7+
extensive_time_limit = 600
8+
surrogate_optimality_gap = 0.001
9+
# Time limit in seconds to solve the surrogate form
10+
surrogate_time_limit = 300

nectar/config/meta.ini

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Configurations for data_manager package
2+
3+
# All the sections and corresponding keys in this file are mandatory,
4+
# i.e., you cannot alter their name but change the value.
5+
6+
[Run]
7+
# `problem` value should match with the folder containing data
8+
# management scripts for a given problem type.
9+
# For example, we have a data-management scripts related to
10+
# Stochastic Capacitated Facility Location (S-CFLP) inside the `cflp`.
11+
# Hence, for data management of S-CFLP we assign `problem` key the
12+
# value `cflp`.
13+
problem = cflp
14+
# Number of processes to run in parallel
15+
n_worker = 4
16+
from_pid = 0
17+
to_pid = 100
18+
19+
# Values in Directory and File section are optional. If nothing is passed,
20+
# we will automatically set the default values.
21+
[Directory]
22+
data = data
23+
result_extensive = result_ext
24+
result_xi = result_xi
25+
[File]
26+
instance = instances.pkl
27+
result_extensive = result_ext.pkl
28+
result_xi = result_xi.pkl
29+

nectar/data_manager/__init__.py

Whitespace-only changes.

nectar/data_manager/__main__.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""Dataset management
2+
3+
Data manager comprises of following modules:
4+
1. generate_instance.py
5+
2. generate_optimal_sol.py
6+
3. generate_xi_star.py (with different heuristics)
7+
4. improve_xi_star.py
8+
5. generate_dataset (responsible for creating dataset for ML models)
9+
10+
One should run modules 1 to 5 in order to create the dataset for ML model.
11+
"""
12+
from argparse import ArgumentParser
13+
from configparser import ConfigParser
14+
from importlib import import_module
15+
from pathlib import Path
16+
17+
18+
def main():
19+
# Load and set configuration
20+
meta_config, problem_config = ConfigParser(), ConfigParser()
21+
ROOT = Path(__file__).parent.parent
22+
23+
# Meta config
24+
meta_config.read(ROOT / "config" / "meta.ini")
25+
data_dir = meta_config.get('Directory', 'data')
26+
problem = meta_config.get('Run', 'problem')
27+
instance_file = meta_config.get('File', 'instance')
28+
result_ext_file = meta_config.get('File', 'result_extensive')
29+
result_xi_file = meta_config.get('File', 'result_xi')
30+
31+
# Problem config
32+
problem_config.read(ROOT / "config" / ".".join([problem, "ini"]))
33+
problem_path = ".".join(["nectar.data_manager", problem])
34+
get_problem_identifier = getattr(import_module(
35+
"nectar.utils.combinatorics."+problem
36+
), "get_problem_identifier")
37+
identifier = get_problem_identifier(problem_config)
38+
39+
# Set path
40+
data_dir_path = ROOT / data_dir / "_".join([problem, identifier])
41+
path = {
42+
"data": data_dir_path,
43+
"result_xi": data_dir_path / result_xi_file,
44+
"result_ext": data_dir_path / result_ext_file,
45+
"instance": data_dir_path / instance_file
46+
}
47+
48+
# Specify the module to run
49+
parser = ArgumentParser()
50+
parser.add_argument('--run', type=str,
51+
help='specify the data_manager module to execute. '
52+
'inst: to generate instances '
53+
'opt: to generate optimal solution '
54+
'repr: to find a representative scenario '
55+
'imp: to improve a representative scenario '
56+
'dataset : to create dataset for ML'
57+
'all: to run all module one after the other ',
58+
default='inst')
59+
args = parser.parse_args()
60+
if args.run == "inst" or args.run == "all":
61+
generate_instance = getattr(import_module(".".join([problem_path, "generate_instance"])),
62+
"generate_instance")
63+
generate_instance(meta_config, problem_config, path)
64+
if args.run == "opt" or args.run == "all":
65+
generate_optimal_sol = getattr(import_module(".".join([problem_path, "generate_optimal_sol"])),
66+
"generate_optimal_sol")
67+
generate_optimal_sol(meta_config, problem_config, path)
68+
if args.run == "repr" or args.run == "all":
69+
generate_xi_hat = getattr(import_module(".".join([problem_path, "generate_xi_hat"])),
70+
"generate_xi_hat")
71+
runs = ConfigParser()
72+
runs.read(Path(__file__).parents[0] / meta_config['Run']['problem'] / "runs.ini")
73+
for idx in runs.sections():
74+
generate_xi_hat(meta_config, runs[idx], path)
75+
if args.run == "imp" or args.run == "all":
76+
improve_xi_hat = getattr(import_module(".".join([problem_path, "improve_xi_hat"])),
77+
"improve_xi_hat")
78+
improve_xi_hat(meta_config, path)
79+
if args.run == "dataset" or args.run == "all":
80+
generate_dataset = getattr(import_module(".".join([problem_path, "generate_dataset"])),
81+
"generate_dataset")
82+
generate_dataset(path)
83+
84+
85+
if __name__ == "__main__":
86+
main()

nectar/data_manager/cflp/__init__.py

Whitespace-only changes.
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import random
2+
from collections import defaultdict
3+
import time
4+
import numpy as np
5+
6+
from ...utils import load_pickle
7+
8+
np.random.seed(7)
9+
random.seed(11)
10+
11+
MIN_C_F, MAX_C_F = 15, 19
12+
MIN_C_V, MAX_C_V = 5, 9
13+
14+
MEAN_C_F = (MAX_C_F - MIN_C_F) / 2
15+
MEAN_C_V = (MAX_C_V - MIN_C_V) / 2
16+
17+
18+
def fetch_scenario(idxs, data):
19+
scenario = []
20+
for idx in idxs:
21+
scenario.append(data[idx]['scenario'])
22+
scenario = np.asarray(scenario)
23+
24+
return scenario
25+
26+
27+
def normalize_scenario(scenario, MIN_SCE, MAX_SCE):
28+
scenario_diff = np.subtract(scenario, MIN_SCE)
29+
scenario_scaled = np.divide(scenario_diff, MAX_SCE - MIN_SCE)
30+
scenario_scaled = (scenario_scaled * 2) - 1
31+
32+
return scenario_scaled
33+
34+
35+
def extract_scenario_features(scenario):
36+
features = []
37+
start_time = time.time()
38+
features.extend(np.max(scenario, axis=0))
39+
features.extend(np.min(scenario, axis=0))
40+
features.extend(np.median(scenario, axis=0))
41+
features.extend(np.quantile(scenario, 0.75, axis=0))
42+
features.extend(np.quantile(scenario, 0.25, axis=0))
43+
features.extend(np.mean(scenario, axis=0))
44+
features.extend(np.std(scenario, axis=0))
45+
46+
for k in [0.9, 1, 1.1, 1.2, 1.5]:
47+
greater_than = []
48+
less_than = []
49+
for i in range(scenario.shape[1]):
50+
i_greater_than = [True] * scenario.shape[0]
51+
i_less_than = [True] * scenario.shape[0]
52+
for j in range(scenario.shape[1]):
53+
if i == j:
54+
continue
55+
56+
i_greater_than = np.logical_and(i_greater_than, (1 + k) * scenario[:, i] >= scenario[:, j])
57+
i_less_than = np.logical_and(i_less_than, scenario[:, i] <= (1 + k) * scenario[:, j])
58+
59+
greater_than.append(sum(i_greater_than) / scenario.shape[0])
60+
less_than.append(sum(i_less_than) / scenario.shape[0])
61+
62+
features.extend(greater_than)
63+
features.extend(less_than)
64+
65+
total_time = time.time() - start_time
66+
67+
return np.asarray(features), total_time
68+
69+
70+
def create_model_input(idxs, instance, cost_normalized, scenarios_normalized):
71+
assert len(idxs) == scenarios_normalized.shape[0]
72+
total_time = 0
73+
x = []
74+
for rank, idx in enumerate(idxs):
75+
x_object = {k: v for k, v in instance[idx].items()}
76+
x_object["pid"] = idx
77+
x_object["c_f_normalized"] = cost_normalized[idx]['c_f']
78+
x_object["c_v_normalized"] = cost_normalized[idx]['c_v']
79+
x_object["scenario_normalized"] = scenarios_normalized[rank]
80+
x_object["scenario_features"], item_time = extract_scenario_features(scenarios_normalized[rank])
81+
total_time += item_time
82+
x.append(x_object)
83+
84+
return {"input": np.asarray(x), "total_time": total_time}
85+
86+
87+
def generate_dataset(path, train_test_split=0.7):
88+
instance = load_pickle(path["instance"])
89+
result_xi = load_pickle(path["result_xi"])
90+
total_time = 0
91+
92+
# Find problem for which we have representative scenario
93+
solved = []
94+
for k, v in result_xi.items():
95+
v["solved_xi"] and solved.append(k)
96+
97+
# Normalize cost
98+
cost_normalized = defaultdict(dict)
99+
start_time = time.time()
100+
for idx in solved:
101+
cost_normalized[idx]['c_f'] = (((instance[idx]['c_f'] - MIN_C_F) / (MAX_C_F - MIN_C_F)) * 2) - 1
102+
cost_normalized[idx]['c_v'] = (((instance[idx]['c_v'] - MIN_C_V) / (MAX_C_V - MIN_C_V)) * 2) - 1
103+
total_time += (time.time() - start_time)
104+
105+
# Shuffle and split into train and test
106+
random.shuffle(solved)
107+
n_train = int(train_test_split * len(solved))
108+
train_idxs, test_idxs = solved[:n_train], solved[n_train:]
109+
110+
# Normalize scenarios
111+
train_scenarios = fetch_scenario(train_idxs, instance)
112+
test_scenarios = fetch_scenario(test_idxs, instance)
113+
start_time = time.time()
114+
MAX_SCE = np.max(train_scenarios, axis=0)
115+
MIN_SCE = np.min(train_scenarios, axis=0)
116+
train_scenarios_normalized = normalize_scenario(train_scenarios, MIN_SCE, MAX_SCE)
117+
test_scenarios_normalized = normalize_scenario(test_scenarios, MIN_SCE, MAX_SCE)
118+
total_time += (time.time() - start_time)
119+
120+
# Prepare training samples
121+
result = create_model_input(train_idxs, instance, cost_normalized, train_scenarios_normalized)
122+
x_train, total_time_train = result["input"], result["total_time"]
123+
124+
result = create_model_input(test_idxs, instance, cost_normalized, test_scenarios_normalized)
125+
x_test, total_time_test = result["input"], result["total_time"]
126+
127+
total_time += (total_time_train + total_time_test)
128+
129+
y_train = np.asarray([{"pid": pid, "xi_hat": result_xi[pid]["xi_hat"]}
130+
for pid in train_idxs])
131+
y_test = np.asarray([{"pid": pid, "xi_hat": result_xi[pid]["xi_hat"]}
132+
for pid in test_idxs])
133+
134+
np.save(path["data"] / "x_train_raw.npy", x_train)
135+
np.save(path["data"] / "y_train_raw.npy", y_train)
136+
np.save(path["data"] / "x_test_raw.npy", x_test)
137+
np.save(path["data"] / "y_test_raw.npy", y_test)
138+
np.save(path["data"] / "preprocessing_time.npy", [total_time / len(solved)])

0 commit comments

Comments
 (0)