Skip to content

Commit e00e901

Browse files
macos
1 parent 09609b3 commit e00e901

File tree

13 files changed

+207
-98
lines changed

13 files changed

+207
-98
lines changed

.github/workflows/build_wheels.yml

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,19 @@
1-
name: Build_for_Linux_and_Windows
2-
1+
name: Build_wheels
32
on: [workflow_dispatch]
4-
53
jobs:
64
build_wheels:
75
name: Build wheels on ${{ matrix.os }}
86
runs-on: ${{ matrix.os }}
97
strategy:
108
matrix:
11-
os: [ubuntu-20.04, windows-2019]
12-
9+
os: [ubuntu-latest, windows-latest, macos-13, macos-14]
1310
steps:
1411
- uses: actions/checkout@v4
15-
1612
- name: Build wheels
17-
uses: pypa/cibuildwheel@v2.16.5
13+
uses: pypa/cibuildwheel@v2.18.1
1814
env:
1915
CIBW_SKIP: "*musllinux*"
20-
16+
CIBW_ENVIRONMENT: MACOSX_DEPLOYMENT_TARGET=10.14
2117
- uses: actions/upload-artifact@v4
2218
with:
2319
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}

API_REFERENCE_FOR_CLASSIFICATION.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,4 +165,9 @@ A string specifying the label of the category.
165165

166166
## Method: get_unique_term_affiliations()
167167

168-
***Returns a list of strings containing unique predictor affiliations for terms.***
168+
***Returns a list of strings containing unique predictor affiliations for terms.***
169+
170+
171+
## Method: get_base_predictors_in_each_unique_term_affiliation()
172+
173+
***Returns a list of integer lists. The first list contains indexes for the unique base predictors used in the first unique term affiliation. The second list contains indexes for the unique base predictors used in the second unique term affiliation, and so on.***

API_REFERENCE_FOR_REGRESSION.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,11 @@ A numpy matrix with predictor values.
277277
***Returns a list of strings containing unique predictor affiliations for terms.***
278278

279279

280+
## Method: get_base_predictors_in_each_unique_term_affiliation()
281+
282+
***Returns a list of integer lists. The first list contains indexes for the unique base predictors used in the first unique term affiliation. The second list contains indexes for the unique base predictors used in the second unique term affiliation, and so on.***
283+
284+
280285
## Method: get_term_coefficients()
281286

282287
***Returns a numpy vector containing term regression coefficients.***

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Build predictive and interpretable parametric regression or classification machi
88
***pip install aplr***
99

1010
# Availability
11-
Currently available for Windows and most Linux distributions.
11+
Available for Windows, most Linux distributions and MacOS.
1212

1313
# How to use
1414
Please see the two example Python scripts [here](https://github.com/ottenbreit-data-science/aplr/tree/main/examples). They cover common use cases, but not all of the functionality in this package.

aplr/aplr.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,9 @@ def get_term_affiliations(self) -> List[str]:
256256
def get_unique_term_affiliations(self) -> List[str]:
257257
return self.APLRRegressor.get_unique_term_affiliations()
258258

259+
def get_base_predictors_in_each_unique_term_affiliation(self) -> List[str]:
260+
return self.APLRRegressor.get_base_predictors_in_each_unique_term_affiliation()
261+
259262
def get_term_coefficients(self) -> npt.ArrayLike:
260263
return self.APLRRegressor.get_term_coefficients()
261264

@@ -477,6 +480,9 @@ def get_feature_importance(self) -> npt.ArrayLike:
477480

478481
def get_unique_term_affiliations(self) -> List[str]:
479482
return self.APLRClassifier.get_unique_term_affiliations()
483+
484+
def get_base_predictors_in_each_unique_term_affiliation(self) -> List[str]:
485+
return self.APLRClassifier.get_base_predictors_in_each_unique_term_affiliation()
480486

481487
# For sklearn
482488
def get_params(self, deep=True):

cpp/APLRClassifier.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class APLRClassifier
5252
size_t max_terms;
5353
std::vector<std::string> unique_term_affiliations;
5454
std::map<std::string, size_t> unique_term_affiliation_map;
55+
std::vector<std::vector<size_t>> base_predictors_in_each_unique_term_affiliation;
5556

5657
APLRClassifier(size_t m = 3000, double v = 0.1, uint_fast32_t random_state = std::numeric_limits<uint_fast32_t>::lowest(), size_t n_jobs = 0,
5758
size_t cv_folds = 5, size_t reserved_terms_times_num_x = 100, size_t bins = 300, size_t verbosity = 0, size_t max_interaction_level = 1,
@@ -76,6 +77,7 @@ class APLRClassifier
7677
double get_cv_error();
7778
VectorXd get_feature_importance();
7879
std::vector<std::string> get_unique_term_affiliations();
80+
std::vector<std::vector<size_t>> get_base_predictors_in_each_unique_term_affiliation();
7981
};
8082

8183
APLRClassifier::APLRClassifier(size_t m, double v, uint_fast32_t random_state, size_t n_jobs, size_t cv_folds,
@@ -109,7 +111,8 @@ APLRClassifier::APLRClassifier(const APLRClassifier &other)
109111
num_first_steps_with_linear_effects_only{other.num_first_steps_with_linear_effects_only},
110112
penalty_for_non_linearity{other.penalty_for_non_linearity}, penalty_for_interactions{other.penalty_for_interactions},
111113
max_terms{other.max_terms}, unique_term_affiliations{other.unique_term_affiliations},
112-
unique_term_affiliation_map{other.unique_term_affiliation_map}
114+
unique_term_affiliation_map{other.unique_term_affiliation_map},
115+
base_predictors_in_each_unique_term_affiliation{other.base_predictors_in_each_unique_term_affiliation}
113116
{
114117
}
115118

@@ -256,6 +259,20 @@ void APLRClassifier::calculate_unique_term_affiliations()
256259
{
257260
unique_term_affiliation_map[unique_term_affiliations[i]] = i;
258261
}
262+
base_predictors_in_each_unique_term_affiliation.resize(unique_term_affiliation_map.size());
263+
std::vector<std::set<size_t>> base_predictors_in_each_unique_term_affiliation_set(unique_term_affiliation_map.size());
264+
for (std::string &category : categories)
265+
{
266+
for (auto &term : logit_models[category].terms)
267+
{
268+
std::vector<size_t> unique_base_terms_for_this_term{term.get_unique_base_terms_used_in_this_term()};
269+
base_predictors_in_each_unique_term_affiliation_set[unique_term_affiliation_map[term.predictor_affiliation]].insert(unique_base_terms_for_this_term.begin(), unique_base_terms_for_this_term.end());
270+
}
271+
}
272+
for (size_t i = 0; i < base_predictors_in_each_unique_term_affiliation_set.size(); ++i)
273+
{
274+
base_predictors_in_each_unique_term_affiliation[i] = std::vector<size_t>(base_predictors_in_each_unique_term_affiliation_set[i].begin(), base_predictors_in_each_unique_term_affiliation_set[i].end());
275+
}
259276
}
260277

261278
void APLRClassifier::calculate_validation_metrics()
@@ -374,4 +391,9 @@ VectorXd APLRClassifier::get_feature_importance()
374391
std::vector<std::string> APLRClassifier::get_unique_term_affiliations()
375392
{
376393
return unique_term_affiliations;
394+
}
395+
396+
std::vector<std::vector<size_t>> APLRClassifier::get_base_predictors_in_each_unique_term_affiliation()
397+
{
398+
return base_predictors_in_each_unique_term_affiliation;
377399
}

cpp/APLRRegressor.h

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include <future>
55
#include <random>
66
#include <vector>
7-
#include <omp.h>
7+
#include <thread>
88
#include "../dependencies/eigen-3.4.0/Eigen/Dense"
99
#include "functions.h"
1010
#include "term.h"
@@ -79,6 +79,7 @@ class APLRRegressor
7979
bool max_terms_reached;
8080
bool round_robin_update_of_existing_terms;
8181
size_t term_to_update_in_this_boosting_step;
82+
size_t cores_to_use;
8283

8384
void validate_input_to_fit(const MatrixXd &X, const VectorXd &y, const VectorXd &sample_weight, const std::vector<std::string> &X_names,
8485
const MatrixXi &cv_observations, const std::vector<size_t> &prioritized_predictors_indexes,
@@ -215,6 +216,7 @@ class APLRRegressor
215216
size_t number_of_unique_term_affiliations;
216217
std::vector<std::string> unique_term_affiliations;
217218
std::map<std::string, size_t> unique_term_affiliation_map;
219+
std::vector<std::vector<size_t>> base_predictors_in_each_unique_term_affiliation;
218220
VectorXd feature_importance;
219221
VectorXd term_importance;
220222
double dispersion_parameter;
@@ -276,6 +278,7 @@ class APLRRegressor
276278
std::vector<std::string> get_term_names();
277279
std::vector<std::string> get_term_affiliations();
278280
std::vector<std::string> get_unique_term_affiliations();
281+
std::vector<std::vector<size_t>> get_base_predictors_in_each_unique_term_affiliation();
279282
VectorXd get_term_coefficients();
280283
MatrixXd get_validation_error_steps();
281284
VectorXd get_feature_importance();
@@ -351,7 +354,8 @@ APLRRegressor::APLRRegressor(const APLRRegressor &other)
351354
penalty_for_non_linearity{other.penalty_for_non_linearity}, penalty_for_interactions{other.penalty_for_interactions},
352355
max_terms{other.max_terms}, min_predictor_values_in_training{other.min_predictor_values_in_training},
353356
max_predictor_values_in_training{other.max_predictor_values_in_training}, unique_term_affiliations{other.unique_term_affiliations},
354-
unique_term_affiliation_map{other.unique_term_affiliation_map}
357+
unique_term_affiliation_map{other.unique_term_affiliation_map},
358+
base_predictors_in_each_unique_term_affiliation{other.base_predictors_in_each_unique_term_affiliation}
355359
{
356360
}
357361

@@ -410,12 +414,10 @@ void APLRRegressor::preprocess_prioritized_predictors_and_interaction_constraint
410414
void APLRRegressor::initialize_multithreading()
411415
{
412416
size_t available_cores{static_cast<size_t>(std::thread::hardware_concurrency())};
413-
size_t cores_to_use;
414417
if (n_jobs == 0)
415418
cores_to_use = available_cores;
416419
else
417420
cores_to_use = std::min(n_jobs, available_cores);
418-
omp_set_num_threads(cores_to_use);
419421
}
420422

421423
void APLRRegressor::preprocess_penalties()
@@ -1299,14 +1301,47 @@ std::vector<size_t> APLRRegressor::find_terms_eligible_current_indexes_for_a_bas
12991301
void APLRRegressor::estimate_split_point_for_each_term(std::vector<Term> &terms, std::vector<size_t> &terms_indexes)
13001302
{
13011303
bool multithreading{n_jobs != 1 && terms_indexes.size() > 1};
1302-
#pragma omp parallel for schedule(guided) if (multithreading)
1303-
for (size_t i = 0; i < terms_indexes.size(); ++i)
1304+
1305+
if (multithreading)
13041306
{
1305-
terms[terms_indexes[i]].estimate_split_point(X_train, neg_gradient_current, sample_weight_train, bins,
1306-
predictor_learning_rates[terms[terms_indexes[i]].base_term],
1307-
min_observations_in_split, linear_effects_only_in_this_boosting_step,
1308-
predictor_penalties_for_non_linearity[terms[terms_indexes[i]].base_term],
1309-
predictor_penalties_for_interactions[terms[terms_indexes[i]].base_term]);
1307+
size_t num_threads{std::min(cores_to_use, terms_indexes.size())};
1308+
std::vector<std::thread> threads;
1309+
size_t chunk_size{(terms_indexes.size() + num_threads - 1) / num_threads};
1310+
1311+
for (size_t t = 0; t < num_threads; ++t)
1312+
{
1313+
threads.emplace_back([&, t]()
1314+
{
1315+
size_t start = t * chunk_size;
1316+
size_t end = std::min(start + chunk_size, terms_indexes.size());
1317+
for (size_t i = start; i < end; ++i)
1318+
{
1319+
terms[terms_indexes[i]].estimate_split_point(X_train, neg_gradient_current, sample_weight_train, bins,
1320+
predictor_learning_rates[terms[terms_indexes[i]].base_term],
1321+
min_observations_in_split, linear_effects_only_in_this_boosting_step,
1322+
predictor_penalties_for_non_linearity[terms[terms_indexes[i]].base_term],
1323+
predictor_penalties_for_interactions[terms[terms_indexes[i]].base_term]);
1324+
} });
1325+
}
1326+
1327+
for (auto &thread : threads)
1328+
{
1329+
if (thread.joinable())
1330+
{
1331+
thread.join();
1332+
}
1333+
}
1334+
}
1335+
else
1336+
{
1337+
for (size_t i = 0; i < terms_indexes.size(); ++i)
1338+
{
1339+
terms[terms_indexes[i]].estimate_split_point(X_train, neg_gradient_current, sample_weight_train, bins,
1340+
predictor_learning_rates[terms[terms_indexes[i]].base_term],
1341+
min_observations_in_split, linear_effects_only_in_this_boosting_step,
1342+
predictor_penalties_for_non_linearity[terms[terms_indexes[i]].base_term],
1343+
predictor_penalties_for_interactions[terms[terms_indexes[i]].base_term]);
1344+
}
13101345
}
13111346
}
13121347

@@ -2282,6 +2317,17 @@ void APLRRegressor::correct_term_names_coefficients_and_affiliations()
22822317
{
22832318
unique_term_affiliation_map[unique_term_affiliations[i]] = i;
22842319
}
2320+
base_predictors_in_each_unique_term_affiliation.resize(unique_term_affiliation_map.size());
2321+
std::vector<std::set<size_t>> base_predictors_in_each_unique_term_affiliation_set(unique_term_affiliation_map.size());
2322+
for (auto &term : terms)
2323+
{
2324+
std::vector<size_t> unique_base_terms_for_this_term{term.get_unique_base_terms_used_in_this_term()};
2325+
base_predictors_in_each_unique_term_affiliation_set[unique_term_affiliation_map[term.predictor_affiliation]].insert(unique_base_terms_for_this_term.begin(), unique_base_terms_for_this_term.end());
2326+
}
2327+
for (size_t i = 0; i < base_predictors_in_each_unique_term_affiliation_set.size(); ++i)
2328+
{
2329+
base_predictors_in_each_unique_term_affiliation[i] = std::vector<size_t>(base_predictors_in_each_unique_term_affiliation_set[i].begin(), base_predictors_in_each_unique_term_affiliation_set[i].end());
2330+
}
22852331
}
22862332

22872333
void APLRRegressor::additional_cleanup_after_creating_final_model()
@@ -2402,6 +2448,11 @@ std::vector<std::string> APLRRegressor::get_unique_term_affiliations()
24022448
return unique_term_affiliations;
24032449
}
24042450

2451+
std::vector<std::vector<size_t>> APLRRegressor::get_base_predictors_in_each_unique_term_affiliation()
2452+
{
2453+
return base_predictors_in_each_unique_term_affiliation;
2454+
}
2455+
24052456
VectorXd APLRRegressor::get_term_coefficients()
24062457
{
24072458
return term_coefficients;

0 commit comments

Comments
 (0)