|
34 | 34 | RollingSpearmanOfReturns, |
35 | 35 | SimpleBeta, |
36 | 36 | ) |
37 | | -from zipline.pipeline.factors.statistical import vectorized_beta |
| 37 | +from zipline.pipeline.factors.statistical import ( |
| 38 | + vectorized_beta, |
| 39 | + vectorized_pearson_r, |
| 40 | +) |
38 | 41 | from zipline.pipeline.loaders.frame import DataFrameLoader |
39 | 42 | from zipline.pipeline.sentinels import NotSpecified |
40 | 43 | from zipline.testing import ( |
@@ -1059,3 +1062,84 @@ def test_allowed_missing_doesnt_double_count(self): |
1059 | 1062 | result5 = vectorized_beta(dependents, independent, allowed_missing=5) |
1060 | 1063 | assert_equal(np.isnan(result5), |
1061 | 1064 | np.array([False, False, False, False, False])) |
| 1065 | + |
| 1066 | + |
| 1067 | +class VectorizedCorrelationTestCase(ZiplineTestCase): |
| 1068 | + |
| 1069 | + def naive_columnwise_func(self, func, left, right): |
| 1070 | + out = np.empty_like(left[0]) |
| 1071 | + self.assertEqual(left.shape, right.shape) |
| 1072 | + |
| 1073 | + for col in range(left.shape[1]): |
| 1074 | + left_col = left[:, col] |
| 1075 | + right_col = right[:, col] |
| 1076 | + missing = np.isnan(left_col) | np.isnan(right_col) |
| 1077 | + left_col = left_col[~missing] |
| 1078 | + right_col = right_col[~missing] |
| 1079 | + r, pvalue = func(left_col, right_col) |
| 1080 | + out[col] = r |
| 1081 | + |
| 1082 | + return out |
| 1083 | + |
| 1084 | + def naive_columnwise_pearson(self, left, right): |
| 1085 | + return self.naive_columnwise_func(pearsonr, left, right) |
| 1086 | + |
| 1087 | + def naive_columnwise_spearman(self, left, right): |
| 1088 | + return self.naive_columnwise_func(spearmanr, left, right) |
| 1089 | + |
| 1090 | + @parameter_space( |
| 1091 | + seed=[1, 2, 42], |
| 1092 | + nan_offset=[-1, 0, 1], |
| 1093 | + nans=['dependent', 'independent', 'both'], |
| 1094 | + __fail_fast=True, |
| 1095 | + ) |
| 1096 | + def test_produce_nans_when_too_much_missing_data(self, |
| 1097 | + seed, |
| 1098 | + nans, |
| 1099 | + nan_offset): |
| 1100 | + rand = np.random.RandomState(seed) |
| 1101 | + |
| 1102 | + betas = np.array([-0.5, 0.0, 0.5, 1.0, 1.5]) |
| 1103 | + independents = as_column(np.linspace(-5., 5., 30)) + np.arange(5) |
| 1104 | + noise = as_column(rand.uniform(-2, 2, 30)) |
| 1105 | + dependents = 1.0 + betas * independents + noise |
| 1106 | + |
| 1107 | + # Write nans in a triangular pattern into the middle of the dependent |
| 1108 | + # array. |
| 1109 | + nan_grid = np.array([[1, 1, 1, 1, 1], |
| 1110 | + [0, 1, 1, 1, 1], |
| 1111 | + [0, 0, 1, 1, 1], |
| 1112 | + [0, 0, 0, 1, 1], |
| 1113 | + [0, 0, 0, 0, 1]], dtype=bool) |
| 1114 | + |
| 1115 | + if nans == 'dependent' or nans == 'both': |
| 1116 | + dependents[10 + nan_offset:15 + nan_offset][nan_grid] = np.nan |
| 1117 | + if nans == 'independent' or nans == 'both': |
| 1118 | + independents[10 + nan_offset:15 + nan_offset][nan_grid] = np.nan |
| 1119 | + |
| 1120 | + expected = self.naive_columnwise_pearson(dependents, independents) |
| 1121 | + for allowed_missing in list(range(7)) + [10000]: |
| 1122 | + results = vectorized_pearson_r( |
| 1123 | + dependents, independents, allowed_missing |
| 1124 | + ) |
| 1125 | + for i, result in enumerate(results): |
| 1126 | + # column i has i + 1 missing values. |
| 1127 | + if i + 1 > allowed_missing: |
| 1128 | + self.assertTrue(np.isnan(result)) |
| 1129 | + else: |
| 1130 | + assert_equal(result, expected[i]) |
| 1131 | + |
| 1132 | + def test_broadcasting(self): |
| 1133 | + _independent = as_column(np.array([1, 2, 3, 4, 5])) |
| 1134 | + dependent = _independent * [2.5, 1.0, -3.5] |
| 1135 | + |
| 1136 | + def do_check(independent): |
| 1137 | + result = vectorized_pearson_r( |
| 1138 | + dependent, independent, allowed_missing=0 |
| 1139 | + ) |
| 1140 | + assert_equal(result, np.array([1.0, 1.0, -1.0])) |
| 1141 | + |
| 1142 | + # We should get the same result from passing a N x 1 array or an N x 3 |
| 1143 | + # array with the column tiled 3 times. |
| 1144 | + do_check(_independent) |
| 1145 | + do_check(np.tile(_independent, 3)) |
0 commit comments