Skip to content

Commit d8c7858

Browse files
committed
Remove factor option
1 parent 0ad0ff0 commit d8c7858

File tree

5 files changed

+36
-62
lines changed

5 files changed

+36
-62
lines changed

cpp/src/arrow/compute/api_vector.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,7 @@ static auto kRankOptionsType = GetFunctionOptionsType<RankOptions>(
154154
DataMember("tiebreaker", &RankOptions::tiebreaker));
155155
static auto kRankQuantileOptionsType = GetFunctionOptionsType<RankQuantileOptions>(
156156
DataMember("sort_keys", &RankQuantileOptions::sort_keys),
157-
DataMember("null_placement", &RankQuantileOptions::null_placement),
158-
DataMember("factor", &RankQuantileOptions::factor));
157+
DataMember("null_placement", &RankQuantileOptions::null_placement));
159158
static auto kPairwiseOptionsType = GetFunctionOptionsType<PairwiseOptions>(
160159
DataMember("periods", &PairwiseOptions::periods));
161160
static auto kListFlattenOptionsType = GetFunctionOptionsType<ListFlattenOptions>(
@@ -234,11 +233,10 @@ RankOptions::RankOptions(std::vector<SortKey> sort_keys, NullPlacement null_plac
234233
constexpr char RankOptions::kTypeName[];
235234

236235
RankQuantileOptions::RankQuantileOptions(std::vector<SortKey> sort_keys,
237-
NullPlacement null_placement, double factor)
236+
NullPlacement null_placement)
238237
: FunctionOptions(internal::kRankQuantileOptionsType),
239238
sort_keys(std::move(sort_keys)),
240-
null_placement(null_placement),
241-
factor(factor) {}
239+
null_placement(null_placement) {}
242240
constexpr char RankQuantileOptions::kTypeName[];
243241

244242
PairwiseOptions::PairwiseOptions(int64_t periods)

cpp/src/arrow/compute/api_vector.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -199,13 +199,11 @@ class ARROW_EXPORT RankOptions : public FunctionOptions {
199199
class ARROW_EXPORT RankQuantileOptions : public FunctionOptions {
200200
public:
201201
explicit RankQuantileOptions(std::vector<SortKey> sort_keys = {},
202-
NullPlacement null_placement = NullPlacement::AtEnd,
203-
double factor = 1.0);
202+
NullPlacement null_placement = NullPlacement::AtEnd);
204203
/// Convenience constructor for array inputs
205204
explicit RankQuantileOptions(SortOrder order,
206-
NullPlacement null_placement = NullPlacement::AtEnd,
207-
double factor = 1.0)
208-
: RankQuantileOptions({SortKey("", order)}, null_placement, factor) {}
205+
NullPlacement null_placement = NullPlacement::AtEnd)
206+
: RankQuantileOptions({SortKey("", order)}, null_placement) {}
209207

210208
static constexpr char const kTypeName[] = "RankQuantileOptions";
211209
static RankQuantileOptions Defaults() { return RankQuantileOptions(); }
@@ -214,9 +212,6 @@ class ARROW_EXPORT RankQuantileOptions : public FunctionOptions {
214212
std::vector<SortKey> sort_keys;
215213
/// Whether nulls and NaNs are placed at the start or at the end
216214
NullPlacement null_placement;
217-
/// Factor to apply to the output.
218-
/// Use 1.0 for results in (0, 1), 100.0 for percentages, etc.
219-
double factor;
220215
};
221216

222217
/// \brief Partitioning options for NthToIndices

cpp/src/arrow/compute/kernels/vector_rank.cc

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,6 @@ class SortAndMarkDuplicate : public TypeVisitor {
166166

167167
// A helper class that emits rankings for the "rank_quantile" function
168168
struct QuantileRanker {
169-
explicit QuantileRanker(double factor) : factor_(factor) {}
170-
171169
Result<Datum> CreateRankings(ExecContext* ctx, const NullPartitionResult& sorted) {
172170
const int64_t length = sorted.overall_end() - sorted.overall_begin();
173171
ARROW_ASSIGN_OR_RAISE(auto rankings,
@@ -189,7 +187,7 @@ struct QuantileRanker {
189187
}
190188
// The run length, i.e. the frequency of the current value
191189
int64_t freq = run_end - it;
192-
double quantile = (cum_freq + 0.5 * freq) * factor_ / static_cast<double>(length);
190+
double quantile = (cum_freq + 0.5 * freq) / static_cast<double>(length);
193191
// Output quantile rank values
194192
for (; it < run_end; ++it) {
195193
out_begin[original_index(*it)] = quantile;
@@ -199,9 +197,6 @@ struct QuantileRanker {
199197
DCHECK_EQ(cum_freq, length);
200198
return Datum(rankings);
201199
}
202-
203-
private:
204-
const double factor_;
205200
};
206201

207202
// A helper class that emits rankings for the "rank" function
@@ -292,10 +287,11 @@ const FunctionDoc rank_quantile_doc(
292287
"are therefore sorted at the end of the input. For floating-point types,\n"
293288
"NaNs are considered greater than any other non-null value, but smaller\n"
294289
"than null values.\n"
295-
"Results are computed as in https://en.wikipedia.org/wiki/Quantile_rank\n"
290+
"The results are real values strictly between 0 and 1. They are\n"
291+
"computed as in https://en.wikipedia.org/wiki/Quantile_rank\n"
292+
"but without multiplying by 100.\n"
296293
"\n"
297-
"The handling of nulls and NaNs, and the constant factor can be changed\n"
298-
"in RankQuantileOptions."),
294+
"The handling of nulls and NaNs can be changed in RankQuantileOptions."),
299295
{"input"}, "RankQuantileOptions");
300296

301297
template <typename Derived>
@@ -375,9 +371,7 @@ class RankQuantileMetaFunction : public RankMetaFunctionBase<RankQuantileMetaFun
375371

376372
static bool NeedsDuplicates(const RankQuantileOptions&) { return true; }
377373

378-
static RankerType GetRanker(const RankQuantileOptions& options) {
379-
return RankerType(options.factor);
380-
}
374+
static RankerType GetRanker(const RankQuantileOptions& options) { return RankerType(); }
381375

382376
RankQuantileMetaFunction()
383377
: RankMetaFunctionBase("rank_quantile", Arity::Unary(), rank_quantile_doc,

cpp/src/arrow/compute/kernels/vector_sort_test.cc

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2472,10 +2472,10 @@ TEST_F(TestRank, EmptyChunks) {
24722472
class TestRankQuantile : public BaseTestRank {
24732473
public:
24742474
void AssertRankQuantile(const DatumVector& datums, SortOrder order,
2475-
NullPlacement null_placement, double factor,
2475+
NullPlacement null_placement,
24762476
const std::shared_ptr<Array>& expected) {
24772477
const std::vector<SortKey> sort_keys{SortKey("foo", order)};
2478-
RankQuantileOptions options(sort_keys, null_placement, factor);
2478+
RankQuantileOptions options(sort_keys, null_placement);
24792479
ARROW_SCOPED_TRACE("options = ", options.ToString());
24802480
for (const auto& datum : datums) {
24812481
ASSERT_OK_AND_ASSIGN(auto actual, CallFunction("rank_quantile", {datum}, &options));
@@ -2485,59 +2485,52 @@ class TestRankQuantile : public BaseTestRank {
24852485
}
24862486

24872487
void AssertRankQuantile(const DatumVector& datums, SortOrder order,
2488-
NullPlacement null_placement, double factor,
2489-
const std::string& expected) {
2490-
AssertRankQuantile(datums, order, null_placement, factor,
2491-
ArrayFromJSON(float64(), expected));
2488+
NullPlacement null_placement, const std::string& expected) {
2489+
AssertRankQuantile(datums, order, null_placement, ArrayFromJSON(float64(), expected));
24922490
}
24932491

2494-
void AssertRankQuantile(SortOrder order, NullPlacement null_placement, double factor,
2492+
void AssertRankQuantile(SortOrder order, NullPlacement null_placement,
24952493
const std::shared_ptr<Array>& expected) {
2496-
AssertRankQuantile(datums_, order, null_placement, factor, expected);
2494+
AssertRankQuantile(datums_, order, null_placement, expected);
24972495
}
24982496

2499-
void AssertRankQuantile(SortOrder order, NullPlacement null_placement, double factor,
2497+
void AssertRankQuantile(SortOrder order, NullPlacement null_placement,
25002498
const std::string& expected) {
2501-
AssertRankQuantile(datums_, order, null_placement, factor,
2499+
AssertRankQuantile(datums_, order, null_placement,
25022500
ArrayFromJSON(float64(), expected));
25032501
}
25042502

25052503
void AssertRankQuantileEmpty(std::shared_ptr<DataType> type) {
25062504
for (auto null_placement : AllNullPlacements()) {
25072505
for (auto order : AllOrders()) {
2508-
AssertRankQuantile({ArrayFromJSON(type, "[]")}, order, null_placement,
2509-
/*factor=*/1.0, "[]");
2506+
AssertRankQuantile({ArrayFromJSON(type, "[]")}, order, null_placement, "[]");
25102507
AssertRankQuantile({ArrayFromJSON(type, "[null]")}, order, null_placement,
2511-
/*factor=*/1.0, "[0.5]");
2512-
AssertRankQuantile({ArrayFromJSON(type, "[null]")}, order, null_placement,
2513-
/*factor=*/10.0, "[5]");
2514-
AssertRankQuantile({ArrayFromJSON(type, "[null, null, null]")}, order,
2515-
null_placement, /*factor=*/1.0, "[0.5, 0.5, 0.5]");
2508+
"[0.5]");
25162509
AssertRankQuantile({ArrayFromJSON(type, "[null, null, null]")}, order,
2517-
null_placement, /*factor=*/100.0, "[50, 50, 50]");
2510+
null_placement, "[0.5, 0.5, 0.5]");
25182511
}
25192512
}
25202513
}
25212514

25222515
// Expecting an input ordered like [1, 2, 1, 2, 1]
25232516
void AssertRankQuantile_12121() {
25242517
for (auto null_placement : AllNullPlacements()) {
2525-
AssertRankQuantile(SortOrder::Ascending, null_placement, /*factor=*/100.0,
2526-
"[30.0, 80.0, 30.0, 80.0, 30.0]");
2527-
AssertRankQuantile(SortOrder::Descending, null_placement, /*factor=*/100.0,
2528-
"[70.0, 20.0, 70.0, 20.0, 70.0]");
2518+
AssertRankQuantile(SortOrder::Ascending, null_placement,
2519+
"[0.3, 0.8, 0.3, 0.8, 0.3]");
2520+
AssertRankQuantile(SortOrder::Descending, null_placement,
2521+
"[0.7, 0.2, 0.7, 0.2, 0.7]");
25292522
}
25302523
}
25312524

25322525
// Expecting an input ordered like [null, 1, null, 2, null]
25332526
void AssertRankQuantile_N1N2N() {
2534-
AssertRankQuantile(SortOrder::Ascending, NullPlacement::AtStart, /*factor=*/1.0,
2527+
AssertRankQuantile(SortOrder::Ascending, NullPlacement::AtStart,
25352528
"[0.3, 0.7, 0.3, 0.9, 0.3]");
2536-
AssertRankQuantile(SortOrder::Ascending, NullPlacement::AtEnd, /*factor=*/1.0,
2529+
AssertRankQuantile(SortOrder::Ascending, NullPlacement::AtEnd,
25372530
"[0.7, 0.1, 0.7, 0.3, 0.7]");
2538-
AssertRankQuantile(SortOrder::Descending, NullPlacement::AtStart, /*factor=*/1.0,
2531+
AssertRankQuantile(SortOrder::Descending, NullPlacement::AtStart,
25392532
"[0.3, 0.9, 0.3, 0.7, 0.3]");
2540-
AssertRankQuantile(SortOrder::Descending, NullPlacement::AtEnd, /*factor=*/1.0,
2533+
AssertRankQuantile(SortOrder::Descending, NullPlacement::AtEnd,
25412534
"[0.7, 0.3, 0.7, 0.1, 0.7]");
25422535
}
25432536

@@ -2548,14 +2541,10 @@ class TestRankQuantile : public BaseTestRank {
25482541
// Reproduce the example from https://en.wikipedia.org/wiki/Percentile_rank
25492542
SetInput(ArrayFromJSON(type, "[7, 5, 5, 4, 4, 3, 3, 3, 2, 1]"));
25502543
for (auto null_placement : AllNullPlacements()) {
2551-
AssertRankQuantile(SortOrder::Ascending, null_placement, /*factor=*/10.0,
2552-
"[9.5, 8.0, 8.0, 6.0, 6.0, 3.5, 3.5, 3.5, 1.5, 0.5]");
2553-
AssertRankQuantile(SortOrder::Ascending, null_placement, /*factor=*/100.0,
2554-
"[95, 80, 80, 60, 60, 35, 35, 35, 15, 5]");
2555-
AssertRankQuantile(SortOrder::Descending, null_placement, /*factor=*/10.0,
2556-
"[0.5, 2.0, 2.0, 4.0, 4.0, 6.5, 6.5, 6.5, 8.5, 9.5]");
2557-
AssertRankQuantile(SortOrder::Descending, null_placement, /*factor=*/100.0,
2558-
"[5, 20, 20, 40, 40, 65, 65, 65, 85, 95]");
2544+
AssertRankQuantile(SortOrder::Ascending, null_placement,
2545+
"[0.95, 0.8, 0.8, 0.6, 0.6, 0.35, 0.35, 0.35, 0.15, 0.05]");
2546+
AssertRankQuantile(SortOrder::Descending, null_placement,
2547+
"[0.05, 0.2, 0.2, 0.4, 0.4, 0.65, 0.65, 0.65, 0.85, 0.95]");
25592548
}
25602549

25612550
// With nulls

docs/source/cpp/compute.rst

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,9 +1827,7 @@ in the respective option classes.
18271827

18281828
* \(4) The output is a one-based numerical array of ranks.
18291829

1830-
* \(5) The output is an array of quantiles between 0 and a constant *factor*.
1831-
The *factor* can be configured in :class:`RankQuantileOptions`
1832-
(use 100.0 for a percentile rank).
1830+
* \(5) The output is an array of quantiles strictly between 0 and 1.
18331831

18341832
* \(6) The input can be an array, chunked array, record batch or
18351833
table. If the input is a record batch or table, one or more sort

0 commit comments

Comments
 (0)