22#include <ruby/util.h>
33#include <ruby/version.h>
44#include <assert.h>
5+ #include <math.h>
56
67#if RUBY_API_VERSION_CODE >= 20400
78/* for 2.4.0 or higher */
1718# undef HAVE_RB_RATIONAL_PLUS
1819#endif
1920
21+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
22+ # define HAVE_ARITHMETIC_SEQUENCE
23+ #else
24+ # undef HAVE_ARITHMETIC_SEQUENCE
25+ #endif
26+
2027#ifndef RB_INTEGER_TYPE_P
2128# define RB_INTEGER_TYPE_P (obj ) enum_stat_integer_type_p(obj)
2229static inline int
@@ -88,7 +95,11 @@ static VALUE half_in_rational;
8895
8996static ID idPow , idPLUS , idMINUS , idSTAR , idDIV , idGE ;
9097static ID id_eqeq_p , id_idiv , id_negate , id_to_f , id_cmp , id_nan_p ;
91- static ID id_each , id_real_p , id_sum , id_population ;
98+ static ID id_each , id_real_p , id_sum , id_population , id_closed , id_edge ;
99+
100+ static VALUE sym_left , sym_right ;
101+
102+ static VALUE cHistogram ;
92103
93104inline static VALUE
94105f_add (VALUE x , VALUE y )
@@ -2051,9 +2062,253 @@ hash_value_counts(int argc, VALUE* argv, VALUE hash)
20512062 return any_value_counts (argc , argv , hash , hash_value_counts_without_sort );
20522063}
20532064
2065+ static long
2066+ histogram_edge_bin_index (VALUE edge , VALUE rb_x , int left_p )
2067+ {
2068+ double x , y ;
2069+ long lo , hi , mid ;
2070+
2071+ x = NUM2DBL (rb_x );
2072+
2073+ lo = -1 ;
2074+ hi = RARRAY_LEN (edge );
2075+
2076+ if (left_p ) {
2077+ while (hi - lo > 1 ) {
2078+ mid = lo + (hi - lo )/2 ;
2079+ y = NUM2DBL (RARRAY_AREF (edge , mid ));
2080+ if (y <= x ) {
2081+ lo = mid ;
2082+ }
2083+ else {
2084+ hi = mid ;
2085+ }
2086+ }
2087+ return lo ;
2088+ }
2089+ else {
2090+ while (hi - lo > 1 ) {
2091+ mid = lo + (hi - lo )/2 ;
2092+ y = NUM2DBL (RARRAY_AREF (edge , mid ));
2093+ if (y < x ) {
2094+ lo = mid ;
2095+ }
2096+ else {
2097+ hi = mid ;
2098+ }
2099+ }
2100+ return hi - 1 ;
2101+ }
2102+ }
2103+
2104+ static void
2105+ histogram_weights_push_values (VALUE weights , VALUE edge , VALUE values , int left_p )
2106+ {
2107+ VALUE x , cur ;
2108+ long i , n , bi ;
2109+
2110+ n = RARRAY_LEN (values );
2111+ for (i = 0 ; i < n ; ++ i ) {
2112+ x = RARRAY_AREF (values , i );
2113+
2114+ bi = histogram_edge_bin_index (edge , x , left_p );
2115+
2116+ cur = rb_ary_entry (weights , bi );
2117+ if (NIL_P (cur )) {
2118+ cur = INT2FIX (1 );
2119+ }
2120+ else {
2121+ cur = rb_funcall (cur , idPLUS , 1 , INT2FIX (1 ));
2122+ }
2123+
2124+ rb_ary_store (weights , bi , cur );
2125+ }
2126+ }
2127+
2128+ static int
2129+ opt_closed_left_p (VALUE opts )
2130+ {
2131+ int left_p = 1 ;
2132+
2133+ if (!NIL_P (opts )) {
2134+ VALUE closed ;
2135+ #ifdef HAVE_RB_GET_KWARGS
2136+ ID kwargs = id_closed ;
2137+ rb_get_kwargs (opts , & kwargs , 0 , 1 , & closed );
2138+ #else
2139+ closed = rb_hash_lookup2 (opts , ID2SYM (id_closed ), sym_left );
2140+ #endif
2141+ left_p = (closed != sym_right );
2142+ if (left_p && closed != sym_left ) {
2143+ rb_raise (rb_eArgError , "invalid value for :closed keyword "
2144+ "(%" PRIsVALUE " for :left or :right)" , closed );
2145+ }
2146+ }
2147+
2148+ return left_p ;
2149+ }
2150+
2151+ static inline long
2152+ sturges (long n )
2153+ {
2154+ if (n == 0 ) return 1L ;
2155+ return (long )(ceil (log2 (n )) + 1 );
2156+ }
2157+
2158+ static VALUE
2159+ ary_histogram_calculate_edge_lo_hi (const double lo , const double hi , const long nbins , const int left_p )
2160+ {
2161+ VALUE edge ;
2162+ double bw , lbw , start , step , divisor , r ;
2163+ long i , len ;
2164+
2165+ if (hi == lo ) {
2166+ start = hi ;
2167+ step = 1 ;
2168+ divisor = 1 ;
2169+ len = 1 ;
2170+ }
2171+ else {
2172+ bw = (hi - lo ) / nbins ;
2173+ lbw = log10 (bw );
2174+ if (lbw >= 0 ) {
2175+ step = pow (10 , floor (lbw ));
2176+ r = bw / step ;
2177+ if (r <= 1.1 ) {
2178+ /* do nothing */
2179+ }
2180+ else if (r <= 2.2 ) {
2181+ step *= 2 ;
2182+ }
2183+ else if (r <= 5.5 ) {
2184+ step *= 5 ;
2185+ }
2186+ else {
2187+ step *= 10 ;
2188+ }
2189+ divisor = 1.0 ;
2190+ start = step * floor (lo / step );
2191+ len = (long )ceil ((hi - start ) / step );
2192+ }
2193+ else {
2194+ divisor = pow (10 , - floor (lbw ));
2195+ r = bw * divisor ;
2196+ if (r <= 1.1 ) {
2197+ /* do nothing */
2198+ }
2199+ else if (r <= 2.2 ) {
2200+ divisor /= 2 ;
2201+ }
2202+ else if (r <= 5.5 ) {
2203+ divisor /= 5 ;
2204+ }
2205+ else {
2206+ divisor /= 10 ;
2207+ }
2208+ step = 1.0 ;
2209+ start = floor (lo * divisor );
2210+ len = (long )ceil (hi * divisor - start );
2211+ }
2212+ }
2213+
2214+ if (left_p ) {
2215+ while (lo < start /divisor ) {
2216+ start -= step ;
2217+ }
2218+ while ((start + (len - 1 )* step )/divisor <= hi ) {
2219+ ++ len ;
2220+ }
2221+ }
2222+ else {
2223+ while (lo <= start /divisor ) {
2224+ start -= step ;
2225+ }
2226+ while ((start + (len - 1 )* step )/divisor < hi ) {
2227+ ++ len ;
2228+ }
2229+ }
2230+
2231+ edge = rb_ary_new_capa (len );
2232+ for (i = 0 ; i < len ; ++ i ) {
2233+ rb_ary_push (edge , DBL2NUM (start /divisor ));
2234+ start += step ;
2235+ }
2236+
2237+ return edge ;
2238+ }
2239+
2240+ static VALUE
2241+ ary_histogram_calculate_edge (VALUE ary , const long nbins , const int left_p )
2242+ {
2243+ long n ;
2244+ VALUE minmax ;
2245+ VALUE edge = Qnil ;
2246+ double lo , hi ;
2247+
2248+ Check_Type (ary , T_ARRAY );
2249+ n = RARRAY_LEN (ary );
2250+
2251+ if (n == 0 && nbins < 0 ) {
2252+ rb_raise (rb_eArgError , "nbins must be >= 0 for an empty array, got %ld" , nbins );
2253+ }
2254+ else if (n > 0 && nbins < 1 ) {
2255+ rb_raise (rb_eArgError , "nbins must be >= 1 for a non-empty array, got %ld" , nbins );
2256+ }
2257+ else if (n == 0 ) {
2258+ edge = rb_ary_new_capa (1 );
2259+ rb_ary_push (edge , DBL2NUM (0.0 ));
2260+ return edge ;
2261+ }
2262+
2263+ minmax = rb_funcall (ary , rb_intern ("minmax" ), 0 );
2264+ lo = NUM2DBL (RARRAY_AREF (minmax , 0 ));
2265+ hi = NUM2DBL (RARRAY_AREF (minmax , 1 ));
2266+
2267+ edge = ary_histogram_calculate_edge_lo_hi (lo , hi , nbins , left_p );
2268+
2269+ return edge ;
2270+ }
2271+
2272+ /* call-seq:
2273+ * ary.histogram(nbins=:auto, closed: :left)
2274+ *
2275+ * @param [Integer] nbins The approximate number of bins
2276+ * @param [:left, :right] closed
2277+ * If :left (the default), the bin interval are left-closed.
2278+ * If :right, the bin interval are right-closed.
2279+ *
2280+ * @return [EnumerableStatistics::Histogram] The histogram struct.
2281+ */
2282+ static VALUE
2283+ ary_histogram (int argc , VALUE * argv , VALUE ary )
2284+ {
2285+ VALUE arg0 , opts , edge , weights ;
2286+ int left_p ;
2287+ long nbins ;
2288+
2289+ rb_scan_args (argc , argv , "01:" , & arg0 , & opts );
2290+ if (NIL_P (arg0 )) {
2291+ nbins = sturges (RARRAY_LEN (ary ));
2292+ }
2293+ else {
2294+ nbins = NUM2LONG (arg0 );
2295+ }
2296+ left_p = opt_closed_left_p (opts );
2297+
2298+ edge = ary_histogram_calculate_edge (ary , nbins , left_p );
2299+ weights = rb_ary_new_capa (RARRAY_LEN (edge ) - 1 );
2300+ histogram_weights_push_values (weights , edge , ary , left_p );
2301+
2302+ return rb_struct_new (cHistogram , edge , weights ,
2303+ left_p ? sym_left : sym_right ,
2304+ Qfalse );
2305+ }
2306+
20542307void
20552308Init_extension (void )
20562309{
2310+ VALUE mEnumerableStatistics ;
2311+
20572312#ifndef HAVE_ENUM_SUM
20582313 rb_define_method (rb_mEnumerable , "sum" , enum_sum , -1 );
20592314#endif
@@ -2082,6 +2337,11 @@ Init_extension(void)
20822337 half_in_rational = nurat_s_new_internal (rb_cRational , INT2FIX (1 ), INT2FIX (2 ));
20832338 rb_gc_register_mark_object (half_in_rational );
20842339
2340+ mEnumerableStatistics = rb_const_get_at (rb_cObject , rb_intern ("EnumerableStatistics" ));
2341+ cHistogram = rb_const_get_at (mEnumerableStatistics , rb_intern ("Histogram" ));
2342+
2343+ rb_define_method (rb_cArray , "histogram" , ary_histogram , -1 );
2344+
20852345 idPLUS = '+' ;
20862346 idMINUS = '-' ;
20872347 idSTAR = '*' ;
@@ -2098,4 +2358,9 @@ Init_extension(void)
20982358 id_real_p = rb_intern ("real?" );
20992359 id_sum = rb_intern ("sum" );
21002360 id_population = rb_intern ("population" );
2361+ id_closed = rb_intern ("closed" );
2362+ id_edge = rb_intern ("edge" );
2363+
2364+ sym_left = ID2SYM (rb_intern ("left" ));
2365+ sym_right = ID2SYM (rb_intern ("right" ));
21012366}
0 commit comments