Skip to content

Commit 1593bf9

Browse files
authored
Merge pull request #38 from dieghernan/box
Implement box style
2 parents b829f10 + d20d47d commit 1593bf9

File tree

4 files changed

+85
-6
lines changed

4 files changed

+85
-6
lines changed

R/classInt.R

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ classIntervals <- function(var, n, style="quantile", rtimes=3, ..., intervalClos
105105
nobs <- length(unique(var))
106106
if (nobs == 1) stop("single unique value")
107107
# Fix 22: Diego Hernangómez
108-
needn <- !(style %in% c("dpih", "headtails"))
108+
needn <- !(style %in% c("dpih", "headtails", "box"))
109109

110110
if (missing(n)) n <- nclass.Sturges(var)
111111
if (n < 2 & needn) stop("n less than 2")
@@ -349,6 +349,42 @@ classIntervals <- function(var, n, style="quantile", rtimes=3, ..., intervalClos
349349
brks <- c(min(x_sort), rowSums(m) / 2, max(x_sort))
350350

351351

352+
} else if (style == "box"){
353+
# 2022-09-22 Diego Hernangomez, see:
354+
# https://github.com/r-spatial/classInt/issues/18
355+
# Adapted from:
356+
# https://spatialanalysis.github.io/lab_tutorials/4_R_Mapping.html#box-map
357+
358+
dots <- list(...)
359+
iqr_mult <- ifelse(is.null(dots$iqr_mult), 1.5, dots$iqr_mult)
360+
361+
qv <- unname(quantile(var))
362+
iqr <- iqr_mult * (qv[4] - qv[2])
363+
upfence <- qv[4] + iqr
364+
lofence <- qv[2] - iqr
365+
366+
# initialize break points vector
367+
bb <- vector(mode="numeric",length=7)
368+
369+
# logic for lower and upper fences
370+
if (lofence < qv[1]) { # no lower outliers
371+
bb[1] <- lofence
372+
bb[2] <- floor(qv[1])
373+
} else {
374+
bb[2] <- lofence
375+
bb[1] <- qv[1]
376+
}
377+
if (upfence > qv[5]) { # no upper outliers
378+
bb[7] <- upfence
379+
bb[6] <- ceiling(qv[5])
380+
} else {
381+
bb[6] <- upfence
382+
bb[7] <- qv[5]
383+
}
384+
bb[3:5] <- qv[2:4]
385+
386+
brks <- bb
387+
352388
} else stop(paste(style, "unknown"))
353389
}
354390
if (is.null(brks)) stop("Null breaks")

man/classIntervals.Rd

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ classIntervals2shingle(x)
6969
The "headtails" style uses the algorithm proposed by Bin Jiang (2013), in order to find groupings or hierarchy for data with a heavy-tailed distribution. This classification scheme partitions all of the data values around the mean into two parts and continues the process iteratively for the values (above the mean) in the head until the head part values are no longer heavy-tailed distributed. Thus, the number of classes and the class intervals are both naturally determined. By default the algorithm uses \code{thr = 0.4}, meaning that when the head represents more than 40\% of the observations the distribution is not considered heavy-tailed. The threshold argument \code{thr} may be modified through \code{\dots} (see Examples).
7070
7171
The "maximum" style uses the Maximum Breaks method of classification finding the k - 1 largest differences in \code{var}. The mean of the values that generated the largest splits is used as the interval boundary.
72+
73+
The "box" style generate 7 breaks (therefore 6 categories) based on a box-and-whisker plot. First and last categories
74+
includes the data values considered as outliers, and the four remaining categories are defined by the percentiles 25,
75+
50 and 75 of the data distribution. By default, the identification of outliers is based on the interquantile range
76+
(IQR), so values lower than percentile 25 - 1.5 * IQR or higher than percentile 75 + 1.5 * IQR are considered as outliers.
77+
The multiplier applied to the IQR \code{iqr_mult = 1.5} may be modified through \code{\dots}.
78+
7279
}
7380
7481
\value{
@@ -140,7 +147,13 @@ plot(classIntervals(jenks71$jenks71, n=5, style="jenks"), pal=pal1,
140147
main="dpih method")
141148
plot(classIntervals(jenks71$jenks71, style="headtails", thr = 1), pal=pal1,
142149
main="Head Tails method")
143-
par(opar)
150+
}
151+
if (run) {
152+
plot(classIntervals(jenks71$jenks71, style="maximum"), pal=pal1,
153+
main="Maximum method")
154+
plot(classIntervals(jenks71$jenks71, style="box"), pal=pal1,
155+
main="Box method")
156+
par(opar)
144157
}
145158
if (run) {
146159
print(classIntervals(jenks71$jenks71, n=5, style="fixed",
@@ -203,6 +216,15 @@ if (run) {
203216
if (run) {
204217
print(classIntervals(jenks71$jenks71, style="headtails", thr = .45))
205218
}
219+
if (run) {
220+
print(classIntervals(jenks71$jenks71, style="maximum"))
221+
}
222+
if (run) {
223+
print(classIntervals(jenks71$jenks71, style="box"))
224+
}
225+
if (run) {
226+
print(classIntervals(jenks71$jenks71, style="box", iqr_mult = 0.25))
227+
}
206228
x <- c(0, 0, 0, 1, 2, 50)
207229
print(classIntervals(x, n=3, style="fisher"))
208230
print(classIntervals(x, n=3, style="jenks"))
@@ -262,6 +284,9 @@ classIntervals(x_units, n=5, style="fisher")
262284
if (have_units) {
263285
classIntervals(x_units, style="headtails")
264286
}
287+
if (have_units) {
288+
classIntervals(x_units, style="box")
289+
}
265290
\dontrun{
266291
st <- Sys.time()
267292
x_POSIXt <- sample(st+((0:500)*3600), 100)
@@ -274,6 +299,8 @@ classIntervals(x_POSIXt, n=5, style="quantile")
274299
classIntervals(x_POSIXt, n=5, style="kmeans")
275300
classIntervals(x_POSIXt, n=5, style="fisher")
276301
classIntervals(x_POSIXt, style="headtails")
302+
classIntervals(x_POSIXt, style="maximum")
303+
classIntervals(x_POSIXt, style="box")
277304
}
278305
# see vignette for further details
279306
\dontrun{

tests/test_Unique.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ print(classIntervals(data_censored, n=5, style="jenks"), unique=TRUE)
3939
print(classIntervals(data_censored, style="headtails"), unique=TRUE)
4040
print(classIntervals(data_censored, style="headtails", thr = 1))
4141
print(classIntervals(data_censored, style="headtails", thr = 0))
42+
print(classIntervals(data_censored, style="box", iqr_mult = 0))
43+
print(classIntervals(data_censored, style="box"))
4244
x <- c(0, 0, 0, 1, 2, 50)
4345
print(classIntervals(x, n=3, style="fisher"), unique=TRUE)
4446
print(classIntervals(x, n=3, style="jenks"), unique=TRUE)

tests/test_Unique.Rout.save

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

2-
R version 3.5.3 (2020-03-20) -- "Great Truth"
3-
Copyright (C) 2019 The R Foundation for Statistical Computing
4-
Platform: x86_64-pc-linux-gnu (64-bit)
2+
R version 4.2.1 (2022-06-23 ucrt) -- "Funny-Looking Kid"
3+
Copyright (C) 2022 The R Foundation for Statistical Computing
4+
Platform: x86_64-w64-mingw32/x64 (64-bit)
55

66
R is free software and comes with ABSOLUTELY NO WARRANTY.
77
You are welcome to redistribute it under certain conditions.
@@ -223,6 +223,20 @@ style: headtails
223223
one of 101 possible partitions of this variable into 2 classes
224224
[0,18.92407) [18.92407,26]
225225
20 100
226+
> print(classIntervals(data_censored, style="box", iqr_mult = 0))
227+
style: box
228+
one of 79,208,745 possible partitions of this variable into 6 classes
229+
[0,19.38567) [19.38567,19.38567) [19.38567,20.11391) [20.11391,20.77193)
230+
30 0 30 30
231+
[20.77193,20.77193) [20.77193,26]
232+
0 30
233+
> print(classIntervals(data_censored, style="box"))
234+
style: box
235+
one of 79,208,745 possible partitions of this variable into 6 classes
236+
[0,17.30627) [17.30627,19.38567) [19.38567,20.11391) [20.11391,20.77193)
237+
10 20 30 30
238+
[20.77193,22.85133) [22.85133,26]
239+
20 10
226240
> x <- c(0, 0, 0, 1, 2, 50)
227241
> print(classIntervals(x, n=3, style="fisher"), unique=TRUE)
228242
style: fisher
@@ -276,4 +290,4 @@ Class found with one single (possibly repeated) value: changed label
276290
>
277291
> proc.time()
278292
user system elapsed
279-
0.164 0.028 0.183
293+
0.37 0.06 0.68

0 commit comments

Comments
 (0)