Skip to content

Commit 37fb4b5

Browse files
authored
Merge pull request #19 from nrennie/duplicate-columns
Add `duplicate_columns()` function
2 parents 174f61f + 248ebf0 commit 37fb4b5

File tree

5 files changed

+76
-1
lines changed

5 files changed

+76
-1
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: messy
22
Title: Create Messy Data from Clean Data Frames
3-
Version: 0.1.0.9001
3+
Version: 0.1.0.9002
44
Authors@R: c(
55
person(given = "Nicola", family = "Rennie", role = c("aut", "cre", "cph"),
66
email = "nrennie35@gmail.com", comment = c(ORCID = "0000-0003-4797-557X")))

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ export(add_special_chars)
44
export(add_whitespace)
55
export(change_case)
66
export(change_separators)
7+
export(duplicate_columns)
78
export(duplicate_rows)
89
export(make_missing)
910
export(messy)

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# messy (development version)
22

33
* Add `change_separators()` function
4+
* Add `duplicate_columns()` function
45

56
# messy 0.1.0
67

R/duplicate_columns.R

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#' Duplicate columns and insert them into the dataframe at random
2+
#'
3+
#' @param data input dataframe
4+
#' @param messiness Probability that each column is duplicated. Must be
5+
#' between 0 and 1. Default 0.1.
6+
#' @param random Whether duplicated column names should be randomly selected
7+
#' from other column names, or maintain the original. Default `TRUE`.
8+
#' @param name_sep Separator to use for adding numbers to end of names. Default `""`.
9+
#' @return A dataframe with duplicated rows inserted
10+
#' @author Jordi Rosell
11+
#' @export
12+
#' @examples
13+
#' duplicate_columns(mtcars, messiness = 0.1)
14+
15+
duplicate_columns <- function(
16+
data,
17+
messiness = 0.1,
18+
random = TRUE,
19+
name_sep = "") {
20+
if (messiness < 0 || messiness > 1) {
21+
stop("'messiness' must be between 0 and 1")
22+
}
23+
if (!is.logical(random)) {
24+
stop("'random' must be either 'TRUE' or 'FALSE'")
25+
}
26+
27+
original_names <- colnames(data)
28+
n <- ncol(data)
29+
for (i in seq_len(n)) {
30+
if (stats::runif(1) < messiness) {
31+
if (random) {
32+
new_col_name <- sample(original_names, 1)
33+
} else {
34+
new_col_name <- original_names[i]
35+
}
36+
new_col_name <- paste0(new_col_name, name_sep, sample(100 * n, 1))
37+
data[[new_col_name]] <- data[[i]]
38+
}
39+
}
40+
41+
return(data)
42+
}

man/duplicate_columns.Rd

Lines changed: 31 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)