|  | 
|  | 1 | +\name{penguins} | 
|  | 2 | +\encoding{UTF-8} | 
|  | 3 | +\docType{data} | 
|  | 4 | +\title{Measurements of Penguins near Palmer Station, Antarctica} | 
|  | 5 | +\alias{penguins} | 
|  | 6 | +\alias{penguins_raw} | 
|  | 7 | +\description{ | 
|  | 8 | +  Data on adult penguins covering three species found on three islands in the  | 
|  | 9 | +  Palmer Archipelago, Antarctica, including their size  | 
|  | 10 | +  (flipper length, body mass, bill dimensions), and sex. | 
|  | 11 | + | 
|  | 12 | +  The columns of \code{penguins} are a subset of the more extensive | 
|  | 13 | +  \code{penguins_raw} data frame, which includes nesting observations  | 
|  | 14 | +  and blood isotope data. There are differences in the column names  | 
|  | 15 | +  and data types. See the \sQuote{Format} section for details.  | 
|  | 16 | +} | 
|  | 17 | +\usage{ | 
|  | 18 | +penguins | 
|  | 19 | +penguins_raw | 
|  | 20 | +} | 
|  | 21 | +\format{ | 
|  | 22 | +  \code{penguins} is a data frame with 344 rows and 8 variables: | 
|  | 23 | +  \describe{ | 
|  | 24 | +    \item{\code{species}}{\code{\link{factor}}, with levels  | 
|  | 25 | +      \code{Adelie}, \code{Chinstrap}, and \code{Gentoo}} | 
|  | 26 | +    \item{\code{island}}{\code{factor},  | 
|  | 27 | +      with levels \code{Biscoe}, \code{Dream}, and \code{Torgersen})} | 
|  | 28 | +    \item{\code{bill_len}}{\code{\link{numeric}}, bill length (millimeters)} | 
|  | 29 | +    \item{\code{bill_dep}}{\code{numeric}, bill depth (millimeters)} | 
|  | 30 | +    \item{\code{flipper_len}}{\code{\link{integer}}, flipper length (millimeters)} | 
|  | 31 | +    \item{\code{body_mass}}{\code{integer}, body mass (grams)} | 
|  | 32 | +    \item{\code{sex}}{\code{factor}, with levels \code{female} and \code{male}} | 
|  | 33 | +    \item{\code{year}}{\code{integer}, study year: 2007, 2008, or 2009} | 
|  | 34 | +  } | 
|  | 35 | + | 
|  | 36 | +  \code{penguins_raw} is a data frame with 344 rows and 17 variables.  | 
|  | 37 | +  8 columns correspond to columns in \code{penguins},  | 
|  | 38 | +  though with different variable names and/or classes: | 
|  | 39 | +  \describe{ | 
|  | 40 | +    \item{\code{Species}}{\code{character}} | 
|  | 41 | +    \item{\code{Island}}{\code{character}} | 
|  | 42 | +    \item{\code{Culmen Length (mm)}}{\code{numeric}, bill length} | 
|  | 43 | +    \item{\code{Culmen Depth (mm)}}{\code{numeric}, bill depth} | 
|  | 44 | +    \item{\code{Flipper Length (mm)}}{\code{numeric}, flipper length} | 
|  | 45 | +    \item{\code{Body Mass (g)}}{\code{numeric}, body mass} | 
|  | 46 | +    \item{\code{Sex}}{\code{character}} | 
|  | 47 | +    \item{\code{Date Egg}}{\code{\link{Date}}, when study nest observed with 1 egg. | 
|  | 48 | +      The year component is the \code{year} column in \code{penguins}} | 
|  | 49 | +  } | 
|  | 50 | + | 
|  | 51 | +  There are 9 further columns in \code{penguins_raw}: | 
|  | 52 | +  \describe{ | 
|  | 53 | +    \item{\code{studyName}}{\code{character}, expedition during which the data was collected} | 
|  | 54 | +    \item{\code{Sample Number}}{\code{numeric}, continuous numbering sequence for each sample} | 
|  | 55 | +    \item{\code{Region}}{\code{character}, the region of Palmer LTER sampling grid} | 
|  | 56 | +    \item{\code{Stage}}{\code{character}, denoting reproductive stage at sampling} | 
|  | 57 | +    \item{\code{Individual ID}}{\code{character}, unique ID for each individual in dataset} | 
|  | 58 | +    \item{\code{Clutch Completion}}{\code{character},  | 
|  | 59 | +      if the study nest was observed with a full clutch, i.e., 2 eggs} | 
|  | 60 | +    \item{\code{Delta 15 N (o/oo)}}{\code{numeric}, the ratio of stable isotopes 15N:14N} | 
|  | 61 | +    \item{\code{Delta 13 C (o/oo)}}{\code{numeric}, the ratio of stable isotopes 13C:12C} | 
|  | 62 | +    \item{\code{Comments}}{\code{character}, additional relevant information} | 
|  | 63 | +  } | 
|  | 64 | +} | 
|  | 65 | +\source{ | 
|  | 66 | +  \describe{ | 
|  | 67 | +    \item{\enc{Adélie}{Adelie} penguins:}{Palmer Station Antarctica LTER and K. Gorman (2020). | 
|  | 68 | +      Structural size measurements and isotopic signatures of foraging | 
|  | 69 | +      among adult male and female \enc{Adélie}{Adelie} penguins (Pygoscelis adeliae) | 
|  | 70 | +      nesting along the Palmer Archipelago near Palmer Station, 2007-2009 | 
|  | 71 | +      ver 5. Environmental Data Initiative, \doi{10.6073/pasta/98b16d7d563f265cb52372c8ca99e60f}.} | 
|  | 72 | + | 
|  | 73 | +    \item{Gentoo penguins:}{Palmer Station Antarctica LTER and K. Gorman (2020). | 
|  | 74 | +      \doi{10.6073/pasta/7fca67fb28d56ee2ffa3d9370ebda689}.} | 
|  | 75 | + | 
|  | 76 | +    \item{Chinstrap penguins:}{Palmer Station Antarctica LTER and K. Gorman (2020). | 
|  | 77 | +      \doi{10.6073/pasta/c14dfcfada8ea13a17536e73eb6fbe9e}.} | 
|  | 78 | +  } | 
|  | 79 | + | 
|  | 80 | +  The title naming convention for the source for the Gentoo and Chinstrap | 
|  | 81 | +  data is that same as for \enc{Adélie}{Adelie} penguins. | 
|  | 82 | +} | 
|  | 83 | +\references{ | 
|  | 84 | +  Gorman, K. B., Williams, T. D. and Fraser, W. R. (2014) | 
|  | 85 | +  Ecological Sexual Dimorphism and Environmental Variability within a | 
|  | 86 | +  Community of Antarctic Penguins (Genus Pygoscelis). | 
|  | 87 | +  \emph{PLoS ONE} \bold{9}, 3, e90081; \doi{10.1371/journal.pone.0090081}. | 
|  | 88 | + | 
|  | 89 | +  Horst, A. M., Hill, A. P. and Gorman, K. B. (2022)  | 
|  | 90 | +  Palmer Archipelago Penguins Data in the palmerpenguins R Package  | 
|  | 91 | +  - An Alternative to Anderson's Irises. | 
|  | 92 | +  \emph{R Journal} \bold{14}, 1; \doi{10.32614/RJ-2022-020}. | 
|  | 93 | +
 | 
|  | 94 | +  Kaye, E., Turner, H., Gorman, K. B., Horst, A. M. and Hill, A. P. (2025) | 
|  | 95 | +  Preparing the Palmer Penguins Data for the \pkg{datasets} Package in R.  | 
|  | 96 | +  \doi{10.5281/zenodo.14902740}. | 
|  | 97 | +} | 
|  | 98 | +\details{ | 
|  | 99 | +  \bibcite{Gorman \abbr{et al.}\sspace(2014)} | 
|  | 100 | +  used the data to study sex dimorphism separately for the three species. | 
|  | 101 | +
 | 
|  | 102 | +  \bibcite{Horst \abbr{et al.}\sspace(2022)} popularized the data as an illustration  | 
|  | 103 | +  for different statistical methods, as an alternative to the \code{\link{iris}} data. | 
|  | 104 | +
 | 
|  | 105 | +  \bibcite{Kaye \abbr{et al.}\sspace(2025)} provide the scripts used to create | 
|  | 106 | +  these data sets from the original source data,  | 
|  | 107 | +  and a notebook reproducing results from \bibcite{Gorman \abbr{et al.}\sspace(2014)}. | 
|  | 108 | +} | 
|  | 109 | +\note{ | 
|  | 110 | +  These data sets are also available in the \CRANpkg{palmerpenguins} package.   | 
|  | 111 | +  See the \href{https://allisonhorst.github.io/palmerpenguins/}{package website}  | 
|  | 112 | +  for further details and resources. | 
|  | 113 | +
 | 
|  | 114 | +  The \code{penguins} data has some shorter variable names than the \bold{palmerpenguins} version,  | 
|  | 115 | +  for compact code and data display.  | 
|  | 116 | +} | 
|  | 117 | +\examples{ | 
|  | 118 | +## view summaries | 
|  | 119 | +summary(penguins) | 
|  | 120 | +summary(penguins_raw) # not useful for character vectors | 
|  | 121 | +## convert character vectors to factors first | 
|  | 122 | +dFactor <- function(dat) { | 
|  | 123 | +  dat[] <- lapply(dat, \(.) if (is.character(.)) as.factor(.) else .) | 
|  | 124 | +  dat | 
|  | 125 | +} | 
|  | 126 | +summary(dFactor(penguins_raw)) | 
|  | 127 | +
 | 
|  | 128 | +## visualise distribution across factors | 
|  | 129 | +plot(island ~ species, data = penguins) | 
|  | 130 | +plot(sex ~ interaction(island, species, sep = "\n"), data = penguins) | 
|  | 131 | +
 | 
|  | 132 | +## bill depth vs. length by species (color) and sex (symbol): | 
|  | 133 | +## positive correlations for all species, males tend to have bigger bills | 
|  | 134 | +sym <- c(1, 16) | 
|  | 135 | +pal <- c("darkorange","purple","cyan4") | 
|  | 136 | +plot(bill_dep ~ bill_len, data = penguins, pch = sym[sex], col = pal[species]) | 
|  | 137 | +
 | 
|  | 138 | +## simplified sex dimorphism analysis for Adelie species: | 
|  | 139 | +## proportion of males increases with several size measurements | 
|  | 140 | +adelie <- subset(penguins, species == "Adelie") | 
|  | 141 | +plot(sex ~ bill_len, data = adelie) | 
|  | 142 | +plot(sex ~ bill_dep, data = adelie) | 
|  | 143 | +plot(sex ~ body_mass, data = adelie) | 
|  | 144 | +m <- glm(sex ~ bill_len + bill_dep + body_mass, data = adelie, family = binomial) | 
|  | 145 | +summary(m) | 
|  | 146 | +
 | 
|  | 147 | +## Produce the long variable names as from {palmerpenguins} pkg: | 
|  | 148 | +long_nms <- sub("len", "length_mm", | 
|  | 149 | +                sub("dep","depth_mm", | 
|  | 150 | +                    sub("mass", "mass_g", colnames(penguins)))) | 
|  | 151 | +## compare long and short names: | 
|  | 152 | +noquote(rbind(long_nms, nms = colnames(penguins))) | 
|  | 153 | +
 | 
|  | 154 | +\dontrun{ # << keeping shorter 'penguins' names in this example: | 
|  | 155 | +    colnames(penguins) <- long_nms | 
|  | 156 | +} | 
|  | 157 | +} | 
|  | 158 | +\keyword{datasets} | 
0 commit comments