======================================
===== Álvaro A. Gutiérrez-Vargas =====
======================================

Using aggregate in R

R

1 Using aggregate in R

QWERTYUIOP

A few examples related to the usage of aggregate function.

1.1 Example data

df <- data.frame(id = c(1, 2, 3, 4, 5, 6, 7, 8),
                 name =  c("Shigeo Kageyama", "Ritsu Kageyama", "Arataka Reigen",   "Tsubomi Takane",   "Sho Suzuki",   "Ichi Mezato", "Ryohei Shimura",    "Tenga Onigawara"),
                 affiliation = c("Body Improvement Club",   "Salt Middle School",   "Spirits and Such Consultation Office", "Salt Middle School", NA,   "Psycho Helmet Cult",   "Body Improvement Club",    "Body Improvement Club"),
                 age = c(12,13, 27, 14, 13, 14, 14, 14),
                 salary = as.numeric(c(10,  0,  500,    100,    NA, 20, 200,    200)))
##   id            name                          affiliation age salary
## 1  1 Shigeo Kageyama                Body Improvement Club  12     10
## 2  2  Ritsu Kageyama                   Salt Middle School  13      0
## 3  3  Arataka Reigen Spirits and Such Consultation Office  27    500
## 4  4  Tsubomi Takane                   Salt Middle School  14    100
## 5  5      Sho Suzuki                                 <NA>  13     NA
## 6  6     Ichi Mezato                   Psycho Helmet Cult  14     20
## 7  7  Ryohei Shimura                Body Improvement Club  14    200
## 8  8 Tenga Onigawara                Body Improvement Club  14    200

1.2 Mean of the age by affiliation

#Mean of the age by affiliation
mean_age<- aggregate(x= df$age,
          by=list(df$affiliation),
          FUN=mean)

1.3 Small improvement: Include the names on the go…

##                                Group.1        x
## 1                Body Improvement Club 13.33333
## 2                   Psycho Helmet Cult 14.00000
## 3                   Salt Middle School 13.50000
## 4 Spirits and Such Consultation Office 27.00000
# better name the columns on the way...
mean_age_names<- aggregate(x = list(mean_age = df$age),
          by = list(affiliation =df$affiliation),
          FUN = mean)
##                            affiliation mean_age
## 1                Body Improvement Club 13.33333
## 2                   Psycho Helmet Cult 14.00000
## 3                   Salt Middle School 13.50000
## 4 Spirits and Such Consultation Office 27.00000

1.4 Combining aggregate with user-made functions: Count integrants

#Count the number of integrants
count <- function(x){NROW(x)}

number_integrants <- aggregate(x =   list(integrants = df$id),
                      by =  list(affiliation = df$affiliation),
                      FUN = count)
##                            affiliation integrants
## 1                Body Improvement Club          3
## 2                   Psycho Helmet Cult          1
## 3                   Salt Middle School          2
## 4 Spirits and Such Consultation Office          1

1.5 Using a little bit more elaborate function: The second integreant by affiliation.

#The second of the list of affiliation
second = function(x) {
  if (length(x) == 1)
    return(x)
  return(sort(x, decreasing = TRUE)[2])}

second_int<- aggregate(x =   list(second_member = df$name),
          by=list(affiliation = df$affiliation),
          FUN=second)
##                            affiliation   second_member
## 1                Body Improvement Club Shigeo Kageyama
## 2                   Psycho Helmet Cult     Ichi Mezato
## 3                   Salt Middle School  Ritsu Kageyama
## 4 Spirits and Such Consultation Office  Arataka Reigen