Skip to content

Relabel factor levels with a function, collapsing as necessary

Usage

fct_relabel(.f, .fun, ...)

Arguments

.f

A factor (or character vector).

.fun

A function to be applied to each level. Must accept one character argument and return a character vector of the same length as its input.

You can also use ~ to create as shorthand (in the style of purrr). ~ paste(., "x") is equivalent to function(.) paste(., "x")

...

Additional arguments to fun.

Examples

gss_cat$partyid %>% fct_count()
#> # A tibble: 10 × 2
#>    f                      n
#>    <fct>              <int>
#>  1 No answer            154
#>  2 Don't know             1
#>  3 Other party          393
#>  4 Strong republican   2314
#>  5 Not str republican  3032
#>  6 Ind,near rep        1791
#>  7 Independent         4119
#>  8 Ind,near dem        2499
#>  9 Not str democrat    3690
#> 10 Strong democrat     3490
gss_cat$partyid %>%
  fct_relabel(~ gsub(",", ", ", .x)) %>%
  fct_count()
#> # A tibble: 10 × 2
#>    f                      n
#>    <fct>              <int>
#>  1 No answer            154
#>  2 Don't know             1
#>  3 Other party          393
#>  4 Strong republican   2314
#>  5 Not str republican  3032
#>  6 Ind, near rep       1791
#>  7 Independent         4119
#>  8 Ind, near dem       2499
#>  9 Not str democrat    3690
#> 10 Strong democrat     3490

convert_income <- function(x) {
  regex <- "^(?:Lt |)[$]([0-9]+).*$"
  is_range <- grepl(regex, x)
  num_income <- as.numeric(gsub(regex, "\\1", x[is_range]))
  num_income <- trunc(num_income / 5000) * 5000
  x[is_range] <- paste0("Gt $", num_income)
  x
}
fct_count(gss_cat$rincome)
#> # A tibble: 16 × 2
#>    f                  n
#>    <fct>          <int>
#>  1 No answer        183
#>  2 Don't know       267
#>  3 Refused          975
#>  4 $25000 or more  7363
#>  5 $20000 - 24999  1283
#>  6 $15000 - 19999  1048
#>  7 $10000 - 14999  1168
#>  8 $8000 to 9999    340
#>  9 $7000 to 7999    188
#> 10 $6000 to 6999    215
#> 11 $5000 to 5999    227
#> 12 $4000 to 4999    226
#> 13 $3000 to 3999    276
#> 14 $1000 to 2999    395
#> 15 Lt $1000         286
#> 16 Not applicable  7043
convert_income(levels(gss_cat$rincome))
#>  [1] "No answer"      "Don't know"     "Refused"        "Gt $25000"     
#>  [5] "Gt $20000"      "Gt $15000"      "Gt $10000"      "Gt $5000"      
#>  [9] "Gt $5000"       "Gt $5000"       "Gt $5000"       "Gt $0"         
#> [13] "Gt $0"          "Gt $0"          "Gt $0"          "Not applicable"
rincome2 <- fct_relabel(gss_cat$rincome, convert_income)
fct_count(rincome2)
#> # A tibble: 10 × 2
#>    f                  n
#>    <fct>          <int>
#>  1 No answer        183
#>  2 Don't know       267
#>  3 Refused          975
#>  4 Gt $25000       7363
#>  5 Gt $20000       1283
#>  6 Gt $15000       1048
#>  7 Gt $10000       1168
#>  8 Gt $5000         970
#>  9 Gt $0           1183
#> 10 Not applicable  7043