Relabel factor levels with a function, collapsing as necessary
Arguments
- .f
A factor (or character vector).
- .fun
A function to be applied to each level. Must accept one character argument and return a character vector of the same length as its input.
You can also use
~
to create as shorthand (in the style of purrr).~ paste(., "x")
is equivalent tofunction(.) paste(., "x")
- ...
Additional arguments to
fun
.
Examples
gss_cat$partyid %>% fct_count()
#> # A tibble: 10 × 2
#> f n
#> <fct> <int>
#> 1 No answer 154
#> 2 Don't know 1
#> 3 Other party 393
#> 4 Strong republican 2314
#> 5 Not str republican 3032
#> 6 Ind,near rep 1791
#> 7 Independent 4119
#> 8 Ind,near dem 2499
#> 9 Not str democrat 3690
#> 10 Strong democrat 3490
gss_cat$partyid %>%
fct_relabel(~ gsub(",", ", ", .x)) %>%
fct_count()
#> # A tibble: 10 × 2
#> f n
#> <fct> <int>
#> 1 No answer 154
#> 2 Don't know 1
#> 3 Other party 393
#> 4 Strong republican 2314
#> 5 Not str republican 3032
#> 6 Ind, near rep 1791
#> 7 Independent 4119
#> 8 Ind, near dem 2499
#> 9 Not str democrat 3690
#> 10 Strong democrat 3490
convert_income <- function(x) {
regex <- "^(?:Lt |)[$]([0-9]+).*$"
is_range <- grepl(regex, x)
num_income <- as.numeric(gsub(regex, "\\1", x[is_range]))
num_income <- trunc(num_income / 5000) * 5000
x[is_range] <- paste0("Gt $", num_income)
x
}
fct_count(gss_cat$rincome)
#> # A tibble: 16 × 2
#> f n
#> <fct> <int>
#> 1 No answer 183
#> 2 Don't know 267
#> 3 Refused 975
#> 4 $25000 or more 7363
#> 5 $20000 - 24999 1283
#> 6 $15000 - 19999 1048
#> 7 $10000 - 14999 1168
#> 8 $8000 to 9999 340
#> 9 $7000 to 7999 188
#> 10 $6000 to 6999 215
#> 11 $5000 to 5999 227
#> 12 $4000 to 4999 226
#> 13 $3000 to 3999 276
#> 14 $1000 to 2999 395
#> 15 Lt $1000 286
#> 16 Not applicable 7043
convert_income(levels(gss_cat$rincome))
#> [1] "No answer" "Don't know" "Refused" "Gt $25000"
#> [5] "Gt $20000" "Gt $15000" "Gt $10000" "Gt $5000"
#> [9] "Gt $5000" "Gt $5000" "Gt $5000" "Gt $0"
#> [13] "Gt $0" "Gt $0" "Gt $0" "Not applicable"
rincome2 <- fct_relabel(gss_cat$rincome, convert_income)
fct_count(rincome2)
#> # A tibble: 10 × 2
#> f n
#> <fct> <int>
#> 1 No answer 183
#> 2 Don't know 267
#> 3 Refused 975
#> 4 Gt $25000 7363
#> 5 Gt $20000 1283
#> 6 Gt $15000 1048
#> 7 Gt $10000 1168
#> 8 Gt $5000 970
#> 9 Gt $0 1183
#> 10 Not applicable 7043