Summarise all continuous variables by group. Non-numeric variables will be dropped.
Usage
summ_by(d, cols, ..., pct = c(0.25, 0.75), xname = "")Examples
d = mtcars |> dplyr::mutate(vs=factor(vs), am=factor(am))
d |> summ_by()
#> NB: Non-numeric variables are dropped.
#> Dropped: vs am
#> Adding missing grouping variables: `name`
#> # A tibble: 9 × 10
#> name n nNA Mean SD Min P25 Med P75 Max
#> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 carb 32 0 2.81 1.62 1 2 2 4 8
#> 2 cyl 32 0 6.19 1.79 4 4 6 8 8
#> 3 disp 32 0 231. 124. 71.1 121. 196. 326 472
#> 4 drat 32 0 3.60 0.535 2.76 3.08 3.70 3.92 4.93
#> 5 gear 32 0 3.69 0.738 3 3 4 4 5
#> 6 hp 32 0 147. 68.6 52 96.5 123 180 335
#> 7 mpg 32 0 20.1 6.03 10.4 15.4 19.2 22.8 33.9
#> 8 qsec 32 0 17.8 1.79 14.5 16.9 17.7 18.9 22.9
#> 9 wt 32 0 3.22 0.978 1.51 2.58 3.32 3.61 5.42
d |> summ_by(pct=c(0.1,0.9))
#> NB: Non-numeric variables are dropped.
#> Dropped: vs am
#> Adding missing grouping variables: `name`
#> # A tibble: 9 × 10
#> name n nNA Mean SD Min P10 Med P90 Max
#> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 carb 32 0 2.81 1.62 1 1 2 4 8
#> 2 cyl 32 0 6.19 1.79 4 4 6 8 8
#> 3 disp 32 0 231. 124. 71.1 80.6 196. 396 472
#> 4 drat 32 0 3.60 0.535 2.76 3.01 3.70 4.21 4.93
#> 5 gear 32 0 3.69 0.738 3 3 4 5 5
#> 6 hp 32 0 147. 68.6 52 66 123 244. 335
#> 7 mpg 32 0 20.1 6.03 10.4 14.3 19.2 30.1 33.9
#> 8 qsec 32 0 17.8 1.79 14.5 15.5 17.7 20.0 22.9
#> 9 wt 32 0 3.22 0.978 1.51 1.96 3.32 4.05 5.42
d |> summ_by(mpg)
#> NB: Non-numeric variables are dropped.
#> Dropped:
#> Adding missing grouping variables: `name`
#> # A tibble: 1 × 10
#> name n nNA Mean SD Min P25 Med P75 Max
#> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 mpg 32 0 20.1 6.03 10.4 15.4 19.2 22.8 33.9
d |> summ_by(mpg,vs)
#> Adding missing grouping variables: `vs`
#> # A tibble: 2 × 10
#> vs n nNA Mean SD Min P25 Med P75 Max
#> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0 18 0 16.6 3.86 10.4 14.8 15.6 19.1 26
#> 2 1 14 0 24.6 5.38 17.8 21.4 22.8 29.6 33.9
d |> summ_by(mpg,vs,am)
#> Adding missing grouping variables: `vs`, `am`
#> # A tibble: 4 × 11
#> # Groups: vs [2]
#> vs am n nNA Mean SD Min P25 Med P75 Max
#> <fct> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0 0 12 0 15.0 2.77 10.4 14.0 15.2 16.6 19.2
#> 2 0 1 6 0 19.8 4.01 15 16.8 20.4 21 26
#> 3 1 0 7 0 20.7 2.47 17.8 18.6 21.4 22.2 24.4
#> 4 1 1 7 0 28.4 4.76 21.4 25.0 30.4 31.4 33.9
d |> summ_by(c(mpg,disp))
#> NB: Non-numeric variables are dropped.
#> Dropped:
#> Adding missing grouping variables: `name`
#> # A tibble: 2 × 10
#> name n nNA Mean SD Min P25 Med P75 Max
#> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 disp 32 0 231. 124. 71.1 121. 196. 326 472
#> 2 mpg 32 0 20.1 6.03 10.4 15.4 19.2 22.8 33.9
d |> summ_by(c(mpg,disp),vs)
#> Adding missing grouping variables: `vs`
#> # A tibble: 2 × 19
#> vs mpg_n mpg_nNA mpg_Mean mpg_SD mpg_Min mpg_P25 mpg_Med mpg_P75 mpg_Max
#> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0 18 0 16.6 3.86 10.4 14.8 15.6 19.1 26
#> 2 1 14 0 24.6 5.38 17.8 21.4 22.8 29.6 33.9
#> # ℹ 9 more variables: disp_n <int>, disp_nNA <int>, disp_Mean <dbl>,
#> # disp_SD <dbl>, disp_Min <dbl>, disp_P25 <dbl>, disp_Med <dbl>,
#> # disp_P75 <dbl>, disp_Max <dbl>
d |> summ_by(c(mpg,disp),vs,xname="mpg_")
#> Adding missing grouping variables: `vs`
#> # A tibble: 2 × 19
#> vs n nNA Mean SD Min P25 Med P75 Max disp_n disp_nNA
#> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 0 18 0 16.6 3.86 10.4 14.8 15.6 19.1 26 18 0
#> 2 1 14 0 24.6 5.38 17.8 21.4 22.8 29.6 33.9 14 0
#> # ℹ 7 more variables: disp_Mean <dbl>, disp_SD <dbl>, disp_Min <dbl>,
#> # disp_P25 <dbl>, disp_Med <dbl>, disp_P75 <dbl>, disp_Max <dbl>
# Grouping without column selection is possible but rarely useful in large dataset
d |> summ_by(,vs)
#> Adding missing grouping variables: `vs`
#> # A tibble: 2 × 82
#> vs mpg_n mpg_nNA mpg_Mean mpg_SD mpg_Min mpg_P25 mpg_Med mpg_P75 mpg_Max
#> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0 18 0 16.6 3.86 10.4 14.8 15.6 19.1 26
#> 2 1 14 0 24.6 5.38 17.8 21.4 22.8 29.6 33.9
#> # ℹ 72 more variables: cyl_n <int>, cyl_nNA <int>, cyl_Mean <dbl>,
#> # cyl_SD <dbl>, cyl_Min <dbl>, cyl_P25 <dbl>, cyl_Med <dbl>, cyl_P75 <dbl>,
#> # cyl_Max <dbl>, disp_n <int>, disp_nNA <int>, disp_Mean <dbl>,
#> # disp_SD <dbl>, disp_Min <dbl>, disp_P25 <dbl>, disp_Med <dbl>,
#> # disp_P75 <dbl>, disp_Max <dbl>, hp_n <int>, hp_nNA <int>, hp_Mean <dbl>,
#> # hp_SD <dbl>, hp_Min <dbl>, hp_P25 <dbl>, hp_Med <dbl>, hp_P75 <dbl>,
#> # hp_Max <dbl>, drat_n <int>, drat_nNA <int>, drat_Mean <dbl>, …