Surveyexplorer

Visualize and tabulate single-choice, multiple-choice, matrix-style questions from survey data. Includes ability to group cross-tabulations, frequency distributions, and plots by categorical variables and to integrate survey weights. Ideal for quickly uncovering descriptive patterns in survey data.

Installation

# install.packages("devtools"), if not already downloaded
devtools::install_github("liamhaller/surveyexplorer")

Examples

library(surveyexplorer)

The data used in the following examples is from the berlinbears dataset, a fictional survey of bears in Berlin, that is included in the surveyexplorer package.

Single-choice questions

#Basic table
single_table(berlinbears, 
             question = income)

Question: income
n freq
<1000 82 16.40%
1000-2000 50 10.00%
2000-3000 177 35.40%
3000-4000 109 21.80%
5000+ 57 11.40%
No answer 22 4.40%
NA 3 0.60%
Column Total 500 1

Use group_by = to partition the question into several groups

single_table(berlinbears,
             question = income,
             group_by = gender)

Question: income
grouped by: gender
female male NA Rowwise Total
Frequency Count Frequency Count Frequency Count Frequency Count
<1000 16.74% 39 15.73% 39 21.05% 4 16.40% 82
1000-2000 9.87% 23 9.68% 24 15.79% 3 10.00% 50
2000-3000 35.62% 83 35.89% 89 26.32% 5 35.40% 177
3000-4000 21.89% 51 22.18% 55 15.79% 3 21.80% 109
5000+ 11.59% 27 10.89% 27 15.79% 3 11.40% 57
No answer 3.86% 9 4.84% 12 5.26% 1 4.40% 22
NA 0.43% 1 0.81% 2 0.00% 0 0.60% 3
Columnwise Total 46.60% 233 49.60% 248 3.80% 19 100.00% 500

Ignore unwanted subgroups with subgroups_to_exclude

single_table(berlinbears,
             question = income, 
             group_by = gender, 
             subgroups_to_exclude = NA) 

Question: income
grouped by: gender
female male Rowwise Total
Frequency Count Frequency Count Frequency Count
<1000 16.74% 39 15.73% 39 16.22% 78
1000-2000 9.87% 23 9.68% 24 9.77% 47
2000-3000 35.62% 83 35.89% 89 35.76% 172
3000-4000 21.89% 51 22.18% 55 22.04% 106
5000+ 11.59% 27 10.89% 27 11.23% 54
No answer 3.86% 9 4.84% 12 4.37% 21
NA 0.43% 1 0.81% 2 0.62% 3
Columnwise Total 48.44% 233 51.56% 248 100.00% 481

Remove NAs from the question variable with na.rm

single_table(berlinbears,
             question = income, 
             group_by = gender, 
             subgroups_to_exclude = NA,
             na.rm = TRUE)

Question: income
grouped by: gender
female male Rowwise Total
Frequency Count Frequency Count Frequency Count
<1000 16.81% 39 15.85% 39 16.32% 78
1000-2000 9.91% 23 9.76% 24 9.83% 47
2000-3000 35.78% 83 36.18% 89 35.98% 172
3000-4000 21.98% 51 22.36% 55 22.18% 106
5000+ 11.64% 27 10.98% 27 11.30% 54
No answer 3.88% 9 4.88% 12 4.39% 21
Columnwise Total 48.54% 232 51.46% 246 100.00% 478

Finally, you can specify survey weights using the weight option

single_table(berlinbears,
             question = income, 
             group_by = gender, 
             subgroups_to_exclude = NA,
             na.rm = TRUE,
             weights = weights)
Question: income
grouped by: gender
female male Rowwise Total
Frequency Count Frequency Count Frequency Count
<1000 15.96% 59.6 17.21% 75.2 16.63% 134.8
1000-2000 10.46% 39.1 10.19% 44.5 10.31% 83.6
2000-3000 33.79% 126.3 33.88% 148.0 33.84% 274.3
3000-4000 25.08% 93.7 25.34% 110.7 25.22% 204.4
5000+ 9.82% 36.7 8.68% 37.9 9.21% 74.6
No answer 4.90% 18.3 4.70% 20.5 4.79% 38.8
Columnwise Total 46.10% 373.6 53.90% 436.9 100.00% 810.5
Frequencies and counts are weighted

The same syntax can be applied to the single_freq function to plot frequencies of the question optionally partitioned by subgroups.

single_freq(berlinbears,
             question = income, 
             group_by = gender, 
             subgroups_to_exclude = NA,
             na.rm = TRUE,
             weights = weights)

Multiple-choice questions

The options and syntax for multiple-choice tables multi_table and graphs multi_graphs are the same. The only difference is the question input also accommodates tidyselect syntax to select several columns for each answer option. For example, the question “will_eat” has five answer options each prefixed by “will_eat”

berlinbears |> 
  dplyr::select(starts_with('will_eat')) |> 
  head()
#>   will_eat.SQ001 will_eat.SQ002 will_eat.SQ003 will_eat.SQ004 will_eat.SQ005
#> 1              0              1              0              1              1
#> 2              0              1              1              1              1
#> 3              1              1              0              1              1
#> 4              0              0              0              1              0
#> 5              0              0              0              1              1
#> 6              0              0              0              1              0

The same syntax can be used to select the question for the multiple choice tables and graphs

multi_table(berlinbears, 
            question = dplyr::starts_with('will_eat'), 
            group_by = genus, 
            subgroups_to_exclude = NA,
            na.rm = TRUE)

Question: dplyr::starts_with(“will_eat”)
grouped by: genus
Ailuropoda Ursus Rowwise Total
Frequency Count Frequency Count Frequency Count
will_eat.SQ001 25.10% 61 26.83% 44 10.82% 105
will_eat.SQ002 58.02% 141 63.41% 104 25.26% 245
will_eat.SQ003 9.05% 22 10.98% 18 4.12% 40
will_eat.SQ004 97.53% 237 92.07% 151 40.00% 388
will_eat.SQ005 46.09% 112 48.78% 80 19.79% 192
Columnwise Total 59.07% 573 40.93% 397 100.00% 970

For graphing, the multi_freq function creates an UpSet plot to visualize the frequencies of the intersecting sets for each answer combination and also includes the ability to specify weights.

multi_freq(berlinbears, 
            question = dplyr::starts_with('will_eat'), 
            na.rm = TRUE,
            weights = weights)
#> Estimes are only preciese to one significant digit, weights may have been rounded

The graphs can also be grouped

multi_freq(berlinbears, 
            question = dplyr::starts_with('will_eat'), 
            group_by = genus,
            subgroups_to_exclude = NA,
            na.rm = FALSE,
            weights = weights)
#> Estimes are only preciese to one significant digit, weights may have been rounded

Matrix Questions

matrix_table has the same syntax as above and works with array or categorical questions

matrix_table(berlinbears, 
             dplyr::starts_with('p_'),
             group_by = is_parent)
matrix_table(berlinbears, 
             dplyr::starts_with('c_'),
             group_by = is_parent)

Question: dplyr::starts_with(“p_”)
grouped by: is_parent
1 2 3 4 5 NA
0
p_eatstrash 24.4% (81) 12.95% (43) 39.16% (130) 10.54% (35) 12.95% (43) NA
p_hibernates 6.93% (23) 3.31% (11) 20.78% (69) 31.93% (106) 37.05% (123) NA
p_likes_zoo 62.05% (206) 16.27% (54) 5.42% (18) 6.33% (21) 6.33% (21) 3.61% (12)
p_likeshoney 0.3% (1) 0.6% (2) 2.71% (9) 9.94% (33) 80.42% (267) 6.02% (20)
p_likespine 65.96% (219) 19.28% (64) 6.33% (21) 4.22% (14) 4.22% (14) NA
p_swims 11.14% (37) 8.73% (29) 47.59% (158) 13.86% (46) 12.35% (41) 6.33% (21)
1
p_eatstrash 26.19% (44) 17.26% (29) 31.55% (53) 12.5% (21) 12.5% (21) NA
p_hibernates 4.76% (8) 8.33% (14) 17.26% (29) 35.12% (59) 34.52% (58) NA
p_likes_zoo 62.5% (105) 13.69% (23) 3.57% (6) 5.95% (10) 5.95% (10) 8.33% (14)
p_likeshoney NA 0.6% (1) 6.55% (11) 10.71% (18) 80.36% (135) 1.79% (3)
p_likespine 71.43% (120) 11.31% (19) 7.14% (12) 4.76% (8) 5.36% (9) NA
p_swims 7.74% (13) 14.29% (24) 48.21% (81) 14.88% (25) 9.52% (16) 5.36% (9)

Question: dplyr::starts_with(“c_”)
grouped by: is_parent
high low medium NA
0
c_diet 6.02% (20) 71.99% (239) 16.57% (55) 5.42% (18)
c_exercise 25% (83) 27.71% (92) 24.1% (80) 23.19% (77)
1
c_diet 3.57% (6) 75% (126) 17.26% (29) 4.17% (7)
c_exercise 19.05% (32) 27.38% (46) 23.81% (40) 29.76% (50)

matrix_freq visualizes the frequencies of responses

matrix_freq(berlinbears, 
             dplyr::starts_with('p_'))

matrix_freq(berlinbears, 
             dplyr::starts_with('c_'),
             group_by = is_parent)

For array/matrix style questions that are numeric matrix_mean plots the mean values and confidence intervals

matrix_mean(berlinbears, 
             question = dplyr::starts_with('p_'),
             na.rm = TRUE)

#Can also apply grouping
matrix_mean(berlinbears, 
            question = dplyr::starts_with('p_'),
            na.rm = TRUE,
            group_by = species, 
            subgroups_to_exclude = NA)

#with survey weights
matrix_mean(berlinbears, 
            question = dplyr::starts_with('p_'),
            na.rm = TRUE,
            group_by = species, 
            subgroups_to_exclude = NA, 
            weights = weights)

Finally, for Likert questions (scales of 3,5,7,9…) matrix_likert provides a custom plot

matrix_likert(berlinbears, 
              question = dplyr::starts_with('p_'))

#you can specify custom labels with the `label` argument
matrix_likert(berlinbears,
              question = dplyr::starts_with('p_'),
              labels = c('Strongly disagree', 'Disagree','Neutral','Agree','Strongly agree'))


#and pass colors using the colors option 
matrix_likert(berlinbears, 
              question = dplyr::starts_with('p_'),
              labels = c('Strongly disagree', 'Disagree','Neutral','Agree','Strongly agree'),
              colors = c("#E1AA28", "#1E5F46", "#7E8F75", "#EFCD83", "#E17832"))

#can also apply weights 
matrix_likert(berlinbears, 
              question = dplyr::starts_with('p_'),
              labels = c('Strongly disagree', 'Disagree','Neutral','Agree','Strongly agree'), 
              colors = c("#E1AA28", "#1E5F46", "#7E8F75", "#EFCD83", "#E17832"),
              weights = weights) 

Overview

Functions

*_table functions return a gt table of the cross tabulations and frequencies for each question while *_freq returns the same data but as a plot.

For matrix-style questions with numerical input, matrix_mean plots the mean value value and ± two standard deviations. matrix_likert visualizes questions that accept Likert responses (strongly agree-strongly disagree) or questions with 3,5,7,9… categories.

Syntax

Each function contains the following options