Skip to contents

The function one_way_ave() creates one-way actual vs expect plots.

Pre-processing

A critical step for this package to work is to set all categorical predictors as factors.

library(dplyr)
library(prettyglm)
data('titanic')

# Easy way to convert multiple columns to a factor.
columns_to_factor <- c('Pclass',
                       'Sex',
                       'Cabin', 
                       'Embarked',
                       'Cabintype')
meanage <- base::mean(titanic$Age, na.rm=T)
titanic  <- titanic  %>%
  dplyr::mutate_at(columns_to_factor, list(~factor(.))) %>%
  dplyr::mutate(Age =base::ifelse(is.na(Age)==T,meanage,Age)) %>% 
  dplyr::mutate(Age_Cat = prettyglm::cut3(Age, levels.mean = TRUE, g =10))

# Build a basic glm
survival_model <- stats::glm(Survived ~ Pclass + 
                                        Sex + 
                                        Fare +
                                        Age_Cat +
                                        Embarked + 
                                        SibSp + 
                                        Parch, 
                             data = titanic, 
                             family = binomial(link = 'logit'))

Actual vs Expected Bucketed By Prediction Percentile

actual_expected_bucketed(target_variable = 'Survived',
                         model_object = survival_model,
                         data_set = titanic)
actual_expected_bucketed(target_variable = 'Survived',
                         model_object = survival_model,
                         data_set = titanic, 
                         facetby = 'Sex')