MyNixOS website logo
Description

A 'tidymodels' Engine and Other Extensions for the 'midr' Package.

Provides a 'parsnip' engine for the 'midr' package, enabling users to fit, tune, and evaluate Maximum Interpretation Decomposition (MID) models within the 'tidymodels' framework. Developed through research by the Moonlight Seminar 2025, a study group of actuaries from the Institute of Actuaries of Japan, to enhance practical applications of interpretable modeling. Detailed methodology is available in Asashiba et al. (2025) <doi:10.48550/arXiv.2506.08338>.

midnight

The ‘midnight’ package implements a ‘parsnip’ engine for the ‘midr’ package, allowing users to seamlessly fit, tune, and evaluate MID (Maximum Interpretation Decomposition) models with ‘tidymodels’ workflows. Development and augmentation of the package are driven by research from the “Moonlight Seminar 2025”, a collaborative study group of actuaries from the Institute of Actuaries of Japan focused on advancing the practical applications of interpretable models.

Installation

You can install the development version of midnight from GitHub with:

# install.packages("pak")
pak::pak("ryo-asashi/midnight")

Fit MID Models using ‘parsnip’

This is a basic example which shows you how to solve a common problem:

library(tidymodels)
library(midr)
library(midnight)
library(gridExtra)
library(ISLR2)

nightfall() # activate midnight
# split dataset into training / validating subsets
set.seed(42)
usecol <- c("mnth", "hr", "workingday", "weathersit",
            "temp", "hum", "windspeed", "bikers")
all <- ISLR2::Bikeshare |>
  select(all_of(usecol)) |>
  mutate(workingday = as.factor(workingday))
holdout <- initial_split(all, prop = .5)
train <- training(holdout)
valid <- testing(holdout)

First-Order MID Model

# create a first-order mid surrogate model
mid_spec_1 <- mid_reg()
mid_spec_1
#> mid reg Model Specification (regression)
#> 
#> Computational engine: midr
# fit the model
mid_1 <- mid_spec_1 %>%
  fit(bikers ~ ., train)
mid_1
#> parsnip model object
#> 
#> 
#> Call:
#> interpret(formula = bikers ~ ., data = data, model = NULL, weights = NULL,
#>  k = NULL, k2 = NULL, lambda = NULL, terms = terms)
#> 
#> Intercept: 146.06
#> 
#> Main Effects:
#> 7 main effect terms
#> 
#> Uninterpreted Variation Ratio: 0.30041
# evaluate the model
augment(mid_1, new_data = valid) %>%
  rmse(truth = bikers, estimate = .pred)
#> # A tibble: 1 × 3
#>   .metric .estimator .estimate
#>   <chr>   <chr>          <dbl>
#> 1 rmse    standard        72.9
imp_1 <- mid.importance(mid_1$fit, data = train)
grid.arrange(nrow = 2,
 ggmid(imp_1),
 ggmid(mid_1$fit, "hr"),
 ggmid(mid_1$fit, "temp"),
 ggmid(mid_1$fit, "mnth")
)
par.midr(mar = c(1, 1, 0, 0))
persp(mid_1$fit, "temp:hr", theta = 50, phi = 20, shade = .5)

Second-Order MID Model

# create a second-order mid surrogate model via "custom formula"
mid_spec_2 <- mid_reg(
  penalty = 0.000001,
  terms = bikers ~ .^2 # add interaction terms
) |>
  set_engine(
    "midr",
    method = "llt" # for faster computation
  )
mid_spec_2
#> mid reg Model Specification (regression)
#> 
#> Main Arguments:
#>   penalty = 1e-06
#>   terms = bikers ~ .^2
#> 
#> Engine-Specific Arguments:
#>   method = llt
#> 
#> Computational engine: midr
# fit the model
mid_2 <- mid_spec_2 %>%
  fit(bikers ~ ., train)
mid_2
#> parsnip model object
#> 
#> 
#> Call:
#> interpret(formula = bikers ~ .^2, data = data, model = NULL,
#>  weights = NULL, k = NULL, k2 = NULL, lambda = 1e-06, terms = terms,
#>  method = "llt")
#> 
#> Intercept: 146.06
#> 
#> Main Effects:
#> 7 main effect terms
#> 
#> Interactions:
#> 21 interaction terms
#> 
#> Uninterpreted Variation Ratio: 0.069436
# evaluate the model
augment(mid_2, new_data = valid) %>%
  rmse(truth = bikers, estimate = .pred)
#> # A tibble: 1 × 3
#>   .metric .estimator .estimate
#>   <chr>   <chr>          <dbl>
#> 1 rmse    standard        44.3
imp_2 <- mid.importance(mid_2$fit, data = train)
grid.arrange(nrow = 2,
 ggmid(imp_2, max.nterms = 15),
 ggmid(mid_2$fit, "hr"),
 ggmid(mid_2$fit, "temp"),
 ggmid(mid_2$fit, "hr:workingday", type = "data",
       data = valid, main.effects = TRUE)
)
par.midr(mar = c(1, 1, 0, 0))
persp(mid_2$fit, "temp:hr", theta = 50, phi = 20, shade = .5)

Tune MID Surrogate Models using ‘tune’

# create a second-order mid surrogate model via "custom formula"
# 
mid_spec_3 <- mid_reg(
  params_main = tune(),
  params_inter = tune(),
  penalty = tune(),
  terms = bikers ~ .^2
) |>
  set_engine(
    engine = "midr",
    method = "llt", # for faster computation
    verbosity = 0L, # for keeping console clean
    singular.ok = TRUE # for avoiding errors
  )
mid_spec_3
#> mid reg Model Specification (regression)
#> 
#> Main Arguments:
#>   penalty = tune()
#>   params_main = tune()
#>   params_inter = tune()
#>   terms = bikers ~ .^2
#> 
#> Engine-Specific Arguments:
#>   method = llt
#>   verbosity = 0
#>   singular.ok = TRUE
#> 
#> Computational engine: midr
# define a cross validation method
set.seed(42)
cv <- vfold_cv(train, v = 2)
# execute the parameter tuning
tune_res <- mid_spec_3 %>%
  tune_bayes(
    bikers ~ ., # pass original data on to interpret()
    resamples = cv,
    iter = 50
  )
tune_best <- select_best(tune_res, metric = "rmse")
tune_best
#> # A tibble: 1 × 4
#>   penalty params_main params_inter .config
#>     <dbl>       <int>        <int> <chr>  
#> 1   0.668          70            5 Iter23
# create a second-order mid surrogate model via "custom formula"
mid_spec_4 <- mid_reg(
  params_main = tune_best$params_main,
  params_inter = tune_best$params_inter,
  penalty = tune_best$penalty,
  terms = bikers ~ .^2
)
mid_spec_4
#> mid reg Model Specification (regression)
#> 
#> Main Arguments:
#>   penalty = tune_best$penalty
#>   params_main = tune_best$params_main
#>   params_inter = tune_best$params_inter
#>   terms = bikers ~ .^2
#> 
#> Computational engine: midr
# fit the model
mid_tune <- mid_spec_4 %>%
  fit(bikers ~ ., train)
mid_tune
#> parsnip model object
#> 
#> 
#> Call:
#> interpret(formula = bikers ~ .^2, data = data, model = NULL,
#>  weights = NULL, k = 70L, k2 = 5L, lambda = 0.668346758138943,
#>  terms = terms)
#> 
#> Intercept: 146.06
#> 
#> Main Effects:
#> 7 main effect terms
#> 
#> Interactions:
#> 21 interaction terms
#> 
#> Uninterpreted Variation Ratio: 0.080807
# evaluate the model
augment(mid_tune, new_data = valid) %>%
  rmse(truth = bikers, estimate = .pred)
#> # A tibble: 1 × 3
#>   .metric .estimator .estimate
#>   <chr>   <chr>          <dbl>
#> 1 rmse    standard        43.0
imp_tune <- mid.importance(mid_tune$fit, data = train)
grid.arrange(nrow = 2,
 ggmid(imp_tune, max.nterms = 15),
 ggmid(mid_tune$fit, "hr"),
 ggmid(mid_tune$fit, "temp"),
 ggmid(mid_tune$fit, "hr:workingday", type = "data",
       data = valid, main.effects = TRUE)
)
par.midr(mar = c(1, 1, 0, 0))
persp(mid_tune$fit, "temp:hr", theta = 50, phi = 20, shade = .5)

Compare MID Models

# construct a midlist object
mids <- midlist(
  "1d" = mid_1$fit,
  "2d" = mid_2$fit,
  "tuned" = mid_tune$fit
)
grid.arrange(
  ggmid(mids, "hr"),
  ggmid(mids, "temp", linewidth = 3/4)
)
Metadata

Version

0.2.0

License

Unknown

Platforms (80)

    Darwin
    FreeBSD
    Genode
    GHCJS
    Linux
    MMIXware
    NetBSD
    none
    OpenBSD
    Redox
    Solaris
    uefi
    WASI
    Windows
Show all
  • aarch64-darwin
  • aarch64-freebsd
  • aarch64-genode
  • aarch64-linux
  • aarch64-netbsd
  • aarch64-none
  • aarch64-uefi
  • aarch64-windows
  • aarch64_be-none
  • arc-linux
  • arm-none
  • armv5tel-linux
  • armv6l-linux
  • armv6l-netbsd
  • armv6l-none
  • armv7a-linux
  • armv7a-netbsd
  • armv7l-linux
  • armv7l-netbsd
  • avr-none
  • i686-cygwin
  • i686-freebsd
  • i686-genode
  • i686-linux
  • i686-netbsd
  • i686-none
  • i686-openbsd
  • i686-windows
  • javascript-ghcjs
  • loongarch64-linux
  • m68k-linux
  • m68k-netbsd
  • m68k-none
  • microblaze-linux
  • microblaze-none
  • microblazeel-linux
  • microblazeel-none
  • mips-linux
  • mips-none
  • mips64-linux
  • mips64-none
  • mips64el-linux
  • mipsel-linux
  • mipsel-netbsd
  • mmix-mmixware
  • msp430-none
  • or1k-none
  • powerpc-linux
  • powerpc-netbsd
  • powerpc-none
  • powerpc64-linux
  • powerpc64le-linux
  • powerpcle-none
  • riscv32-linux
  • riscv32-netbsd
  • riscv32-none
  • riscv64-linux
  • riscv64-netbsd
  • riscv64-none
  • rx-none
  • s390-linux
  • s390-none
  • s390x-linux
  • s390x-none
  • sh4-linux
  • vc4-none
  • wasm32-wasi
  • wasm64-wasi
  • x86_64-cygwin
  • x86_64-darwin
  • x86_64-freebsd
  • x86_64-genode
  • x86_64-linux
  • x86_64-netbsd
  • x86_64-none
  • x86_64-openbsd
  • x86_64-redox
  • x86_64-solaris
  • x86_64-uefi
  • x86_64-windows