From 8ed8646513297e50628efad3c2b05ae71ea210f1 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 11:21:03 +0900 Subject: [PATCH 01/40] Add files via upload --- R/mice.impute.svm | 80 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 R/mice.impute.svm diff --git a/R/mice.impute.svm b/R/mice.impute.svm new file mode 100644 index 00000000..df43bdea --- /dev/null +++ b/R/mice.impute.svm @@ -0,0 +1,80 @@ +#' Imputation by the Support Vector Machine +#' +#' Imputes binary variables using the Support Vector Machine (SVM) with a +#' bootstrap step to ensure proper multiple imputation. +#' +#' @inheritParams mice.impute.pmm +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' +#' @return Vector with imputed values, same length as \code{sum(wy)} +#' +#' @details +#' The method combines the bootstrap and SVM to generate multiple imputation +#' that is proper. It is particularly useful for high-dimensional data. +#' +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine +#' for High-Dimensional Data with General Missing Patterns in Causal Inference. +#' Journal of Statistical Computation and Simulation. +#' +#' @author Masayoshi Takahashi +#' @family univariate imputation functions +#' @keywords datagen +#' @export +#' @importFrom stats predict runif +mice.impute.svm <- function(y, ry, x, wy = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + # 1. Check for dependencies + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function to work. Please install it.", call. = FALSE) + } + + if (is.null(wy)) { + wy <- !ry + } + + # 2. Bootstrap for proper multiple imputation + # Resampling from observed data (yobs, xobs) + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + + dotx <- xobs[s, , drop = FALSE] + doty <- yobs[s] + + # 3. Training SVM model (using matrix interface) + # type = "C-svc" for classification. prob.model = TRUE allows us to compute probabilities. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, + ... + ) + + # 4. Compute predicted probabilities for missing values (wy) + # We assume that the probabilities for category [1] (or second level) are stored in column 2. + p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] + + # 5. Sampling 0 or 1 based on the probabilities + # Standard stochastic imputation using runif + draw <- as.integer(runif(length(p)) <= p) + + # 6. Post-processing according to the type (such as factor) of the original y + if (is.factor(y)) { + # Transform to the factor type keeping the level of the original y + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} \ No newline at end of file From 06120fb0d14b92829a8ba0c0caa6b064aee0009b Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 11:21:57 +0900 Subject: [PATCH 02/40] Delete R/mice.impute.svm --- R/mice.impute.svm | 80 ----------------------------------------------- 1 file changed, 80 deletions(-) delete mode 100644 R/mice.impute.svm diff --git a/R/mice.impute.svm b/R/mice.impute.svm deleted file mode 100644 index df43bdea..00000000 --- a/R/mice.impute.svm +++ /dev/null @@ -1,80 +0,0 @@ -#' Imputation by the Support Vector Machine -#' -#' Imputes binary variables using the Support Vector Machine (SVM) with a -#' bootstrap step to ensure proper multiple imputation. -#' -#' @inheritParams mice.impute.pmm -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' -#' @return Vector with imputed values, same length as \code{sum(wy)} -#' -#' @details -#' The method combines the bootstrap and SVM to generate multiple imputation -#' that is proper. It is particularly useful for high-dimensional data. -#' -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine -#' for High-Dimensional Data with General Missing Patterns in Causal Inference. -#' Journal of Statistical Computation and Simulation. -#' -#' @author Masayoshi Takahashi -#' @family univariate imputation functions -#' @keywords datagen -#' @export -#' @importFrom stats predict runif -mice.impute.svm <- function(y, ry, x, wy = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - # 1. Check for dependencies - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function to work. Please install it.", call. = FALSE) - } - - if (is.null(wy)) { - wy <- !ry - } - - # 2. Bootstrap for proper multiple imputation - # Resampling from observed data (yobs, xobs) - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - - dotx <- xobs[s, , drop = FALSE] - doty <- yobs[s] - - # 3. Training SVM model (using matrix interface) - # type = "C-svc" for classification. prob.model = TRUE allows us to compute probabilities. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, - ... - ) - - # 4. Compute predicted probabilities for missing values (wy) - # We assume that the probabilities for category [1] (or second level) are stored in column 2. - p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] - - # 5. Sampling 0 or 1 based on the probabilities - # Standard stochastic imputation using runif - draw <- as.integer(runif(length(p)) <= p) - - # 6. Post-processing according to the type (such as factor) of the original y - if (is.factor(y)) { - # Transform to the factor type keeping the level of the original y - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - res <- draw - } - - return(res) -} \ No newline at end of file From 6fc09cbaab37f35523c1e069a39fe0ad9c377fa2 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 11:24:56 +0900 Subject: [PATCH 03/40] Add files via upload --- R/miceimputesvm.R | 80 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 R/miceimputesvm.R diff --git a/R/miceimputesvm.R b/R/miceimputesvm.R new file mode 100644 index 00000000..df43bdea --- /dev/null +++ b/R/miceimputesvm.R @@ -0,0 +1,80 @@ +#' Imputation by the Support Vector Machine +#' +#' Imputes binary variables using the Support Vector Machine (SVM) with a +#' bootstrap step to ensure proper multiple imputation. +#' +#' @inheritParams mice.impute.pmm +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' +#' @return Vector with imputed values, same length as \code{sum(wy)} +#' +#' @details +#' The method combines the bootstrap and SVM to generate multiple imputation +#' that is proper. It is particularly useful for high-dimensional data. +#' +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine +#' for High-Dimensional Data with General Missing Patterns in Causal Inference. +#' Journal of Statistical Computation and Simulation. +#' +#' @author Masayoshi Takahashi +#' @family univariate imputation functions +#' @keywords datagen +#' @export +#' @importFrom stats predict runif +mice.impute.svm <- function(y, ry, x, wy = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + # 1. Check for dependencies + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function to work. Please install it.", call. = FALSE) + } + + if (is.null(wy)) { + wy <- !ry + } + + # 2. Bootstrap for proper multiple imputation + # Resampling from observed data (yobs, xobs) + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + + dotx <- xobs[s, , drop = FALSE] + doty <- yobs[s] + + # 3. Training SVM model (using matrix interface) + # type = "C-svc" for classification. prob.model = TRUE allows us to compute probabilities. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, + ... + ) + + # 4. Compute predicted probabilities for missing values (wy) + # We assume that the probabilities for category [1] (or second level) are stored in column 2. + p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] + + # 5. Sampling 0 or 1 based on the probabilities + # Standard stochastic imputation using runif + draw <- as.integer(runif(length(p)) <= p) + + # 6. Post-processing according to the type (such as factor) of the original y + if (is.factor(y)) { + # Transform to the factor type keeping the level of the original y + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} \ No newline at end of file From 722fac3e579953eec2b40742e127f52731ec3fb3 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 11:25:52 +0900 Subject: [PATCH 04/40] Delete R/miceimputesvm.R --- R/miceimputesvm.R | 80 ----------------------------------------------- 1 file changed, 80 deletions(-) delete mode 100644 R/miceimputesvm.R diff --git a/R/miceimputesvm.R b/R/miceimputesvm.R deleted file mode 100644 index df43bdea..00000000 --- a/R/miceimputesvm.R +++ /dev/null @@ -1,80 +0,0 @@ -#' Imputation by the Support Vector Machine -#' -#' Imputes binary variables using the Support Vector Machine (SVM) with a -#' bootstrap step to ensure proper multiple imputation. -#' -#' @inheritParams mice.impute.pmm -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' -#' @return Vector with imputed values, same length as \code{sum(wy)} -#' -#' @details -#' The method combines the bootstrap and SVM to generate multiple imputation -#' that is proper. It is particularly useful for high-dimensional data. -#' -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine -#' for High-Dimensional Data with General Missing Patterns in Causal Inference. -#' Journal of Statistical Computation and Simulation. -#' -#' @author Masayoshi Takahashi -#' @family univariate imputation functions -#' @keywords datagen -#' @export -#' @importFrom stats predict runif -mice.impute.svm <- function(y, ry, x, wy = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - # 1. Check for dependencies - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function to work. Please install it.", call. = FALSE) - } - - if (is.null(wy)) { - wy <- !ry - } - - # 2. Bootstrap for proper multiple imputation - # Resampling from observed data (yobs, xobs) - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - - dotx <- xobs[s, , drop = FALSE] - doty <- yobs[s] - - # 3. Training SVM model (using matrix interface) - # type = "C-svc" for classification. prob.model = TRUE allows us to compute probabilities. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, - ... - ) - - # 4. Compute predicted probabilities for missing values (wy) - # We assume that the probabilities for category [1] (or second level) are stored in column 2. - p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] - - # 5. Sampling 0 or 1 based on the probabilities - # Standard stochastic imputation using runif - draw <- as.integer(runif(length(p)) <= p) - - # 6. Post-processing according to the type (such as factor) of the original y - if (is.factor(y)) { - # Transform to the factor type keeping the level of the original y - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - res <- draw - } - - return(res) -} \ No newline at end of file From e35d7d5cde54f60e07473595d3a47a1f4dfc6928 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 11:32:20 +0900 Subject: [PATCH 05/40] Add files via upload --- R/mice.impute.svm.R | 80 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..d2c1e4f1 --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,80 @@ +#' Imputation by the Support Vector Machine +#' +#' Imputes binary variables using the Support Vector Machine (SVM) with a +#' bootstrap step to ensure proper multiple imputation. +#' +#' @inheritParams mice.impute.pmm +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' +#' @return Vector with imputed values, same length as \code{sum(wy)} +#' +#' @details +#' The method combines the bootstrap and SVM to generate multiple imputation +#' that is proper. It is particularly useful for high-dimensional data. +#' +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine +#' for High-Dimensional Data with General Missing Patterns in Causal Inference. +#' Journal of Statistical Computation and Simulation. +#' +#' @author Masayoshi Takahashi +#' @family univariate imputation functions +#' @keywords datagen +#' @export +#' @importFrom stats predict runif +mice.impute.svm <- function(y, ry, x, wy = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + # 1. Check for dependencies + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function to work. Please install it.", call. = FALSE) + } + + if (is.null(wy)) { + wy <- !ry + } + + # 2. Bootstrap for proper multiple imputation + # Resampling from observed data (yobs, xobs) + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + + dotx <- xobs[s, , drop = FALSE] + doty <- yobs[s] + + # 3. Training SVM model (using matrix interface) + # type = "C-svc" for classification. prob.model = TRUE allows us to compute probabilities. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, + ... + ) + + # 4. Compute predicted probabilities for missing values (wy) + # We assume that the probabilities for category [1] (or second level) are stored in column 2. + p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] + + # 5. Sampling 0 or 1 based on the probabilities + # Standard stochastic imputation using runif + draw <- as.integer(runif(length(p)) <= p) + + # 6. Post-processing according to the type (such as factor) of the original y + if (is.factor(y)) { + # Transform to the factor type keeping the level of the original y + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} \ No newline at end of file From 2777bbf58625d036536a08b26b74e73d9a95d289 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:36:15 +0900 Subject: [PATCH 06/40] Delete R/mice.impute.svm.R --- R/mice.impute.svm.R | 80 --------------------------------------------- 1 file changed, 80 deletions(-) delete mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R deleted file mode 100644 index d2c1e4f1..00000000 --- a/R/mice.impute.svm.R +++ /dev/null @@ -1,80 +0,0 @@ -#' Imputation by the Support Vector Machine -#' -#' Imputes binary variables using the Support Vector Machine (SVM) with a -#' bootstrap step to ensure proper multiple imputation. -#' -#' @inheritParams mice.impute.pmm -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' -#' @return Vector with imputed values, same length as \code{sum(wy)} -#' -#' @details -#' The method combines the bootstrap and SVM to generate multiple imputation -#' that is proper. It is particularly useful for high-dimensional data. -#' -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine -#' for High-Dimensional Data with General Missing Patterns in Causal Inference. -#' Journal of Statistical Computation and Simulation. -#' -#' @author Masayoshi Takahashi -#' @family univariate imputation functions -#' @keywords datagen -#' @export -#' @importFrom stats predict runif -mice.impute.svm <- function(y, ry, x, wy = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - # 1. Check for dependencies - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function to work. Please install it.", call. = FALSE) - } - - if (is.null(wy)) { - wy <- !ry - } - - # 2. Bootstrap for proper multiple imputation - # Resampling from observed data (yobs, xobs) - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - - dotx <- xobs[s, , drop = FALSE] - doty <- yobs[s] - - # 3. Training SVM model (using matrix interface) - # type = "C-svc" for classification. prob.model = TRUE allows us to compute probabilities. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, - ... - ) - - # 4. Compute predicted probabilities for missing values (wy) - # We assume that the probabilities for category [1] (or second level) are stored in column 2. - p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] - - # 5. Sampling 0 or 1 based on the probabilities - # Standard stochastic imputation using runif - draw <- as.integer(runif(length(p)) <= p) - - # 6. Post-processing according to the type (such as factor) of the original y - if (is.factor(y)) { - # Transform to the factor type keeping the level of the original y - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - res <- draw - } - - return(res) -} \ No newline at end of file From 7736c17682e97105c1984411ecd579b4b4333ae1 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:36:37 +0900 Subject: [PATCH 07/40] Add files via upload --- R/mice.impute.svm.R | 70 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..b921f9d6 --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,70 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + + # 1. Bootstrap for estimation uncertainty + # To make the imputation 'proper', we draw a bootstrap sample from the observed data + # to account for the variability in the estimation of the SVM decision boundary. + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + dotx <- xobs[s, , drop = FALSE] + doty <- yobs[s] + + # 2. SVM Model Training + # We use C-svc for classification. The 'type' argument here is explicitly + # set to avoid mismatch with the 'type' vector passed by the mice package. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, # Required for Platt scaling (predicted probabilities) + ... + ) + + # 3. Predict probabilities for fundamental uncertainty + # Instead of deterministic classification, we calculate probabilities to allow + # for stochastic draws, satisfying the requirements for proper multiple imputation. + p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] + + # 4. Stochastic drawing + # Generate imputed values based on the predicted probabilities. + draw <- as.integer(runif(length(p)) <= p) + + # 5. Type adjustment + # Ensure the output matches the original class of the target variable. + if (is.factor(y)) { + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} \ No newline at end of file From b4db8767daea5bfa93465ac03e594cf451e21fe7 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:46:05 +0900 Subject: [PATCH 08/40] Delete R/mice.impute.svm.R --- R/mice.impute.svm.R | 70 --------------------------------------------- 1 file changed, 70 deletions(-) delete mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R deleted file mode 100644 index b921f9d6..00000000 --- a/R/mice.impute.svm.R +++ /dev/null @@ -1,70 +0,0 @@ -#' Imputation by the Support Vector Machine (mice-SVM) -#' -#' This function performs proper multiple imputation using the Support Vector Machine (SVM) -#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -#' designed for binary variables in high-dimensional data settings. -#' -#' @inheritParams mice.impute.pmm -#' @param type A vector of length \code{ncol(x)} identifying the predictors. -#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' @return A vector of length \code{sum(!ry)} with imputed values. -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function. Please install it.") - } - - if (is.null(wy)) wy <- !ry - - # 1. Bootstrap for estimation uncertainty - # To make the imputation 'proper', we draw a bootstrap sample from the observed data - # to account for the variability in the estimation of the SVM decision boundary. - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - dotx <- xobs[s, , drop = FALSE] - doty <- yobs[s] - - # 2. SVM Model Training - # We use C-svc for classification. The 'type' argument here is explicitly - # set to avoid mismatch with the 'type' vector passed by the mice package. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, # Required for Platt scaling (predicted probabilities) - ... - ) - - # 3. Predict probabilities for fundamental uncertainty - # Instead of deterministic classification, we calculate probabilities to allow - # for stochastic draws, satisfying the requirements for proper multiple imputation. - p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] - - # 4. Stochastic drawing - # Generate imputed values based on the predicted probabilities. - draw <- as.integer(runif(length(p)) <= p) - - # 5. Type adjustment - # Ensure the output matches the original class of the target variable. - if (is.factor(y)) { - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - res <- draw - } - - return(res) -} \ No newline at end of file From ec5caaf2994ffe25bd3587fd1df602658324e83c Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:46:23 +0900 Subject: [PATCH 09/40] Add files via upload --- R/mice.impute.svm.R | 70 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..b921f9d6 --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,70 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + + # 1. Bootstrap for estimation uncertainty + # To make the imputation 'proper', we draw a bootstrap sample from the observed data + # to account for the variability in the estimation of the SVM decision boundary. + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + dotx <- xobs[s, , drop = FALSE] + doty <- yobs[s] + + # 2. SVM Model Training + # We use C-svc for classification. The 'type' argument here is explicitly + # set to avoid mismatch with the 'type' vector passed by the mice package. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, # Required for Platt scaling (predicted probabilities) + ... + ) + + # 3. Predict probabilities for fundamental uncertainty + # Instead of deterministic classification, we calculate probabilities to allow + # for stochastic draws, satisfying the requirements for proper multiple imputation. + p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] + + # 4. Stochastic drawing + # Generate imputed values based on the predicted probabilities. + draw <- as.integer(runif(length(p)) <= p) + + # 5. Type adjustment + # Ensure the output matches the original class of the target variable. + if (is.factor(y)) { + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} \ No newline at end of file From 60778cce2be1c4bb6e1d5a147ca8e67f62be1ba3 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:52:41 +0900 Subject: [PATCH 10/40] Delete R/mice.impute.svm.R --- R/mice.impute.svm.R | 70 --------------------------------------------- 1 file changed, 70 deletions(-) delete mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R deleted file mode 100644 index b921f9d6..00000000 --- a/R/mice.impute.svm.R +++ /dev/null @@ -1,70 +0,0 @@ -#' Imputation by the Support Vector Machine (mice-SVM) -#' -#' This function performs proper multiple imputation using the Support Vector Machine (SVM) -#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -#' designed for binary variables in high-dimensional data settings. -#' -#' @inheritParams mice.impute.pmm -#' @param type A vector of length \code{ncol(x)} identifying the predictors. -#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' @return A vector of length \code{sum(!ry)} with imputed values. -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function. Please install it.") - } - - if (is.null(wy)) wy <- !ry - - # 1. Bootstrap for estimation uncertainty - # To make the imputation 'proper', we draw a bootstrap sample from the observed data - # to account for the variability in the estimation of the SVM decision boundary. - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - dotx <- xobs[s, , drop = FALSE] - doty <- yobs[s] - - # 2. SVM Model Training - # We use C-svc for classification. The 'type' argument here is explicitly - # set to avoid mismatch with the 'type' vector passed by the mice package. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, # Required for Platt scaling (predicted probabilities) - ... - ) - - # 3. Predict probabilities for fundamental uncertainty - # Instead of deterministic classification, we calculate probabilities to allow - # for stochastic draws, satisfying the requirements for proper multiple imputation. - p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] - - # 4. Stochastic drawing - # Generate imputed values based on the predicted probabilities. - draw <- as.integer(runif(length(p)) <= p) - - # 5. Type adjustment - # Ensure the output matches the original class of the target variable. - if (is.factor(y)) { - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - res <- draw - } - - return(res) -} \ No newline at end of file From 8633f3587408cb3a7e2118bb1ef92c189591702d Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:53:22 +0900 Subject: [PATCH 11/40] Add files via upload --- R/mice.impute.svm.R | 70 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..b921f9d6 --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,70 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + + # 1. Bootstrap for estimation uncertainty + # To make the imputation 'proper', we draw a bootstrap sample from the observed data + # to account for the variability in the estimation of the SVM decision boundary. + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + dotx <- xobs[s, , drop = FALSE] + doty <- yobs[s] + + # 2. SVM Model Training + # We use C-svc for classification. The 'type' argument here is explicitly + # set to avoid mismatch with the 'type' vector passed by the mice package. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, # Required for Platt scaling (predicted probabilities) + ... + ) + + # 3. Predict probabilities for fundamental uncertainty + # Instead of deterministic classification, we calculate probabilities to allow + # for stochastic draws, satisfying the requirements for proper multiple imputation. + p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] + + # 4. Stochastic drawing + # Generate imputed values based on the predicted probabilities. + draw <- as.integer(runif(length(p)) <= p) + + # 5. Type adjustment + # Ensure the output matches the original class of the target variable. + if (is.factor(y)) { + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} \ No newline at end of file From 9b99621e35c1f743089cd2edee3c3822d47c802b Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:57:23 +0900 Subject: [PATCH 12/40] Delete R/mice.impute.svm.R --- R/mice.impute.svm.R | 70 --------------------------------------------- 1 file changed, 70 deletions(-) delete mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R deleted file mode 100644 index b921f9d6..00000000 --- a/R/mice.impute.svm.R +++ /dev/null @@ -1,70 +0,0 @@ -#' Imputation by the Support Vector Machine (mice-SVM) -#' -#' This function performs proper multiple imputation using the Support Vector Machine (SVM) -#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -#' designed for binary variables in high-dimensional data settings. -#' -#' @inheritParams mice.impute.pmm -#' @param type A vector of length \code{ncol(x)} identifying the predictors. -#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' @return A vector of length \code{sum(!ry)} with imputed values. -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function. Please install it.") - } - - if (is.null(wy)) wy <- !ry - - # 1. Bootstrap for estimation uncertainty - # To make the imputation 'proper', we draw a bootstrap sample from the observed data - # to account for the variability in the estimation of the SVM decision boundary. - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - dotx <- xobs[s, , drop = FALSE] - doty <- yobs[s] - - # 2. SVM Model Training - # We use C-svc for classification. The 'type' argument here is explicitly - # set to avoid mismatch with the 'type' vector passed by the mice package. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, # Required for Platt scaling (predicted probabilities) - ... - ) - - # 3. Predict probabilities for fundamental uncertainty - # Instead of deterministic classification, we calculate probabilities to allow - # for stochastic draws, satisfying the requirements for proper multiple imputation. - p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] - - # 4. Stochastic drawing - # Generate imputed values based on the predicted probabilities. - draw <- as.integer(runif(length(p)) <= p) - - # 5. Type adjustment - # Ensure the output matches the original class of the target variable. - if (is.factor(y)) { - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - res <- draw - } - - return(res) -} \ No newline at end of file From adaf0e2c44ebe8f0905568575ec15316b54a2845 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:57:42 +0900 Subject: [PATCH 13/40] Add files via upload --- R/mice.impute.svm.R | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..307ccc93 --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,66 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + + # 1. Bootstrap for estimation uncertainty + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + dotx <- xobs[s, , drop = FALSE] + doty <- as.factor(yobs[s]) + + # 2. SVM Model Training + # By forcing doty to be a factor above, ksvm correctly enters classification mode. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, + ... + ) + + # 3. Predict probabilities for fundamental uncertainty + # Column 2 usually corresponds to the second level of the factor + p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] + + # 4. Stochastic drawing + draw <- as.integer(runif(length(p)) <= p) + + # 5. Type adjustment to match the original 'y' + if (is.factor(y)) { + # If original y was a factor, restore its levels and labels + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + # If original y was numeric 0/1, return as integer/numeric + res <- draw + } + + return(res) +} \ No newline at end of file From b0623a78f7fd6938ff9719f29b0764a8ef1a0040 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 17 Apr 2026 11:54:34 +0900 Subject: [PATCH 14/40] Delete R/mice.impute.svm.R --- R/mice.impute.svm.R | 66 --------------------------------------------- 1 file changed, 66 deletions(-) delete mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R deleted file mode 100644 index 307ccc93..00000000 --- a/R/mice.impute.svm.R +++ /dev/null @@ -1,66 +0,0 @@ -#' Imputation by the Support Vector Machine (mice-SVM) -#' -#' This function performs proper multiple imputation using the Support Vector Machine (SVM) -#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -#' designed for binary variables in high-dimensional data settings. -#' -#' @inheritParams mice.impute.pmm -#' @param type A vector of length \code{ncol(x)} identifying the predictors. -#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' @return A vector of length \code{sum(!ry)} with imputed values. -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function. Please install it.") - } - - if (is.null(wy)) wy <- !ry - - # 1. Bootstrap for estimation uncertainty - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - dotx <- xobs[s, , drop = FALSE] - doty <- as.factor(yobs[s]) - - # 2. SVM Model Training - # By forcing doty to be a factor above, ksvm correctly enters classification mode. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, - ... - ) - - # 3. Predict probabilities for fundamental uncertainty - # Column 2 usually corresponds to the second level of the factor - p <- predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] - - # 4. Stochastic drawing - draw <- as.integer(runif(length(p)) <= p) - - # 5. Type adjustment to match the original 'y' - if (is.factor(y)) { - # If original y was a factor, restore its levels and labels - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - # If original y was numeric 0/1, return as integer/numeric - res <- draw - } - - return(res) -} \ No newline at end of file From 4092907d4df38818d4c60be6171c2aa07542825b Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 17 Apr 2026 11:54:55 +0900 Subject: [PATCH 15/40] Add files via upload --- R/mice.impute.svm.R | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..94361c84 --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,66 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + + # 1. Bootstrap for estimation uncertainty + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + dotx <- xobs[s, , drop = FALSE] + doty <- as.factor(yobs[s]) + + # 2. SVM Model Training + # By forcing doty to be a factor above, ksvm correctly enters classification mode. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, + ... + ) + + # 3. Predict probabilities for fundamental uncertainty + # Column 2 usually corresponds to the second level of the factor + p <- kernlab::predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] + + # 4. Stochastic drawing + draw <- as.integer(runif(length(p)) <= p) + + # 5. Type adjustment to match the original 'y' + if (is.factor(y)) { + # If original y was a factor, restore its levels and labels + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + # If original y was numeric 0/1, return as integer/numeric + res <- draw + } + + return(res) +} \ No newline at end of file From e6560c4baecf30f7006981d8222fad1074287129 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 17 Apr 2026 12:07:46 +0900 Subject: [PATCH 16/40] Delete R/mice.impute.svm.R --- R/mice.impute.svm.R | 66 --------------------------------------------- 1 file changed, 66 deletions(-) delete mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R deleted file mode 100644 index 94361c84..00000000 --- a/R/mice.impute.svm.R +++ /dev/null @@ -1,66 +0,0 @@ -#' Imputation by the Support Vector Machine (mice-SVM) -#' -#' This function performs proper multiple imputation using the Support Vector Machine (SVM) -#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -#' designed for binary variables in high-dimensional data settings. -#' -#' @inheritParams mice.impute.pmm -#' @param type A vector of length \code{ncol(x)} identifying the predictors. -#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' @return A vector of length \code{sum(!ry)} with imputed values. -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function. Please install it.") - } - - if (is.null(wy)) wy <- !ry - - # 1. Bootstrap for estimation uncertainty - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - dotx <- xobs[s, , drop = FALSE] - doty <- as.factor(yobs[s]) - - # 2. SVM Model Training - # By forcing doty to be a factor above, ksvm correctly enters classification mode. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, - ... - ) - - # 3. Predict probabilities for fundamental uncertainty - # Column 2 usually corresponds to the second level of the factor - p <- kernlab::predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] - - # 4. Stochastic drawing - draw <- as.integer(runif(length(p)) <= p) - - # 5. Type adjustment to match the original 'y' - if (is.factor(y)) { - # If original y was a factor, restore its levels and labels - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - # If original y was numeric 0/1, return as integer/numeric - res <- draw - } - - return(res) -} \ No newline at end of file From d8cca8bb6db498400449a95d57a6960d661d1b60 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 17 Apr 2026 12:08:08 +0900 Subject: [PATCH 17/40] Add files via upload --- R/mice.impute.svm.R | 74 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..54482afc --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,74 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + + # 1. Bootstrap for estimation uncertainty + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + dotx <- xobs[s, , drop = FALSE] + doty <- as.factor(yobs[s]) + + # --- Check if both classes exist --- + if (length(unique(doty)) < 2) { + # If only one class exists in the bootstrap sample, + # we cannot fit an SVM. We return the majority (only) class. + single_val <- as.integer(as.character(doty[1])) + draw <- rep(single_val, sum(wy)) + } else { + + # 2. SVM Model Training + # By forcing doty to be a factor above, ksvm correctly enters classification mode. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, + ... + ) + + # 3. Predict probabilities for fundamental uncertainty + # Column 2 usually corresponds to the second level of the factor + p <- kernlab::predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] + + # 4. Stochastic drawing + draw <- as.integer(runif(length(p)) <= p) + } + # 5. Type adjustment to match the original 'y' + if (is.factor(y)) { + # If original y was a factor, restore its levels and labels + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + # If original y was numeric 0/1, return as integer/numeric + res <- draw + } + + return(res) +} \ No newline at end of file From 19c23841440a776f67b6e16a1adc4e4e52c9700f Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 17 Apr 2026 12:15:57 +0900 Subject: [PATCH 18/40] Delete R/mice.impute.svm.R --- R/mice.impute.svm.R | 74 --------------------------------------------- 1 file changed, 74 deletions(-) delete mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R deleted file mode 100644 index 54482afc..00000000 --- a/R/mice.impute.svm.R +++ /dev/null @@ -1,74 +0,0 @@ -#' Imputation by the Support Vector Machine (mice-SVM) -#' -#' This function performs proper multiple imputation using the Support Vector Machine (SVM) -#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -#' designed for binary variables in high-dimensional data settings. -#' -#' @inheritParams mice.impute.pmm -#' @param type A vector of length \code{ncol(x)} identifying the predictors. -#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' @return A vector of length \code{sum(!ry)} with imputed values. -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function. Please install it.") - } - - if (is.null(wy)) wy <- !ry - - # 1. Bootstrap for estimation uncertainty - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - dotx <- xobs[s, , drop = FALSE] - doty <- as.factor(yobs[s]) - - # --- Check if both classes exist --- - if (length(unique(doty)) < 2) { - # If only one class exists in the bootstrap sample, - # we cannot fit an SVM. We return the majority (only) class. - single_val <- as.integer(as.character(doty[1])) - draw <- rep(single_val, sum(wy)) - } else { - - # 2. SVM Model Training - # By forcing doty to be a factor above, ksvm correctly enters classification mode. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, - ... - ) - - # 3. Predict probabilities for fundamental uncertainty - # Column 2 usually corresponds to the second level of the factor - p <- kernlab::predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities")[, 2] - - # 4. Stochastic drawing - draw <- as.integer(runif(length(p)) <= p) - } - # 5. Type adjustment to match the original 'y' - if (is.factor(y)) { - # If original y was a factor, restore its levels and labels - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - # If original y was numeric 0/1, return as integer/numeric - res <- draw - } - - return(res) -} \ No newline at end of file From 50faea15d46be7d1e2bc40597f395f24c197f1a9 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 17 Apr 2026 12:16:16 +0900 Subject: [PATCH 19/40] Add files via upload --- R/mice.impute.svm.R | 75 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..f0c78b1c --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,75 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + + # 1. Bootstrap for estimation uncertainty + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + dotx <- xobs[s, , drop = FALSE] + doty <- as.factor(yobs[s]) + + # --- Check if both classes exist --- + if (length(unique(doty)) < 2) { + # If only one class exists in the bootstrap sample, + # we cannot fit an SVM. We return the majority (only) class. + single_val <- as.integer(as.character(doty[1])) + draw <- rep(single_val, sum(wy)) + } else { + + # 2. SVM Model Training + # By forcing doty to be a factor above, ksvm correctly enters classification mode. + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, + ... + ) + + # 3. Predict probabilities for fundamental uncertainty + # Column 2 usually corresponds to the second level of the factor + p_mat <- kernlab::predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities") + p <- p_mat[, 2] + + # 4. Stochastic drawing + draw <- as.integer(runif(length(p)) <= p) + } + # 5. Type adjustment to match the original 'y' + if (is.factor(y)) { + # If original y was a factor, restore its levels and labels + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + # If original y was numeric 0/1, return as integer/numeric + res <- draw + } + + return(res) +} \ No newline at end of file From a2581abb258ba3f035b6cb1ef0222fc8ce638201 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 17 Apr 2026 12:28:36 +0900 Subject: [PATCH 20/40] Delete R/mice.impute.svm.R --- R/mice.impute.svm.R | 75 --------------------------------------------- 1 file changed, 75 deletions(-) delete mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R deleted file mode 100644 index f0c78b1c..00000000 --- a/R/mice.impute.svm.R +++ /dev/null @@ -1,75 +0,0 @@ -#' Imputation by the Support Vector Machine (mice-SVM) -#' -#' This function performs proper multiple imputation using the Support Vector Machine (SVM) -#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -#' designed for binary variables in high-dimensional data settings. -#' -#' @inheritParams mice.impute.pmm -#' @param type A vector of length \code{ncol(x)} identifying the predictors. -#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' @return A vector of length \code{sum(!ry)} with imputed values. -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { - - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function. Please install it.") - } - - if (is.null(wy)) wy <- !ry - - # 1. Bootstrap for estimation uncertainty - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - dotx <- xobs[s, , drop = FALSE] - doty <- as.factor(yobs[s]) - - # --- Check if both classes exist --- - if (length(unique(doty)) < 2) { - # If only one class exists in the bootstrap sample, - # we cannot fit an SVM. We return the majority (only) class. - single_val <- as.integer(as.character(doty[1])) - draw <- rep(single_val, sum(wy)) - } else { - - # 2. SVM Model Training - # By forcing doty to be a factor above, ksvm correctly enters classification mode. - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, - ... - ) - - # 3. Predict probabilities for fundamental uncertainty - # Column 2 usually corresponds to the second level of the factor - p_mat <- kernlab::predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities") - p <- p_mat[, 2] - - # 4. Stochastic drawing - draw <- as.integer(runif(length(p)) <= p) - } - # 5. Type adjustment to match the original 'y' - if (is.factor(y)) { - # If original y was a factor, restore its levels and labels - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - # If original y was numeric 0/1, return as integer/numeric - res <- draw - } - - return(res) -} \ No newline at end of file From cc41ecb94e13588fb5610e8d52015be4127d2bf2 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 17 Apr 2026 12:28:55 +0900 Subject: [PATCH 21/40] Add files via upload --- R/mice.impute.svm.R | 97 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..7b403eab --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,97 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + n_target <- sum(wy) + + # 1. Bootstrap for estimation uncertainty + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n_obs <- sum(ry) + s <- sample(n_obs, n_obs, replace = TRUE) + + doty <- as.factor(yobs[s]) + dotx <- xobs[s, , drop = FALSE] + + # Initialize draw with NAs + draw <- rep(NA, n_target) + + # --- ULTIMATE SAFEGUARD --- + # Only attempt SVM if we have 2 classes AND enough samples + if (length(unique(doty)) == 2) { + + # Use tryCatch to prevent the "indexes[[j]]" error if SVM/Predict fails + result <- tryCatch({ + # 2. SVM Model Training + fit <- kernlab::ksvm( + x = as.matrix(dotx), + y = doty, + type = "C-svc", + kernel = kernel, + C = C, + scaled = scaled, + prob.model = TRUE, + ... + ) + + # 3. Predict probabilities for fundamental uncertainty + p_mat <- kernlab::predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities") + + # 4. Stochastic drawing + # Robustly extract probabilities for the class '1' + # (Check if it's a matrix and has at least 2 columns) + if (is.matrix(p_mat) && ncol(p_mat) >= 2) { + # Assuming the second column is the probability for class '1' + p <- p_mat[, 2] + # Replace any potential NAs in probability with 0.5 (random guess) + p[is.na(p)] <- 0.5 + as.integer(runif(length(p)) <= p) + } else { + NULL # Trigger fallback + } + }, error = function(e) { + NULL # Trigger fallback on any error + }) + + if (!is.null(result)) { + draw <- result + } + } + + # --- FALLBACK: If SVM failed or only 1 class existed --- + if (any(is.na(draw))) { + # Simple random draw from observed data as a backup + # This ensures mice always receives a valid vector + draw[is.na(draw)] <- sample(as.integer(as.character(yobs)), sum(is.na(draw)), replace = TRUE) + } + + # 5. Type adjustment + if (is.factor(y)) { + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} From 0ac55e0ab8061a49dc9d90718516fb48c0ad640d Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Sat, 18 Apr 2026 17:34:25 +0900 Subject: [PATCH 22/40] Add files via upload --- R/mice.impute.svm.R | 109 +++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 43 deletions(-) diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R index 7b403eab..b7df4012 100644 --- a/R/mice.impute.svm.R +++ b/R/mice.impute.svm.R @@ -3,6 +3,10 @@ #' This function performs proper multiple imputation using the Support Vector Machine (SVM) #' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically #' designed for binary variables in high-dimensional data settings. +#' +#' Note: When using this method in high-dimensional settings, it is recommended +#' to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} +#' from removing predictors before they reach the SVM model. #' #' @inheritParams mice.impute.pmm #' @param type A vector of length \code{ncol(x)} identifying the predictors. @@ -10,13 +14,16 @@ #' @param C Cost of constraints violation (default = 1). #' @param scaled A logical vector indicating the variables to be scaled. #' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param tol Tolerance of termination criterion (default = 0.001). +#' @param kpar List of hyper-parameters for the kernel function (default = list()). #' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. #' @return A vector of length \code{sum(!ry)} with imputed values. #' @references #' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' Journal of Statistical Computation and Simulation. #' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", ...) { +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", tol = 0.001, kpar = list(), ...) { if (!requireNamespace("kernlab", quietly = TRUE)) { stop("Package 'kernlab' is needed for this function. Please install it.") @@ -25,53 +32,62 @@ mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TR if (is.null(wy)) wy <- !ry n_target <- sum(wy) - # 1. Bootstrap for estimation uncertainty + # 1. Bootstrap for estimation uncertainty (Takahashi, 2026, Section 2.2, Step 1) + # Keeping the original x as in mice.impute.svm.R to maintain simulation performance. xobs <- x[ry, , drop = FALSE] yobs <- y[ry] - n_obs <- sum(ry) - s <- sample(n_obs, n_obs, replace = TRUE) + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) - doty <- as.factor(yobs[s]) - dotx <- xobs[s, , drop = FALSE] + # The model must be trained on the bootstrapped observed data (y*, X*) + y_star <- yobs[s] + x_star <- xobs[s, , drop = FALSE] # Initialize draw with NAs draw <- rep(NA, n_target) - # --- ULTIMATE SAFEGUARD --- - # Only attempt SVM if we have 2 classes AND enough samples - if (length(unique(doty)) == 2) { - - # Use tryCatch to prevent the "indexes[[j]]" error if SVM/Predict fails + # --- SVM Execution based on Takahashi (2026) --- + if (length(unique(y_star)) == 2) { result <- tryCatch({ - # 2. SVM Model Training - fit <- kernlab::ksvm( - x = as.matrix(dotx), - y = doty, - type = "C-svc", - kernel = kernel, - C = C, - scaled = scaled, - prob.model = TRUE, - ... + # 2. SVM Model Training (Takahashi, 2026, Section 2.2, Step 2) + # Using formula interface to match the validated simulation logic. + svm.model <- NULL + utils::capture.output( + svm.model <- suppressWarnings( + suppressMessages( + kernlab::ksvm( + y_star ~ x_star, + type = "C-svc", + kernel = kernel, + cross = 0, + C = C, + scaled = scaled, + prob.model = TRUE, + tol = tol, + kpar = kpar, + ... + ) + ) + ) + ) + + # 3. Predict probabilities for fundamental uncertainty (Takahashi, 2026, Section 2.2, Step 3) + p_mat <- NULL + utils::capture.output( + p_mat <- suppressWarnings( + suppressMessages( + kernlab::predict(svm.model, x[wy, , drop = FALSE], type = "probabilities") + ) + ) ) - # 3. Predict probabilities for fundamental uncertainty - p_mat <- kernlab::predict(fit, as.matrix(x[wy, , drop = FALSE]), type = "probabilities") + # 4. Stochastic drawing (Takahashi, 2026, Section 2.2, Step 4) + # Extract probabilities for the positive class (assumed to be the 2nd column). + p <- p_mat[, 2] + as.integer(runif(length(p)) <= p) - # 4. Stochastic drawing - # Robustly extract probabilities for the class '1' - # (Check if it's a matrix and has at least 2 columns) - if (is.matrix(p_mat) && ncol(p_mat) >= 2) { - # Assuming the second column is the probability for class '1' - p <- p_mat[, 2] - # Replace any potential NAs in probability with 0.5 (random guess) - p[is.na(p)] <- 0.5 - as.integer(runif(length(p)) <= p) - } else { - NULL # Trigger fallback - } }, error = function(e) { - NULL # Trigger fallback on any error + NULL # Return NULL to trigger fallback on numerical or logical errors }) if (!is.null(result)) { @@ -79,14 +95,21 @@ mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TR } } - # --- FALLBACK: If SVM failed or only 1 class existed --- + # --- FALLBACK: If SVM failed or bootstrap sample had only 1 class --- if (any(is.na(draw))) { - # Simple random draw from observed data as a backup - # This ensures mice always receives a valid vector - draw[is.na(draw)] <- sample(as.integer(as.character(yobs)), sum(is.na(draw)), replace = TRUE) + n_miss <- sum(is.na(draw)) + # Standard fallback: simple random sampling from observed values. + y_fill <- sample(yobs, n_miss, replace = TRUE) + + if (is.factor(y)) { + # Align levels with the original factor coding (0-based for integer drawing). + draw[is.na(draw)] <- as.integer(factor(y_fill, levels = levels(y))) - 1 + } else { + draw[is.na(draw)] <- y_fill + } } - # 5. Type adjustment + # 5. Final type adjustment for the mice environment. if (is.factor(y)) { res <- factor(draw, levels = c(0, 1), labels = levels(y)) } else { @@ -94,4 +117,4 @@ mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TR } return(res) -} +} \ No newline at end of file From 12b65af085e040ca270fe3ef6fe04779d33346bb Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Sat, 18 Apr 2026 22:08:40 +0900 Subject: [PATCH 23/40] Delete R/mice.impute.svm.R --- R/mice.impute.svm.R | 120 -------------------------------------------- 1 file changed, 120 deletions(-) delete mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R deleted file mode 100644 index b7df4012..00000000 --- a/R/mice.impute.svm.R +++ /dev/null @@ -1,120 +0,0 @@ -#' Imputation by the Support Vector Machine (mice-SVM) -#' -#' This function performs proper multiple imputation using the Support Vector Machine (SVM) -#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -#' designed for binary variables in high-dimensional data settings. -#' -#' Note: When using this method in high-dimensional settings, it is recommended -#' to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} -#' from removing predictors before they reach the SVM model. -#' -#' @inheritParams mice.impute.pmm -#' @param type A vector of length \code{ncol(x)} identifying the predictors. -#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param tol Tolerance of termination criterion (default = 0.001). -#' @param kpar List of hyper-parameters for the kernel function (default = list()). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' @return A vector of length \code{sum(!ry)} with imputed values. -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' Journal of Statistical Computation and Simulation. -#' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", tol = 0.001, kpar = list(), ...) { - - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function. Please install it.") - } - - if (is.null(wy)) wy <- !ry - n_target <- sum(wy) - - # 1. Bootstrap for estimation uncertainty (Takahashi, 2026, Section 2.2, Step 1) - # Keeping the original x as in mice.impute.svm.R to maintain simulation performance. - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - # The model must be trained on the bootstrapped observed data (y*, X*) - y_star <- yobs[s] - x_star <- xobs[s, , drop = FALSE] - - # Initialize draw with NAs - draw <- rep(NA, n_target) - - # --- SVM Execution based on Takahashi (2026) --- - if (length(unique(y_star)) == 2) { - result <- tryCatch({ - # 2. SVM Model Training (Takahashi, 2026, Section 2.2, Step 2) - # Using formula interface to match the validated simulation logic. - svm.model <- NULL - utils::capture.output( - svm.model <- suppressWarnings( - suppressMessages( - kernlab::ksvm( - y_star ~ x_star, - type = "C-svc", - kernel = kernel, - cross = 0, - C = C, - scaled = scaled, - prob.model = TRUE, - tol = tol, - kpar = kpar, - ... - ) - ) - ) - ) - - # 3. Predict probabilities for fundamental uncertainty (Takahashi, 2026, Section 2.2, Step 3) - p_mat <- NULL - utils::capture.output( - p_mat <- suppressWarnings( - suppressMessages( - kernlab::predict(svm.model, x[wy, , drop = FALSE], type = "probabilities") - ) - ) - ) - - # 4. Stochastic drawing (Takahashi, 2026, Section 2.2, Step 4) - # Extract probabilities for the positive class (assumed to be the 2nd column). - p <- p_mat[, 2] - as.integer(runif(length(p)) <= p) - - }, error = function(e) { - NULL # Return NULL to trigger fallback on numerical or logical errors - }) - - if (!is.null(result)) { - draw <- result - } - } - - # --- FALLBACK: If SVM failed or bootstrap sample had only 1 class --- - if (any(is.na(draw))) { - n_miss <- sum(is.na(draw)) - # Standard fallback: simple random sampling from observed values. - y_fill <- sample(yobs, n_miss, replace = TRUE) - - if (is.factor(y)) { - # Align levels with the original factor coding (0-based for integer drawing). - draw[is.na(draw)] <- as.integer(factor(y_fill, levels = levels(y))) - 1 - } else { - draw[is.na(draw)] <- y_fill - } - } - - # 5. Final type adjustment for the mice environment. - if (is.factor(y)) { - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - res <- draw - } - - return(res) -} \ No newline at end of file From 29c364a5e94dd151bcbf9728dabd48fd70346bea Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Sat, 18 Apr 2026 22:09:03 +0900 Subject: [PATCH 24/40] Add files via upload --- R/mice.impute.svm.R | 117 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 R/mice.impute.svm.R diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..b9a23b01 --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,117 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' Note: When using this method in high-dimensional settings, it is recommended +#' to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} +#' from removing predictors before they reach the SVM model. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param tol Tolerance of termination criterion (default = 0.001). +#' @param kpar List of hyper-parameters for the kernel function (default = list()). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' Journal of Statistical Computation and Simulation. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", tol = 0.001, kpar = list(), ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + n_target <- sum(wy) + + # 1. Bootstrap for estimation uncertainty (Takahashi, 2026, Section 2.2, Step 1) + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + # The model must be trained on the bootstrapped observed data (y*, X*) + y_star <- yobs[s] + x_star <- xobs[s, , drop = FALSE] + + # Initialize draw with NAs + draw <- rep(NA, n_target) + + # 2. SVM Model Training (Takahashi, 2026, Section 2.2, Step 2) + if (length(unique(y_star)) == 2) { + result <- tryCatch({ + svm.model <- NULL + utils::capture.output( + svm.model <- suppressWarnings( + suppressMessages( + kernlab::ksvm( + y_star ~ x_star, + type = "C-svc", + kernel = kernel, + cross = 0, + C = C, + scaled = scaled, + prob.model = TRUE, + tol = tol, + kpar = kpar, + ... + ) + ) + ) + ) + + # 3. Predict probabilities for fundamental uncertainty (Takahashi, 2026, Section 2.2, Step 3) + p_mat <- NULL + utils::capture.output( + p_mat <- suppressWarnings( + suppressMessages( + kernlab::predict(svm.model, x[wy, , drop = FALSE], type = "probabilities") + ) + ) + ) + + # 4. Stochastic drawing (Takahashi, 2026, Section 2.2, Step 4) + # Extract probabilities for the positive class (assumed to be the 2nd column). + p <- p_mat[, 2] + as.integer(runif(length(p)) <= p) + + }, error = function(e) { + NULL # Return NULL to trigger fallback on numerical or logical errors + }) + + if (!is.null(result)) { + draw <- result + } + } + + # --- FALLBACK: If SVM failed or bootstrap sample had only 1 class --- + if (any(is.na(draw))) { + n_miss <- sum(is.na(draw)) + # Standard fallback: simple random sampling from observed values. + y_fill <- sample(yobs, n_miss, replace = TRUE) + + if (is.factor(y)) { + # Align levels with the original factor coding (0-based for integer drawing). + draw[is.na(draw)] <- as.integer(factor(y_fill, levels = levels(y))) - 1 + } else { + draw[is.na(draw)] <- y_fill + } + } + + # 5. Final type adjustment for the mice environment. + if (is.factor(y)) { + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} \ No newline at end of file From 086ffff37f85c20334be93d3fd928b990041d830 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Sun, 19 Apr 2026 12:00:54 +0900 Subject: [PATCH 25/40] Update mice.impute.svm.R --- R/mice.impute.svm.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R index b9a23b01..1d5f85a2 100644 --- a/R/mice.impute.svm.R +++ b/R/mice.impute.svm.R @@ -32,7 +32,7 @@ mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TR if (is.null(wy)) wy <- !ry n_target <- sum(wy) - # 1. Bootstrap for estimation uncertainty (Takahashi, 2026, Section 2.2, Step 1) + # 1. Bootstrap for estimation uncertainty (Takahashi, 2026, Section 3.3, Steps 1-2) xobs <- x[ry, , drop = FALSE] yobs <- y[ry] n1 <- sum(ry) @@ -45,7 +45,7 @@ mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TR # Initialize draw with NAs draw <- rep(NA, n_target) - # 2. SVM Model Training (Takahashi, 2026, Section 2.2, Step 2) + # 2. SVM Model Training (Takahashi, 2026, Section 3.3, Step 3) if (length(unique(y_star)) == 2) { result <- tryCatch({ svm.model <- NULL @@ -68,7 +68,7 @@ mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TR ) ) - # 3. Predict probabilities for fundamental uncertainty (Takahashi, 2026, Section 2.2, Step 3) + # 3. Predict probabilities for fundamental uncertainty (Takahashi, 2026, Section 3.3, Step 4) p_mat <- NULL utils::capture.output( p_mat <- suppressWarnings( @@ -78,7 +78,7 @@ mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TR ) ) - # 4. Stochastic drawing (Takahashi, 2026, Section 2.2, Step 4) + # 4. Stochastic drawing (Takahashi, 2026, Section 3.3, Step 5) # Extract probabilities for the positive class (assumed to be the 2nd column). p <- p_mat[, 2] as.integer(runif(length(p)) <= p) @@ -114,4 +114,4 @@ mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TR } return(res) -} \ No newline at end of file +} From 3bd6d82f1bdf95a911b14a69de912801e044bd16 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 21 May 2026 13:54:38 +0900 Subject: [PATCH 26/40] Add files via upload --- DESCRIPTION | 215 +++++++++--------- NAMESPACE | 551 +++++++++++++++++++++++---------------------- filter.mids.Rd | 77 +++++++ mcar.Rd | 135 +++++++++++ mice.impute.svm.R | 117 ++++++++++ mice.impute.svm.Rd | 67 ++++++ 6 files changed, 780 insertions(+), 382 deletions(-) create mode 100644 filter.mids.Rd create mode 100644 mcar.Rd create mode 100644 mice.impute.svm.R create mode 100644 mice.impute.svm.Rd diff --git a/DESCRIPTION b/DESCRIPTION index cd300dbb..d4461dab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,107 +1,108 @@ -Package: mice -Type: Package -Version: 3.19.5 -Title: Multivariate Imputation by Chained Equations -Date: 2026-05-04 -Authors@R: c(person("Stef", "van Buuren", role = c("aut","cre"), - email = "stef.vanbuuren@tno.nl"), - person("Karin", "Groothuis-Oudshoorn", role = "aut", - email = "c.g.m.oudshoorn@utwente.nl"), - person("Gerko","Vink", role = "ctb", - email = "g.vink@uu.nl"), - person("Rianne","Schouten", role = "ctb", - email = "R.M.Schouten@uu.nl"), - person("Alexander", "Robitzsch", role = "ctb", - email = "robitzsch@ipn.uni-kiel.de"), - person("Patrick", "Rockenschaub", role = "ctb", - email = "rockenschaub.patrick@gmail.com"), - person("Lisa","Doove", role = "ctb", - email = "lisa.doove@ppw.kuleuven.be"), - person("Shahab","Jolani", role = "ctb", - email = "s.jolani@maastrichtuniversity.nl"), - person("Margarita","Moreno-Betancur", role="ctb", - email = "margarita.moreno@mcri.edu.au"), - person("Ian", "White", role="ctb", - email = "ian.white@ucl.ac.uk"), - person("Philipp","Gaffert", role = "ctb", - email = "philipp.gaffert@gfk.com"), - person("Florian","Meinfelder", role = "ctb", - email = "florian.meinfelder@uni-bamberg.de"), - person("Bernie","Gray", role = "ctb", - email = "bfgray3@gmail.com"), - person("Vincent", "Arel-Bundock", role = "ctb", - email = "vincent.arel-bundock@umontreal.ca"), - person("Mingyang", "Cai", role = "ctb", - email = "m.cai@uu.nl"), - person("Thom", "Volker", role = "ctb", - email = "t.b.volker@uu.nl"), - person("Edoardo", "Costantini", role = "ctb", - email = "e.costantini@tilburguniversity.edu"), - person("Caspar", "van Lissa", role = "ctb", - email = "c.j.vanlissa@uu.nl"), - person("Hanne", "Oberman", role = "ctb", - email = "h.i.oberman@uu.nl"), - person("Stephen", "Wade", role = "ctb", - email = "stephematician@gmail.com"), - person("Florian", "van Leeuwen", role = "ctb", - email = "f.d.vanleeuwen@uu.nl"), - person("Frederik", "Fabricius-Bjerre", role = "ctb", - email = "frederik@fabriciusbjerre.dk")) -Maintainer: Stef van Buuren -Depends: - R (>= 2.10.0) -Imports: - broom, - dplyr, - glmnet, - graphics, - grDevices, - lattice, - mitml, - nnet, - Rcpp, - rpart, - stats, - tidyr, - utils -Suggests: - broom.mixed, - future, - furrr, - haven, - knitr, - literanger, - lme4, - MASS, - miceadds, - pan, - parallelly, - purrr, - ranger, - randomForest, - rmarkdown, - rstan, - survival, - testthat -Description: Multiple imputation using Fully Conditional Specification (FCS) - implemented by the MICE algorithm as described in Van Buuren and - Groothuis-Oudshoorn (2011) . Each variable has - its own imputation model. Built-in imputation models are provided for - continuous data (predictive mean matching, normal), binary data (logistic - regression), unordered categorical data (polytomous logistic regression) - and ordered categorical data (proportional odds). MICE can also impute - continuous two-level data (normal model, pan, second-level variables). - Passive imputation can be used to maintain consistency between variables. - Various diagnostic plots are available to inspect the quality of the - imputations. -Encoding: UTF-8 -LazyLoad: yes -LazyData: yes -URL: https://github.com/amices/mice, - https://amices.org/mice/, - https://stefvanbuuren.name/fimd/ -BugReports: https://github.com/amices/mice/issues -LinkingTo: cpp11, Rcpp -License: GPL (>= 2) -Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.3 +Package: mice +Type: Package +Version: 3.19.3 +Title: Multivariate Imputation by Chained Equations +Date: 2026-02-24 +Authors@R: c(person("Stef", "van Buuren", role = c("aut","cre"), + email = "stef.vanbuuren@tno.nl"), + person("Karin", "Groothuis-Oudshoorn", role = "aut", + email = "c.g.m.oudshoorn@utwente.nl"), + person("Gerko","Vink", role = "ctb", + email = "g.vink@uu.nl"), + person("Rianne","Schouten", role = "ctb", + email = "R.M.Schouten@uu.nl"), + person("Alexander", "Robitzsch", role = "ctb", + email = "robitzsch@ipn.uni-kiel.de"), + person("Patrick", "Rockenschaub", role = "ctb", + email = "rockenschaub.patrick@gmail.com"), + person("Lisa","Doove", role = "ctb", + email = "lisa.doove@ppw.kuleuven.be"), + person("Shahab","Jolani", role = "ctb", + email = "s.jolani@maastrichtuniversity.nl"), + person("Margarita","Moreno-Betancur", role="ctb", + email = "margarita.moreno@mcri.edu.au"), + person("Ian", "White", role="ctb", + email = "ian.white@ucl.ac.uk"), + person("Philipp","Gaffert", role = "ctb", + email = "philipp.gaffert@gfk.com"), + person("Florian","Meinfelder", role = "ctb", + email = "florian.meinfelder@uni-bamberg.de"), + person("Bernie","Gray", role = "ctb", + email = "bfgray3@gmail.com"), + person("Vincent", "Arel-Bundock", role = "ctb", + email = "vincent.arel-bundock@umontreal.ca"), + person("Mingyang", "Cai", role = "ctb", + email = "m.cai@uu.nl"), + person("Thom", "Volker", role = "ctb", + email = "t.b.volker@uu.nl"), + person("Edoardo", "Costantini", role = "ctb", + email = "e.costantini@tilburguniversity.edu"), + person("Caspar", "van Lissa", role = "ctb", + email = "c.j.vanlissa@uu.nl"), + person("Hanne", "Oberman", role = "ctb", + email = "h.i.oberman@uu.nl"), + person("Stephen", "Wade", role = "ctb", + email = "stephematician@gmail.com"), + person("Florian", "van Leeuwen", role = "ctb", + email = "f.d.vanleeuwen@uu.nl"), + person("Frederik", "Fabricius-Bjerre", role = "ctb", + email = "frederik@fabriciusbjerre.dk")) +Maintainer: Stef van Buuren +Depends: + R (>= 2.10.0) +Imports: + broom, + dplyr, + glmnet, + graphics, + grDevices, + lattice, + mitml, + nnet, + Rcpp, + rpart, + stats, + tidyr, + utils +Suggests: + broom.mixed, + future, + furrr, + haven, + knitr, + literanger, + lme4, + MASS, + miceadds, + pan, + parallelly, + purrr, + ranger, + randomForest, + rmarkdown, + rstan, + survival, + testthat, + kernlab +Description: Multiple imputation using Fully Conditional Specification (FCS) + implemented by the MICE algorithm as described in Van Buuren and + Groothuis-Oudshoorn (2011) . Each variable has + its own imputation model. Built-in imputation models are provided for + continuous data (predictive mean matching, normal), binary data (logistic + regression), unordered categorical data (polytomous logistic regression) + and ordered categorical data (proportional odds). MICE can also impute + continuous two-level data (normal model, pan, second-level variables). + Passive imputation can be used to maintain consistency between variables. + Various diagnostic plots are available to inspect the quality of the + imputations. +Encoding: UTF-8 +LazyLoad: yes +LazyData: yes +URL: https://github.com/amices/mice, + https://amices.org/mice/, + https://stefvanbuuren.name/fimd/ +BugReports: https://github.com/amices/mice/issues +LinkingTo: cpp11, Rcpp +License: GPL (>= 2) +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.3.3 diff --git a/NAMESPACE b/NAMESPACE index 0fbc635d..f2e0b9e7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,275 +1,276 @@ -# Generated by roxygen2: do not edit by hand - -S3method(anova,mira) -S3method(bwplot,mads) -S3method(bwplot,mids) -S3method(cc,data.frame) -S3method(cc,default) -S3method(cc,matrix) -S3method(cc,mids) -S3method(cci,default) -S3method(cci,mids) -S3method(complete,mids) -S3method(densityplot,mids) -S3method(df.residual,lme) -S3method(df.residual,mer) -S3method(df.residual,mira) -S3method(df.residual,multinom) -S3method(filter,mids) -S3method(glance,mipo) -S3method(ic,data.frame) -S3method(ic,default) -S3method(ic,matrix) -S3method(ic,mids) -S3method(ici,default) -S3method(ici,mids) -S3method(is.nan,data.frame) -S3method(mcar,data.frame) -S3method(plot,mcar_object) -S3method(plot,md.pattern) -S3method(plot,mids) -S3method(predict_mi,list) -S3method(predict_mi,lm) -S3method(predict_mi,mira) -S3method(print,mads) -S3method(print,mcar_object) -S3method(print,mice.anova) -S3method(print,mice.anova.summary) -S3method(print,mids) -S3method(print,mipo) -S3method(print,mipo.summary) -S3method(print,mira) -S3method(stripplot,mids) -S3method(summary,mads) -S3method(summary,mice.anova) -S3method(summary,mids) -S3method(summary,mipo) -S3method(summary,mira) -S3method(tidy,mipo) -S3method(with,mids) -S3method(xyplot,mads) -S3method(xyplot,mids) -export(.norm.draw) -export(.pmm.match) -export(D1) -export(D2) -export(D3) -export(ampute) -export(ampute.default.freq) -export(ampute.default.odds) -export(ampute.default.type) -export(ampute.default.weights) -export(appendbreak) -export(as.mids) -export(as.mira) -export(as.mitml.result) -export(bwplot) -export(cbind) -export(cc) -export(cci) -export(complete) -export(construct.blocks) -export(convergence) -export(densityplot) -export(estimice) -export(extractBS) -export(fico) -export(filter) -export(fix.coef) -export(flux) -export(fluxplot) -export(futuremice) -export(getfit) -export(getqbar) -export(glance) -export(glm.mids) -export(ibind) -export(ic) -export(ici) -export(is.mads) -export(is.mids) -export(is.mipo) -export(is.mira) -export(is.mitml.result) -export(lm.mids) -export(mads) -export(make.blocks) -export(make.blots) -export(make.calltype) -export(make.formulas) -export(make.method) -export(make.post) -export(make.predictorMatrix) -export(make.visitSequence) -export(make.where) -export(matchindex) -export(mcar) -export(md.pairs) -export(md.pattern) -export(mdc) -export(mice) -export(mice.impute.2l.bin) -export(mice.impute.2l.lmer) -export(mice.impute.2l.norm) -export(mice.impute.2l.pan) -export(mice.impute.2lonly.mean) -export(mice.impute.2lonly.norm) -export(mice.impute.2lonly.pmm) -export(mice.impute.cart) -export(mice.impute.jomoImpute) -export(mice.impute.lasso.logreg) -export(mice.impute.lasso.norm) -export(mice.impute.lasso.select.logreg) -export(mice.impute.lasso.select.norm) -export(mice.impute.lda) -export(mice.impute.logreg) -export(mice.impute.logreg.boot) -export(mice.impute.mean) -export(mice.impute.midastouch) -export(mice.impute.mnar.logreg) -export(mice.impute.mnar.norm) -export(mice.impute.mpmm) -export(mice.impute.norm) -export(mice.impute.norm.boot) -export(mice.impute.norm.nob) -export(mice.impute.norm.predict) -export(mice.impute.panImpute) -export(mice.impute.passive) -export(mice.impute.pmm) -export(mice.impute.polr) -export(mice.impute.polyreg) -export(mice.impute.quadratic) -export(mice.impute.rf) -export(mice.impute.ri) -export(mice.impute.sample) -export(mice.mids) -export(mice.theme) -export(mids) -export(mids2mplus) -export(mids2spss) -export(mipo) -export(mira) -export(name.blocks) -export(name.formulas) -export(ncc) -export(nelsonaalen) -export(nic) -export(nimp) -export(norm.draw) -export(parlmice) -export(pool) -export(pool.compare) -export(pool.r.squared) -export(pool.scalar) -export(pool.scalar.syn) -export(pool.syn) -export(pool.table) -export(predict_mi) -export(quickpred) -export(rbind) -export(squeeze) -export(stripplot) -export(supports.transparent) -export(tidy) -export(version) -export(xyplot) -importFrom(Rcpp,evalCpp) -importFrom(broom,glance) -importFrom(broom,tidy) -importFrom(dplyr,"%>%") -importFrom(dplyr,.data) -importFrom(dplyr,any_of) -importFrom(dplyr,bind_cols) -importFrom(dplyr,bind_rows) -importFrom(dplyr,filter) -importFrom(dplyr,group_by) -importFrom(dplyr,lead) -importFrom(dplyr,mutate) -importFrom(dplyr,n) -importFrom(dplyr,pull) -importFrom(dplyr,relocate) -importFrom(dplyr,row_number) -importFrom(dplyr,select) -importFrom(dplyr,summarize) -importFrom(glmnet,cv.glmnet) -importFrom(grDevices,dev.off) -importFrom(graphics,abline) -importFrom(graphics,axis) -importFrom(graphics,box) -importFrom(graphics,hist) -importFrom(graphics,par) -importFrom(graphics,plot) -importFrom(graphics,plot.new) -importFrom(graphics,plot.window) -importFrom(graphics,points) -importFrom(graphics,rect) -importFrom(graphics,text) -importFrom(lattice,bwplot) -importFrom(lattice,densityplot) -importFrom(lattice,stripplot) -importFrom(lattice,xyplot) -importFrom(mitml,jomoImpute) -importFrom(mitml,mitmlComplete) -importFrom(mitml,panImpute) -importFrom(mitml,testModels) -importFrom(nnet,multinom) -importFrom(rpart,rpart) -importFrom(rpart,rpart.control) -importFrom(stats,C) -importFrom(stats,aggregate) -importFrom(stats,as.formula) -importFrom(stats,binomial) -importFrom(stats,cancor) -importFrom(stats,coef) -importFrom(stats,complete.cases) -importFrom(stats,confint) -importFrom(stats,contr.treatment) -importFrom(stats,cor) -importFrom(stats,cov) -importFrom(stats,df.residual) -importFrom(stats,fitted) -importFrom(stats,formula) -importFrom(stats,gaussian) -importFrom(stats,getCall) -importFrom(stats,glm) -importFrom(stats,is.empty.model) -importFrom(stats,lm) -importFrom(stats,lm.fit) -importFrom(stats,median) -importFrom(stats,model.frame) -importFrom(stats,model.matrix) -importFrom(stats,na.exclude) -importFrom(stats,na.omit) -importFrom(stats,na.pass) -importFrom(stats,pchisq) -importFrom(stats,pf) -importFrom(stats,predict) -importFrom(stats,pt) -importFrom(stats,qt) -importFrom(stats,quantile) -importFrom(stats,quasibinomial) -importFrom(stats,rbinom) -importFrom(stats,rchisq) -importFrom(stats,reformulate) -importFrom(stats,rgamma) -importFrom(stats,rnorm) -importFrom(stats,runif) -importFrom(stats,sd) -importFrom(stats,spline) -importFrom(stats,summary.glm) -importFrom(stats,terms) -importFrom(stats,update) -importFrom(stats,var) -importFrom(stats,vcov) -importFrom(tidyr,complete) -importFrom(utils,askYesNo) -importFrom(utils,flush.console) -importFrom(utils,hasName) -importFrom(utils,head) -importFrom(utils,install.packages) -importFrom(utils,methods) -importFrom(utils,packageDescription) -importFrom(utils,packageVersion) -importFrom(utils,tail) -importFrom(utils,write.table) -useDynLib(mice, .registration = TRUE) +# Generated by roxygen2: do not edit by hand + +S3method(anova,mira) +S3method(bwplot,mads) +S3method(bwplot,mids) +S3method(cc,data.frame) +S3method(cc,default) +S3method(cc,matrix) +S3method(cc,mids) +S3method(cci,default) +S3method(cci,mids) +S3method(complete,mids) +S3method(densityplot,mids) +S3method(df.residual,lme) +S3method(df.residual,mer) +S3method(df.residual,mira) +S3method(df.residual,multinom) +S3method(filter,mids) +S3method(glance,mipo) +S3method(ic,data.frame) +S3method(ic,default) +S3method(ic,matrix) +S3method(ic,mids) +S3method(ici,default) +S3method(ici,mids) +S3method(is.nan,data.frame) +S3method(mcar,data.frame) +S3method(plot,mcar_object) +S3method(plot,md.pattern) +S3method(plot,mids) +S3method(predict_mi,list) +S3method(predict_mi,lm) +S3method(predict_mi,mira) +S3method(print,mads) +S3method(print,mcar_object) +S3method(print,mice.anova) +S3method(print,mice.anova.summary) +S3method(print,mids) +S3method(print,mipo) +S3method(print,mipo.summary) +S3method(print,mira) +S3method(stripplot,mids) +S3method(summary,mads) +S3method(summary,mice.anova) +S3method(summary,mids) +S3method(summary,mipo) +S3method(summary,mira) +S3method(tidy,mipo) +S3method(with,mids) +S3method(xyplot,mads) +S3method(xyplot,mids) +export(.norm.draw) +export(.pmm.match) +export(D1) +export(D2) +export(D3) +export(ampute) +export(ampute.default.freq) +export(ampute.default.odds) +export(ampute.default.type) +export(ampute.default.weights) +export(appendbreak) +export(as.mids) +export(as.mira) +export(as.mitml.result) +export(bwplot) +export(cbind) +export(cc) +export(cci) +export(complete) +export(construct.blocks) +export(convergence) +export(densityplot) +export(estimice) +export(extractBS) +export(fico) +export(filter) +export(fix.coef) +export(flux) +export(fluxplot) +export(futuremice) +export(getfit) +export(getqbar) +export(glance) +export(glm.mids) +export(ibind) +export(ic) +export(ici) +export(is.mads) +export(is.mids) +export(is.mipo) +export(is.mira) +export(is.mitml.result) +export(lm.mids) +export(mads) +export(make.blocks) +export(make.blots) +export(make.calltype) +export(make.formulas) +export(make.method) +export(make.post) +export(make.predictorMatrix) +export(make.visitSequence) +export(make.where) +export(matchindex) +export(mcar) +export(md.pairs) +export(md.pattern) +export(mdc) +export(mice) +export(mice.impute.2l.bin) +export(mice.impute.2l.lmer) +export(mice.impute.2l.norm) +export(mice.impute.2l.pan) +export(mice.impute.2lonly.mean) +export(mice.impute.2lonly.norm) +export(mice.impute.2lonly.pmm) +export(mice.impute.cart) +export(mice.impute.jomoImpute) +export(mice.impute.lasso.logreg) +export(mice.impute.lasso.norm) +export(mice.impute.lasso.select.logreg) +export(mice.impute.lasso.select.norm) +export(mice.impute.lda) +export(mice.impute.logreg) +export(mice.impute.logreg.boot) +export(mice.impute.mean) +export(mice.impute.midastouch) +export(mice.impute.mnar.logreg) +export(mice.impute.mnar.norm) +export(mice.impute.mpmm) +export(mice.impute.norm) +export(mice.impute.norm.boot) +export(mice.impute.norm.nob) +export(mice.impute.norm.predict) +export(mice.impute.panImpute) +export(mice.impute.passive) +export(mice.impute.pmm) +export(mice.impute.polr) +export(mice.impute.polyreg) +export(mice.impute.quadratic) +export(mice.impute.rf) +export(mice.impute.ri) +export(mice.impute.sample) +export(mice.impute.svm) +export(mice.mids) +export(mice.theme) +export(mids) +export(mids2mplus) +export(mids2spss) +export(mipo) +export(mira) +export(name.blocks) +export(name.formulas) +export(ncc) +export(nelsonaalen) +export(nic) +export(nimp) +export(norm.draw) +export(parlmice) +export(pool) +export(pool.compare) +export(pool.r.squared) +export(pool.scalar) +export(pool.scalar.syn) +export(pool.syn) +export(pool.table) +export(predict_mi) +export(quickpred) +export(rbind) +export(squeeze) +export(stripplot) +export(supports.transparent) +export(tidy) +export(version) +export(xyplot) +importFrom(Rcpp,evalCpp) +importFrom(broom,glance) +importFrom(broom,tidy) +importFrom(dplyr,"%>%") +importFrom(dplyr,.data) +importFrom(dplyr,any_of) +importFrom(dplyr,bind_cols) +importFrom(dplyr,bind_rows) +importFrom(dplyr,filter) +importFrom(dplyr,group_by) +importFrom(dplyr,lead) +importFrom(dplyr,mutate) +importFrom(dplyr,n) +importFrom(dplyr,pull) +importFrom(dplyr,relocate) +importFrom(dplyr,row_number) +importFrom(dplyr,select) +importFrom(dplyr,summarize) +importFrom(glmnet,cv.glmnet) +importFrom(grDevices,dev.off) +importFrom(graphics,abline) +importFrom(graphics,axis) +importFrom(graphics,box) +importFrom(graphics,hist) +importFrom(graphics,par) +importFrom(graphics,plot) +importFrom(graphics,plot.new) +importFrom(graphics,plot.window) +importFrom(graphics,points) +importFrom(graphics,rect) +importFrom(graphics,text) +importFrom(lattice,bwplot) +importFrom(lattice,densityplot) +importFrom(lattice,stripplot) +importFrom(lattice,xyplot) +importFrom(mitml,jomoImpute) +importFrom(mitml,mitmlComplete) +importFrom(mitml,panImpute) +importFrom(mitml,testModels) +importFrom(nnet,multinom) +importFrom(rpart,rpart) +importFrom(rpart,rpart.control) +importFrom(stats,C) +importFrom(stats,aggregate) +importFrom(stats,as.formula) +importFrom(stats,binomial) +importFrom(stats,cancor) +importFrom(stats,coef) +importFrom(stats,complete.cases) +importFrom(stats,confint) +importFrom(stats,contr.treatment) +importFrom(stats,cor) +importFrom(stats,cov) +importFrom(stats,df.residual) +importFrom(stats,fitted) +importFrom(stats,formula) +importFrom(stats,gaussian) +importFrom(stats,getCall) +importFrom(stats,glm) +importFrom(stats,is.empty.model) +importFrom(stats,lm) +importFrom(stats,lm.fit) +importFrom(stats,median) +importFrom(stats,model.frame) +importFrom(stats,model.matrix) +importFrom(stats,na.exclude) +importFrom(stats,na.omit) +importFrom(stats,na.pass) +importFrom(stats,pchisq) +importFrom(stats,pf) +importFrom(stats,predict) +importFrom(stats,pt) +importFrom(stats,qt) +importFrom(stats,quantile) +importFrom(stats,quasibinomial) +importFrom(stats,rbinom) +importFrom(stats,rchisq) +importFrom(stats,reformulate) +importFrom(stats,rgamma) +importFrom(stats,rnorm) +importFrom(stats,runif) +importFrom(stats,sd) +importFrom(stats,spline) +importFrom(stats,summary.glm) +importFrom(stats,terms) +importFrom(stats,update) +importFrom(stats,var) +importFrom(stats,vcov) +importFrom(tidyr,complete) +importFrom(utils,askYesNo) +importFrom(utils,flush.console) +importFrom(utils,hasName) +importFrom(utils,head) +importFrom(utils,install.packages) +importFrom(utils,methods) +importFrom(utils,packageDescription) +importFrom(utils,packageVersion) +importFrom(utils,tail) +importFrom(utils,write.table) +useDynLib(mice, .registration = TRUE) diff --git a/filter.mids.Rd b/filter.mids.Rd new file mode 100644 index 00000000..3ae563cb --- /dev/null +++ b/filter.mids.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter.R +\name{filter.mids} +\alias{filter.mids} +\title{Subset rows of a \code{mids} object} +\usage{ +\method{filter}{mids}(.data, ..., .preserve = FALSE) +} +\arguments{ +\item{.data}{A \code{mids} object.} + +\item{...}{Expressions that return a +logical value, and are defined in terms of the variables in \code{.data$data}. +If multiple expressions are specified, they are combined with the \code{&} operator. +Only rows for which all conditions evaluate to \code{TRUE} are kept.} + +\item{.preserve}{Relevant when the \code{.data} input is grouped. +If \code{.preserve = FALSE} (the default), the grouping structure +is recalculated based on the resulting data, otherwise the grouping is kept as is.} +} +\value{ +An S3 object of class \code{mids} +} +\description{ +This function takes a \code{mids} object and returns a new +\code{mids} object that pertains to the subset of the data +identified by the expression in \dots. The expression may use +column values from the incomplete data in \code{.data$data}. +} +\note{ +The function calculates a logical vector \code{include} of length \code{nrow(.data$data)}. +The function constructs the elements of the filtered \code{mids} object as follows: +\tabular{ll}{ +\code{data} \tab Select rows in \code{.data$data} for which \code{include == TRUE}\cr +\code{imp} \tab Select rows each imputation \code{data.frame} in \code{.data$imp} for which \code{include == TRUE}\cr +\code{m} \tab Equals \code{.data$m}\cr +\code{where} \tab Select rows in \code{.data$where} for which \code{include == TRUE}\cr +\code{blocks} \tab Equals \code{.data$blocks}\cr +\code{call} \tab Equals \code{.data$call}\cr +\code{nmis} \tab Recalculate \code{nmis} based on the selected \code{data} rows\cr +\code{method} \tab Equals \code{.data$method}\cr +\code{predictorMatrix} \tab Equals \code{.data$predictorMatrix}\cr +\code{visitSequence} \tab Equals \code{.data$visitSequence}\cr +\code{formulas} \tab Equals \code{.data$formulas}\cr +\code{post} \tab Equals \code{.data$post}\cr +\code{blots} \tab Equals \code{.data$blots}\cr +\code{ignore} \tab Select positions in \code{.data$ignore} for which \code{include == TRUE}\cr +\code{seed} \tab Equals \code{.data$seed}\cr +\code{iteration} \tab Equals \code{.data$iteration}\cr +\code{lastSeedValue} \tab Equals \code{.data$lastSeedValue}\cr +\code{chainMean} \tab Set to \code{NULL}\cr +\code{chainVar} \tab Set to \code{NULL}\cr +\code{loggedEvents} \tab Equals \code{.data$loggedEvents}\cr +\code{version} \tab Replaced with current version\cr +\code{date} \tab Replaced with current date +} +} +\examples{ +imp <- mice(nhanes, m = 2, maxit = 1, print = FALSE) + +# example with external logical vector +imp_f <- filter(imp, c(rep(TRUE, 13), rep(FALSE, 12))) + +nrow(complete(imp)) +nrow(complete(imp_f)) + +# example with calculated include vector +imp_f2 <- filter(imp, age >= 2 & hyp == 1) +nrow(complete(imp_f2)) # should be 5 +} +\seealso{ +\code{\link[dplyr]{filter}} +} +\author{ +Patrick Rockenschaub +} +\keyword{manip} diff --git a/mcar.Rd b/mcar.Rd new file mode 100644 index 00000000..141b5c95 --- /dev/null +++ b/mcar.Rd @@ -0,0 +1,135 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mcar.R +\name{mcar} +\alias{mcar} +\title{Jamshidian and Jalal's Non-Parametric MCAR Test} +\usage{ +mcar( + x, + imputed = mice(x, method = "norm"), + min_n = 6, + method = "auto", + replications = 10000, + use_chisq = 30, + alpha = 0.05 +) +} +\arguments{ +\item{x}{An object for which a method exists; usually a \code{data.frame}.} + +\item{imputed}{Either an object of class \code{mids}, as returned by +\code{\link[=mice]{mice()}}, or a list of \code{data.frame}s.} + +\item{min_n}{Atomic numeric, must be greater than 1. When there are missing +data patterns with fewer than \code{min_n} cases, all cases with that pattern will +be removed from \code{x} and \code{imputed}.} + +\item{method}{Atomic character. If it is known (or assumed) that data are +either multivariate normally distributed or not, then use either +\code{method = "hawkins"} or \code{method = "nonparametric"}, respectively. +The default argument \code{method = "auto"} follows the procedure outlined in the +Details section, and in Figure 7 of Jamshidian and Jalal (2010).} + +\item{replications}{Number of replications used to simulate the Neyman +distribution when performing Hawkins' test. As this method is based on random +sampling, use a high number of \code{replications} (and optionally, +\code{\link[=set.seed]{set.seed()}}) to minimize Monte Carlo error and ensure reproducibility.} + +\item{use_chisq}{Atomic integer, indicating the minimum number of cases +within a group \emph{k} that triggers the use of asymptotic Chi-square +distribution instead of the emprical distribution in the Neyman uniformity +test, which is performed as part of Hawkins' test.} + +\item{alpha}{Atomic numeric, indicating the significance level of tests.} +} +\value{ +An object of class \code{mcar_object}. +} +\description{ +Test whether missingness is contingent upon the observed variables, +according to the methodology developed by Jamshidian and Jalal (2010) (see +Details). +} +\details{ +Three types of missingness have been distinguished in the literature +(Rubin, 1976): +Missing completely at random (MCAR), which means that missingness is random; +missing at random (MAR), which means that missingness is contingent on the +\emph{observed}; +and missing not at random (MNAR), which means that missingness is related to +unobserved data. + +Jamshidian and Jalal's non-parametric MCAR test assumes that the missing data +are either MCAR or MAR, and tests whether the missingness is independent of +the observed values. If so, the covariance matrices of the imputed data will +be equal accross groups with different patterns of missingness. This test +consists of the following procedure: +\enumerate{ +\item Data are imputed. +\item The imputed data are split into \emph{k} groups according to the +\emph{k} missing data patterns in the original data (see +\code{\link[=md.pattern]{md.pattern()}}). +\item Perform Hawkins' test for equality of covariances across the \emph{k} +groups. +\item If the test is \emph{not significant}, conclude that there is no evidence +against multivariate normality of the data, nor against MCAR. +\item If the test \emph{is significant}, and multivariate normality of the data +can be assumed, then it can be concluded that missingness is MAR. +\item If multivariate normality cannot be assumed, then perform the +Anderson-Darling non-parametric test for equality of covariances across the +\emph{k} groups. +\item If the Anderson-Darling test is \emph{not significant}, this is evidence +against multivariate normality - but no evidence against MCAR. +\item If the Anderson-Darling test \emph{is significant}, this is evidence +it can be concluded that missingness is MAR. +} + +Note that, despite its name in common parlance, an MCAR test can only +indicate whether missingness is MCAR or MAR. The procedure cannot distinguish +MCAR from MNAR, so a non-significant result does not rule out MNAR. + +This is a re-implementation of the function \code{TestMCARNormality}, which was +originally published in the R-packgage \code{MissMech}, which has been removed +from CRAN. This new implementation is faster, as its backend is written in +C++. It also enhances the functionality of the original: +\itemize{ +\item Multiply imputed data can now be used; the median p-value and test +statistic across replications is then reported, as suggested by +Eekhout, Wiel, and Heymans (2017). +\item The printing method for an \code{mcar_object} gives a warning when at +least one p-value of either test was significant. In this case, it is +recommended to inspect the range of p-values, and consider potential +violations of MCAR. +\item A plotting method for an \code{mcar_object} is provided. +\item A plotting method for the \verb{$md.pattern} element of an \code{mcar_object} +is provided. +} +} +\examples{ +res <- mcar(nhanes) +# Examine test results +res +# Plot p-values across imputed data sets +plot(res) +# Plot md patterns used for the test +plot(res, type = "md.pattern") +# Note difference with the raw md.patterns: +md.pattern(nhanes) +} +\references{ +Rubin, D. B. (1976). Inference and Missing Data. Biometrika, Vol. 63, No. 3, +pp. 581-592. \doi{10.2307/2335739} + +Eekhout, I., M. A. Wiel, & M. W. Heymans (2017). Methods for Significance +Testing of Categorical Covariates in Logistic Regression Models After +Multiple Imputation: Power and Applicability Analysis. BMC Medical Research +Methodology 17 (1): 129. + +Jamshidian, M., & Jalal, S. (2010). Tests of homoscedasticity, normality, and +missing completely at random for incomplete multivariate data. Psychometrika, +75(4), 649–674. \doi{10.1007/s11336-010-9175-3} +} +\author{ +Caspar J. Van Lissa +} +\keyword{internal} diff --git a/mice.impute.svm.R b/mice.impute.svm.R new file mode 100644 index 00000000..075f3ca3 --- /dev/null +++ b/mice.impute.svm.R @@ -0,0 +1,117 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' Note: When using this method in high-dimensional settings, it is recommended +#' to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} +#' from removing predictors before they reach the SVM model. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param tol Tolerance of termination criterion (default = 0.001). +#' @param kpar List of hyper-parameters for the kernel function (default = list()). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' Journal of Statistical Computation and Simulation. +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", tol = 0.001, kpar = list(), ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + n_target <- sum(wy) + + # 1. Bootstrap for estimation uncertainty (Takahashi, 2026, Section 3.3, Steps 1-2) + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + # The model must be trained on the bootstrapped observed data (y*, X*) + y_star <- yobs[s] + x_star <- xobs[s, , drop = FALSE] + + # Initialize draw with NAs + draw <- rep(NA, n_target) + + # 2. SVM Model Training (Takahashi, 2026, Section 3.3, Step 3) + if (length(unique(y_star)) == 2) { + result <- tryCatch({ + svm.model <- NULL + utils::capture.output( + svm.model <- suppressWarnings( + suppressMessages( + kernlab::ksvm( + y_star ~ x_star, + type = "C-svc", + kernel = kernel, + cross = 0, + C = C, + scaled = scaled, + prob.model = TRUE, + tol = tol, + kpar = kpar, + ... + ) + ) + ) + ) + + # 3. Predict probabilities for fundamental uncertainty (Takahashi, 2026, Section 3.3, Step 4) + p_mat <- NULL + utils::capture.output( + p_mat <- suppressWarnings( + suppressMessages( + kernlab::predict(svm.model, x[wy, , drop = FALSE], type = "probabilities") + ) + ) + ) + + # 4. Stochastic drawing (Takahashi, 2026, Section 3.3, Step 5) + # Extract probabilities for the positive class (assumed to be the 2nd column). + p <- p_mat[, 2] + as.integer(runif(length(p)) <= p) + + }, error = function(e) { + NULL # Return NULL to trigger fallback on numerical or logical errors + }) + + if (!is.null(result)) { + draw <- result + } + } + + # --- FALLBACK: If SVM failed or bootstrap sample had only 1 class --- + if (any(is.na(draw))) { + n_miss <- sum(is.na(draw)) + # Standard fallback: simple random sampling from observed values. + y_fill <- sample(yobs, n_miss, replace = TRUE) + + if (is.factor(y)) { + # Align levels with the original factor coding (0-based for integer drawing). + draw[is.na(draw)] <- as.integer(factor(y_fill, levels = levels(y))) - 1 + } else { + draw[is.na(draw)] <- y_fill + } + } + + # 5. Final type adjustment for the mice environment. + if (is.factor(y)) { + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} \ No newline at end of file diff --git a/mice.impute.svm.Rd b/mice.impute.svm.Rd new file mode 100644 index 00000000..4a48c10e --- /dev/null +++ b/mice.impute.svm.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mice.impute.svm.R +\name{mice.impute.svm} +\alias{mice.impute.svm} +\title{Imputation by the Support Vector Machine (mice-SVM)} +\usage{ +mice.impute.svm( + y, + ry, + x, + wy = NULL, + type = NULL, + C = 1, + scaled = TRUE, + kernel = "vanilladot", + tol = 0.001, + kpar = list(), + ... +) +} +\arguments{ +\item{y}{Vector to be imputed} + +\item{ry}{Logical vector of length \code{length(y)} indicating the +the subset \code{y[ry]} of elements in \code{y} to which the imputation +model is fitted. The \code{ry} generally distinguishes the observed +(\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} + +\item{x}{Numeric design matrix with \code{length(y)} rows with predictors for +\code{y}. Matrix \code{x} may have no missing values.} + +\item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value +indicates locations in \code{y} for which imputations are created.} + +\item{type}{A vector of length \code{ncol(x)} identifying the predictors. +Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls.} + +\item{C}{Cost of constraints violation (default = 1).} + +\item{scaled}{A logical vector indicating the variables to be scaled.} + +\item{kernel}{The kernel function used in training and predicting (default = "vanilladot").} + +\item{tol}{Tolerance of termination criterion (default = 0.001).} + +\item{kpar}{List of hyper-parameters for the kernel function (default = list()).} + +\item{...}{Other named arguments to be passed to \code{kernlab::ksvm()}.} +} +\value{ +A vector of length \code{sum(!ry)} with imputed values. +} +\description{ +This function performs proper multiple imputation using the Support Vector Machine (SVM) +combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +designed for binary variables in high-dimensional data settings. +} +\details{ +Note: When using this method in high-dimensional settings, it is recommended +to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} +from removing predictors before they reach the SVM model. +} +\references{ +Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +High-Dimensional Data with General Missing Patterns in Causal Inference. +Journal of Statistical Computation and Simulation. +} From bc41dbca67ad0e9d80d15d20460ac4eea6843ff9 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 21 May 2026 13:58:23 +0900 Subject: [PATCH 27/40] Delete filter.mids.Rd --- filter.mids.Rd | 77 -------------------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 filter.mids.Rd diff --git a/filter.mids.Rd b/filter.mids.Rd deleted file mode 100644 index 3ae563cb..00000000 --- a/filter.mids.Rd +++ /dev/null @@ -1,77 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/filter.R -\name{filter.mids} -\alias{filter.mids} -\title{Subset rows of a \code{mids} object} -\usage{ -\method{filter}{mids}(.data, ..., .preserve = FALSE) -} -\arguments{ -\item{.data}{A \code{mids} object.} - -\item{...}{Expressions that return a -logical value, and are defined in terms of the variables in \code{.data$data}. -If multiple expressions are specified, they are combined with the \code{&} operator. -Only rows for which all conditions evaluate to \code{TRUE} are kept.} - -\item{.preserve}{Relevant when the \code{.data} input is grouped. -If \code{.preserve = FALSE} (the default), the grouping structure -is recalculated based on the resulting data, otherwise the grouping is kept as is.} -} -\value{ -An S3 object of class \code{mids} -} -\description{ -This function takes a \code{mids} object and returns a new -\code{mids} object that pertains to the subset of the data -identified by the expression in \dots. The expression may use -column values from the incomplete data in \code{.data$data}. -} -\note{ -The function calculates a logical vector \code{include} of length \code{nrow(.data$data)}. -The function constructs the elements of the filtered \code{mids} object as follows: -\tabular{ll}{ -\code{data} \tab Select rows in \code{.data$data} for which \code{include == TRUE}\cr -\code{imp} \tab Select rows each imputation \code{data.frame} in \code{.data$imp} for which \code{include == TRUE}\cr -\code{m} \tab Equals \code{.data$m}\cr -\code{where} \tab Select rows in \code{.data$where} for which \code{include == TRUE}\cr -\code{blocks} \tab Equals \code{.data$blocks}\cr -\code{call} \tab Equals \code{.data$call}\cr -\code{nmis} \tab Recalculate \code{nmis} based on the selected \code{data} rows\cr -\code{method} \tab Equals \code{.data$method}\cr -\code{predictorMatrix} \tab Equals \code{.data$predictorMatrix}\cr -\code{visitSequence} \tab Equals \code{.data$visitSequence}\cr -\code{formulas} \tab Equals \code{.data$formulas}\cr -\code{post} \tab Equals \code{.data$post}\cr -\code{blots} \tab Equals \code{.data$blots}\cr -\code{ignore} \tab Select positions in \code{.data$ignore} for which \code{include == TRUE}\cr -\code{seed} \tab Equals \code{.data$seed}\cr -\code{iteration} \tab Equals \code{.data$iteration}\cr -\code{lastSeedValue} \tab Equals \code{.data$lastSeedValue}\cr -\code{chainMean} \tab Set to \code{NULL}\cr -\code{chainVar} \tab Set to \code{NULL}\cr -\code{loggedEvents} \tab Equals \code{.data$loggedEvents}\cr -\code{version} \tab Replaced with current version\cr -\code{date} \tab Replaced with current date -} -} -\examples{ -imp <- mice(nhanes, m = 2, maxit = 1, print = FALSE) - -# example with external logical vector -imp_f <- filter(imp, c(rep(TRUE, 13), rep(FALSE, 12))) - -nrow(complete(imp)) -nrow(complete(imp_f)) - -# example with calculated include vector -imp_f2 <- filter(imp, age >= 2 & hyp == 1) -nrow(complete(imp_f2)) # should be 5 -} -\seealso{ -\code{\link[dplyr]{filter}} -} -\author{ -Patrick Rockenschaub -} -\keyword{manip} From c61a2736366ca7503fcdb5768a2343c7c3a1bade Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 21 May 2026 13:58:51 +0900 Subject: [PATCH 28/40] Delete mcar.Rd --- mcar.Rd | 135 -------------------------------------------------------- 1 file changed, 135 deletions(-) delete mode 100644 mcar.Rd diff --git a/mcar.Rd b/mcar.Rd deleted file mode 100644 index 141b5c95..00000000 --- a/mcar.Rd +++ /dev/null @@ -1,135 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcar.R -\name{mcar} -\alias{mcar} -\title{Jamshidian and Jalal's Non-Parametric MCAR Test} -\usage{ -mcar( - x, - imputed = mice(x, method = "norm"), - min_n = 6, - method = "auto", - replications = 10000, - use_chisq = 30, - alpha = 0.05 -) -} -\arguments{ -\item{x}{An object for which a method exists; usually a \code{data.frame}.} - -\item{imputed}{Either an object of class \code{mids}, as returned by -\code{\link[=mice]{mice()}}, or a list of \code{data.frame}s.} - -\item{min_n}{Atomic numeric, must be greater than 1. When there are missing -data patterns with fewer than \code{min_n} cases, all cases with that pattern will -be removed from \code{x} and \code{imputed}.} - -\item{method}{Atomic character. If it is known (or assumed) that data are -either multivariate normally distributed or not, then use either -\code{method = "hawkins"} or \code{method = "nonparametric"}, respectively. -The default argument \code{method = "auto"} follows the procedure outlined in the -Details section, and in Figure 7 of Jamshidian and Jalal (2010).} - -\item{replications}{Number of replications used to simulate the Neyman -distribution when performing Hawkins' test. As this method is based on random -sampling, use a high number of \code{replications} (and optionally, -\code{\link[=set.seed]{set.seed()}}) to minimize Monte Carlo error and ensure reproducibility.} - -\item{use_chisq}{Atomic integer, indicating the minimum number of cases -within a group \emph{k} that triggers the use of asymptotic Chi-square -distribution instead of the emprical distribution in the Neyman uniformity -test, which is performed as part of Hawkins' test.} - -\item{alpha}{Atomic numeric, indicating the significance level of tests.} -} -\value{ -An object of class \code{mcar_object}. -} -\description{ -Test whether missingness is contingent upon the observed variables, -according to the methodology developed by Jamshidian and Jalal (2010) (see -Details). -} -\details{ -Three types of missingness have been distinguished in the literature -(Rubin, 1976): -Missing completely at random (MCAR), which means that missingness is random; -missing at random (MAR), which means that missingness is contingent on the -\emph{observed}; -and missing not at random (MNAR), which means that missingness is related to -unobserved data. - -Jamshidian and Jalal's non-parametric MCAR test assumes that the missing data -are either MCAR or MAR, and tests whether the missingness is independent of -the observed values. If so, the covariance matrices of the imputed data will -be equal accross groups with different patterns of missingness. This test -consists of the following procedure: -\enumerate{ -\item Data are imputed. -\item The imputed data are split into \emph{k} groups according to the -\emph{k} missing data patterns in the original data (see -\code{\link[=md.pattern]{md.pattern()}}). -\item Perform Hawkins' test for equality of covariances across the \emph{k} -groups. -\item If the test is \emph{not significant}, conclude that there is no evidence -against multivariate normality of the data, nor against MCAR. -\item If the test \emph{is significant}, and multivariate normality of the data -can be assumed, then it can be concluded that missingness is MAR. -\item If multivariate normality cannot be assumed, then perform the -Anderson-Darling non-parametric test for equality of covariances across the -\emph{k} groups. -\item If the Anderson-Darling test is \emph{not significant}, this is evidence -against multivariate normality - but no evidence against MCAR. -\item If the Anderson-Darling test \emph{is significant}, this is evidence -it can be concluded that missingness is MAR. -} - -Note that, despite its name in common parlance, an MCAR test can only -indicate whether missingness is MCAR or MAR. The procedure cannot distinguish -MCAR from MNAR, so a non-significant result does not rule out MNAR. - -This is a re-implementation of the function \code{TestMCARNormality}, which was -originally published in the R-packgage \code{MissMech}, which has been removed -from CRAN. This new implementation is faster, as its backend is written in -C++. It also enhances the functionality of the original: -\itemize{ -\item Multiply imputed data can now be used; the median p-value and test -statistic across replications is then reported, as suggested by -Eekhout, Wiel, and Heymans (2017). -\item The printing method for an \code{mcar_object} gives a warning when at -least one p-value of either test was significant. In this case, it is -recommended to inspect the range of p-values, and consider potential -violations of MCAR. -\item A plotting method for an \code{mcar_object} is provided. -\item A plotting method for the \verb{$md.pattern} element of an \code{mcar_object} -is provided. -} -} -\examples{ -res <- mcar(nhanes) -# Examine test results -res -# Plot p-values across imputed data sets -plot(res) -# Plot md patterns used for the test -plot(res, type = "md.pattern") -# Note difference with the raw md.patterns: -md.pattern(nhanes) -} -\references{ -Rubin, D. B. (1976). Inference and Missing Data. Biometrika, Vol. 63, No. 3, -pp. 581-592. \doi{10.2307/2335739} - -Eekhout, I., M. A. Wiel, & M. W. Heymans (2017). Methods for Significance -Testing of Categorical Covariates in Logistic Regression Models After -Multiple Imputation: Power and Applicability Analysis. BMC Medical Research -Methodology 17 (1): 129. - -Jamshidian, M., & Jalal, S. (2010). Tests of homoscedasticity, normality, and -missing completely at random for incomplete multivariate data. Psychometrika, -75(4), 649–674. \doi{10.1007/s11336-010-9175-3} -} -\author{ -Caspar J. Van Lissa -} -\keyword{internal} From aabca2b9e9c32a03fe2c61c984271aba877be14f Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 21 May 2026 13:59:06 +0900 Subject: [PATCH 29/40] Delete mice.impute.svm.R --- mice.impute.svm.R | 117 ---------------------------------------------- 1 file changed, 117 deletions(-) delete mode 100644 mice.impute.svm.R diff --git a/mice.impute.svm.R b/mice.impute.svm.R deleted file mode 100644 index 075f3ca3..00000000 --- a/mice.impute.svm.R +++ /dev/null @@ -1,117 +0,0 @@ -#' Imputation by the Support Vector Machine (mice-SVM) -#' -#' This function performs proper multiple imputation using the Support Vector Machine (SVM) -#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -#' designed for binary variables in high-dimensional data settings. -#' -#' Note: When using this method in high-dimensional settings, it is recommended -#' to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} -#' from removing predictors before they reach the SVM model. -#' -#' @inheritParams mice.impute.pmm -#' @param type A vector of length \code{ncol(x)} identifying the predictors. -#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. -#' @param C Cost of constraints violation (default = 1). -#' @param scaled A logical vector indicating the variables to be scaled. -#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). -#' @param tol Tolerance of termination criterion (default = 0.001). -#' @param kpar List of hyper-parameters for the kernel function (default = list()). -#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. -#' @return A vector of length \code{sum(!ry)} with imputed values. -#' @references -#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -#' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' Journal of Statistical Computation and Simulation. -#' @export -mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", tol = 0.001, kpar = list(), ...) { - - if (!requireNamespace("kernlab", quietly = TRUE)) { - stop("Package 'kernlab' is needed for this function. Please install it.") - } - - if (is.null(wy)) wy <- !ry - n_target <- sum(wy) - - # 1. Bootstrap for estimation uncertainty (Takahashi, 2026, Section 3.3, Steps 1-2) - xobs <- x[ry, , drop = FALSE] - yobs <- y[ry] - n1 <- sum(ry) - s <- sample(n1, n1, replace = TRUE) - - # The model must be trained on the bootstrapped observed data (y*, X*) - y_star <- yobs[s] - x_star <- xobs[s, , drop = FALSE] - - # Initialize draw with NAs - draw <- rep(NA, n_target) - - # 2. SVM Model Training (Takahashi, 2026, Section 3.3, Step 3) - if (length(unique(y_star)) == 2) { - result <- tryCatch({ - svm.model <- NULL - utils::capture.output( - svm.model <- suppressWarnings( - suppressMessages( - kernlab::ksvm( - y_star ~ x_star, - type = "C-svc", - kernel = kernel, - cross = 0, - C = C, - scaled = scaled, - prob.model = TRUE, - tol = tol, - kpar = kpar, - ... - ) - ) - ) - ) - - # 3. Predict probabilities for fundamental uncertainty (Takahashi, 2026, Section 3.3, Step 4) - p_mat <- NULL - utils::capture.output( - p_mat <- suppressWarnings( - suppressMessages( - kernlab::predict(svm.model, x[wy, , drop = FALSE], type = "probabilities") - ) - ) - ) - - # 4. Stochastic drawing (Takahashi, 2026, Section 3.3, Step 5) - # Extract probabilities for the positive class (assumed to be the 2nd column). - p <- p_mat[, 2] - as.integer(runif(length(p)) <= p) - - }, error = function(e) { - NULL # Return NULL to trigger fallback on numerical or logical errors - }) - - if (!is.null(result)) { - draw <- result - } - } - - # --- FALLBACK: If SVM failed or bootstrap sample had only 1 class --- - if (any(is.na(draw))) { - n_miss <- sum(is.na(draw)) - # Standard fallback: simple random sampling from observed values. - y_fill <- sample(yobs, n_miss, replace = TRUE) - - if (is.factor(y)) { - # Align levels with the original factor coding (0-based for integer drawing). - draw[is.na(draw)] <- as.integer(factor(y_fill, levels = levels(y))) - 1 - } else { - draw[is.na(draw)] <- y_fill - } - } - - # 5. Final type adjustment for the mice environment. - if (is.factor(y)) { - res <- factor(draw, levels = c(0, 1), labels = levels(y)) - } else { - res <- draw - } - - return(res) -} \ No newline at end of file From 3de2c7d0b93d36c609228ce602fc51325fc17b30 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 21 May 2026 13:59:16 +0900 Subject: [PATCH 30/40] Delete mice.impute.svm.Rd --- mice.impute.svm.Rd | 67 ---------------------------------------------- 1 file changed, 67 deletions(-) delete mode 100644 mice.impute.svm.Rd diff --git a/mice.impute.svm.Rd b/mice.impute.svm.Rd deleted file mode 100644 index 4a48c10e..00000000 --- a/mice.impute.svm.Rd +++ /dev/null @@ -1,67 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mice.impute.svm.R -\name{mice.impute.svm} -\alias{mice.impute.svm} -\title{Imputation by the Support Vector Machine (mice-SVM)} -\usage{ -mice.impute.svm( - y, - ry, - x, - wy = NULL, - type = NULL, - C = 1, - scaled = TRUE, - kernel = "vanilladot", - tol = 0.001, - kpar = list(), - ... -) -} -\arguments{ -\item{y}{Vector to be imputed} - -\item{ry}{Logical vector of length \code{length(y)} indicating the -the subset \code{y[ry]} of elements in \code{y} to which the imputation -model is fitted. The \code{ry} generally distinguishes the observed -(\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} - -\item{x}{Numeric design matrix with \code{length(y)} rows with predictors for -\code{y}. Matrix \code{x} may have no missing values.} - -\item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value -indicates locations in \code{y} for which imputations are created.} - -\item{type}{A vector of length \code{ncol(x)} identifying the predictors. -Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls.} - -\item{C}{Cost of constraints violation (default = 1).} - -\item{scaled}{A logical vector indicating the variables to be scaled.} - -\item{kernel}{The kernel function used in training and predicting (default = "vanilladot").} - -\item{tol}{Tolerance of termination criterion (default = 0.001).} - -\item{kpar}{List of hyper-parameters for the kernel function (default = list()).} - -\item{...}{Other named arguments to be passed to \code{kernlab::ksvm()}.} -} -\value{ -A vector of length \code{sum(!ry)} with imputed values. -} -\description{ -This function performs proper multiple imputation using the Support Vector Machine (SVM) -combined with bootstrapping, as proposed by Takahashi (2026). It is specifically -designed for binary variables in high-dimensional data settings. -} -\details{ -Note: When using this method in high-dimensional settings, it is recommended -to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} -from removing predictors before they reach the SVM model. -} -\references{ -Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for -High-Dimensional Data with General Missing Patterns in Causal Inference. -Journal of Statistical Computation and Simulation. -} From ba3f6dc6ec85241a42d9bce507b4fb1d783a5171 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 21 May 2026 14:00:01 +0900 Subject: [PATCH 31/40] Add files via upload --- R/mice.impute.svm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R index 1d5f85a2..075f3ca3 100644 --- a/R/mice.impute.svm.R +++ b/R/mice.impute.svm.R @@ -114,4 +114,4 @@ mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TR } return(res) -} +} \ No newline at end of file From f0f99bf136a017b0b7df0002c80b615fbe29f734 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 21 May 2026 14:01:08 +0900 Subject: [PATCH 32/40] Add files via upload --- man/filter.mids.Rd | 153 +++++++++++++++++++++-------------------- man/mcar.Rd | 135 ++++++++++++++++++++++++++++++++++++ man/mice.impute.svm.Rd | 67 ++++++++++++++++++ 3 files changed, 279 insertions(+), 76 deletions(-) create mode 100644 man/mcar.Rd create mode 100644 man/mice.impute.svm.Rd diff --git a/man/filter.mids.Rd b/man/filter.mids.Rd index e1ee1af3..3ae563cb 100644 --- a/man/filter.mids.Rd +++ b/man/filter.mids.Rd @@ -1,76 +1,77 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/filter.R -\name{filter.mids} -\alias{filter.mids} -\title{Subset rows of a \code{mids} object} -\usage{ -\method{filter}{mids}(.data, ..., .preserve = FALSE) -} -\arguments{ -\item{.data}{A \code{mids} object.} - -\item{...}{Expressions that return a -logical value, and are defined in terms of the variables in \code{.data$data}. -If multiple expressions are specified, they are combined with the \code{&} operator. -Only rows for which all conditions evaluate to \code{TRUE} are kept.} - -\item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the -resulting data, otherwise the grouping is kept as is.} -} -\value{ -An S3 object of class \code{mids} -} -\description{ -This function takes a \code{mids} object and returns a new -\code{mids} object that pertains to the subset of the data -identified by the expression in \dots. The expression may use -column values from the incomplete data in \code{.data$data}. -} -\note{ -The function calculates a logical vector \code{include} of length \code{nrow(.data$data)}. -The function constructs the elements of the filtered \code{mids} object as follows: -\tabular{ll}{ -\code{data} \tab Select rows in \code{.data$data} for which \code{include == TRUE}\cr -\code{imp} \tab Select rows each imputation \code{data.frame} in \code{.data$imp} for which \code{include == TRUE}\cr -\code{m} \tab Equals \code{.data$m}\cr -\code{where} \tab Select rows in \code{.data$where} for which \code{include == TRUE}\cr -\code{blocks} \tab Equals \code{.data$blocks}\cr -\code{call} \tab Equals \code{.data$call}\cr -\code{nmis} \tab Recalculate \code{nmis} based on the selected \code{data} rows\cr -\code{method} \tab Equals \code{.data$method}\cr -\code{predictorMatrix} \tab Equals \code{.data$predictorMatrix}\cr -\code{visitSequence} \tab Equals \code{.data$visitSequence}\cr -\code{formulas} \tab Equals \code{.data$formulas}\cr -\code{post} \tab Equals \code{.data$post}\cr -\code{blots} \tab Equals \code{.data$blots}\cr -\code{ignore} \tab Select positions in \code{.data$ignore} for which \code{include == TRUE}\cr -\code{seed} \tab Equals \code{.data$seed}\cr -\code{iteration} \tab Equals \code{.data$iteration}\cr -\code{lastSeedValue} \tab Equals \code{.data$lastSeedValue}\cr -\code{chainMean} \tab Set to \code{NULL}\cr -\code{chainVar} \tab Set to \code{NULL}\cr -\code{loggedEvents} \tab Equals \code{.data$loggedEvents}\cr -\code{version} \tab Replaced with current version\cr -\code{date} \tab Replaced with current date -} -} -\examples{ -imp <- mice(nhanes, m = 2, maxit = 1, print = FALSE) - -# example with external logical vector -imp_f <- filter(imp, c(rep(TRUE, 13), rep(FALSE, 12))) - -nrow(complete(imp)) -nrow(complete(imp_f)) - -# example with calculated include vector -imp_f2 <- filter(imp, age >= 2 & hyp == 1) -nrow(complete(imp_f2)) # should be 5 -} -\seealso{ -\code{\link[dplyr]{filter}} -} -\author{ -Patrick Rockenschaub -} -\keyword{manip} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter.R +\name{filter.mids} +\alias{filter.mids} +\title{Subset rows of a \code{mids} object} +\usage{ +\method{filter}{mids}(.data, ..., .preserve = FALSE) +} +\arguments{ +\item{.data}{A \code{mids} object.} + +\item{...}{Expressions that return a +logical value, and are defined in terms of the variables in \code{.data$data}. +If multiple expressions are specified, they are combined with the \code{&} operator. +Only rows for which all conditions evaluate to \code{TRUE} are kept.} + +\item{.preserve}{Relevant when the \code{.data} input is grouped. +If \code{.preserve = FALSE} (the default), the grouping structure +is recalculated based on the resulting data, otherwise the grouping is kept as is.} +} +\value{ +An S3 object of class \code{mids} +} +\description{ +This function takes a \code{mids} object and returns a new +\code{mids} object that pertains to the subset of the data +identified by the expression in \dots. The expression may use +column values from the incomplete data in \code{.data$data}. +} +\note{ +The function calculates a logical vector \code{include} of length \code{nrow(.data$data)}. +The function constructs the elements of the filtered \code{mids} object as follows: +\tabular{ll}{ +\code{data} \tab Select rows in \code{.data$data} for which \code{include == TRUE}\cr +\code{imp} \tab Select rows each imputation \code{data.frame} in \code{.data$imp} for which \code{include == TRUE}\cr +\code{m} \tab Equals \code{.data$m}\cr +\code{where} \tab Select rows in \code{.data$where} for which \code{include == TRUE}\cr +\code{blocks} \tab Equals \code{.data$blocks}\cr +\code{call} \tab Equals \code{.data$call}\cr +\code{nmis} \tab Recalculate \code{nmis} based on the selected \code{data} rows\cr +\code{method} \tab Equals \code{.data$method}\cr +\code{predictorMatrix} \tab Equals \code{.data$predictorMatrix}\cr +\code{visitSequence} \tab Equals \code{.data$visitSequence}\cr +\code{formulas} \tab Equals \code{.data$formulas}\cr +\code{post} \tab Equals \code{.data$post}\cr +\code{blots} \tab Equals \code{.data$blots}\cr +\code{ignore} \tab Select positions in \code{.data$ignore} for which \code{include == TRUE}\cr +\code{seed} \tab Equals \code{.data$seed}\cr +\code{iteration} \tab Equals \code{.data$iteration}\cr +\code{lastSeedValue} \tab Equals \code{.data$lastSeedValue}\cr +\code{chainMean} \tab Set to \code{NULL}\cr +\code{chainVar} \tab Set to \code{NULL}\cr +\code{loggedEvents} \tab Equals \code{.data$loggedEvents}\cr +\code{version} \tab Replaced with current version\cr +\code{date} \tab Replaced with current date +} +} +\examples{ +imp <- mice(nhanes, m = 2, maxit = 1, print = FALSE) + +# example with external logical vector +imp_f <- filter(imp, c(rep(TRUE, 13), rep(FALSE, 12))) + +nrow(complete(imp)) +nrow(complete(imp_f)) + +# example with calculated include vector +imp_f2 <- filter(imp, age >= 2 & hyp == 1) +nrow(complete(imp_f2)) # should be 5 +} +\seealso{ +\code{\link[dplyr]{filter}} +} +\author{ +Patrick Rockenschaub +} +\keyword{manip} diff --git a/man/mcar.Rd b/man/mcar.Rd new file mode 100644 index 00000000..141b5c95 --- /dev/null +++ b/man/mcar.Rd @@ -0,0 +1,135 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mcar.R +\name{mcar} +\alias{mcar} +\title{Jamshidian and Jalal's Non-Parametric MCAR Test} +\usage{ +mcar( + x, + imputed = mice(x, method = "norm"), + min_n = 6, + method = "auto", + replications = 10000, + use_chisq = 30, + alpha = 0.05 +) +} +\arguments{ +\item{x}{An object for which a method exists; usually a \code{data.frame}.} + +\item{imputed}{Either an object of class \code{mids}, as returned by +\code{\link[=mice]{mice()}}, or a list of \code{data.frame}s.} + +\item{min_n}{Atomic numeric, must be greater than 1. When there are missing +data patterns with fewer than \code{min_n} cases, all cases with that pattern will +be removed from \code{x} and \code{imputed}.} + +\item{method}{Atomic character. If it is known (or assumed) that data are +either multivariate normally distributed or not, then use either +\code{method = "hawkins"} or \code{method = "nonparametric"}, respectively. +The default argument \code{method = "auto"} follows the procedure outlined in the +Details section, and in Figure 7 of Jamshidian and Jalal (2010).} + +\item{replications}{Number of replications used to simulate the Neyman +distribution when performing Hawkins' test. As this method is based on random +sampling, use a high number of \code{replications} (and optionally, +\code{\link[=set.seed]{set.seed()}}) to minimize Monte Carlo error and ensure reproducibility.} + +\item{use_chisq}{Atomic integer, indicating the minimum number of cases +within a group \emph{k} that triggers the use of asymptotic Chi-square +distribution instead of the emprical distribution in the Neyman uniformity +test, which is performed as part of Hawkins' test.} + +\item{alpha}{Atomic numeric, indicating the significance level of tests.} +} +\value{ +An object of class \code{mcar_object}. +} +\description{ +Test whether missingness is contingent upon the observed variables, +according to the methodology developed by Jamshidian and Jalal (2010) (see +Details). +} +\details{ +Three types of missingness have been distinguished in the literature +(Rubin, 1976): +Missing completely at random (MCAR), which means that missingness is random; +missing at random (MAR), which means that missingness is contingent on the +\emph{observed}; +and missing not at random (MNAR), which means that missingness is related to +unobserved data. + +Jamshidian and Jalal's non-parametric MCAR test assumes that the missing data +are either MCAR or MAR, and tests whether the missingness is independent of +the observed values. If so, the covariance matrices of the imputed data will +be equal accross groups with different patterns of missingness. This test +consists of the following procedure: +\enumerate{ +\item Data are imputed. +\item The imputed data are split into \emph{k} groups according to the +\emph{k} missing data patterns in the original data (see +\code{\link[=md.pattern]{md.pattern()}}). +\item Perform Hawkins' test for equality of covariances across the \emph{k} +groups. +\item If the test is \emph{not significant}, conclude that there is no evidence +against multivariate normality of the data, nor against MCAR. +\item If the test \emph{is significant}, and multivariate normality of the data +can be assumed, then it can be concluded that missingness is MAR. +\item If multivariate normality cannot be assumed, then perform the +Anderson-Darling non-parametric test for equality of covariances across the +\emph{k} groups. +\item If the Anderson-Darling test is \emph{not significant}, this is evidence +against multivariate normality - but no evidence against MCAR. +\item If the Anderson-Darling test \emph{is significant}, this is evidence +it can be concluded that missingness is MAR. +} + +Note that, despite its name in common parlance, an MCAR test can only +indicate whether missingness is MCAR or MAR. The procedure cannot distinguish +MCAR from MNAR, so a non-significant result does not rule out MNAR. + +This is a re-implementation of the function \code{TestMCARNormality}, which was +originally published in the R-packgage \code{MissMech}, which has been removed +from CRAN. This new implementation is faster, as its backend is written in +C++. It also enhances the functionality of the original: +\itemize{ +\item Multiply imputed data can now be used; the median p-value and test +statistic across replications is then reported, as suggested by +Eekhout, Wiel, and Heymans (2017). +\item The printing method for an \code{mcar_object} gives a warning when at +least one p-value of either test was significant. In this case, it is +recommended to inspect the range of p-values, and consider potential +violations of MCAR. +\item A plotting method for an \code{mcar_object} is provided. +\item A plotting method for the \verb{$md.pattern} element of an \code{mcar_object} +is provided. +} +} +\examples{ +res <- mcar(nhanes) +# Examine test results +res +# Plot p-values across imputed data sets +plot(res) +# Plot md patterns used for the test +plot(res, type = "md.pattern") +# Note difference with the raw md.patterns: +md.pattern(nhanes) +} +\references{ +Rubin, D. B. (1976). Inference and Missing Data. Biometrika, Vol. 63, No. 3, +pp. 581-592. \doi{10.2307/2335739} + +Eekhout, I., M. A. Wiel, & M. W. Heymans (2017). Methods for Significance +Testing of Categorical Covariates in Logistic Regression Models After +Multiple Imputation: Power and Applicability Analysis. BMC Medical Research +Methodology 17 (1): 129. + +Jamshidian, M., & Jalal, S. (2010). Tests of homoscedasticity, normality, and +missing completely at random for incomplete multivariate data. Psychometrika, +75(4), 649–674. \doi{10.1007/s11336-010-9175-3} +} +\author{ +Caspar J. Van Lissa +} +\keyword{internal} diff --git a/man/mice.impute.svm.Rd b/man/mice.impute.svm.Rd new file mode 100644 index 00000000..4a48c10e --- /dev/null +++ b/man/mice.impute.svm.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mice.impute.svm.R +\name{mice.impute.svm} +\alias{mice.impute.svm} +\title{Imputation by the Support Vector Machine (mice-SVM)} +\usage{ +mice.impute.svm( + y, + ry, + x, + wy = NULL, + type = NULL, + C = 1, + scaled = TRUE, + kernel = "vanilladot", + tol = 0.001, + kpar = list(), + ... +) +} +\arguments{ +\item{y}{Vector to be imputed} + +\item{ry}{Logical vector of length \code{length(y)} indicating the +the subset \code{y[ry]} of elements in \code{y} to which the imputation +model is fitted. The \code{ry} generally distinguishes the observed +(\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} + +\item{x}{Numeric design matrix with \code{length(y)} rows with predictors for +\code{y}. Matrix \code{x} may have no missing values.} + +\item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value +indicates locations in \code{y} for which imputations are created.} + +\item{type}{A vector of length \code{ncol(x)} identifying the predictors. +Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls.} + +\item{C}{Cost of constraints violation (default = 1).} + +\item{scaled}{A logical vector indicating the variables to be scaled.} + +\item{kernel}{The kernel function used in training and predicting (default = "vanilladot").} + +\item{tol}{Tolerance of termination criterion (default = 0.001).} + +\item{kpar}{List of hyper-parameters for the kernel function (default = list()).} + +\item{...}{Other named arguments to be passed to \code{kernlab::ksvm()}.} +} +\value{ +A vector of length \code{sum(!ry)} with imputed values. +} +\description{ +This function performs proper multiple imputation using the Support Vector Machine (SVM) +combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +designed for binary variables in high-dimensional data settings. +} +\details{ +Note: When using this method in high-dimensional settings, it is recommended +to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} +from removing predictors before they reach the SVM model. +} +\references{ +Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +High-Dimensional Data with General Missing Patterns in Causal Inference. +Journal of Statistical Computation and Simulation. +} From 7051c93ecee7d364c55d5e4e548e929f05ffd358 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Thu, 21 May 2026 22:38:51 +0900 Subject: [PATCH 33/40] Add files via upload --- R/mice.impute.svm.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R index 075f3ca3..3f62ba3d 100644 --- a/R/mice.impute.svm.R +++ b/R/mice.impute.svm.R @@ -21,7 +21,8 @@ #' @references #' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for #' High-Dimensional Data with General Missing Patterns in Causal Inference. -#' Journal of Statistical Computation and Simulation. +#' \emph{Journal of Statistical Computation and Simulation}. +#' \doi{10.1080/00949655.2026.2660865} #' @export mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", tol = 0.001, kpar = list(), ...) { From 89485ec69a2aba5001c676c84535ecea7830fa90 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 22 May 2026 10:03:11 +0900 Subject: [PATCH 34/40] Add files via upload From e32f64287bab2e24aed693779965cd926c16bb15 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 22 May 2026 10:04:01 +0900 Subject: [PATCH 35/40] Add files via upload --- man/mice.impute.svm.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/mice.impute.svm.Rd b/man/mice.impute.svm.Rd index 4a48c10e..a6564465 100644 --- a/man/mice.impute.svm.Rd +++ b/man/mice.impute.svm.Rd @@ -63,5 +63,6 @@ from removing predictors before they reach the SVM model. \references{ Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for High-Dimensional Data with General Missing Patterns in Causal Inference. -Journal of Statistical Computation and Simulation. +\emph{Journal of Statistical Computation and Simulation}. +\doi{10.1080/00949655.2026.2660865} } From 60c2d5ec56b87beb46dd674f0c0e874096148925 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 22 May 2026 16:05:51 +0900 Subject: [PATCH 36/40] Add files via upload --- R/mice.impute.svm.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R index 3f62ba3d..76570e56 100644 --- a/R/mice.impute.svm.R +++ b/R/mice.impute.svm.R @@ -18,6 +18,7 @@ #' @param kpar List of hyper-parameters for the kernel function (default = list()). #' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. #' @return A vector of length \code{sum(!ry)} with imputed values. +#' @author Masayoshi Takahashi, 2026 #' @references #' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for #' High-Dimensional Data with General Missing Patterns in Causal Inference. From 531a8512d00cd6ea59f3f5d367b09cb444efa573 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 22 May 2026 16:06:34 +0900 Subject: [PATCH 37/40] Add files via upload --- man/mice.impute.svm.Rd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/man/mice.impute.svm.Rd b/man/mice.impute.svm.Rd index a6564465..a5e8be9c 100644 --- a/man/mice.impute.svm.Rd +++ b/man/mice.impute.svm.Rd @@ -66,3 +66,6 @@ High-Dimensional Data with General Missing Patterns in Causal Inference. \emph{Journal of Statistical Computation and Simulation}. \doi{10.1080/00949655.2026.2660865} } +\author{ +Masayoshi Takahashi, 2026 +} From 667a1c6f15f3cec3c020c6e6ff107a80b4c16e20 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Fri, 29 May 2026 11:10:35 +0900 Subject: [PATCH 38/40] Delete man/mcar.Rd --- man/mcar.Rd | 135 ---------------------------------------------------- 1 file changed, 135 deletions(-) delete mode 100644 man/mcar.Rd diff --git a/man/mcar.Rd b/man/mcar.Rd deleted file mode 100644 index 141b5c95..00000000 --- a/man/mcar.Rd +++ /dev/null @@ -1,135 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcar.R -\name{mcar} -\alias{mcar} -\title{Jamshidian and Jalal's Non-Parametric MCAR Test} -\usage{ -mcar( - x, - imputed = mice(x, method = "norm"), - min_n = 6, - method = "auto", - replications = 10000, - use_chisq = 30, - alpha = 0.05 -) -} -\arguments{ -\item{x}{An object for which a method exists; usually a \code{data.frame}.} - -\item{imputed}{Either an object of class \code{mids}, as returned by -\code{\link[=mice]{mice()}}, or a list of \code{data.frame}s.} - -\item{min_n}{Atomic numeric, must be greater than 1. When there are missing -data patterns with fewer than \code{min_n} cases, all cases with that pattern will -be removed from \code{x} and \code{imputed}.} - -\item{method}{Atomic character. If it is known (or assumed) that data are -either multivariate normally distributed or not, then use either -\code{method = "hawkins"} or \code{method = "nonparametric"}, respectively. -The default argument \code{method = "auto"} follows the procedure outlined in the -Details section, and in Figure 7 of Jamshidian and Jalal (2010).} - -\item{replications}{Number of replications used to simulate the Neyman -distribution when performing Hawkins' test. As this method is based on random -sampling, use a high number of \code{replications} (and optionally, -\code{\link[=set.seed]{set.seed()}}) to minimize Monte Carlo error and ensure reproducibility.} - -\item{use_chisq}{Atomic integer, indicating the minimum number of cases -within a group \emph{k} that triggers the use of asymptotic Chi-square -distribution instead of the emprical distribution in the Neyman uniformity -test, which is performed as part of Hawkins' test.} - -\item{alpha}{Atomic numeric, indicating the significance level of tests.} -} -\value{ -An object of class \code{mcar_object}. -} -\description{ -Test whether missingness is contingent upon the observed variables, -according to the methodology developed by Jamshidian and Jalal (2010) (see -Details). -} -\details{ -Three types of missingness have been distinguished in the literature -(Rubin, 1976): -Missing completely at random (MCAR), which means that missingness is random; -missing at random (MAR), which means that missingness is contingent on the -\emph{observed}; -and missing not at random (MNAR), which means that missingness is related to -unobserved data. - -Jamshidian and Jalal's non-parametric MCAR test assumes that the missing data -are either MCAR or MAR, and tests whether the missingness is independent of -the observed values. If so, the covariance matrices of the imputed data will -be equal accross groups with different patterns of missingness. This test -consists of the following procedure: -\enumerate{ -\item Data are imputed. -\item The imputed data are split into \emph{k} groups according to the -\emph{k} missing data patterns in the original data (see -\code{\link[=md.pattern]{md.pattern()}}). -\item Perform Hawkins' test for equality of covariances across the \emph{k} -groups. -\item If the test is \emph{not significant}, conclude that there is no evidence -against multivariate normality of the data, nor against MCAR. -\item If the test \emph{is significant}, and multivariate normality of the data -can be assumed, then it can be concluded that missingness is MAR. -\item If multivariate normality cannot be assumed, then perform the -Anderson-Darling non-parametric test for equality of covariances across the -\emph{k} groups. -\item If the Anderson-Darling test is \emph{not significant}, this is evidence -against multivariate normality - but no evidence against MCAR. -\item If the Anderson-Darling test \emph{is significant}, this is evidence -it can be concluded that missingness is MAR. -} - -Note that, despite its name in common parlance, an MCAR test can only -indicate whether missingness is MCAR or MAR. The procedure cannot distinguish -MCAR from MNAR, so a non-significant result does not rule out MNAR. - -This is a re-implementation of the function \code{TestMCARNormality}, which was -originally published in the R-packgage \code{MissMech}, which has been removed -from CRAN. This new implementation is faster, as its backend is written in -C++. It also enhances the functionality of the original: -\itemize{ -\item Multiply imputed data can now be used; the median p-value and test -statistic across replications is then reported, as suggested by -Eekhout, Wiel, and Heymans (2017). -\item The printing method for an \code{mcar_object} gives a warning when at -least one p-value of either test was significant. In this case, it is -recommended to inspect the range of p-values, and consider potential -violations of MCAR. -\item A plotting method for an \code{mcar_object} is provided. -\item A plotting method for the \verb{$md.pattern} element of an \code{mcar_object} -is provided. -} -} -\examples{ -res <- mcar(nhanes) -# Examine test results -res -# Plot p-values across imputed data sets -plot(res) -# Plot md patterns used for the test -plot(res, type = "md.pattern") -# Note difference with the raw md.patterns: -md.pattern(nhanes) -} -\references{ -Rubin, D. B. (1976). Inference and Missing Data. Biometrika, Vol. 63, No. 3, -pp. 581-592. \doi{10.2307/2335739} - -Eekhout, I., M. A. Wiel, & M. W. Heymans (2017). Methods for Significance -Testing of Categorical Covariates in Logistic Regression Models After -Multiple Imputation: Power and Applicability Analysis. BMC Medical Research -Methodology 17 (1): 129. - -Jamshidian, M., & Jalal, S. (2010). Tests of homoscedasticity, normality, and -missing completely at random for incomplete multivariate data. Psychometrika, -75(4), 649–674. \doi{10.1007/s11336-010-9175-3} -} -\author{ -Caspar J. Van Lissa -} -\keyword{internal} From 992e015c342c083271e68097de3d7bad2983d2e3 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Sun, 31 May 2026 08:43:02 +0900 Subject: [PATCH 39/40] Update DESCRIPTION --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index a4220ddf..c3cf8a30 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -69,6 +69,7 @@ Suggests: future, furrr, haven, + kernlab, knitr, literanger, lme4, From 361d586f257b1ed1582498bcedf19f5fd446e5c9 Mon Sep 17 00:00:00 2001 From: Masayoshi Takahashi <75410724+mtakahashi123@users.noreply.github.com> Date: Mon, 1 Jun 2026 08:45:12 +0900 Subject: [PATCH 40/40] Update NAMESPACE --- NAMESPACE | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 23982ab0..178b994e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -143,6 +143,7 @@ export(mice.impute.quadratic) export(mice.impute.rf) export(mice.impute.ri) export(mice.impute.sample) +export(mice.impute.svm) export(mice.mids) export(mice.theme) export(mids) @@ -273,4 +274,4 @@ importFrom(utils,packageDescription) importFrom(utils,packageVersion) importFrom(utils,tail) importFrom(utils,write.table) -useDynLib(mice, .registration = TRUE) \ No newline at end of file +useDynLib(mice, .registration = TRUE)