diff --git a/DESCRIPTION b/DESCRIPTION index 17d1a2ff..8b1f8843 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,107 +1,108 @@ -Package: mice -Type: Package -Version: 3.19.8 -Title: Multivariate Imputation by Chained Equations -Date: 2026-06-01 -Authors@R: c(person("Stef", "van Buuren", role = c("aut","cre"), - email = "stef.vanbuuren@tno.nl"), - person("Karin", "Groothuis-Oudshoorn", role = "aut", - email = "c.g.m.oudshoorn@utwente.nl"), - person("Gerko","Vink", role = "ctb", - email = "g.vink@uu.nl"), - person("Rianne","Schouten", role = "ctb", - email = "R.M.Schouten@uu.nl"), - person("Alexander", "Robitzsch", role = "ctb", - email = "robitzsch@ipn.uni-kiel.de"), - person("Patrick", "Rockenschaub", role = "ctb", - email = "rockenschaub.patrick@gmail.com"), - person("Lisa","Doove", role = "ctb", - email = "lisa.doove@ppw.kuleuven.be"), - person("Shahab","Jolani", role = "ctb", - email = "s.jolani@maastrichtuniversity.nl"), - person("Margarita","Moreno-Betancur", role="ctb", - email = "margarita.moreno@mcri.edu.au"), - person("Ian", "White", role="ctb", - email = "ian.white@ucl.ac.uk"), - person("Philipp","Gaffert", role = "ctb", - email = "philipp.gaffert@gfk.com"), - person("Florian","Meinfelder", role = "ctb", - email = "florian.meinfelder@uni-bamberg.de"), - person("Bernie","Gray", role = "ctb", - email = "bfgray3@gmail.com"), - person("Vincent", "Arel-Bundock", role = "ctb", - email = "vincent.arel-bundock@umontreal.ca"), - person("Mingyang", "Cai", role = "ctb", - email = "m.cai@uu.nl"), - person("Thom", "Volker", role = "ctb", - email = "t.b.volker@uu.nl"), - person("Edoardo", "Costantini", role = "ctb", - email = "e.costantini@tilburguniversity.edu"), - person("Caspar", "van Lissa", role = "ctb", - email = "c.j.vanlissa@uu.nl"), - person("Hanne", "Oberman", role = "ctb", - email = "h.i.oberman@uu.nl"), - person("Stephen", "Wade", role = "ctb", - email = "stephematician@gmail.com"), - person("Florian", "van Leeuwen", role = "ctb", - email = "f.d.vanleeuwen@uu.nl"), - person("Frederik", "Fabricius-Bjerre", role = "ctb", - email = "frederik@fabriciusbjerre.dk")) -Maintainer: Stef van Buuren -Depends: - R (>= 2.10.0) -Imports: - broom, - dplyr, - glmnet, - graphics, - grDevices, - lattice, - mitml, - nnet, - Rcpp, - rpart, - stats, - tidyr, - utils -Suggests: - broom.mixed, - future, - furrr, - haven, - knitr, - literanger, - lme4, - MASS, - miceadds, - pan, - parallelly, - purrr, - ranger, - randomForest, - rmarkdown, - rstan, - survival, - testthat -Description: Multiple imputation using Fully Conditional Specification (FCS) - implemented by the MICE algorithm as described in Van Buuren and - Groothuis-Oudshoorn (2011) . Each variable has - its own imputation model. Built-in imputation models are provided for - continuous data (predictive mean matching, normal), binary data (logistic - regression), unordered categorical data (polytomous logistic regression) - and ordered categorical data (proportional odds). MICE can also impute - continuous two-level data (normal model, pan, second-level variables). - Passive imputation can be used to maintain consistency between variables. - Various diagnostic plots are available to inspect the quality of the - imputations. -Encoding: UTF-8 -LazyLoad: yes -LazyData: yes -URL: https://github.com/amices/mice, - https://amices.org/mice/, - https://stefvanbuuren.name/fimd/ -BugReports: https://github.com/amices/mice/issues -LinkingTo: cpp11, Rcpp -License: GPL (>= 2) -Roxygen: list(markdown = TRUE) -Config/roxygen2/version: 8.0.0 +Package: mice +Type: Package +Version: 3.19.8 +Title: Multivariate Imputation by Chained Equations +Date: 2026-06-01 +Authors@R: c(person("Stef", "van Buuren", role = c("aut","cre"), + email = "stef.vanbuuren@tno.nl"), + person("Karin", "Groothuis-Oudshoorn", role = "aut", + email = "c.g.m.oudshoorn@utwente.nl"), + person("Gerko","Vink", role = "ctb", + email = "g.vink@uu.nl"), + person("Rianne","Schouten", role = "ctb", + email = "R.M.Schouten@uu.nl"), + person("Alexander", "Robitzsch", role = "ctb", + email = "robitzsch@ipn.uni-kiel.de"), + person("Patrick", "Rockenschaub", role = "ctb", + email = "rockenschaub.patrick@gmail.com"), + person("Lisa","Doove", role = "ctb", + email = "lisa.doove@ppw.kuleuven.be"), + person("Shahab","Jolani", role = "ctb", + email = "s.jolani@maastrichtuniversity.nl"), + person("Margarita","Moreno-Betancur", role="ctb", + email = "margarita.moreno@mcri.edu.au"), + person("Ian", "White", role="ctb", + email = "ian.white@ucl.ac.uk"), + person("Philipp","Gaffert", role = "ctb", + email = "philipp.gaffert@gfk.com"), + person("Florian","Meinfelder", role = "ctb", + email = "florian.meinfelder@uni-bamberg.de"), + person("Bernie","Gray", role = "ctb", + email = "bfgray3@gmail.com"), + person("Vincent", "Arel-Bundock", role = "ctb", + email = "vincent.arel-bundock@umontreal.ca"), + person("Mingyang", "Cai", role = "ctb", + email = "m.cai@uu.nl"), + person("Thom", "Volker", role = "ctb", + email = "t.b.volker@uu.nl"), + person("Edoardo", "Costantini", role = "ctb", + email = "e.costantini@tilburguniversity.edu"), + person("Caspar", "van Lissa", role = "ctb", + email = "c.j.vanlissa@uu.nl"), + person("Hanne", "Oberman", role = "ctb", + email = "h.i.oberman@uu.nl"), + person("Stephen", "Wade", role = "ctb", + email = "stephematician@gmail.com"), + person("Florian", "van Leeuwen", role = "ctb", + email = "f.d.vanleeuwen@uu.nl"), + person("Frederik", "Fabricius-Bjerre", role = "ctb", + email = "frederik@fabriciusbjerre.dk")) +Maintainer: Stef van Buuren +Depends: + R (>= 2.10.0) +Imports: + broom, + dplyr, + glmnet, + graphics, + grDevices, + lattice, + mitml, + nnet, + Rcpp, + rpart, + stats, + tidyr, + utils +Suggests: + broom.mixed, + future, + furrr, + haven, + kernlab, + knitr, + literanger, + lme4, + MASS, + miceadds, + pan, + parallelly, + purrr, + ranger, + randomForest, + rmarkdown, + rstan, + survival, + testthat +Description: Multiple imputation using Fully Conditional Specification (FCS) + implemented by the MICE algorithm as described in Van Buuren and + Groothuis-Oudshoorn (2011) . Each variable has + its own imputation model. Built-in imputation models are provided for + continuous data (predictive mean matching, normal), binary data (logistic + regression), unordered categorical data (polytomous logistic regression) + and ordered categorical data (proportional odds). MICE can also impute + continuous two-level data (normal model, pan, second-level variables). + Passive imputation can be used to maintain consistency between variables. + Various diagnostic plots are available to inspect the quality of the + imputations. +Encoding: UTF-8 +LazyLoad: yes +LazyData: yes +URL: https://github.com/amices/mice, + https://amices.org/mice/, + https://stefvanbuuren.name/fimd/ +BugReports: https://github.com/amices/mice/issues +LinkingTo: cpp11, Rcpp +License: GPL (>= 2) +Roxygen: list(markdown = TRUE) +Config/roxygen2/version: 8.0.0 diff --git a/NAMESPACE b/NAMESPACE index f5f02c57..178b994e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,276 +1,277 @@ -# Generated by roxygen2: do not edit by hand - -S3method(anova,mira) -S3method(as.data.frame,mipo) -S3method(bwplot,mads) -S3method(bwplot,mids) -S3method(cc,data.frame) -S3method(cc,default) -S3method(cc,matrix) -S3method(cc,mids) -S3method(cci,default) -S3method(cci,mids) -S3method(complete,mids) -S3method(densityplot,mids) -S3method(df.residual,lme) -S3method(df.residual,mer) -S3method(df.residual,mira) -S3method(df.residual,multinom) -S3method(filter,mids) -S3method(glance,mipo) -S3method(ic,data.frame) -S3method(ic,default) -S3method(ic,matrix) -S3method(ic,mids) -S3method(ici,default) -S3method(ici,mids) -S3method(is.nan,data.frame) -S3method(mcar,data.frame) -S3method(plot,mcar_object) -S3method(plot,md.pattern) -S3method(plot,mids) -S3method(predict_mi,list) -S3method(predict_mi,lm) -S3method(predict_mi,mira) -S3method(print,mads) -S3method(print,mcar_object) -S3method(print,mice.anova) -S3method(print,mice.anova.summary) -S3method(print,mids) -S3method(print,mipo) -S3method(print,mipo.summary) -S3method(print,mira) -S3method(stripplot,mids) -S3method(summary,mads) -S3method(summary,mice.anova) -S3method(summary,mids) -S3method(summary,mipo) -S3method(summary,mira) -S3method(tidy,mipo) -S3method(with,mids) -S3method(xyplot,mads) -S3method(xyplot,mids) -export(.norm.draw) -export(.pmm.match) -export(D1) -export(D2) -export(D3) -export(ampute) -export(ampute.default.freq) -export(ampute.default.odds) -export(ampute.default.type) -export(ampute.default.weights) -export(appendbreak) -export(as.mids) -export(as.mira) -export(as.mitml.result) -export(bwplot) -export(cbind) -export(cc) -export(cci) -export(complete) -export(construct.blocks) -export(convergence) -export(densityplot) -export(estimice) -export(extractBS) -export(fico) -export(filter) -export(fix.coef) -export(flux) -export(fluxplot) -export(futuremice) -export(getfit) -export(getqbar) -export(glance) -export(glm.mids) -export(ibind) -export(ic) -export(ici) -export(is.mads) -export(is.mids) -export(is.mipo) -export(is.mira) -export(is.mitml.result) -export(lm.mids) -export(mads) -export(make.blocks) -export(make.blots) -export(make.calltype) -export(make.formulas) -export(make.method) -export(make.post) -export(make.predictorMatrix) -export(make.visitSequence) -export(make.where) -export(matchindex) -export(mcar) -export(md.pairs) -export(md.pattern) -export(mdc) -export(mice) -export(mice.impute.2l.bin) -export(mice.impute.2l.lmer) -export(mice.impute.2l.norm) -export(mice.impute.2l.pan) -export(mice.impute.2lonly.mean) -export(mice.impute.2lonly.norm) -export(mice.impute.2lonly.pmm) -export(mice.impute.cart) -export(mice.impute.jomoImpute) -export(mice.impute.lasso.logreg) -export(mice.impute.lasso.norm) -export(mice.impute.lasso.select.logreg) -export(mice.impute.lasso.select.norm) -export(mice.impute.lda) -export(mice.impute.logreg) -export(mice.impute.logreg.boot) -export(mice.impute.mean) -export(mice.impute.midastouch) -export(mice.impute.mnar.logreg) -export(mice.impute.mnar.norm) -export(mice.impute.mpmm) -export(mice.impute.norm) -export(mice.impute.norm.boot) -export(mice.impute.norm.nob) -export(mice.impute.norm.predict) -export(mice.impute.panImpute) -export(mice.impute.passive) -export(mice.impute.pmm) -export(mice.impute.polr) -export(mice.impute.polyreg) -export(mice.impute.quadratic) -export(mice.impute.rf) -export(mice.impute.ri) -export(mice.impute.sample) -export(mice.mids) -export(mice.theme) -export(mids) -export(mids2mplus) -export(mids2spss) -export(mipo) -export(mira) -export(name.blocks) -export(name.formulas) -export(ncc) -export(nelsonaalen) -export(nic) -export(nimp) -export(norm.draw) -export(parlmice) -export(pool) -export(pool.compare) -export(pool.r.squared) -export(pool.scalar) -export(pool.scalar.syn) -export(pool.syn) -export(pool.table) -export(predict_mi) -export(quickpred) -export(rbind) -export(squeeze) -export(stripplot) -export(supports.transparent) -export(tidy) -export(version) -export(xyplot) -importFrom(Rcpp,evalCpp) -importFrom(broom,glance) -importFrom(broom,tidy) -importFrom(dplyr,"%>%") -importFrom(dplyr,.data) -importFrom(dplyr,any_of) -importFrom(dplyr,bind_cols) -importFrom(dplyr,bind_rows) -importFrom(dplyr,filter) -importFrom(dplyr,group_by) -importFrom(dplyr,lead) -importFrom(dplyr,mutate) -importFrom(dplyr,n) -importFrom(dplyr,pull) -importFrom(dplyr,relocate) -importFrom(dplyr,row_number) -importFrom(dplyr,select) -importFrom(dplyr,summarize) -importFrom(glmnet,cv.glmnet) -importFrom(grDevices,dev.off) -importFrom(graphics,abline) -importFrom(graphics,axis) -importFrom(graphics,box) -importFrom(graphics,hist) -importFrom(graphics,par) -importFrom(graphics,plot) -importFrom(graphics,plot.new) -importFrom(graphics,plot.window) -importFrom(graphics,points) -importFrom(graphics,rect) -importFrom(graphics,text) -importFrom(lattice,bwplot) -importFrom(lattice,densityplot) -importFrom(lattice,stripplot) -importFrom(lattice,xyplot) -importFrom(mitml,jomoImpute) -importFrom(mitml,mitmlComplete) -importFrom(mitml,panImpute) -importFrom(mitml,testModels) -importFrom(nnet,multinom) -importFrom(rpart,rpart) -importFrom(rpart,rpart.control) -importFrom(stats,C) -importFrom(stats,aggregate) -importFrom(stats,as.formula) -importFrom(stats,binomial) -importFrom(stats,cancor) -importFrom(stats,coef) -importFrom(stats,complete.cases) -importFrom(stats,confint) -importFrom(stats,contr.treatment) -importFrom(stats,cor) -importFrom(stats,cov) -importFrom(stats,df.residual) -importFrom(stats,fitted) -importFrom(stats,formula) -importFrom(stats,gaussian) -importFrom(stats,getCall) -importFrom(stats,glm) -importFrom(stats,is.empty.model) -importFrom(stats,lm) -importFrom(stats,lm.fit) -importFrom(stats,median) -importFrom(stats,model.frame) -importFrom(stats,model.matrix) -importFrom(stats,na.exclude) -importFrom(stats,na.omit) -importFrom(stats,na.pass) -importFrom(stats,pchisq) -importFrom(stats,pf) -importFrom(stats,predict) -importFrom(stats,pt) -importFrom(stats,qt) -importFrom(stats,quantile) -importFrom(stats,quasibinomial) -importFrom(stats,rbinom) -importFrom(stats,rchisq) -importFrom(stats,reformulate) -importFrom(stats,rgamma) -importFrom(stats,rnorm) -importFrom(stats,runif) -importFrom(stats,sd) -importFrom(stats,spline) -importFrom(stats,summary.glm) -importFrom(stats,terms) -importFrom(stats,update) -importFrom(stats,var) -importFrom(stats,vcov) -importFrom(tidyr,complete) -importFrom(utils,askYesNo) -importFrom(utils,flush.console) -importFrom(utils,hasName) -importFrom(utils,head) -importFrom(utils,install.packages) -importFrom(utils,methods) -importFrom(utils,packageDescription) -importFrom(utils,packageVersion) -importFrom(utils,tail) -importFrom(utils,write.table) -useDynLib(mice, .registration = TRUE) +# Generated by roxygen2: do not edit by hand + +S3method(anova,mira) +S3method(as.data.frame,mipo) +S3method(bwplot,mads) +S3method(bwplot,mids) +S3method(cc,data.frame) +S3method(cc,default) +S3method(cc,matrix) +S3method(cc,mids) +S3method(cci,default) +S3method(cci,mids) +S3method(complete,mids) +S3method(densityplot,mids) +S3method(df.residual,lme) +S3method(df.residual,mer) +S3method(df.residual,mira) +S3method(df.residual,multinom) +S3method(filter,mids) +S3method(glance,mipo) +S3method(ic,data.frame) +S3method(ic,default) +S3method(ic,matrix) +S3method(ic,mids) +S3method(ici,default) +S3method(ici,mids) +S3method(is.nan,data.frame) +S3method(mcar,data.frame) +S3method(plot,mcar_object) +S3method(plot,md.pattern) +S3method(plot,mids) +S3method(predict_mi,list) +S3method(predict_mi,lm) +S3method(predict_mi,mira) +S3method(print,mads) +S3method(print,mcar_object) +S3method(print,mice.anova) +S3method(print,mice.anova.summary) +S3method(print,mids) +S3method(print,mipo) +S3method(print,mipo.summary) +S3method(print,mira) +S3method(stripplot,mids) +S3method(summary,mads) +S3method(summary,mice.anova) +S3method(summary,mids) +S3method(summary,mipo) +S3method(summary,mira) +S3method(tidy,mipo) +S3method(with,mids) +S3method(xyplot,mads) +S3method(xyplot,mids) +export(.norm.draw) +export(.pmm.match) +export(D1) +export(D2) +export(D3) +export(ampute) +export(ampute.default.freq) +export(ampute.default.odds) +export(ampute.default.type) +export(ampute.default.weights) +export(appendbreak) +export(as.mids) +export(as.mira) +export(as.mitml.result) +export(bwplot) +export(cbind) +export(cc) +export(cci) +export(complete) +export(construct.blocks) +export(convergence) +export(densityplot) +export(estimice) +export(extractBS) +export(fico) +export(filter) +export(fix.coef) +export(flux) +export(fluxplot) +export(futuremice) +export(getfit) +export(getqbar) +export(glance) +export(glm.mids) +export(ibind) +export(ic) +export(ici) +export(is.mads) +export(is.mids) +export(is.mipo) +export(is.mira) +export(is.mitml.result) +export(lm.mids) +export(mads) +export(make.blocks) +export(make.blots) +export(make.calltype) +export(make.formulas) +export(make.method) +export(make.post) +export(make.predictorMatrix) +export(make.visitSequence) +export(make.where) +export(matchindex) +export(mcar) +export(md.pairs) +export(md.pattern) +export(mdc) +export(mice) +export(mice.impute.2l.bin) +export(mice.impute.2l.lmer) +export(mice.impute.2l.norm) +export(mice.impute.2l.pan) +export(mice.impute.2lonly.mean) +export(mice.impute.2lonly.norm) +export(mice.impute.2lonly.pmm) +export(mice.impute.cart) +export(mice.impute.jomoImpute) +export(mice.impute.lasso.logreg) +export(mice.impute.lasso.norm) +export(mice.impute.lasso.select.logreg) +export(mice.impute.lasso.select.norm) +export(mice.impute.lda) +export(mice.impute.logreg) +export(mice.impute.logreg.boot) +export(mice.impute.mean) +export(mice.impute.midastouch) +export(mice.impute.mnar.logreg) +export(mice.impute.mnar.norm) +export(mice.impute.mpmm) +export(mice.impute.norm) +export(mice.impute.norm.boot) +export(mice.impute.norm.nob) +export(mice.impute.norm.predict) +export(mice.impute.panImpute) +export(mice.impute.passive) +export(mice.impute.pmm) +export(mice.impute.polr) +export(mice.impute.polyreg) +export(mice.impute.quadratic) +export(mice.impute.rf) +export(mice.impute.ri) +export(mice.impute.sample) +export(mice.impute.svm) +export(mice.mids) +export(mice.theme) +export(mids) +export(mids2mplus) +export(mids2spss) +export(mipo) +export(mira) +export(name.blocks) +export(name.formulas) +export(ncc) +export(nelsonaalen) +export(nic) +export(nimp) +export(norm.draw) +export(parlmice) +export(pool) +export(pool.compare) +export(pool.r.squared) +export(pool.scalar) +export(pool.scalar.syn) +export(pool.syn) +export(pool.table) +export(predict_mi) +export(quickpred) +export(rbind) +export(squeeze) +export(stripplot) +export(supports.transparent) +export(tidy) +export(version) +export(xyplot) +importFrom(Rcpp,evalCpp) +importFrom(broom,glance) +importFrom(broom,tidy) +importFrom(dplyr,"%>%") +importFrom(dplyr,.data) +importFrom(dplyr,any_of) +importFrom(dplyr,bind_cols) +importFrom(dplyr,bind_rows) +importFrom(dplyr,filter) +importFrom(dplyr,group_by) +importFrom(dplyr,lead) +importFrom(dplyr,mutate) +importFrom(dplyr,n) +importFrom(dplyr,pull) +importFrom(dplyr,relocate) +importFrom(dplyr,row_number) +importFrom(dplyr,select) +importFrom(dplyr,summarize) +importFrom(glmnet,cv.glmnet) +importFrom(grDevices,dev.off) +importFrom(graphics,abline) +importFrom(graphics,axis) +importFrom(graphics,box) +importFrom(graphics,hist) +importFrom(graphics,par) +importFrom(graphics,plot) +importFrom(graphics,plot.new) +importFrom(graphics,plot.window) +importFrom(graphics,points) +importFrom(graphics,rect) +importFrom(graphics,text) +importFrom(lattice,bwplot) +importFrom(lattice,densityplot) +importFrom(lattice,stripplot) +importFrom(lattice,xyplot) +importFrom(mitml,jomoImpute) +importFrom(mitml,mitmlComplete) +importFrom(mitml,panImpute) +importFrom(mitml,testModels) +importFrom(nnet,multinom) +importFrom(rpart,rpart) +importFrom(rpart,rpart.control) +importFrom(stats,C) +importFrom(stats,aggregate) +importFrom(stats,as.formula) +importFrom(stats,binomial) +importFrom(stats,cancor) +importFrom(stats,coef) +importFrom(stats,complete.cases) +importFrom(stats,confint) +importFrom(stats,contr.treatment) +importFrom(stats,cor) +importFrom(stats,cov) +importFrom(stats,df.residual) +importFrom(stats,fitted) +importFrom(stats,formula) +importFrom(stats,gaussian) +importFrom(stats,getCall) +importFrom(stats,glm) +importFrom(stats,is.empty.model) +importFrom(stats,lm) +importFrom(stats,lm.fit) +importFrom(stats,median) +importFrom(stats,model.frame) +importFrom(stats,model.matrix) +importFrom(stats,na.exclude) +importFrom(stats,na.omit) +importFrom(stats,na.pass) +importFrom(stats,pchisq) +importFrom(stats,pf) +importFrom(stats,predict) +importFrom(stats,pt) +importFrom(stats,qt) +importFrom(stats,quantile) +importFrom(stats,quasibinomial) +importFrom(stats,rbinom) +importFrom(stats,rchisq) +importFrom(stats,reformulate) +importFrom(stats,rgamma) +importFrom(stats,rnorm) +importFrom(stats,runif) +importFrom(stats,sd) +importFrom(stats,spline) +importFrom(stats,summary.glm) +importFrom(stats,terms) +importFrom(stats,update) +importFrom(stats,var) +importFrom(stats,vcov) +importFrom(tidyr,complete) +importFrom(utils,askYesNo) +importFrom(utils,flush.console) +importFrom(utils,hasName) +importFrom(utils,head) +importFrom(utils,install.packages) +importFrom(utils,methods) +importFrom(utils,packageDescription) +importFrom(utils,packageVersion) +importFrom(utils,tail) +importFrom(utils,write.table) +useDynLib(mice, .registration = TRUE) diff --git a/R/mice.impute.svm.R b/R/mice.impute.svm.R new file mode 100644 index 00000000..76570e56 --- /dev/null +++ b/R/mice.impute.svm.R @@ -0,0 +1,119 @@ +#' Imputation by the Support Vector Machine (mice-SVM) +#' +#' This function performs proper multiple imputation using the Support Vector Machine (SVM) +#' combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +#' designed for binary variables in high-dimensional data settings. +#' +#' Note: When using this method in high-dimensional settings, it is recommended +#' to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} +#' from removing predictors before they reach the SVM model. +#' +#' @inheritParams mice.impute.pmm +#' @param type A vector of length \code{ncol(x)} identifying the predictors. +#' Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls. +#' @param C Cost of constraints violation (default = 1). +#' @param scaled A logical vector indicating the variables to be scaled. +#' @param kernel The kernel function used in training and predicting (default = "vanilladot"). +#' @param tol Tolerance of termination criterion (default = 0.001). +#' @param kpar List of hyper-parameters for the kernel function (default = list()). +#' @param ... Other named arguments to be passed to \code{kernlab::ksvm()}. +#' @return A vector of length \code{sum(!ry)} with imputed values. +#' @author Masayoshi Takahashi, 2026 +#' @references +#' Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +#' High-Dimensional Data with General Missing Patterns in Causal Inference. +#' \emph{Journal of Statistical Computation and Simulation}. +#' \doi{10.1080/00949655.2026.2660865} +#' @export +mice.impute.svm <- function(y, ry, x, wy = NULL, type = NULL, C = 1, scaled = TRUE, kernel = "vanilladot", tol = 0.001, kpar = list(), ...) { + + if (!requireNamespace("kernlab", quietly = TRUE)) { + stop("Package 'kernlab' is needed for this function. Please install it.") + } + + if (is.null(wy)) wy <- !ry + n_target <- sum(wy) + + # 1. Bootstrap for estimation uncertainty (Takahashi, 2026, Section 3.3, Steps 1-2) + xobs <- x[ry, , drop = FALSE] + yobs <- y[ry] + n1 <- sum(ry) + s <- sample(n1, n1, replace = TRUE) + + # The model must be trained on the bootstrapped observed data (y*, X*) + y_star <- yobs[s] + x_star <- xobs[s, , drop = FALSE] + + # Initialize draw with NAs + draw <- rep(NA, n_target) + + # 2. SVM Model Training (Takahashi, 2026, Section 3.3, Step 3) + if (length(unique(y_star)) == 2) { + result <- tryCatch({ + svm.model <- NULL + utils::capture.output( + svm.model <- suppressWarnings( + suppressMessages( + kernlab::ksvm( + y_star ~ x_star, + type = "C-svc", + kernel = kernel, + cross = 0, + C = C, + scaled = scaled, + prob.model = TRUE, + tol = tol, + kpar = kpar, + ... + ) + ) + ) + ) + + # 3. Predict probabilities for fundamental uncertainty (Takahashi, 2026, Section 3.3, Step 4) + p_mat <- NULL + utils::capture.output( + p_mat <- suppressWarnings( + suppressMessages( + kernlab::predict(svm.model, x[wy, , drop = FALSE], type = "probabilities") + ) + ) + ) + + # 4. Stochastic drawing (Takahashi, 2026, Section 3.3, Step 5) + # Extract probabilities for the positive class (assumed to be the 2nd column). + p <- p_mat[, 2] + as.integer(runif(length(p)) <= p) + + }, error = function(e) { + NULL # Return NULL to trigger fallback on numerical or logical errors + }) + + if (!is.null(result)) { + draw <- result + } + } + + # --- FALLBACK: If SVM failed or bootstrap sample had only 1 class --- + if (any(is.na(draw))) { + n_miss <- sum(is.na(draw)) + # Standard fallback: simple random sampling from observed values. + y_fill <- sample(yobs, n_miss, replace = TRUE) + + if (is.factor(y)) { + # Align levels with the original factor coding (0-based for integer drawing). + draw[is.na(draw)] <- as.integer(factor(y_fill, levels = levels(y))) - 1 + } else { + draw[is.na(draw)] <- y_fill + } + } + + # 5. Final type adjustment for the mice environment. + if (is.factor(y)) { + res <- factor(draw, levels = c(0, 1), labels = levels(y)) + } else { + res <- draw + } + + return(res) +} \ No newline at end of file diff --git a/man/filter.mids.Rd b/man/filter.mids.Rd index e1ee1af3..3ae563cb 100644 --- a/man/filter.mids.Rd +++ b/man/filter.mids.Rd @@ -1,76 +1,77 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/filter.R -\name{filter.mids} -\alias{filter.mids} -\title{Subset rows of a \code{mids} object} -\usage{ -\method{filter}{mids}(.data, ..., .preserve = FALSE) -} -\arguments{ -\item{.data}{A \code{mids} object.} - -\item{...}{Expressions that return a -logical value, and are defined in terms of the variables in \code{.data$data}. -If multiple expressions are specified, they are combined with the \code{&} operator. -Only rows for which all conditions evaluate to \code{TRUE} are kept.} - -\item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the -resulting data, otherwise the grouping is kept as is.} -} -\value{ -An S3 object of class \code{mids} -} -\description{ -This function takes a \code{mids} object and returns a new -\code{mids} object that pertains to the subset of the data -identified by the expression in \dots. The expression may use -column values from the incomplete data in \code{.data$data}. -} -\note{ -The function calculates a logical vector \code{include} of length \code{nrow(.data$data)}. -The function constructs the elements of the filtered \code{mids} object as follows: -\tabular{ll}{ -\code{data} \tab Select rows in \code{.data$data} for which \code{include == TRUE}\cr -\code{imp} \tab Select rows each imputation \code{data.frame} in \code{.data$imp} for which \code{include == TRUE}\cr -\code{m} \tab Equals \code{.data$m}\cr -\code{where} \tab Select rows in \code{.data$where} for which \code{include == TRUE}\cr -\code{blocks} \tab Equals \code{.data$blocks}\cr -\code{call} \tab Equals \code{.data$call}\cr -\code{nmis} \tab Recalculate \code{nmis} based on the selected \code{data} rows\cr -\code{method} \tab Equals \code{.data$method}\cr -\code{predictorMatrix} \tab Equals \code{.data$predictorMatrix}\cr -\code{visitSequence} \tab Equals \code{.data$visitSequence}\cr -\code{formulas} \tab Equals \code{.data$formulas}\cr -\code{post} \tab Equals \code{.data$post}\cr -\code{blots} \tab Equals \code{.data$blots}\cr -\code{ignore} \tab Select positions in \code{.data$ignore} for which \code{include == TRUE}\cr -\code{seed} \tab Equals \code{.data$seed}\cr -\code{iteration} \tab Equals \code{.data$iteration}\cr -\code{lastSeedValue} \tab Equals \code{.data$lastSeedValue}\cr -\code{chainMean} \tab Set to \code{NULL}\cr -\code{chainVar} \tab Set to \code{NULL}\cr -\code{loggedEvents} \tab Equals \code{.data$loggedEvents}\cr -\code{version} \tab Replaced with current version\cr -\code{date} \tab Replaced with current date -} -} -\examples{ -imp <- mice(nhanes, m = 2, maxit = 1, print = FALSE) - -# example with external logical vector -imp_f <- filter(imp, c(rep(TRUE, 13), rep(FALSE, 12))) - -nrow(complete(imp)) -nrow(complete(imp_f)) - -# example with calculated include vector -imp_f2 <- filter(imp, age >= 2 & hyp == 1) -nrow(complete(imp_f2)) # should be 5 -} -\seealso{ -\code{\link[dplyr]{filter}} -} -\author{ -Patrick Rockenschaub -} -\keyword{manip} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter.R +\name{filter.mids} +\alias{filter.mids} +\title{Subset rows of a \code{mids} object} +\usage{ +\method{filter}{mids}(.data, ..., .preserve = FALSE) +} +\arguments{ +\item{.data}{A \code{mids} object.} + +\item{...}{Expressions that return a +logical value, and are defined in terms of the variables in \code{.data$data}. +If multiple expressions are specified, they are combined with the \code{&} operator. +Only rows for which all conditions evaluate to \code{TRUE} are kept.} + +\item{.preserve}{Relevant when the \code{.data} input is grouped. +If \code{.preserve = FALSE} (the default), the grouping structure +is recalculated based on the resulting data, otherwise the grouping is kept as is.} +} +\value{ +An S3 object of class \code{mids} +} +\description{ +This function takes a \code{mids} object and returns a new +\code{mids} object that pertains to the subset of the data +identified by the expression in \dots. The expression may use +column values from the incomplete data in \code{.data$data}. +} +\note{ +The function calculates a logical vector \code{include} of length \code{nrow(.data$data)}. +The function constructs the elements of the filtered \code{mids} object as follows: +\tabular{ll}{ +\code{data} \tab Select rows in \code{.data$data} for which \code{include == TRUE}\cr +\code{imp} \tab Select rows each imputation \code{data.frame} in \code{.data$imp} for which \code{include == TRUE}\cr +\code{m} \tab Equals \code{.data$m}\cr +\code{where} \tab Select rows in \code{.data$where} for which \code{include == TRUE}\cr +\code{blocks} \tab Equals \code{.data$blocks}\cr +\code{call} \tab Equals \code{.data$call}\cr +\code{nmis} \tab Recalculate \code{nmis} based on the selected \code{data} rows\cr +\code{method} \tab Equals \code{.data$method}\cr +\code{predictorMatrix} \tab Equals \code{.data$predictorMatrix}\cr +\code{visitSequence} \tab Equals \code{.data$visitSequence}\cr +\code{formulas} \tab Equals \code{.data$formulas}\cr +\code{post} \tab Equals \code{.data$post}\cr +\code{blots} \tab Equals \code{.data$blots}\cr +\code{ignore} \tab Select positions in \code{.data$ignore} for which \code{include == TRUE}\cr +\code{seed} \tab Equals \code{.data$seed}\cr +\code{iteration} \tab Equals \code{.data$iteration}\cr +\code{lastSeedValue} \tab Equals \code{.data$lastSeedValue}\cr +\code{chainMean} \tab Set to \code{NULL}\cr +\code{chainVar} \tab Set to \code{NULL}\cr +\code{loggedEvents} \tab Equals \code{.data$loggedEvents}\cr +\code{version} \tab Replaced with current version\cr +\code{date} \tab Replaced with current date +} +} +\examples{ +imp <- mice(nhanes, m = 2, maxit = 1, print = FALSE) + +# example with external logical vector +imp_f <- filter(imp, c(rep(TRUE, 13), rep(FALSE, 12))) + +nrow(complete(imp)) +nrow(complete(imp_f)) + +# example with calculated include vector +imp_f2 <- filter(imp, age >= 2 & hyp == 1) +nrow(complete(imp_f2)) # should be 5 +} +\seealso{ +\code{\link[dplyr]{filter}} +} +\author{ +Patrick Rockenschaub +} +\keyword{manip} diff --git a/man/mice.impute.svm.Rd b/man/mice.impute.svm.Rd new file mode 100644 index 00000000..a5e8be9c --- /dev/null +++ b/man/mice.impute.svm.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mice.impute.svm.R +\name{mice.impute.svm} +\alias{mice.impute.svm} +\title{Imputation by the Support Vector Machine (mice-SVM)} +\usage{ +mice.impute.svm( + y, + ry, + x, + wy = NULL, + type = NULL, + C = 1, + scaled = TRUE, + kernel = "vanilladot", + tol = 0.001, + kpar = list(), + ... +) +} +\arguments{ +\item{y}{Vector to be imputed} + +\item{ry}{Logical vector of length \code{length(y)} indicating the +the subset \code{y[ry]} of elements in \code{y} to which the imputation +model is fitted. The \code{ry} generally distinguishes the observed +(\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} + +\item{x}{Numeric design matrix with \code{length(y)} rows with predictors for +\code{y}. Matrix \code{x} may have no missing values.} + +\item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value +indicates locations in \code{y} for which imputations are created.} + +\item{type}{A vector of length \code{ncol(x)} identifying the predictors. +Captured here to avoid conflicts with the SVM 'type' argument from mice's internal calls.} + +\item{C}{Cost of constraints violation (default = 1).} + +\item{scaled}{A logical vector indicating the variables to be scaled.} + +\item{kernel}{The kernel function used in training and predicting (default = "vanilladot").} + +\item{tol}{Tolerance of termination criterion (default = 0.001).} + +\item{kpar}{List of hyper-parameters for the kernel function (default = list()).} + +\item{...}{Other named arguments to be passed to \code{kernlab::ksvm()}.} +} +\value{ +A vector of length \code{sum(!ry)} with imputed values. +} +\description{ +This function performs proper multiple imputation using the Support Vector Machine (SVM) +combined with bootstrapping, as proposed by Takahashi (2026). It is specifically +designed for binary variables in high-dimensional data settings. +} +\details{ +Note: When using this method in high-dimensional settings, it is recommended +to set \code{eps = 0} in the \code{mice()} call to prevent \code{remove.lindep()} +from removing predictors before they reach the SVM model. +} +\references{ +Takahashi, M. (2026). Multiple Imputation based on the Support Vector Machine for +High-Dimensional Data with General Missing Patterns in Causal Inference. +\emph{Journal of Statistical Computation and Simulation}. +\doi{10.1080/00949655.2026.2660865} +} +\author{ +Masayoshi Takahashi, 2026 +}