An artificial dataset, intended for presenting the extended features of dataMaid, which is a toolset for identifying potential errors in a dataset.

exampleData

Format

A data.frame with 300 observations on the following 6 variables.

addresses

a factor with fictitious US addresses

binomial

a numeric vector with a binomial distributed variable

poisson

a numeric vector with a Poisson distributed variable

gauss

a numeric vector with a Gaussian distributed variable

zigauss

a numeric vector with a zero-inflated Gaussian distributed variable

bpinteraction

a factor with interactions between binomial and poisson values

Source

Artificial data

Examples

# NOT RUN { isID <- function(v, nMax = NULL, ...) { out <- list(problem = FALSE, message = "") if (class(v) %in% c("character", "factor", "labelled", "numeric", "integer")) { v <- as.character(v) lengths <- nchar(v) if (all(lengths > 10) & length(unique(lengths)) == 1) { out$problem <- TRUE out$message <- "Warning: This variable seems to contain ID codes!" } } out } countZeros <- function(v, ...) { res <- length(which(v == 0)) summaryResult(list(feature = "No. zeros", result = res, value = res)) } countZeros <- summaryFunction(countZeros, description = "Count number of zeros", classes = allClasses()) summarize(toyData, numericSummaries = c(defaultNumericSummaries())) mosaicVisual <- function(v, vnam, doEval) { thisCall <- call("mosaicplot", table(v), main = vnam, xlab = "") if (doEval) { return(eval(thisCall)) } else return(deparse(thisCall)) } mosaicVisual <- visualFunction(mosaicVisual, description = "Mosaic plots using graphics", classes = allClasses()) identifyColons <- function(v, nMax = Inf, ... ) { v <- unique(na.omit(v)) problemMessage <- "Note: The following values include colons:" problem <- FALSE problemValues <- NULL problemValues <- v[sapply(gregexpr("[[:xdigit:]]:[[:xdigit:]]", v), function(x) all(x != -1))] if (length(problemValues) > 0) { problem <- TRUE } problemStatus <- list(problem = problem, problemValues = problemValues) outMessage <- messageGenerator(problemStatus, problemMessage, nMax) checkResult(list(problem = problem, message = outMessage, problemValues = problemValues)) } identifyColons <- checkFunction(identifyColons, description = "Identify non-suffixed nor -prefixed colons", classes = c("character", "factor", "labelled")) makeDataReport(exampleData, replace = TRUE, preChecks = c("isKey", "isEmpty", "isID"), allVisuals = "mosaicVisual", characterSummaries = c(defaultCharacterSummaries(), "countZeros"), factorSummaries = c(defaultFactorSummaries(), "countZeros"), labelledSummaries = c(defaultLabelledSummaries(), "countZeros"), numericSummaries = c(defaultNumericSummaries(), "countZeros"), integerSummaries = c(defaultIntegerSummaries(), "countZeros"), characterChecks = c(defaultCharacterChecks(), "identifyColons"), factorChecks = c(defaultFactorChecks(), "identifyColons"), labelledCheck = c(defaultLabelledChecks(), "identifyColons")) # }