#' Tabulate special values often representing missing data
#'
#' Generates a summary of counts and fractions of records from the
#' variables listed in `xVars` that are missing (using the standard R
#' designation NA), blank (0 length, common in character data),
#' spaces (one or more, also common in character data), and zeros
#' or negative values in numerical data (sometimes indicative of
#' range errors or disguised missing data)
#'
#' @param DF data frame containing all variables in the `xVars` list
#' @param xVars character vector of the names of variables to be examined
#' (default NULL means characterize all variables in data frame `DF`)
#' @param subsetIndex index into record subset in `DF` to be examined
#' (default NULL means characterize all records in `DF`)
#' @param dgts number of digits in frequency results (default = 3)
#'
#' @return data frame with one row for each variable in `xVars` list and
#' these columns:
#'   * Variable an element of the `xVars` list
#'   * nMiss number of records exhibiting the missing value NA (or NaN)
#'   * fracMiss fraction of records represented by `nMiss`
#'   * nBlank number of records listing the value blank (0 length character string)
#'   * fracBlank fraction of records represented by `nBlank`
#'   * nSpaces number of records consisting only of one or more spaces
#'   * fracSpaces fraction of records represented by `nSpaces`
#'   * nZero number of records listing the numerical value zero
#'   * fracZero fraction of records represented by `nZero'
#'   * nNeg number of records listing a negative numerical value
#'   * fracNeg fraction of records represented by `nNeg`
#' @export
#'
#' @examples
#' FirstAnomalyDataFrame
#' TabulateSpecialValues(FirstAnomalyDataFrame)
TabulateSpecialValues <- function(DF, xVars = NULL, subsetIndex = NULL, dgts = 3){
  #
  stopifnot("DF must be a data frame"= is.data.frame(DF))
  if (is.null(xVars)){
    xVars <- colnames(DF)
  }  else {
    stopifnot("xVars not found in DF"= xVars %in% colnames(DF))
  }
  nVar <- length(xVars)
  #
  if (is.null(subsetIndex)){
    dfSub <- DF[, xVars]
  } else {
    dfSub <- DF[subsetIndex, xVars]
  }
  stopifnot("Specified data subset is empty"= nrow(dfSub) > 0)
  stopifnot("dgts must be positive"= dgts > 0)
  #
  nMiss <- vector("numeric", nVar)
  nBlank <- vector("numeric", nVar)
  nSpaces <- vector("numeric", nVar)
  nZero <- vector("numeric", nVar)
  nNeg <- vector("numeric", nVar)
  #
  for (i in 1:nVar){
    x <- dfSub[, i]
    nMiss[i] <- length(which(is.na(x)))
    nBlank[i] <- length(which(x == ""))
    nSpaces[i] <- length(grep("^[ ]+$", x))
    nZero[i] <- ifelse(is.numeric(x), length(which(x == 0)), 0)
    nNeg[i] <- ifelse(is.numeric(x), length(which(x < 0)), 0)
  }
  n <- nrow(dfSub)
  fracMiss <- round(nMiss/n, digits = dgts)
  fracBlank <- round(nBlank/n, digits = dgts)
  fracSpaces <- round(nSpaces/n, digits = dgts)
  fracZero <- round(nZero/n, digits = dgts)
  fracNeg <- round(nNeg/n, digits = dgts)
  #
  outFrame <- data.frame(Variable = xVars, nMiss = nMiss, fracMiss = fracMiss,
                         nBlank = nBlank, fracBlank = fracBlank, nSpaces = nSpaces,
                         fracSpaces = fracSpaces, nZero = nZero, fracZero = fracZero,
                         nNeg = nNeg, fracNeg = fracNeg)
  #
  return(outFrame)
}

