% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/format_columns.R
\name{format_columns}
\alias{format_columns}
\title{Format and standardize column names and data types of an occurrence dataset}
\usage{
format_columns(
  occ,
  metadata,
  extract_binomial = TRUE,
  binomial_from = NULL,
  include_subspecies = FALSE,
  include_variety = FALSE,
  check_numeric = TRUE,
  numeric_columns = NULL,
  check_encoding = TRUE,
  data_source = NULL,
  progress_bar = FALSE,
  verbose = FALSE
)
}
\arguments{
\item{occ}{(data.frame or data.table) a dataset with occurrence records,
preferably obtained from \code{import_gbif()}, \code{get_specieslink()}, \code{get_bien()},
or \code{get_idigbio()}.}

\item{metadata}{(character or data.frame) if a character, one of 'gbif',
'specieslink', 'bien', or 'idigbio', specifying which metadata template to
use (the corresponding data frames are available in
\code{RuHere::prepared_metadata}). If a data.frame is provided, it must have 21
columns (see \strong{Details}).}

\item{extract_binomial}{(logical) whether to create a column with the
binomial name of the species. If FALSE, it will create a column "species"
with the exact name stored in the scientificName column. Default is TRUE.}

\item{binomial_from}{(character) the column name in metadata from which to
extract the binomial name. Only applicable if \code{extract_binomial = TRUE}.
If \code{metadata} corresponds to one of the predefined sources ('gbif',
specieslink', 'bien', or 'idigbio'), predefined columns will be used
automatically. Default is "scientificName".}

\item{include_subspecies}{(logical) whether to include subspecies in the
binomial name.  Only applicable if \code{extract_binomial = TRUE}. If TRUE, the
function includes any infraspecific epithet after the pattern "subsp.".
Default if FALSE.}

\item{include_variety}{(logical) whether to include variety in the binomial
name. Only applicable if \code{extract_binomial = TRUE}. If TRUE, the function
includes any infraspecific epithet after the pattern "var.". Default if FALSE.}

\item{check_numeric}{(logical) whether to check and coerce the columns
specified in \code{numeric_columns} to numeric type. Default is TRUE.}

\item{numeric_columns}{(character) a vector of column names that must be
numeric. Default is NULL, meaning that if \code{check_numeric = TRUE}, the
following columns will be coerced: 'decimalLongitude', 'decimalLatitude',
'coordinateUncertaintyInMeters', 'elevation', and 'year'.}

\item{check_encoding}{(logical) whether to check and fix the encoding of
columns that typically contain special characters (see \strong{Details}). Default
is TRUE.}

\item{data_source}{(character) the source of the occurrence records. Default
is NULL, meaning it will use the same string provided in \code{metadata}. If
\code{metadata} is a user-defined data.frame, this argument must be specified.}

\item{progress_bar}{(logical) whether to display a progress bar during
processing. If TRUE, the 'pbapply' package must be installed. Default is
\code{FALSE}.}

\item{verbose}{(logical) whether to print messages about the progress.
Default is FALSE.}
}
\value{
A data.frame with standardized column names and data types according to the
specified metadata.
}
\description{
Format and standardize column names and data types of an occurrence dataset
}
\details{
If a user-defined metadata data.frame is provided, it must include the
following 21 columns:
'scientificName', 'collectionCode', 'catalogNumber', 'decimalLongitude',
'decimalLatitude', 'coordinateUncertaintyInMeters', 'elevation', 'country',
'stateProvince', 'municipality', 'locality', 'year', 'eventDate',
'recordedBy', 'identifiedBy', 'basisOfRecord', 'occurrenceRemarks', 'habitat',
'datasetName', 'datasetKey', and 'key'.

If \code{check_encoding = TRUE}, the function will inspect and, if necessary, fix
the encoding of these columns:
'collectionCode', 'catalogNumber', 'country', 'stateProvince',
municipality', 'locality', 'eventDate','recordedBy', 'identifiedBy',
'basisOfRecord', and 'datasetName'.
}
\examples{
# Example with GBIF
data("occ_gbif", package = "RuHere") #Import data example
gbif_standardized <- format_columns(occ_gbif, metadata = "gbif")
# Example with SpeciesLink
data("occ_splink", package = "RuHere") #Import data example
splink_standardized <- format_columns(occ_splink, metadata = "specieslink")
# Example with BIEN
data("occ_bien", package = "RuHere") #Import data example
bien_standardized <- format_columns(occ_bien, metadata = "bien")
# Example with idigbio
data("occ_idig", package = "RuHere") #Import data example
idig_standardized <- format_columns(occ_idig, metadata = "idigbio")

}
