% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pnadc.R
\name{pnadc_data}
\alias{pnadc_data}
\title{Download PNADC microdata}
\usage{
pnadc_data(
  module,
  year = NULL,
  vars = NULL,
  as_survey = FALSE,
  cache_dir = NULL,
  refresh = FALSE,
  lazy = FALSE,
  backend = c("arrow", "duckdb")
)
}
\arguments{
\item{module}{Character. The module identifier. Use \code{\link{pnadc_modules}}
to see available modules. Required.}

\item{year}{Numeric or vector. Year(s) to download. Use NULL for all available
years for the module. Default is NULL.}

\item{vars}{Character vector. Variables to select. Use NULL for all variables.
Survey design variables (UPA, Estrato, V1028) and key demographic variables
are always included. Default is NULL.}

\item{as_survey}{Logical. If TRUE, returns a survey design object (requires
the \code{srvyr} package). Default is FALSE.}

\item{cache_dir}{Character. Directory for caching downloaded files.
Default uses \code{tools::R_user_dir("healthbR", "cache")}.}

\item{refresh}{Logical. If TRUE, re-download even if file exists in cache.
Default is FALSE.}

\item{lazy}{Logical. If TRUE, returns a lazy query object instead of a
tibble. Requires the \pkg{arrow} package. The lazy object supports
dplyr verbs (filter, select, mutate, etc.) which are pushed down
to the query engine before collecting into memory. Call
\code{dplyr::collect()} to materialize the result. Default: FALSE.}

\item{backend}{Character. Backend for lazy evaluation: \code{"arrow"}
(default) or \code{"duckdb"}. Only used when \code{lazy = TRUE}.
DuckDB backend requires the \pkg{duckdb} package.}
}
\value{
A tibble with PNADC microdata, or a \code{srvyr} survey design object
if \code{as_survey = TRUE}.
}
\description{
Downloads and returns PNADC microdata for the specified module and year(s)
from the IBGE FTP. Data is cached locally to avoid repeated downloads.
When the \code{arrow} package is installed, data is cached in parquet format
for faster subsequent reads.
}
\details{
PNAD Continua (Pesquisa Nacional por Amostra de Domicilios Continua) is a
quarterly household survey conducted by IBGE. This function provides access
to supplementary modules with health-related content.
\subsection{Available modules}{
\itemize{
\item \code{deficiencia}: Persons with disabilities (2019, 2022, 2024)
\item \code{habitacao}: Housing characteristics (2012-2019, 2022-2024)
\item \code{moradores}: General characteristics of residents (2012-2019, 2022-2024)
\item \code{aps}: Primary health care (2022)
}
}

\subsection{Survey design variables}{

For proper statistical analysis with complex survey design, the following
variables are always included:
\itemize{
\item \code{UPA}: Primary sampling unit
\item \code{Estrato}: Stratum
\item \code{V1028}: Survey weight
}

Use \code{as_survey = TRUE} to get a properly weighted survey design object
for analysis with the \code{srvyr} package.
}
}
\section{Data source}{

Data is downloaded from the IBGE FTP server:
\verb{https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/}
}

\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# download deficiencia module for 2022
df <- pnadc_data(module = "deficiencia", year = 2022, cache_dir = tempdir())

# download with survey design
svy <- pnadc_data(
  module = "deficiencia",
  year = 2022,
  as_survey = TRUE,
  cache_dir = tempdir()
)

# select specific variables
df_subset <- pnadc_data(
  module = "deficiencia",
  year = 2022,
  vars = c("S11001", "S11002"),
  cache_dir = tempdir()
)
\dontshow{\}) # examplesIf}
}
