## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, eval = FALSE, collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- # # Option 1: Set in your R session # Sys.setenv(OPENAI_API_KEY = "sk-...") # # # Option 2: Add to your ~/.Renviron file (persists across sessions) # # OPENAI_API_KEY=sk-... # # # Verify the key is set # nzchar(Sys.getenv("OPENAI_API_KEY")) ## ----------------------------------------------------------------------------- # library(sdcMicro) # data(testdata) # # # Step 1: AI-assisted variable classification # sdc <- AI_createSdcObj(dat = testdata, policy = "open") # # # Step 2: AI-assisted anonymization # sdc <- AI_applyAnonymization(sdc, k = 3) # # # Step 3: Extract the anonymized data # anon_data <- extractManipData(sdc) # head(anon_data) ## ----------------------------------------------------------------------------- # # OpenAI (default) # query_llm(prompt, provider = "openai") # # # Anthropic (native Messages API) # query_llm(prompt, provider = "anthropic") # # # Any OpenAI-compatible endpoint (Ollama, Azure, vLLM, etc.) # query_llm(prompt, provider = "custom", # base_url = "http://localhost:11434/v1", # model = "llama3") ## ----------------------------------------------------------------------------- # sdc <- AI_createSdcObj(dat = testdata, policy = "open") ## ----------------------------------------------------------------------------- # library(sdcMicro) # data(testdata) # sdc <- AI_createSdcObj(dat = testdata, policy = "open") ## ----------------------------------------------------------------------------- # # Reject and modify # roles <- AI_createSdcObj(dat = testdata, policy = "open") # # User presses 'n' — roles is returned as a list # roles$keyVars <- c(roles$keyVars, "age") # Add age as key variable # sdc <- createSdcObj(testdata, # keyVars = roles$keyVars, # numVars = roles$numVars, # weightVar = roles$weightVar, # hhId = roles$hhId) ## ----------------------------------------------------------------------------- # sdc <- AI_applyAnonymization(sdc, k = 3) ## ----------------------------------------------------------------------------- # library(sdcMicro) # data(testdata) # # # Step 1: Create sdcObj with AI-assisted variable classification # sdc <- AI_createSdcObj(dat = testdata, policy = "open") # # # Step 2: Apply AI-assisted anonymization # sdc <- AI_applyAnonymization(sdc, k = 3, n_strategies = 3) ## ----------------------------------------------------------------------------- # # Extract anonymized data # anon_data <- extractManipData(sdc) # # # Review risk and utility # print(sdc, "risk") ## ----------------------------------------------------------------------------- # # Minimize suppressions (prefer recoding over suppression) # sdc <- AI_applyAnonymization(sdc, k = 3, # weights = c(0.6, 0.2, 0.2)) # # # Preserve categorical diversity (for cross-tabulations) # sdc <- AI_applyAnonymization(sdc, k = 3, # weights = c(0.2, 0.6, 0.2)) ## ----------------------------------------------------------------------------- # # OpenAI (default) — best strategy quality # sdc <- AI_applyAnonymization(sdc, k = 3) # # # Anthropic Claude — comparable quality, different provider # sdc <- AI_applyAnonymization(sdc, k = 3, provider = "anthropic") # # # Local Ollama instance — maximum privacy, no external communication # sdc <- AI_applyAnonymization(sdc, k = 3, # provider = "custom", # base_url = "http://localhost:11434/v1", # model = "llama3")