## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>"
)

## ----setup--------------------------------------------------------------------
library(RiskyCNV)

## ----file_paths---------------------------------------------------------------
sample_file     <- system.file("extdata", "sample_data.csv",
                                package = "RiskyCNV")
cnv_file        <- system.file("extdata", "cnv_data.txt",
                                package = "RiskyCNV")
gene_file       <- system.file("extdata", "gene_annotation.csv",
                                package = "RiskyCNV")
annotated_file  <- system.file("extdata", "annotated_cnv.csv",
                                package = "RiskyCNV")
cnv_matrix_file <- system.file("extdata", "cnv_matrix.csv",
                                package = "RiskyCNV")
rna_file        <- system.file("extdata", "rna_data.csv",
                                package = "RiskyCNV")

# Preview the clinical sample data
head(read.csv(sample_file))

## ----grade_preset-------------------------------------------------------------
grade_groups <- extract_metadata(
  file_path    = sample_file,
  column_name  = "gleason_score",
  disease_type = "prostate",
  output_dir   = tempdir()
)

print(names(grade_groups))
print(sapply(grade_groups, length))

## ----grade_auto---------------------------------------------------------------
grade_groups_auto <- extract_metadata(
  file_path    = sample_file,
  column_name  = "gleason_score",
  disease_type = "auto",
  n_groups     = 5,
  group_type   = "grade",
  output_dir   = tempdir()
)

print(names(grade_groups_auto))

## ----risk_preset--------------------------------------------------------------
risk_groups <- classify_risk(
  file_path    = sample_file,
  column_name  = "gleason_score",
  disease_type = "prostate",
  output_dir   = tempdir()
)

print(names(risk_groups))
print(sapply(risk_groups, length))

## ----risk_auto----------------------------------------------------------------
# Two risk groups
risk_2 <- classify_risk(
  file_path    = sample_file,
  column_name  = "gleason_score",
  disease_type = "auto",
  n_groups     = 2,
  output_dir   = tempdir()
)
print(names(risk_2))

# Four risk groups
risk_4 <- classify_risk(
  file_path    = sample_file,
  column_name  = "gleason_score",
  disease_type = "auto",
  n_groups     = 4,
  output_dir   = tempdir()
)
print(names(risk_4))

## ----aberration---------------------------------------------------------------
aberrations <- aberration(
  cnv_data_file = cnv_file,
  effect_size   = 0.3
)

# Aberrant regions per chromosome
print(sapply(aberrations, nrow))

## ----recurrent, eval = FALSE--------------------------------------------------
# recurrent_file <- recurrent(
#   x             = risk_groups,
#   risk_level    = "high_risk",
#   cnv_data_file = cnv_file,
#   threshold     = 2
# )
# 
# recurrent_data <- read.csv(recurrent_file)
# head(recurrent_data)

## ----annotate, eval = FALSE---------------------------------------------------
# annotated <- annotate(
#   genes_file = gene_file,
#   risk_file  = recurrent_file,
#   output_dir = tempdir()
# )
# 
# head(annotated)

## ----cnv_matrix---------------------------------------------------------------
old_wd <- getwd()
setwd(tempdir())

cnv_matrix <- create_CNVMatrix(input_file = annotated_file)

setwd(old_wd)

print(dim(cnv_matrix))
print(cnv_matrix[, 1:min(5, ncol(cnv_matrix))])

## ----correlations-------------------------------------------------------------
old_wd <- getwd()
setwd(tempdir())

corr_results <- correlate_with_expr(
  cnv_file = cnv_matrix_file,
  rna_file = rna_file
)

setwd(old_wd)

cat("All correlations:\n")
print(corr_results$all_correlations)

cat("\nSignificant correlations (p < 0.05):\n")
print(corr_results$significant)

cat("\nHigh-confidence CNV-driven genes (p < 0.05, r > 0.8):\n")
print(corr_results$high_correlation)

## ----generalised, eval = FALSE------------------------------------------------
# # Breast cancer with Nottingham scores
# breast_grades <- extract_metadata(
#   file_path    = "breast_samples.csv",
#   column_name  = "nottingham_score",
#   disease_type = "auto",
#   n_groups     = 3,
#   group_type   = "grade",
#   output_dir   = tempdir()
# )
# 
# # Lymphoma with two risk groups (limited vs. advanced)
# lymphoma_risk <- classify_risk(
#   file_path    = "lymphoma_samples.csv",
#   column_name  = "ann_arbor_stage",
#   disease_type = "auto",
#   n_groups     = 2,
#   output_dir   = tempdir()
# )