# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#'Convert ATC Code for each patients to the corresponding DFS number of the ATC tree 
#'
#' @param tree : ATC tree (we assume that there is a column 'ATCCode' )
#' @param patientATC : patients observations, for each patient we got a string 
#'containing taken medications (ATC code)
#' @examples
#'  ATC_code <- c('A01AA30 A01AB03', 'A10AC30')
#'  ATCtoNumeric(ATC_code, ATC_Tree_UpperBound_2024)
#'
#' @return a matrix of the same size as patientATC but containing integer 
#' that are the index of the corresponding ATC code.
#' @export
ATCtoNumeric <- function(patientATC, tree) {
    .Call(`_emcAdr_ATCtoNumeric`, patientATC, tree)
}

#'Convert the histogram returned by the DistributionApproximation function, to a real number distribution
#'(that can be used in a test for example) 
#'
#'@param vec : distribution returned by the DistributionAproximationFunction
#'
#'@return A vector containing sampled risk during the MCMC algorithm 
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#'  DistributionApproximationResults = DistributionApproximation(epochs = 10,
#'             ATCtree = ATC_Tree_UpperBound_2024, observations = FAERS_myopathy)
#'   histogramToDitribution(DistributionApproximationResults$ScoreDistribution)
#' }
#'@export
histogramToDitribution <- function(vec) {
    .Call(`_emcAdr_histogramToDitribution`, vec)
}

#' Output the outstanding score (Outstanding_score) outputed by the MCMC algorithm
#' in a special format
#' 
#' @param outstanding_score : Outstanding_score outputed by MCMC algorithm to be converted
#' to the ScoreDistribution format
#' @param max_score : max_score parameter used during the MCMC algorithm
#' 
#' @return outstanding_score in a format compatible with MCMC algorithm output
#' @examples
#' \donttest{
#'  data("ATC_Tree_UpperBound_2024")
#'  data("FAERS_myopathy")
#' 
#'   DistributionApproximationResults = DistributionApproximation(epochs = 10,
#'             ATCtree = ATC_Tree_UpperBound_2024, observations = FAERS_myopathy)
#'   OutsandingScoreToDistribution(DistributionApproximationResults$Outstanding_score, max_score = 100)
#' }
#' @export
OutsandingScoreToDistribution <- function(outstanding_score, max_score) {
    .Call(`_emcAdr_OutsandingScoreToDistribution`, outstanding_score, max_score)
}

#'Function used to compute the Relative Risk on a list of cocktails
#'
#'@param cocktails : A list containing cocktails in the form of vector of integers (ATC index)
#'@param ATCtree : ATC tree with upper bound of the DFS (without the root)
#'@param observations : observation of the AE based on the medications of each patients
#'(a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#' on which we want to compute the risk distribution
#'@param num_thread : Number of thread to run in parallel if openMP is available, 1 by default
#' 
#'@return RR score among "cocktails" parameters
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#' cocktails = list(c(561, 904),
#'                c(1902, 4585))
#' 
#' RR_of_cocktails = compute_RR_on_list(cocktails = cocktails,
#'                               ATCtree = ATC_Tree_UpperBound_2024, 
#'                               observations = FAERS_myopathy)
#'}
#'@export
compute_RR_on_list <- function(cocktails, ATCtree, observations, num_thread = 1L) {
    .Call(`_emcAdr_compute_RR_on_list`, cocktails, ATCtree, observations, num_thread)
}

#'Function used to compute the Hypergeometric score on a list of cocktails
#'
#'@param cocktails : A list containing cocktails in the form of vector of integers (ATC index)
#'@param ATCtree : ATC tree with upper bound of the DFS (without the root)
#'@param observations : observation of the AE based on the medications of each patients
#'(a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#' on which we want to compute the risk distribution
#'@param num_thread : Number of thread to run in parallel if openMP is available, 1 by default
#' 
#'@return Hypergeometric score among "cocktails" parameters
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#' cocktails = list(c(561, 904),
#'                c(1902, 4585))
#' 
#' Hypergeom_of_cocktails = compute_hypergeom_on_list(cocktails = cocktails,
#'                               ATCtree = ATC_Tree_UpperBound_2024, 
#'                               observations = FAERS_myopathy)
#'}
#'@export
compute_hypergeom_on_list <- function(cocktails, ATCtree, observations, num_thread = 1L) {
    .Call(`_emcAdr_compute_hypergeom_on_list`, cocktails, ATCtree, observations, num_thread)
}

#'Function used to compute the Hypergeometric score on a cocktail
#'
#'@param cocktail : A cocktail in the form of vector of integers (ATC index)
#'@param upperBounds : ATC tree  upper bound of the DFS (without the root)
#'@param ADRCount : number of patient experiencing ADR in dataset
#'@param observationsADR : observation of the ADR for each patients
#'(a vector containing the ADR on which we want to compute the risk distribution)
#'@param observationsMedication : observation of the drug intake for each patients
#' on which we want to compute the risk distribution
#'@param num_thread : Number of thread to run in parallel if openMP is available, 1 by default
#' 
#'@return Hypergeometric score of the "cocktail" parameter
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#' ADRCount = sum(FAERS_myopathy$patientADR)
#' cocktail = c(561, 904)
#' 
#' Hypergeom_of_cocktail = compute_hypergeom_cocktail(cocktail = cocktail,
#'                               upperBounds = ATC_Tree_UpperBound_2024$upperBound,
#'                               ADRCount =  ADRCount,
#'                               observationsADR = FAERS_myopathy$patientADR,
#'                               observationsMedication = FAERS_myopathy$patientATC,
#'                               num_thread=8)
#'}
#'@export
compute_hypergeom_cocktail <- function(cocktail, upperBounds, ADRCount, observationsADR, observationsMedication, num_thread = 1L) {
    .Call(`_emcAdr_compute_hypergeom_cocktail`, cocktail, upperBounds, ADRCount, observationsADR, observationsMedication, num_thread)
}

#' Used to add the p_value to each cocktail of a csv_file that is an
#' output of the genetic algorithm
#' @param distribution_outputs A list of distribution of cocktails of different sizes
#' in order to compute the p_value for multiple cocktail sizes
#' @param filename The file name of the .csv file containing the output
#' @param filtred_distribution Does the p-values have to be computed using filtered distribution
#' or normal distribution (filtered distribution by default)
#' @param sep The separator used in the csv file (';' by default)
#' 
#' @examples
#' \donttest{
#'  data("ATC_Tree_UpperBound_2024")
#'  data("FAERS_myopathy")
#' 
#'   DistributionApproximationResults_size2 = DistributionApproximation(epochs = 10,
#'             ATCtree = ATC_Tree_UpperBound_2024, observations = FAERS_myopathy, Smax = 2)
#'             
#'   DistributionApproximationResults_size3 = DistributionApproximation(epochs = 10,
#'             ATCtree = ATC_Tree_UpperBound_2024, observations = FAERS_myopathy, Smax = 3)
#'             
#'   score_distribution_list = list(DistributionApproximationResults_size2,
#'                               DistributionApproximationResults_size3)
#'   p_value_csv_file(score_distribution_list, "path/to/output.csv")
#' }
#' @return A real valued number vector representing the p-value of the inputed
#' csv file filename, computed on the distribution_outputs List.
#' @export
p_value_csv_file <- function(distribution_outputs, filename, filtred_distribution = FALSE, sep = ";") {
    invisible(.Call(`_emcAdr_p_value_csv_file`, distribution_outputs, filename, filtred_distribution, sep))
}

#' Used to add the p_value to each cocktail of an output of the genetic algorithm
#' @param distribution_outputs A list of distribution of cocktails of different sizes
#' in order to compute the p_value for multiple cocktail sizes
#' @param genetic_results outputs of the genetic algorithm
#' @param filtred_distribution Does the p-values have to be computed using filtered distribution
#' or normal distribution (filtered distribution by default)
#' 
#' @examples
#' \donttest{
#'  data("ATC_Tree_UpperBound_2024")
#'  data("FAERS_myopathy")
#'   DistributionApproximationResults_size2 = DistributionApproximation(epochs = 10,
#'             ATCtree = ATC_Tree_UpperBound_2024, observations = FAERS_myopathy, Smax = 2)
#'             
#'   DistributionApproximationResults_size3 = DistributionApproximation(epochs = 10,
#'             ATCtree = ATC_Tree_UpperBound_2024, observations = FAERS_myopathy, Smax = 3)
#'             
#'   score_distribution_list = list(DistributionApproximationResults_size2,
#'                               DistributionApproximationResults_size3)
#'   genetic_results = GeneticAlgorithm(epochs = 10, nbIndividuals = 20, 
#'             ATCtree = ATC_Tree_UpperBound_2024,
#'             observations = FAERS_myopathy)
#'   p_value_genetic_results(score_distribution_list, genetic_results)
#' }
#' @return A real valued number vector representing the p-value of the inputed
#' genetic algorithm results (genetic_results) computed on the 
#' distribution_outputs List.
#' @export
p_value_genetic_results <- function(distribution_outputs, genetic_results, filtred_distribution = FALSE) {
    .Call(`_emcAdr_p_value_genetic_results`, distribution_outputs, genetic_results, filtred_distribution)
}

#' Used to add the p_value to each cocktail of cocktail list
#' @param distribution_outputs A list of distribution of cocktails of different sizes
#' in order to compute the p_value for multiple cocktail sizes
#' @param cocktails A list containing cocktails in the form of vector of integers (ATC index)
#' @param ATCtree ATC tree with upper bound of the DFS (without the root)
#' @param observations observation of the AE based on the medications of each patients
#'(a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#' on which we want to compute the risk distribution
#' @param filtred_distribution Does the p-values have to be computed using filtered distribution
#' or normal distribution (filtered distribution by default)
#' @param num_thread Number of thread to run in parallel if openMP is available, 1 by default
#' @examples
#' \donttest{
#'  data("ATC_Tree_UpperBound_2024")
#'  data("FAERS_myopathy")
#'  
#'   DistributionApproximationResults_size2 = DistributionApproximation(epochs = 10,
#'             ATCtree = ATC_Tree_UpperBound_2024, observations = FAERS_myopathy, Smax = 2)
#'             
#'   DistributionApproximationResults_size3 = DistributionApproximation(epochs = 10,
#'             ATCtree = ATC_Tree_UpperBound_2024, observations = FAERS_myopathy, Smax = 3)
#'             
#'   score_distribution_list = list(DistributionApproximationResults_size2,
#'                               DistributionApproximationResults_size3)
#' 
#'   cocktails = list(c(561, 904),
#'                c(1902, 4585))
#'  
#'   p_value_cocktails(score_distribution_list, cocktails, ATC_Tree_UpperBound_2024,
#'                     FAERS_myopathy)
#' }
#' @return A real valued number vector representing the p-value of the inputed
#' cocktails computed on the distribution_outputs List.
#' @export
p_value_cocktails <- function(distribution_outputs, cocktails, ATCtree, observations, num_thread = 1L, filtred_distribution = FALSE) {
    .Call(`_emcAdr_p_value_cocktails`, distribution_outputs, cocktails, ATCtree, observations, num_thread, filtred_distribution)
}

#' Function used to convert your genetic algorithm results that are stored into 
#' a .csv file to a Data structure that can be used by the clustering algorithm
#' @param ATC_name the ATC_name column of the ATC tree
#' @param filename Name of the file where the results are located
#' @param sep the separator to use when opening the csv file (';' by default)
#' @return An R List that can be used by other algorithms (e.g. clustering algorithm)
#' @examples
#' \donttest{
#'   data("ATC_Tree_UpperBound_2024")
#'   genetic_results = csv_to_population(ATC_Tree_UpperBound_2024$Name,
#'                     "path/to/output.csv")
#' }
#' @export
csv_to_population <- function(ATC_name, filename, sep = ";") {
    .Call(`_emcAdr_csv_to_population`, ATC_name, filename, sep)
}

#' Function used to convert a string vector of drugs in form "drug1:drug2" to 
#' a vector of index of the ATC tree ex: c(ATC_index(drug1), ATC_index(drugs2))
#' @param ATC_name the ATC_name column of the ATC tree
#' @param lines A string vector of drugs cocktail in the form "drug1:drug2:...:drug_n"
#' @param last_element A boolean to indicate whether we are matching the drug to 
#' the first matching occurrence in the tree or the last one. Default is false
#' @return An R List that can be used by other algorithms (e.g. clustering algorithm)
#' @examples
#' \donttest{
#'   data("ATC_Tree_UpperBound_2024")
#'   string_list = c('hmg coa reductase inhibitors:nervous system',
#'                   'metformin:prasugrel')
#'   string_list_to_int_cocktails(ATC_Tree_UpperBound_2024$Name,
#'                               string_list)
#' }
#' @export
string_list_to_int_cocktails <- function(ATC_name, lines, last_element = FALSE) {
    .Call(`_emcAdr_string_list_to_int_cocktails`, ATC_name, lines, last_element)
}

#' Function used to convert integer cocktails (like the one outputed by the distributionApproximation function)
#' to string cocktail in order to make them more readable
#' 
#' @param cocktails cocktails vector to be converted (index in the ATC tree)
#' @param ATC_name The ATC_name column of the ATC tree
#' 
#' @return The name of integer cocktails in cocktails
#' @examples
#' \donttest{
#'   data("ATC_Tree_UpperBound_2024")
#'   int_list = list(c(561, 904),
#'                c(1902, 4585))
#'   int_cocktail_to_string_cocktail(int_list, ATC_Tree_UpperBound_2024$Name)
#' }
#' @export
int_cocktail_to_string_cocktail <- function(cocktails, ATC_name) {
    .Call(`_emcAdr_int_cocktail_to_string_cocktail`, cocktails, ATC_name)
}

#' Filter out drug cocktails with high-level ATC classifications
#' 
#' This function iterates through a collection of drug combinations (cocktails) and filters out 
#' those that have a ratio of "high-level" nodes (ATC codes with length <= 3) exceeding 
#' the specified threshold. This is useful for removing overly generic drug categories 
#' from results.
#'
#' @param solutions A \code{Rcpp::DataFrame} containing the results to filter. Must include columns: 
#'   "score", "RR", "p_value", "n.patient.taking.C", "n.patient.taking.C.and.having.AE", and "Cocktail".
#' @param ATC_name A vector of strings containing the ATC codes/names used for mapping.
#' @param ATC_length An integer vector where each element represents the length (hierarchy level) 
#'   of the corresponding ATC code in \code{ATC_name}.
#' @param find_last_occurence Logical. If \code{true} (default), the mapping logic will look for 
#'   the last occurrence of a drug name in the reference list.
#' @param max_height_ratio A double (default 0.5) representing the maximum allowable proportion 
#'   of high-level nodes (length <= 3) in a cocktail. Cocktails exceeding this ratio are removed.
#'
#' @return A \code{Rcpp::DataFrame} with the same columns as \code{solutions}, containing only 
#'   the cocktails that met the \code{max_height_ratio} criteria.
#' @export
remove_higher_cocktails <- function(solutions, ATC_name, ATC_length, find_last_occurence = TRUE, max_height_ratio = .5) {
    .Call(`_emcAdr_remove_higher_cocktails`, solutions, ATC_name, ATC_length, find_last_occurence, max_height_ratio)
}

#'The MCMC method that runs the random walk on a single cocktail in order to estimate the distribution of score among cocktails of size Smax.
#'
#'@param epochs : number of steps for the MCMC algorithm
#'@param ATCtree : ATC tree with upper bound of the DFS (without the root, also see on the github repo for an example)
#'@param observations : real observation of the AE based on the medications of each patients
#'(a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#'
#'@param temperature : starting temperature, default = 1 (denoted T in the article)
#'@param nbResults : Number of returned solution (Cocktail of size Smax with the best oberved score during the run), 5 by default
#'@param Smax : Size of the cocktail we approximate the distribution from
#'@param p_type1 : probability to operate type1 mutation. Note :
#'the probability to operate the type 2 mutation is then 1 - P_type1. P_type1 must be in [0;1]. Default is .01
#'@param beta : filter the minimum number of patients that must have taken the 
#'cocktail for his risk to be taken into account in the DistributionScoreBeta default is 4
#'@param max_score : maximum number the score can take. Score greater than this 
#'one would be added to the distribution as the value max_score. Default is 500
#'@param num_thread : Number of thread to run in parallel if openMP is available, 1 by default
#'@param verbose : Output summary (default is false)
#'
#'@return I no problem, return a List containing :
#' - ScoreDistribution : the distribution of the score as an array with each cells
#' representing the number of risks =  (index-1)/ 10
#' - Outstanding_score : An array of the score greater than max_score,
#' - Best_cocktails : the nbResults bests cocktails encountered during the run.
#' - Best_scores : Score corresponding to the bestCocktails.
#' - Filtered_score_distribution : Distribution containing score for cocktails taken by at
#' least beta patients.
#' - Best_cocktails_beta : the nbResults bests cocktails taken by at least beta patients
#' encountered during the run.
#' - Best_scores_beta : Score corresponding to the bestCocktailsBeta.
#' - cocktailSize : Smax parameter used during the run.
#'; Otherwise the list is empty
#'
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#' estimation = DistributionApproximation(epochs = 10, ATCtree = ATC_Tree_UpperBound_2024,
#'             observations = FAERS_myopathy)
#'}
#'@export
DistributionApproximation <- function(epochs, ATCtree, observations, temperature = 1L, nbResults = 5L, Smax = 2L, p_type1 = .01, beta = 4L, max_score = 500L, num_thread = 1L, verbose = FALSE) {
    .Call(`_emcAdr_DistributionApproximation`, epochs, ATCtree, observations, temperature, nbResults, Smax, p_type1, beta, max_score, num_thread, verbose)
}

#'Genetic algorithm, trying to reach riskiest cocktails (the ones which maximize
#'the fitness function, Hypergeometric score in our case)
#'
#'@param epochs : number of step or the algorithm 
#'@param nbIndividuals : size of the population
#'@param ATCtree : ATC tree with upper bound of the DFS (without the root)
#'@param observations : real observation of the AE based on the medications of each patients
#'(a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#'@param num_thread : Number of thread to run in parallel if openMP is available, 1 by default
#'@param diversity : enable the diversity mechanism of the algorithm
#' (favor the diversity of cocktail in the population),  default is false
#'@param p_crossover : probability to operate a crossover on the crossover phase. Default is 80\%
#'@param p_mutation : probability to operate a mutation after the crossover phase. Default is 1\%
#'@param nbElite : number of best individual we keep from generation to generation. Default is 0
#'@param tournamentSize : size of the tournament (select the best individual 
#'between tournamentSize sampled individuals) 
#'@param alpha : when making a type 1 mutation you have (alpha / size of cocktail) chance to add a drug. 
#'@param summary : print the summary of population at each steps ? 
#'
#'@return If no problem, return a List :
#' - meanFitnesses : The mean score of the population at each epochs of the algorithm.
#' - BestFitnesses : The best score of the population at each epochs of the algorithm.
#' - FinalPopulation : The final population of the algorithm when finished (medications
#' and corresponding scores)
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#' results = GeneticAlgorithm(epochs = 10, nbIndividuals = 10, 
#'             ATCtree = ATC_Tree_UpperBound_2024,
#'             observations = FAERS_myopathy)
#'}
#'@export
GeneticAlgorithm <- function(epochs, nbIndividuals, ATCtree, observations, num_thread = 1L, diversity = FALSE, p_crossover = .80, p_mutation = .01, nbElite = 0L, tournamentSize = 2L, alpha = 1, summary = TRUE) {
    .Call(`_emcAdr_GeneticAlgorithm`, epochs, nbIndividuals, ATCtree, observations, num_thread, diversity, p_crossover, p_mutation, nbElite, tournamentSize, alpha, summary)
}

#'The true distribution of the score among every single nodes of the ATC
#'
#'@param ATCtree : ATC tree with upper bound of the DFS (without the root)
#'@param observations : observation of the AE based on the medications of each patients
#'(a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#' on which we want to compute the risk distribution
#'@param beta : minimum number of person taking the cocktails in order to consider it
#'in the beta score distribution 
#'@param max_score : maximum number the score can take. Score greater than this 
#'one would be added to the distribution as the value max_score. Default is 1000
#'@param nbResults : Number of returned solution (Cocktail with the
#' best oberved score during the run), 100 by default
#'@param num_thread : Number of thread to run in parallel if openMP is available, 1 by default
#'
#'@return Return a List containing :
#' - ScoreDistribution : the distribution of the score as an array with each cells
#' representing the number of risks =  (index-1)/ 10
#' - Filtered_score_distribution : Distribution containing score for cocktails taken by at
#' least beta patients.
#' - Outstanding_score : An array of the score greater than max_score,
#' - Best_cocktails : the nbResults bests cocktails encountered during the run.
#' - Best_cocktails_beta : the nbResults bests cocktails taken by at least beta patients
#' encountered during the run.
#' - Best_scores : Score corresponding to the Best_cocktails.
#' - Best_scores_beta : Score corresponding to the Best_cocktails_beta.
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#' size_1_score_distribution = trueDistributionDrugs(ATCtree = ATC_Tree_UpperBound_2024,
#'             observations = FAERS_myopathy[1:100,], beta = 4)
#'}
#'@export
trueDistributionDrugs <- function(ATCtree, observations, beta, max_score = 1000L, nbResults = 100L, num_thread = 1L) {
    .Call(`_emcAdr_trueDistributionDrugs`, ATCtree, observations, beta, max_score, nbResults, num_thread)
}

#'The true distribution of the score among every size-two cocktails
#'
#'@param ATCtree : ATC tree with upper bound of the DFS (without the root)
#'@param observations : observation of the AE based on the medications of each patients
#'(a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#' on which we want to compute the risk distribution
#'@param beta : minimum number of person taking the cocktails in order to consider it
#'in the beta score distribution 
#'@param max_score : maximum number the score can take. Score greater than this 
#'one would be added to the distribution as the value max_score. Default is 1000
#'@param nbResults : Number of returned solution (Cocktail with the
#' best oberved score during the run), 100 by default
#'@param num_thread : Number of thread to run in parallel if openMP is available, 1 by default
#'
#'@return Return a List containing :
#' - ScoreDistribution : the distribution of the score as an array with each cells
#' representing the number of risks =  (index-1)/ 10
#' - Filtered_score_distribution : Distribution containing score for cocktails taken by at
#' least beta patients.
#' - Outstanding_score : An array of the score greater than max_score,
#' - Best_cocktails : the nbResults bests cocktails encountered during the run.
#' - Best_cocktails_beta : the nbResults bests cocktails taken by at least beta patients
#' encountered during the run.
#' - Best_scores : Score corresponding to the Best_cocktails.
#' - Best_scores_beta : Score corresponding to the Best_cocktails_beta.
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#' size_2_score_distribution = trueDistributionSizeTwoCocktail(ATCtree = ATC_Tree_UpperBound_2024,
#'             observations = FAERS_myopathy[1:100,], beta = 4)
#'}
#'@export
trueDistributionSizeTwoCocktail <- function(ATCtree, observations, beta, max_score = 100L, nbResults = 100L, num_thread = 1L) {
    .Call(`_emcAdr_trueDistributionSizeTwoCocktail`, ATCtree, observations, beta, max_score, nbResults, num_thread)
}

#'Function used in the reference article to compare diverse Disproportionality Analysis metrics 
#'
#'@param CocktailList : A list of cocktails on which the Disproportionality analysis metrics should be computed
#'@param ATCtree : ATC tree with upper bound of the DFS (without the root)
#'@param observations : observation of the AE based on the medications of each patients
#'(a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#' on which we want to compute the risk distribution
#'@param num_thread : Number of thread to run in parallel if openMP is available, 1 by default
#'
#'@return Multiple DA metrics computed on CocktailList cocktails
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#' cocktails = list(c(561, 904),
#'                c(1902, 4585)) # only size 2 cocktails allowed for this function
#' 
#' scores_of_cocktails = computeMetrics_size2(CocktailList = cocktails,
#'                               ATCtree = ATC_Tree_UpperBound_2024, 
#'                               observations = FAERS_myopathy[1:100,])
#'}
#'@export
computeMetrics_size2 <- function(CocktailList, ATCtree, observations, num_thread = 1L) {
    .Call(`_emcAdr_computeMetrics_size2`, CocktailList, ATCtree, observations, num_thread)
}

#' This function can be used in order to try different set of parameters for the genetic
#' algorithm in a convenient way. This will run each combination of mutation_rate,
#' nb_elite and alphas possible nb_test_desired times. For each sets of parameters,
#' results will be saved in a file named according to the set of parameter. One
#' can regroup the results of each run in a csv file by using the print_csv function
#' specifying the names of each file that needs to be treated and the number of 
#' performed runs on each parameter set
#' 
#' @param epochs : the number of epochs for the genetic algorithm
#' @param nb_individuals : the size of the population in the genetic algorithm
#' @param ATCtree : ATC tree with upper bound of the DFS (without the root)
#' @param observations : observation of the AE based on the medications of each patients
#' (a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#' on which we want to compute the risk distribution
#' @param nb_test_desired : number of genetic algorithm runs on each sets of parameters
#' @param mutation_rate : a vector with each mutation_rate to be tested
#' @param nb_elite : a vector with each nb_elite to be tested
#' @param alphas : a vector with each alphas to be tested
#' @param path : the path where the resulting files should be written
#' @param num_thread : Number of thread to run in parallel if openMP is available, 1 by default
#' @return No return value, this function should output results of the runs of the 
#' genetic algorithm in a specific format supported by function print_csv
#' and p_value_csv_file. The files are outputed in path which is current 
#' directory by default.
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' data("FAERS_myopathy")
#' 
#' # different parameter to test for
#' mutation_rate = c(.1,.2,.3)
#' nb_elite = c(0,1,2)
#' alphas = c(0.5,1,2)
#' hyperparam_test_genetic_algorithm(epochs = 2, nb_individuals = 2,
#'                               ATCtree = ATC_Tree_UpperBound_2024, 
#'                               observations = FAERS_myopathy,
#'                               nb_test_desired = 5, mutation_rate = mutation_rate,
#'                               nb_elite = nb_elite, alphas = alphas)
#'}
hyperparam_test_genetic_algorithm <- function(epochs, nb_individuals, ATCtree, observations, nb_test_desired, mutation_rate, nb_elite, alphas, path = "./", num_thread = 1L) {
    invisible(.Call(`_emcAdr_hyperparam_test_genetic_algorithm`, epochs, nb_individuals, ATCtree, observations, nb_test_desired, mutation_rate, nb_elite, alphas, path, num_thread))
}

#'Print every cocktails found during the genetic algorithm when used with the 
#'hyperparam_test_genetic_algorithm function. This enables to condense the solutions 
#'found in each files by collapsing similar cocktail in a single row by cocktail.
#'
#'
#' @param input_filenames : A List containing filename of hyperparam_test_genetic_algorithm output file
#' @param observations : observation of the AE based on the medications of each patients
#' (a DataFrame containing the medication on the first column and the ADR (boolean) on the second)
#' on which we want to compute the risk distribution
#' @param repetition : The parameter nb_test_desired used in the hyperparam test function
#' @param ATCtree : ATC tree with upper bound of the DFS (without the root)
#' @param csv_filename : Name of the output file, "solutions.csv" by default
#' @examples
#' \donttest{
#'  data("ATC_Tree_UpperBound_2024")
#'  data("FAERS_myopathy")
#'  files = c('250e_700ind_0.2mr_0ne_2alpha.txt') # results of hyperparam_test_genetic_algorithm
#' 
#'  print_csv(input_filenames = files, observations = FAERS_myopathy,
#'           repetition = 5, ATCtree = ATC_Tree_UpperBound_2024)
#' }
#' @return No return value, should process the output of the genetic algorithm in 
#' files produced by hyperparam_test_genetic_algorithm and output a summary csv file.
#' The csv file is outputed in current directory and named after the csv_filename
#' variable (solutions.csv by default).
#' @export
print_csv <- function(input_filenames, observations, repetition, ATCtree, csv_filename = "solutions.csv") {
    invisible(.Call(`_emcAdr_print_csv`, input_filenames, observations, repetition, ATCtree, csv_filename))
}

#' Recover the square matrix of distance between cocktails where the index (i,j)
#' of the matrix is the distance between cocktails i and j in the genetic_results
#' list. 
#' @param genetic_results the List returned by the genetic algorithm.
#' @param ATCtree : ATC tree with upper bound of the DFS (without the root)
#' @param normalization : Do we keep the distance between cocktail in the range [0;1] ? 
#' 
#' @return The square matrix of distances between cocktails
#' @examples
#' \donttest{
#'  data("ATC_Tree_UpperBound_2024")
#'  data("FAERS_myopathy")
#'  
#'  genetic_results = GeneticAlgorithm(epochs = 10, nbIndividuals = 10,
#'             ATCtree = ATC_Tree_UpperBound_2024,
#'             observations = FAERS_myopathy)
#'  distance_matrix = get_dissimilarity_from_genetic_results(genetic_results = genetic_results,
#'                         ATCtree = ATC_Tree_UpperBound_2024, normalization = TRUE)
#' }
#' @export
get_dissimilarity_from_genetic_results <- function(genetic_results, ATCtree, normalization) {
    .Call(`_emcAdr_get_dissimilarity_from_genetic_results`, genetic_results, ATCtree, normalization)
}

#' Recover the square matrix of distance between cocktails where the index (i,j)
#' of the matrix is the distance between cocktails i and j in the csv file containing
#' results of genetic algorithm
#' 
#' @param filename : the name of the file returned by the print_csv function.
#' @param ATCtree : ATC tree with upper bound of the DFS (without the root)
#' @param normalization : Do we keep the distance between cocktail in the range [0;1] ? 
#' 
#' @return The square matrix of distances between cocktails
#' @examples
#' \donttest{
#'  data("ATC_Tree_UpperBound_2024")
#'  
#'  distance_matrix = get_dissimilarity_from_txt_file(filename = '250e_700ind_0.2mr_0ne_2alpha.txt',
#'                         ATCtree = ATC_Tree_UpperBound_2024, normalization = TRUE)
#' }
#' @export
get_dissimilarity_from_txt_file <- function(filename, ATCtree, normalization = TRUE) {
    .Call(`_emcAdr_get_dissimilarity_from_txt_file`, filename, ATCtree, normalization)
}

#' Recover the square matrix of distance between cocktails where the index (i,j)
#' of the matrix is the distance between cocktails i and j in an arbitrary
#' cocktail list
#' 
#' @param cocktails : A list of cocktails in the form of a vector of integer
#' @param ATCtree : ATC tree with upper bound of the DFS (without the root)
#' @param normalization : Do we keep the distance between cocktail in the range [0;1] ? 
#' 
#' @return The square matrix of distances between cocktails
#'@examples
#'\donttest{
#' data("ATC_Tree_UpperBound_2024")
#' 
#' cocktails = list(c(561, 904),
#'                c(1902, 4585)) # only size 2 cocktails allowed for this function
#' 
#' distance_matrix = get_dissimilarity_from_cocktail_list(cocktails = cocktails,
#'                               ATCtree = ATC_Tree_UpperBound_2024, 
#'                               normalization = TRUE)
#'}
get_dissimilarity_from_cocktail_list <- function(cocktails, ATCtree, normalization = TRUE) {
    .Call(`_emcAdr_get_dissimilarity_from_cocktail_list`, cocktails, ATCtree, normalization)
}

#' Generate Matrix for Drug Combinations
#' 
#' This function creates a logical data frame where each column represents a 
#' specific sub-combination of drugs derived from a given "cocktail." For each 
#' patient in the input data, it indicates (TRUE/FALSE) whether they were 
#' taking that specific combination based on the ATC hierarchy.
#'
#' @param cocktail An integer vector of drug indices representing the full 
#'   combination to be analyzed.
#' @param upperBound An integer vector defining the ATC tree hierarchy bounds.
#' @param data A \code{Rcpp::DataFrame} containing patient records. It must 
#'   include a column \code{"patientATC"} which is a list of integer vectors 
#'   representing the drugs each patient is taking.
#'
#' @return A \code{Rcpp::DataFrame} where:
#' \itemize{
#'   \item Each column corresponds to a sub-combination of the input \code{cocktail}.
#'   \item Each row corresponds to a patient in the input \code{data}.
#'   \item Values are boolean indicators (represented as integers/logicals in R).
#' }
#' 
#' @details 
#' The function first generates all possible non-empty power-set combinations of the 
#' \code{cocktail} (e.g., for \eqn{\{1, 2\}}, it generates \eqn{\{1\}, \{2\}, \{1, 2\}}). 
#'
#' @note The column names of the resulting data frame are strings of comma-separated 
#' drug indices (e.g., "888,659,").
#'
#' @export
combination_data_frame <- function(cocktail, upperBound, data) {
    .Call(`_emcAdr_combination_data_frame`, cocktail, upperBound, data)
}

