# Enhanced PubMed Search Strategy Comparison using searchAnalyzeR Package
# Comparing Two Complementary Search Approaches for COVID-19 Research
#
# This example demonstrates comparing different search strategies with better
# performance characteristics and clearer differences using searchAnalyzeR functions.

# Load required packages
library(searchAnalyzeR)
library(ggplot2)
library(patchwork)  # For combining plots

# Set up the comparative analysis
cat("=== searchAnalyzeR: Enhanced Search Strategy Comparison Example ===\n")
cat("Topic: COVID-19 Research (Broad vs. Targeted Clinical Approaches)\n")
cat("Objective: Compare broad vs. targeted search strategies for different research needs\n\n")

# Define two strategically different search approaches for better comparison
strategy_A <- list(
  name = "Broad COVID Research Strategy",
  terms = c(
    "covid-19",
    "coronavirus",
    "sars-cov-2",
    "pandemic",
    "covid"
  ),
  description = "Broad approach capturing general COVID-19 research across all domains",
  databases = c("PubMed"),
  date_range = as.Date(c("2020-01-01", "2024-12-31")),
  filters = list(
    language = "English",
    article_types = c("Journal Article", "Review", "Clinical Trial")
  ),
  search_date = Sys.time()
)

strategy_B <- list(
  name = "Targeted Clinical COVID Strategy",
  terms = c(
    "covid-19 clinical trial",
    "covid-19 randomized controlled trial",
    "covid-19 systematic review",
    "covid-19 vaccine efficacy",
    "covid-19 treatment outcomes"
  ),
  description = "Targeted approach focusing on high-quality clinical evidence",
  databases = c("PubMed"),
  date_range = as.Date(c("2020-01-01", "2024-12-31")),
  filters = list(
    language = "English",
    article_types = c("Journal Article", "Review", "Clinical Trial")
  ),
  search_date = Sys.time()
)

cat("Strategy A (Broad COVID Research):\n")
cat("Terms:", paste(strategy_A$terms, collapse = " OR "), "\n")
cat("Description:", strategy_A$description, "\n\n")

cat("Strategy B (Targeted Clinical COVID):\n")
cat("Terms:", paste(strategy_B$terms, collapse = " OR "), "\n")
cat("Description:", strategy_B$description, "\n\n")

# Execute searches for both strategies
cat("=== EXECUTING STRATEGY A: Broad COVID Research ===\n")
results_A <- search_pubmed(
  search_terms = strategy_A$terms,
  max_results = 200,
  date_range = strategy_A$date_range
)

cat("\nStrategy A completed. Retrieved", nrow(results_A), "articles.\n\n")

cat("=== EXECUTING STRATEGY B: Targeted Clinical COVID ===\n")
results_B <- search_pubmed(
  search_terms = strategy_B$terms,
  max_results = 200,
  date_range = strategy_B$date_range
)

cat("\nStrategy B completed. Retrieved", nrow(results_B), "articles.\n\n")

# Standardize both result sets
cat("Standardizing search results...\n")
standardized_A <- std_search_results(results_A, source_format = "pubmed")
standardized_B <- std_search_results(results_B, source_format = "pubmed")

# Add strategy identifiers
standardized_A$strategy <- "Broad_COVID_Research"
standardized_B$strategy <- "Targeted_Clinical_COVID"

# Detect duplicates within each strategy
dedup_A <- detect_dupes(standardized_A, method = "exact")
dedup_B <- detect_dupes(standardized_B, method = "exact")

cat("Strategy A - Total:", nrow(dedup_A), "Unique:", sum(!dedup_A$duplicate), "Duplicates:", sum(dedup_A$duplicate), "\n")
cat("Strategy B - Total:", nrow(dedup_B), "Unique:", sum(!dedup_B$duplicate), "Duplicates:", sum(dedup_B$duplicate), "\n\n")

# Create enhanced gold standard for better demonstration
cat("Creating enhanced gold standard for demonstration...\n")

# High-confidence terms that indicate quality COVID research
high_confidence_patterns <- c(
  "randomized", "controlled trial", "systematic review", "meta-analysis",
  "clinical trial", "vaccine efficacy", "treatment outcome", "placebo",
  "double-blind", "multicenter", "cohort study", "case-control"
)

# Filter out duplicates
unique_A <- dedup_A[!dedup_A$duplicate, ]
unique_B <- dedup_B[!dedup_B$duplicate, ]

# Articles that appear in both strategies (high confidence due to overlap)
overlap_ids <- intersect(unique_A$id, unique_B$id)

# Articles with multiple high-confidence patterns in title/abstract
count_patterns <- function(data, patterns) {
  combined_text <- tolower(paste(data$title, data$abstract, sep = " "))
  sapply(combined_text, function(text) {
    sum(sapply(patterns, function(pattern) grepl(pattern, text, fixed = TRUE)))
  })
}

pattern_counts_A <- count_patterns(unique_A, high_confidence_patterns)
pattern_counts_B <- count_patterns(unique_B, high_confidence_patterns)

# Articles with 2+ high-confidence patterns
multi_pattern_A <- unique_A$id[pattern_counts_A >= 2]
multi_pattern_B <- unique_B$id[pattern_counts_B >= 2]

# Articles with "systematic review" or "meta-analysis" in title (highest confidence)
systematic_review_pattern <- "systematic review|meta-analysis"
systematic_A <- unique_A$id[grepl(systematic_review_pattern, tolower(unique_A$title))]
systematic_B <- unique_B$id[grepl(systematic_review_pattern, tolower(unique_B$title))]

# Combine for gold standard with different confidence levels
gold_standard_ids <- unique(c(
  overlap_ids,           # High confidence: found by both strategies
  systematic_A,          # Very high confidence: systematic reviews from A
  systematic_B,          # Very high confidence: systematic reviews from B
  multi_pattern_A,       # High confidence: multiple quality indicators from A
  multi_pattern_B        # High confidence: multiple quality indicators from B
))

cat("Gold standard created with", length(gold_standard_ids), "high-confidence relevant articles\n")
cat("- Overlap between strategies:", length(overlap_ids), "articles\n")
cat("- Systematic reviews Strategy A:", length(systematic_A), "articles\n")
cat("- Systematic reviews Strategy B:", length(systematic_B), "articles\n")
cat("- Strategy A multi-pattern matches:", length(multi_pattern_A), "articles\n")
cat("- Strategy B multi-pattern matches:", length(multi_pattern_B), "articles\n\n")

# Initialize analyzers for both strategies
cat("Initializing SearchAnalyzers for comparison...\n")

unique_A_ids <- unique_A$id
unique_B_ids <- unique_B$id

analyzer_A <- SearchAnalyzer$new(
  search_results = unique_A,
  gold_standard = gold_standard_ids,
  search_strategy = strategy_A
)

analyzer_B <- SearchAnalyzer$new(
  search_results = unique_B,
  gold_standard = gold_standard_ids,
  search_strategy = strategy_B
)

# Calculate comprehensive metrics for both strategies
cat("Calculating performance metrics...\n")
metrics_A <- analyzer_A$calculate_metrics()
metrics_B <- analyzer_B$calculate_metrics()

# Compare strategies using the comparison framework
cat("Performing statistical comparison...\n")
comparison_result <- compare_strategies(
  strategy1_results = unique_A_ids,
  strategy2_results = unique_B_ids,
  gold_standard = gold_standard_ids,
  test_type = "mcnemar"
)

# Additional performance metrics using the enhanced functions
cat("Calculating additional performance metrics...\n")

# Calculate strategy comparison metrics
strategy_comparison <- calc_strategy_comparison(
  strategy1_results = unique_A_ids,
  strategy2_results = unique_B_ids,
  gold_standard = gold_standard_ids
)

# Calculate temporal coverage for both strategies
temporal_A <- calc_temporal_coverage(unique_A, target_date_range = strategy_A$date_range)
temporal_B <- calc_temporal_coverage(unique_B, target_date_range = strategy_B$date_range)

# Display comprehensive comparison results
cat("\n=== COMPREHENSIVE STRATEGY COMPARISON RESULTS ===\n\n")

cat("STRATEGY A (Broad COVID Research) PERFORMANCE:\n")
cat("Total Articles Retrieved:", nrow(unique_A), "\n")
if (!is.null(metrics_A$precision_recall$precision)) {
  cat("Precision:", round(metrics_A$precision_recall$precision, 3), "\n")
  cat("Recall:", round(metrics_A$precision_recall$recall, 3), "\n")
  cat("F1 Score:", round(metrics_A$precision_recall$f1_score, 3), "\n")
  cat("Number Needed to Read:", round(metrics_A$precision_recall$number_needed_to_read, 1), "\n")
}

cat("\nSTRATEGY B (Targeted Clinical COVID) PERFORMANCE:\n")
cat("Total Articles Retrieved:", nrow(unique_B), "\n")
if (!is.null(metrics_B$precision_recall$precision)) {
  cat("Precision:", round(metrics_B$precision_recall$precision, 3), "\n")
  cat("Recall:", round(metrics_B$precision_recall$recall, 3), "\n")
  cat("F1 Score:", round(metrics_B$precision_recall$f1_score, 3), "\n")
  cat("Number Needed to Read:", round(metrics_B$precision_recall$number_needed_to_read, 1), "\n")
}

cat("\nSTATISTICAL COMPARISON RESULTS:\n")
cat("Test Used:", comparison_result$test, "\n")
cat("P-value:", round(comparison_result$p_value, 4), "\n")
cat("Statistically Significant:", comparison_result$significant, "\n")

if (!is.null(comparison_result$difference)) {
  cat("\nPERFORMANCE DIFFERENCES (B - A):\n")
  cat("Precision Difference:", round(comparison_result$difference$precision_diff, 3), "\n")
  cat("Recall Difference:", round(comparison_result$difference$recall_diff, 3), "\n")
  cat("F1 Score Difference:", round(comparison_result$difference$f1_diff, 3), "\n")
}

# Display enhanced overlap analysis
cat("\nENHANCED OVERLAP ANALYSIS:\n")
cat("Total Unique Articles (Combined):", strategy_comparison$overlap_analysis$total_unique, "\n")
cat("Overlap Between Strategies:", strategy_comparison$overlap_analysis$overlap_count, "\n")
cat("Unique to Strategy A (Broad):", strategy_comparison$overlap_analysis$unique_to_strategy1, "\n")
cat("Unique to Strategy B (Targeted):", strategy_comparison$overlap_analysis$unique_to_strategy2, "\n")
cat("Overlap Percentage:", round(strategy_comparison$overlap_analysis$overlap_percentage, 1), "%\n")

# Display complementarity analysis
cat("\nCOMPLEMENTARITY ANALYSIS:\n")
cat("Added Recall by Strategy A:", round(strategy_comparison$complementarity$added_recall_by_strategy1, 3), "\n")
cat("Added Recall by Strategy B:", round(strategy_comparison$complementarity$added_recall_by_strategy2, 3), "\n")
cat("Synergy Score:", round(strategy_comparison$complementarity$synergy_score, 3), "\n")

# Display temporal coverage results
cat("\nTEMPORAL COVERAGE ANALYSIS:\n")
cat("Strategy A - Target Period Coverage:", round(temporal_A$target_period_coverage * 100, 1), "%\n")
cat("Strategy B - Target Period Coverage:", round(temporal_B$target_period_coverage * 100, 1), "%\n")

if (length(temporal_A$peak_years) > 0) {
  cat("Strategy A - Peak Publication Years:", paste(temporal_A$peak_years, collapse = ", "), "\n")
}
if (length(temporal_B$peak_years) > 0) {
  cat("Strategy B - Peak Publication Years:", paste(temporal_B$peak_years, collapse = ", "), "\n")
}

# Create enhanced visualizations for comparison
cat("\nGenerating comparative visualizations...\n")

# 1. Side-by-side performance overview
overview_A <- analyzer_A$visualize_performance("overview") +
  ggtitle("Strategy A: Broad COVID Research") +
  theme(plot.title = element_text(size = 12)) +
  ylim(0, 1)

overview_B <- analyzer_B$visualize_performance("overview") +
  ggtitle("Strategy B: Targeted Clinical COVID") +
  theme(plot.title = element_text(size = 12)) +
  ylim(0, 1)

combined_overview <- overview_A + overview_B +
  plot_annotation(title = "COVID Search Strategy Performance Comparison",
                  subtitle = "Broad vs. Targeted Clinical Approaches - Demonstrating Clear Trade-offs")

print(combined_overview)

# 2. Temporal comparison
temporal_A_plot <- analyzer_A$visualize_performance("temporal") +
  ggtitle("Strategy A: Temporal Distribution") +
  theme(plot.title = element_text(size = 12))

temporal_B_plot <- analyzer_B$visualize_performance("temporal") +
  ggtitle("Strategy B: Temporal Distribution") +
  theme(plot.title = element_text(size = 12))

combined_temporal <- temporal_A_plot + temporal_B_plot +
  plot_annotation(title = "Temporal Distribution Comparison: Broad vs. Targeted COVID Strategies")

print(combined_temporal)

# 3. Create a custom comparison summary plot
comparison_data <- data.frame(
  Strategy = c("Broad COVID Research", "Targeted Clinical COVID"),
  Precision = c(metrics_A$precision_recall$precision, metrics_B$precision_recall$precision),
  Recall = c(metrics_A$precision_recall$recall, metrics_B$precision_recall$recall),
  F1_Score = c(metrics_A$precision_recall$f1_score, metrics_B$precision_recall$f1_score),
  Articles_Retrieved = c(nrow(unique_A), nrow(unique_B)),
  stringsAsFactors = FALSE
)

# Reshape for plotting using base R
precision_data <- data.frame(
  Strategy = comparison_data$Strategy,
  Metric = "Precision",
  Value = comparison_data$Precision,
  stringsAsFactors = FALSE
)

recall_data <- data.frame(
  Strategy = comparison_data$Strategy,
  Metric = "Recall",
  Value = comparison_data$Recall,
  stringsAsFactors = FALSE
)

f1_data <- data.frame(
  Strategy = comparison_data$Strategy,
  Metric = "F1_Score",
  Value = comparison_data$F1_Score,
  stringsAsFactors = FALSE
)

comparison_long <- rbind(precision_data, recall_data, f1_data)

comparison_plot <- ggplot(comparison_long, aes(x = Metric, y = Value, fill = Strategy)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = round(Value, 3)), position = position_dodge(width = 0.7), vjust = -0.5) +
  scale_fill_manual(values = c("Broad COVID Research" = "#2E86AB", "Targeted Clinical COVID" = "#A23B72")) +
  labs(title = "Direct Performance Metric Comparison",
       subtitle = "Demonstrating clear trade-offs: Broad strategy shows higher recall, Targeted shows higher precision",
       y = "Score", x = "Performance Metric") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 11, color = "gray60"),
    legend.position = "bottom"
  ) +
  ylim(0, 1)

print(comparison_plot)

# 4. Enhanced overlap analysis visualization
cat("\nCreating enhanced overlap analysis visualization...\n")

overlap_data <- data.frame(
  Category = c("Broad Strategy Only", "Overlap", "Targeted Strategy Only"),
  Count = c(strategy_comparison$overlap_analysis$unique_to_strategy1,
            strategy_comparison$overlap_analysis$overlap_count,
            strategy_comparison$overlap_analysis$unique_to_strategy2),
  Percentage = c(
    strategy_comparison$overlap_analysis$unique_to_strategy1 / strategy_comparison$overlap_analysis$total_unique * 100,
    strategy_comparison$overlap_analysis$overlap_count / strategy_comparison$overlap_analysis$total_unique * 100,
    strategy_comparison$overlap_analysis$unique_to_strategy2 / strategy_comparison$overlap_analysis$total_unique * 100
  ),
  stringsAsFactors = FALSE
)

overlap_plot <- ggplot(overlap_data, aes(x = Category, y = Count, fill = Category)) +
  geom_col(alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(Count, "\n(", round(Percentage, 1), "%)")), vjust = 0.5, size = 3.5) +
  scale_fill_manual(values = c("Broad Strategy Only" = "#2E86AB",
                               "Overlap" = "#F18F01",
                               "Targeted Strategy Only" = "#A23B72")) +
  labs(title = "Article Retrieval Overlap Analysis",
       subtitle = "Complementary nature of broad vs. targeted approaches",
       y = "Number of Articles", x = "Category") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 11, color = "gray60"),
    legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

print(overlap_plot)

# 5. Strategy comparison performance plot (MISSING FROM ORIGINAL)
cat("\nCreating strategy comparison performance plot...\n")
performance_comparison_plot <- ggplot(strategy_comparison$performance_comparison,
                                      aes(x = strategy, y = f1_score, fill = strategy)) +
  geom_col(alpha = 0.8, width = 0.7) +
  geom_text(aes(label = round(f1_score, 3)), vjust = -0.5) +
  scale_fill_manual(values = c("Strategy 1" = "#2E86AB", "Strategy 2" = "#A23B72", "Combined" = "#F18F01")) +
  labs(title = "F1 Score Comparison Across Strategies",
       subtitle = "Higher F1 scores indicate better balanced performance",
       y = "F1 Score", x = "Strategy") +
  theme_minimal() +
  theme(legend.position = "none") +
  ylim(0, 1)

print(performance_comparison_plot)

# Analysis of individual term effectiveness
cat("\nAnalyzing individual term effectiveness...\n")

# Analyze terms from Strategy A
term_analysis_A <- term_effectiveness(
  terms = strategy_A$terms,
  search_results = unique_A,
  gold_standard = gold_standard_ids,
  text_fields = c("title", "abstract")
)

# Analyze terms from Strategy B
term_analysis_B <- term_effectiveness(
  terms = strategy_B$terms,
  search_results = unique_B,
  gold_standard = gold_standard_ids,
  text_fields = c("title", "abstract")
)

# Display term effectiveness results
cat("\nTerm Effectiveness for Strategy A (Broad COVID Research):\n")
print(term_analysis_A)

cat("\nTerm Effectiveness for Strategy B (Targeted Clinical COVID):\n")
print(term_analysis_B)

# Add strategy information
term_analysis_A$strategy <- "Broad_COVID_Research"
term_analysis_B$strategy <- "Targeted_Clinical_COVID"

# === ENHANCED INDIVIDUAL TERM EFFECTIVENESS PLOTS ===
cat("\n=== CREATING INDIVIDUAL TERM EFFECTIVENESS VISUALIZATIONS ===\n")

# Calculate TES scores for both strategies
term_analysis_A <- calc_tes(term_analysis_A)
term_analysis_B <- calc_tes(term_analysis_B)

# Find top terms for both strategies
top_results_A <- find_top_terms(term_analysis_A, n = 3, plot = FALSE)
top_results_B <- find_top_terms(term_analysis_B, n = 3, plot = FALSE)

cat("\nTop performing terms in Strategy A (Broad):", paste(top_results_A$terms, collapse = ", "), "\n")
cat("TES scores:", paste(round(top_results_A$data$tes, 3), collapse = ", "), "\n")

cat("\nTop performing terms in Strategy B (Targeted):", paste(top_results_B$terms, collapse = ", "), "\n")
cat("TES scores:", paste(round(top_results_B$data$tes, 3), collapse = ", "), "\n")

# ===== MISSING INDIVIDUAL PLOTS FROM ORIGINAL =====

# 1. Strategy A - Individual Plots (MISSING)
cat("\nGenerating individual plots for Strategy A (Broad COVID Research)...\n")

# Precision plot for Strategy A
precision_plot_A <- plot_term_effectiveness(
  term_analysis_A,
  plot_type = "precision_only",
  title_override = "Strategy A: Term Precision Analysis",
  show_values = TRUE
)
print(precision_plot_A)

# Coverage plot for Strategy A
coverage_plot_A <- plot_term_effectiveness(
  term_analysis_A,
  plot_type = "coverage_only",
  title_override = "Strategy A: Term Coverage Analysis",
  show_values = TRUE
)
print(coverage_plot_A)

# Count plot for Strategy A
counts_plot_A <- plot_term_effectiveness(
  term_analysis_A,
  plot_type = "counts",
  title_override = "Strategy A: Article Retrieval Counts",
  show_values = TRUE
)
print(counts_plot_A)

# Precision vs Coverage bubble plot for Strategy A
bubble_plot_A <- plot_term_effectiveness(
  term_analysis_A,
  plot_type = "precision_coverage",
  title_override = "Strategy A: Term Effectiveness (Precision vs Coverage)",
  show_values = FALSE  # Labels would be too cluttered with values
)
print(bubble_plot_A)

# 2. Strategy B - Individual Plots (MISSING)
cat("\nGenerating individual plots for Strategy B (Targeted Clinical COVID)...\n")

# Precision plot for Strategy B
precision_plot_B <- plot_term_effectiveness(
  term_analysis_B,
  plot_type = "precision_only",
  title_override = "Strategy B: Term Precision Analysis",
  show_values = TRUE
)
print(precision_plot_B)

# Coverage plot for Strategy B
coverage_plot_B <- plot_term_effectiveness(
  term_analysis_B,
  plot_type = "coverage_only",
  title_override = "Strategy B: Term Coverage Analysis",
  show_values = TRUE
)
print(coverage_plot_B)

# Count plot for Strategy B
counts_plot_B <- plot_term_effectiveness(
  term_analysis_B,
  plot_type = "counts",
  title_override = "Strategy B: Article Retrieval Counts",
  show_values = TRUE
)
print(counts_plot_B)

# Precision vs Coverage bubble plot for Strategy B
bubble_plot_B <- plot_term_effectiveness(
  term_analysis_B,
  plot_type = "precision_coverage",
  title_override = "Strategy B: Term Effectiveness (Precision vs Coverage)",
  show_values = FALSE
)
print(bubble_plot_B)

# 3. Side-by-side Comparison Plots (MISSING)
cat("\nCreating side-by-side comparison plots...\n")

if (requireNamespace("patchwork", quietly = TRUE)) {
  # Combined precision comparison
  precision_comparison <- precision_plot_A + precision_plot_B +
    plot_annotation(
      title = "Term Precision Comparison Across Strategies",
      subtitle = "Broad COVID vs Targeted Clinical approaches"
    )
  print(precision_comparison)

  # Combined coverage comparison
  coverage_comparison <- coverage_plot_A + coverage_plot_B +
    plot_annotation(
      title = "Term Coverage Comparison Across Strategies",
      subtitle = "Broad COVID vs Targeted Clinical approaches"
    )
  print(coverage_comparison)

  # Combined bubble plot comparison
  bubble_comparison <- bubble_plot_A + bubble_plot_B +
    plot_annotation(
      title = "Term Effectiveness Landscape Comparison",
      subtitle = "Precision vs Coverage analysis for both strategies"
    )
  print(bubble_comparison)
}

# Create highlighted precision plots for both strategies
precision_highlight_A <- plot_term_effectiveness(
  term_analysis_A,
  plot_type = "precision_only",
  highlight_terms = top_results_A$terms,
  title_override = "Strategy A: Top-Performing Terms (Broad COVID Research)",
  show_values = TRUE
)
print(precision_highlight_A)

precision_highlight_B <- plot_term_effectiveness(
  term_analysis_B,
  plot_type = "precision_only",
  highlight_terms = top_results_B$terms,
  title_override = "Strategy B: Top-Performing Terms (Targeted Clinical COVID)",
  show_values = TRUE
)
print(precision_highlight_B)

# Cross-strategy term comparison
cat("\nPerforming cross-strategy term comparison...\n")
term_comparison <- compare_terms(
  list(
    "Broad" = term_analysis_A,
    "Targeted" = term_analysis_B
  ),
  top_n = 3
)

cat("\nCross-Strategy Term Effectiveness Comparison:\n")
print(term_comparison)

# Create TES comparison plot
top_terms_combined <- unique(c(top_results_A$terms, top_results_B$terms))
tes_comparison_data <- term_comparison[term_comparison$term %in% top_terms_combined, ]

tes_plot <- ggplot(tes_comparison_data, aes(x = term, y = tes, fill = strategy)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = round(tes, 3)),
            position = position_dodge(width = 0.7), vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("Broad" = "#2E86AB", "Targeted" = "#A23B72")) +
  labs(title = "Term Effectiveness Score (TES) Comparison",
       subtitle = "Top-performing terms across COVID search strategies",
       x = "Search Terms", y = "TES Score", fill = "Strategy") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 11, color = "gray60"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  ) +
  ylim(0, 1)

print(tes_plot)

# ===== ADDITIONAL MISSING CONTENT FROM ORIGINAL =====

# Generate comprehensive reports and exports (MISSING)
cat("\nExporting comprehensive analysis results...\n")
output_dir <- tempdir()

# Export individual strategy results
export_files_A <- export_results(
  search_results = unique_A,
  file_path = file.path(output_dir, "strategy_A_broad_covid"),
  formats = c("csv", "xlsx"),
  include_metadata = TRUE
)

export_files_B <- export_results(
  search_results = unique_B,
  file_path = file.path(output_dir, "strategy_B_targeted_covid"),
  formats = c("csv", "xlsx"),
  include_metadata = TRUE
)

# Export enhanced comparison results
enhanced_comparison_summary <- data.frame(
  Metric = c("Total_Articles_A", "Total_Articles_B", "Precision_A", "Precision_B",
             "Recall_A", "Recall_B", "F1_Score_A", "F1_Score_B", "Statistical_Significance",
             "Overlap_Count", "Unique_to_A", "Unique_to_B", "Synergy_Score",
             "Temporal_Coverage_A", "Temporal_Coverage_B"),
  Value = c(nrow(unique_A), nrow(unique_B),
            metrics_A$precision_recall$precision, metrics_B$precision_recall$precision,
            metrics_A$precision_recall$recall, metrics_B$precision_recall$recall,
            metrics_A$precision_recall$f1_score, metrics_B$precision_recall$f1_score,
            comparison_result$significant, strategy_comparison$overlap_analysis$overlap_count,
            strategy_comparison$overlap_analysis$unique_to_strategy1,
            strategy_comparison$overlap_analysis$unique_to_strategy2,
            strategy_comparison$complementarity$synergy_score,
            temporal_A$target_period_coverage, temporal_B$target_period_coverage),
  stringsAsFactors = FALSE
)

write.csv(enhanced_comparison_summary, file.path(output_dir, "enhanced_strategy_comparison_summary.csv"), row.names = FALSE)

# Export term effectiveness results with TES scores
write.csv(term_analysis_A, file.path(output_dir, "term_effectiveness_strategy_A.csv"), row.names = FALSE)
write.csv(term_analysis_B, file.path(output_dir, "term_effectiveness_strategy_B.csv"), row.names = FALSE)

# Export cross-strategy term comparison if available
if (exists("term_comparison")) {
  write.csv(term_comparison, file.path(output_dir, "cross_strategy_term_comparison.csv"), row.names = FALSE)
}

# Export strategy comparison results
write.csv(strategy_comparison$performance_comparison, file.path(output_dir, "strategy_performance_comparison.csv"), row.names = FALSE)
write.csv(strategy_comparison$overlap_analysis, file.path(output_dir, "strategy_overlap_analysis.csv"), row.names = FALSE)

# Export temporal coverage results
write.csv(temporal_A$coverage_by_year, file.path(output_dir, "temporal_coverage_strategy_A.csv"), row.names = FALSE)
write.csv(temporal_B$coverage_by_year, file.path(output_dir, "temporal_coverage_strategy_B.csv"), row.names = FALSE)

# Combined dataset with strategy labels - using base R instead of bind_rows and mutate
combined_A <- unique_A
combined_A$search_strategy <- "Broad_COVID_Research"
combined_A$in_gold_standard <- combined_A$id %in% gold_standard_ids
combined_A$found_by_both <- combined_A$id %in% intersect(unique_A_ids, unique_B_ids)

combined_B <- unique_B
combined_B$search_strategy <- "Targeted_Clinical_COVID"
combined_B$in_gold_standard <- combined_B$id %in% gold_standard_ids
combined_B$found_by_both <- combined_B$id %in% intersect(unique_A_ids, unique_B_ids)

# Combine and remove duplicates
combined_results <- rbind(combined_A, combined_B)
combined_results <- combined_results[!duplicated(combined_results$id), ]

write.csv(combined_results, file.path(output_dir, "combined_strategy_results.csv"), row.names = FALSE)

# Create comprehensive data package with enhanced results
enhanced_analysis_results <- list(
  metrics_A = metrics_A,
  metrics_B = metrics_B,
  comparison = comparison_result,
  strategy_comparison = strategy_comparison,
  temporal_A = temporal_A,
  temporal_B = temporal_B,
  term_effectiveness_A = term_analysis_A,
  term_effectiveness_B = term_analysis_B
)

package_dir <- create_data_package(
  search_results = combined_results,
  analysis_results = enhanced_analysis_results,
  output_dir = output_dir,
  package_name = "covid_search_strategy_comparison_enhanced"
)

# Display sample articles from each strategy for qualitative review (MISSING)
cat("\n=== SAMPLE ARTICLES FOR QUALITATIVE REVIEW ===\n")

# Get unique articles that are in gold standard
gold_articles_A <- unique_A[unique_A$id %in% gold_standard_ids, ]
gold_articles_B <- unique_B[unique_B$id %in% gold_standard_ids, ]

# Sort by date (most recent first) - using base R
if ("date" %in% names(gold_articles_A)) {
  gold_articles_A <- gold_articles_A[order(gold_articles_A$date, decreasing = TRUE), ]
  gold_articles_B <- gold_articles_B[order(gold_articles_B$date, decreasing = TRUE), ]
}

# Take top 3 articles
sample_A <- head(gold_articles_A, 3)
sample_B <- head(gold_articles_B, 3)

cat("\nTop articles from Strategy A (Broad COVID Research):\n")
for (i in 1:nrow(sample_A)) {
  article <- sample_A[i, ]
  cat("\n", i, ". ", article$title, "\n", sep = "")
  cat("   Journal:", article$source, "\n")
  cat("   Date:", as.character(article$date), "\n")
  cat("   PMID:", gsub("PMID:", "", article$id), "\n")
}

cat("\nTop articles from Strategy B (Targeted Clinical COVID):\n")
for (i in 1:nrow(sample_B)) {
  article <- sample_B[i, ]
  cat("\n", i, ". ", article$title, "\n", sep = "")
  cat("   Journal:", article$source, "\n")
  cat("   Date:", as.character(article$date), "\n")
  cat("   PMID:", gsub("PMID:", "", article$id), "\n")
}

# Clean up and provide final file locations
final_files <- list.files(output_dir, pattern = "covid|strategy|term_effectiveness|enhanced", full.names = TRUE, recursive = TRUE)
cat("\n=== FINAL OUTPUT LOCATIONS ===\n")
for (file in final_files) {
  cat(file, "\n")
}

# Final enhanced summary and recommendations
cat("\n=== ENHANCED ANALYSIS SUMMARY AND STRATEGIC INSIGHTS ===\n\n")

# Determine the better strategy for different purposes
precision_winner <- ifelse(metrics_A$precision_recall$precision > metrics_B$precision_recall$precision,
                           "Broad COVID Research (A)", "Targeted Clinical COVID (B)")
recall_winner <- ifelse(metrics_A$precision_recall$recall > metrics_B$precision_recall$recall,
                        "Broad COVID Research (A)", "Targeted Clinical COVID (B)")
f1_winner <- ifelse(metrics_A$precision_recall$f1_score > metrics_B$precision_recall$f1_score,
                    "Broad COVID Research (A)", "Targeted Clinical COVID (B)")

cat("PERFORMANCE WINNERS BY METRIC:\n")
cat("Best Precision:", precision_winner, "\n")
cat("Best Recall:", recall_winner, "\n")
cat("Best Overall F1 Score:", f1_winner, "\n\n")

cat("ENHANCED KEY FINDINGS:\n")
cat("1. Strategy A (Broad COVID Research):\n")
cat("   - Casts wide net across all COVID-19 research\n")
cat("   - Retrieved", nrow(unique_A), "unique articles\n")
cat("   - F1 Score:", round(metrics_A$precision_recall$f1_score, 3), "\n")
cat("   - Precision:", round(metrics_A$precision_recall$precision, 3), "\n")
cat("   - Recall:", round(metrics_A$precision_recall$recall, 3), "\n")
cat("   - Best for: Comprehensive literature reviews, scoping studies\n\n")

cat("2. Strategy B (Targeted Clinical COVID):\n")
cat("   - Focuses on high-quality clinical evidence\n")
cat("   - Retrieved", nrow(unique_B), "unique articles\n")
cat("   - F1 Score:", round(metrics_B$precision_recall$f1_score, 3), "\n")
cat("   - Precision:", round(metrics_B$precision_recall$precision, 3), "\n")
cat("   - Recall:", round(metrics_B$precision_recall$recall, 3), "\n")
cat("   - Best for: Clinical guidelines, systematic reviews of interventions\n\n")

cat("3. Strategic Complementarity Analysis:\n")
cat("   - Total unique articles when combined:", strategy_comparison$overlap_analysis$total_unique, "\n")
cat("   - Overlap between strategies:", strategy_comparison$overlap_analysis$overlap_count,
    "(", round(strategy_comparison$overlap_analysis$overlap_percentage, 1), "%)\n")
cat("   - Broad strategy contributed", strategy_comparison$overlap_analysis$unique_to_strategy1, "unique articles\n")
cat("   - Targeted strategy contributed", strategy_comparison$overlap_analysis$unique_to_strategy2, "unique articles\n")
cat("   - Synergy Score:", round(strategy_comparison$complementarity$synergy_score, 3), "\n\n")

# Strategic recommendations based on results
cat("STRATEGIC RECOMMENDATIONS FOR COVID-19 RESEARCH:\n\n")

if (comparison_result$significant) {
  cat("✓ STATISTICAL SIGNIFICANCE: The strategies show statistically significant differences (p < 0.05)\n")
} else {
  cat("○ STATISTICAL SIGNIFICANCE: No significant difference detected (p ≥ 0.05)\n")
}

if (strategy_comparison$overlap_analysis$overlap_percentage < 60) {
  cat("✓ HIGH COMPLEMENTARITY: Strategies are highly complementary - combining both is recommended\n")
} else {
  cat("○ MODERATE OVERLAP: Some redundancy between strategies\n")
}

if (strategy_comparison$complementarity$synergy_score > 0.15) {
  cat("✓ STRONG SYNERGY: Combining strategies provides substantial added value\n")
} else {
  cat("○ LIMITED SYNERGY: Minimal additional benefit from combining strategies\n")
}

# Usage recommendations
cat("\nUSAGE RECOMMENDATIONS:\n")
cat("• For broad COVID-19 scoping reviews → Use Strategy A (Broad)\n")
cat("• For clinical intervention reviews → Use Strategy B (Targeted)\n")
cat("• For comprehensive systematic reviews → Combine both strategies\n")
cat("• For rapid evidence synthesis → Start with Strategy B, expand with Strategy A if needed\n")

cat("\n=== DEMONSTRATION COMPLETE ===\n")
cat("This enhanced example demonstrates:\n")
cat("1. Clear performance differences between complementary search approaches\n")
cat("2. Meaningful trade-offs (precision vs. recall) that inform strategy selection\n")
cat("3. Statistical significance testing of strategy differences\n")
cat("4. Enhanced term-level effectiveness analysis with actionable insights\n")
cat("5. Strategic recommendations based on quantitative analysis\n")
cat("6. Comprehensive visualization and reporting capabilities\n")
cat("7. Real-world applicability for different research purposes\n")
cat("8. Evidence-based optimization of search strategies\n\n")

cat("Key Package Features Demonstrated:\n")
cat("• Comparative strategy analysis with statistical testing\n")
cat("• Individual term effectiveness scoring (TES)\n")
cat("• Overlap and complementarity analysis\n")
cat("• Temporal coverage assessment\n")
cat("• Enhanced visualization suite\n")
cat("• Strategic decision support\n")
cat("• Comprehensive reporting and export capabilities\n")

cat("\n=== ENHANCED TERM EFFECTIVENESS VISUALIZATION SUMMARY ===\n")
cat("The following individual plots were generated for enhanced term effectiveness analysis:\n")
cat("1. Precision-only plots for each strategy (showing accuracy of each term)\n")
cat("2. Coverage-only plots for each strategy (showing completeness of each term)\n")
cat("3. Article count plots for each strategy (showing retrieval volume)\n")
cat("4. Precision vs Coverage bubble plots (showing balanced effectiveness)\n")
cat("5. Highlighted plots emphasizing top-performing terms\n")
cat("6. Side-by-side comparisons using patchwork\n")
cat("7. Enhanced performance comparison matrix visualization\n")
cat("8. Strategy overlap and complementarity analysis plots\n")
cat("9. Term Effectiveness Score (TES) comparison plots\n")
cat("10. Cross-strategy term performance visualizations\n")
cat("11. Sample articles for qualitative review\n")
cat("12. Comprehensive export and reporting functionality\n\n")
cat("These enhanced individual plots provide comprehensive, focused analysis\n")
cat("with detailed synergy, complementarity, and TES metrics, making it easier to\n")
cat("identify the most effective search terms and optimal strategy combinations.\n")
cat("The TES metric provides a balanced view of term-level performance that\n")
cat("complements traditional strategy-level F1 scores.\n")
