stemangiola · stemangiola · Jul 6, 2025 · Jul 6, 2025 · Jul 6, 2025 · Jul 6, 2025
diff --git a/R/utilities.R b/R/utilities.R
@@ -592,11 +592,25 @@ get_x_y_annotation_columns = function(.data, .column, .row, .abundance){
     select_if(negate(is.list)) %>%
     ungroup() %>%
     {
-      # Rows
+      # Get column, row, and abundance column names
+      col_col <- quo_name(.column)
+      row_col <- quo_name(.row)
+      abundance_col <- quo_name(.abundance)
+
+      # Columns for column orientation
+      col_orient <- colnames(subset(., !!.column))
+      # Columns for row orientation
+      row_orient <- colnames(subset(., !!.row))
+
+      # Always include abundance column in both orientations
+      col_orient <- unique(c(col_orient, abundance_col))
+      row_orient <- unique(c(row_orient, abundance_col))
+
       bind_rows(
-        (.) %>% subset(!!.column) %>% colnames %>% as_tibble %>% rename(column = value) %>% gather(orientation, col_name),
-        (.) %>% subset(!!.row) %>% colnames %>% as_tibble %>% rename(row = value) %>% gather(orientation, col_name)
-      )
+        tibble(orientation = "column", col_name = col_orient),
+        tibble(orientation = "row", col_name = row_orient)
+      ) %>%
+        distinct()
     }
 }
 

diff --git a/tests/testthat/test-get_x_y_annotation_columns.R b/tests/testthat/test-get_x_y_annotation_columns.R
@@ -0,0 +1,319 @@
+context("tidyHeatmap:::get_x_y_annotation_columns tests")
+
+# Load required packages
+library(dplyr)
+library(tidyr)
+library(tibble)
+# Line removed as `rlang` is not used in this test file.
+
+# Create test data sets for different scenarios
+create_basic_test_data <- function() {
+  # Create rectangular data: all combinations of 2 samples and 2 genes
+  tidyr::expand_grid(
+    sample = c("S1", "S2"),
+    gene = c("G1", "G2")
+  ) %>%
+    dplyr::mutate(
+      count = c(10, 20, 30, 40),
+      sample_type = rep(c("A", "B"), each = 2),  # annotation for samples
+      gene_pathway = rep(c("P1", "P2"), times = 2),  # annotation for genes
+      batch = rep(c("B1", "B2"), each = 2),  # could be either sample or gene annotation
+      irrelevant_col = rep(c("X", "Y"), each = 2)  # not related to either dimension
+    )
+}
+
+create_complex_test_data <- function() {
+  # Create rectangular data: all combinations of 3 patients and 4 biomarkers
+  tidyr::expand_grid(
+    patient_id = c("P1", "P2", "P3"),
+    biomarker = c("BM1", "BM2", "BM3", "BM4")
+  ) %>%
+    dplyr::mutate(
+      expression = runif(12),
+      age = rep(c(25, 35, 45), each = 4),  # patient annotation
+      gender = rep(c("M", "F", "M"), each = 4),  # patient annotation
+      pathway = rep(c("Path1", "Path2", "Path1", "Path2"), times = 3),  # biomarker annotation
+      category = rep(c("Cat1", "Cat2", "Cat1", "Cat2"), times = 3),  # biomarker annotation
+      treatment = rep(c("T1", "T2", "T3"), each = 4),  # patient annotation
+      tissue_type = rep(c("Normal", "Tumor"), each = 6)  # could be either
+    )
+}
+
+# Test basic functionality
+test_that("tidyHeatmap:::get_x_y_annotation_columns works with basic data", {
+  test_data <- create_basic_test_data()
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count)
+
+  # Should be a tibble with orientation and col_name columns
+  expect_s3_class(result, "tbl_df")
+  expect_equal(names(result), c("orientation", "col_name"))
+
+  # Should have entries for both orientations
+  expect_true("column" %in% result$orientation)
+  expect_true("row" %in% result$orientation)
+
+  # Check that sample-related columns are marked as column orientation
+  column_cols <- result %>% filter(orientation == "column") %>% pull(col_name)
+  expect_true("sample" %in% column_cols)
+  expect_true("sample_type" %in% column_cols)
+
+  # Check that gene-related columns are marked as row orientation
+  row_cols <- result %>% filter(orientation == "row") %>% pull(col_name)
+  expect_true("gene" %in% row_cols)
+  expect_true("gene_pathway" %in% row_cols)
+
+  # Count should be in both orientations (as it's the abundance measure)
+  expect_true("count" %in% column_cols)
+  expect_true("count" %in% row_cols)
+})
+
+# Test with more complex data structure
+test_that("tidyHeatmap:::get_x_y_annotation_columns handles complex data correctly", {
+  test_data <- create_complex_test_data()
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, patient_id, biomarker, expression)
+
+  # Should identify patient-specific annotations
+  column_cols <- result %>% filter(orientation == "column") %>% pull(col_name)
+  expect_true("patient_id" %in% column_cols)
+  expect_true("age" %in% column_cols)
+  expect_true("gender" %in% column_cols)
+  expect_true("treatment" %in% column_cols)
+
+  # Should identify biomarker-specific annotations
+  row_cols <- result %>% filter(orientation == "row") %>% pull(col_name)
+  expect_true("biomarker" %in% row_cols)
+  expect_true("pathway" %in% row_cols)
+  expect_true("category" %in% row_cols)
+})
+
+# Test with empty data
+test_that("tidyHeatmap:::get_x_y_annotation_columns handles empty data", {
+  empty_data <- tibble(
+    sample = character(0),
+    gene = character(0),
+    count = numeric(0)
+  )
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(empty_data, sample, gene, count)
+
+  expect_s3_class(result, "tbl_df")
+  expect_equal(names(result), c("orientation", "col_name"))
+  # With empty data, we should still get the basic structure
+  expect_true(nrow(result) >= 0)
+})
+
+# Test with single row/column
+test_that("tidyHeatmap:::get_x_y_annotation_columns handles single row/column data", {
+  # Single sample and single gene (still rectangular)
+  single_data <- tibble(
+    sample = "S1",
+    gene = "G1",
+    count = 100,
+    sample_annotation = "A",
+    gene_annotation = "X"
+  )
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(single_data, sample, gene, count)
+
+  expect_s3_class(result, "tbl_df")
+  expect_equal(names(result), c("orientation", "col_name"))
+
+  # Should still categorize annotations correctly
+  column_cols <- result %>% filter(orientation == "column") %>% pull(col_name)
+  row_cols <- result %>% filter(orientation == "row") %>% pull(col_name)
+
+  expect_true("sample" %in% column_cols)
+  expect_true("gene" %in% row_cols)
+})
+
+# Test with grouped data
+test_that("tidyHeatmap:::get_x_y_annotation_columns works with grouped data", {
+  test_data <- create_basic_test_data() %>%
+    group_by(sample_type)
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count)
+
+  # Should still work correctly and ungroup the data
+  expect_s3_class(result, "tbl_df")
+  expect_equal(names(result), c("orientation", "col_name"))
+
+  # Should still categorize correctly
+  column_cols <- result %>% filter(orientation == "column") %>% pull(col_name)
+  row_cols <- result %>% filter(orientation == "row") %>% pull(col_name)
+
+  expect_true("sample" %in% column_cols)
+  expect_true("gene" %in% row_cols)
+})
+
+# Test with list columns (should be filtered out)
+test_that("tidyHeatmap:::get_x_y_annotation_columns filters out list columns", {
+  test_data <- create_basic_test_data() %>%
+    mutate(list_col = list(c(1, 2), c(3, 4), c(5, 6), c(7, 8)))
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count)
+
+  # list_col should not appear in the result
+  all_cols <- result %>% pull(col_name)
+  expect_false("list_col" %in% all_cols)
+
+  # Other columns should still be present
+  column_cols <- result %>% filter(orientation == "column") %>% pull(col_name)
+  row_cols <- result %>% filter(orientation == "row") %>% pull(col_name)
+
+  expect_true("sample" %in% column_cols)
+  expect_true("gene" %in% row_cols)
+})
+
+# Test with factor columns
+test_that("tidyHeatmap:::get_x_y_annotation_columns handles factor columns", {
+  test_data <- create_basic_test_data() %>%
+    mutate(
+      sample_type = factor(sample_type),
+      gene_pathway = factor(gene_pathway)
+    )
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count)
+
+  # Should handle factors correctly
+  column_cols <- result %>% filter(orientation == "column") %>% pull(col_name)
+  row_cols <- result %>% filter(orientation == "row") %>% pull(col_name)
+
+  expect_true("sample_type" %in% column_cols)
+  expect_true("gene_pathway" %in% row_cols)
+})
+
+# Test with numeric annotation columns
+test_that("tidyHeatmap:::get_x_y_annotation_columns handles numeric annotations", {
+  # Create rectangular data: all combinations of 2 samples and 2 genes
+  test_data <- tidyr::expand_grid(
+    sample = c("S1", "S2"),
+    gene = c("G1", "G2")
+  ) %>%
+    dplyr::mutate(
+      count = c(10, 20, 30, 40),
+      sample_score = rep(c(1.5, 2.5), each = 2),  # numeric annotation for samples
+      gene_weight = rep(c(10.1, 20.2), times = 2)  # numeric annotation for genes
+    )
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count)
+
+  column_cols <- result %>% filter(orientation == "column") %>% pull(col_name)
+  row_cols <- result %>% filter(orientation == "row") %>% pull(col_name)
+
+  expect_true("sample_score" %in% column_cols)
+  expect_true("gene_weight" %in% row_cols)
+})
+
+# Test with real-world-like data (using structure similar to N52)
+test_that("tidyHeatmap:::get_x_y_annotation_columns works with N52-like structure", {
+  # Create rectangular data: all combinations of 4 samples and 3 genes
+  n52_like_data <- tidyr::expand_grid(
+    symbol_ct = c("G1", "G2", "G3"),
+    UBR = c("S1", "S2", "S3", "S4")
+  ) %>%
+    dplyr::mutate(
+      `read count normalised log` = runif(12),
+      Category = rep(c("Cat1", "Cat2", "Cat1"), each = 4),  # gene annotation
+      `Cell type` = rep(c("TypeA", "TypeB", "TypeA", "TypeB"), times = 3),  # sample annotation
+      CAPRA_TOTAL = rep(c(1, 2, 3, 4), times = 3),  # sample annotation
+      inflection = rep(c(10, 20, 30), each = 4)  # gene annotation
+    )
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(
+    n52_like_data, 
+    UBR, 
+    symbol_ct, 
+    `read count normalised log`
+  )
+
+  column_cols <- result %>% filter(orientation == "column") %>% pull(col_name)
+  row_cols <- result %>% filter(orientation == "row") %>% pull(col_name)
+
+  # UBR should be in column orientation
+  expect_true("UBR" %in% column_cols)
+  # symbol_ct should be in row orientation  
+  expect_true("symbol_ct" %in% row_cols)
+
+  # Cell type and CAPRA_TOTAL should be sample (column) annotations
+  expect_true("Cell type" %in% column_cols)
+  expect_true("CAPRA_TOTAL" %in% column_cols)
+
+  # Category and inflection should be gene (row) annotations
+  expect_true("Category" %in% row_cols)
+  expect_true("inflection" %in% row_cols)
+
+  # count (abundance) should be in both orientations
+  expect_true("count" %in% column_cols)
+  expect_true("count" %in% row_cols)
+})
+
+# Test with missing abundance column specified
+# Removed due to redundancy with the N52-like structure test.
+# Test result structure and types
+test_that("tidyHeatmap:::get_x_y_annotation_columns returns correct structure", {
+  test_data <- create_basic_test_data()
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count)
+
+  # Check return type and structure
+  expect_s3_class(result, "tbl_df")
+  expect_equal(ncol(result), 2)
+  expect_equal(names(result), c("orientation", "col_name"))
+
+  # Check column types
+  expect_type(result$orientation, "character")
+  expect_type(result$col_name, "character")
+
+  # Check that orientation only contains expected values
+  unique_orientations <- unique(result$orientation)
+  expect_true(all(unique_orientations %in% c("column", "row")))
+
+  # Check that col_name contains actual column names from the data
+  expect_true(all(result$col_name %in% names(test_data)))
+})
+
+# Test with duplicate column names handling
+test_that("tidyHeatmap:::get_x_y_annotation_columns handles edge cases", {
+  # Test with minimal rectangular data
+  minimal_data <- tidyr::expand_grid(
+    x = c("A", "B"),
+    y = c("1", "2")
+  ) %>%
+    dplyr::mutate(z = c(100, 200, 300, 400))
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(minimal_data, x, y, z)
+
+  expect_s3_class(result, "tbl_df")
+  expect_equal(names(result), c("orientation", "col_name"))
+
+  # All columns should appear in the result
+  all_cols <- result %>% pull(col_name)
+  expect_true("x" %in% all_cols)
+  expect_true("y" %in% all_cols)
+  expect_true("z" %in% all_cols)
+})
+
+# Test with boolean column specific to row values in a rectangular matrix
+
+test_that("tidyHeatmap:::get_x_y_annotation_columns handles boolean row-specific annotation in rectangular data", {
+  # Create all combinations of 2 samples and 2 genes (rectangular)
+  test_data <- tidyr::expand_grid(
+    sample = c("S1", "S2"),
+    gene = c("G1", "G2")
+  ) %>%
+    dplyr::mutate(
+      count = c(10, 20, 30, 40),
+      is_special_gene = gene == "G2" # TRUE only for G2 rows
+    )
+
+  result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count)
+
+  # Should classify is_special_gene as a row annotation
+  row_cols <- result %>% filter(orientation == "row") %>% pull(col_name)
+  column_cols <- result %>% filter(orientation == "column") %>% pull(col_name)
+
+  expect_true("is_special_gene" %in% row_cols)
+  expect_false("is_special_gene" %in% column_cols)
+})