diff --git a/DESCRIPTION b/DESCRIPTION index e108588..57f69ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidySingleCellExperiment Title: Brings SingleCellExperiment to the Tidyverse -Version: 1.19.1 +Version: 1.19.2 Authors@R: c(person("Stefano", "Mangiola", comment=c(ORCID="0000-0001-7474-836X"), email="mangiolastefano@gmail.com", diff --git a/NAMESPACE b/NAMESPACE index c8e6f8e..a84701d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(add_count,SingleCellExperiment) S3method(anti_join,SingleCellExperiment) +S3method(append_samples,SingleCellExperiment) S3method(arrange,SingleCellExperiment) S3method(as_tibble,SingleCellExperiment) S3method(bind_cols,SingleCellExperiment) @@ -158,6 +159,7 @@ importFrom(tidyr,unnest) importFrom(tidyselect,all_of) importFrom(tidyselect,eval_select) importFrom(ttservice,aggregate_cells) +importFrom(ttservice,append_samples) importFrom(ttservice,bind_cols) importFrom(ttservice,bind_rows) importFrom(ttservice,join_features) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index baec5fc..f8500d1 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -29,14 +29,7 @@ arrange.SingleCellExperiment <- function(.data, ..., .by_group=FALSE) { #' @name bind_rows #' @rdname bind_rows #' @inherit ttservice::bind_rows -#' -#' @examples -#' data(pbmc_small) -#' tt <- pbmc_small -#' bind_rows(tt, tt) -#' -#' tt_bind <- tt |> select(nCount_RNA, nFeature_RNA) -#' tt |> bind_cols(tt_bind) +#' @noRd #' #' @references #' Hutchison, W.J., Keyes, T.J., The tidyomics Consortium. et al. The tidyomics ecosystem: enhancing omic data analyses. Nat Methods 21, 1166–1170 (2024). https://doi.org/10.1038/s41592-024-02299-2 @@ -48,6 +41,13 @@ arrange.SingleCellExperiment <- function(.data, ..., .by_group=FALSE) { #' @importFrom SingleCellExperiment cbind #' @export bind_rows.SingleCellExperiment <- function(..., .id=NULL, add.cell.ids=NULL) { + lifecycle::deprecate_warn( + when = "1.19.2", + what = "bind_rows()", + with = "append_samples()", + details = "bind_rows is not a generic method in dplyr and may cause conflicts. Use append_samples." + ) + tts <- flatten_if(dots_values(...), is_spliced) new_obj <- SingleCellExperiment::cbind(tts[[1]], tts[[2]]) @@ -62,6 +62,45 @@ bind_rows.SingleCellExperiment <- function(..., .id=NULL, add.cell.ids=NULL) { new_obj } +#' @name append_samples +#' @rdname append_samples +#' @title Append samples from multiple SingleCellExperiment objects +#' +#' @description +#' Append samples from multiple SingleCellExperiment objects by column-binding them. +#' This function is equivalent to `cbind` but provides a tidyverse-like interface. +#' +#' @param x First SingleCellExperiment object to combine +#' @param ... Additional SingleCellExperiment objects to combine by samples +#' @param .id Object identifier (currently not used) +#' +#' @return A combined SingleCellExperiment object +#' +#' @examples +#' data(pbmc_small) +#' append_samples(pbmc_small, pbmc_small) +#' +#' @importFrom ttservice append_samples +#' @importFrom rlang flatten_if +#' @importFrom rlang is_spliced +#' @importFrom SingleCellExperiment cbind +#' @export +append_samples.SingleCellExperiment <- function(x, ..., .id = NULL) { + # Combine all arguments into a list + tts <- flatten_if(list(x, ...), is_spliced) + new_obj <- do.call(cbind, tts) + + # If duplicated cell names + if (any(duplicated(colnames(new_obj)))) { + warning("tidySingleCellExperiment says:", + " you have duplicated cell names, they will be made unique.") + unique_colnames <- make.unique(colnames(new_obj), sep = "_") + colnames(new_obj) <- unique_colnames + } + + new_obj +} + #' @importFrom rlang flatten_if #' @importFrom rlang is_spliced #' @importFrom rlang dots_values diff --git a/README.Rmd b/README.Rmd index c3d990f..228cdf0 100755 --- a/README.Rmd +++ b/README.Rmd @@ -461,7 +461,7 @@ pbmc_small_nested_interactions <- cell_signaling(genes=rownames(data), cluster=cluster) |> inter_network(data=data, signal=_, genes=rownames(data), cluster=cluster) %$% `individual-networks` |> - map_dfr(~ bind_rows(as_tibble(.x))) + map_dfr(~ append_samples(as_tibble(.x))) })) pbmc_small_nested_interactions |> diff --git a/README.md b/README.md index 9edd55f..b766b72 100755 --- a/README.md +++ b/README.md @@ -314,10 +314,6 @@ pbmc_small_pca <- ## TRUE, : You're computing too large a percentage of total singular values, use a ## standard svd instead. - ## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth = - ## TRUE, : did not converge--results might be invalid!; try increasing work or - ## maxit - ``` r pbmc_small_pca ``` @@ -742,7 +738,7 @@ pbmc_small_nested_interactions <- cell_signaling(genes=rownames(data), cluster=cluster) |> inter_network(data=data, signal=_, genes=rownames(data), cluster=cluster) %$% `individual-networks` |> - map_dfr(~ bind_rows(as_tibble(.x))) + map_dfr(~ append_samples(as_tibble(.x))) })) pbmc_small_nested_interactions |> diff --git a/inst/NEWS.rd b/inst/NEWS.rd index c4c99f7..cce925b 100644 --- a/inst/NEWS.rd +++ b/inst/NEWS.rd @@ -1,6 +1,14 @@ \name{NEWS} \title{News for Package \pkg{tidySingleCellExperiment}} +\section{Changes in version 1.19.2, Bioconductor 3.22 Release}{ +\itemize{ + \item Soft deprecated \code{bind_rows()} in favor of \code{append_samples()} from ttservice. + \item Added \code{append_samples()} method for SingleCellExperiment objects. + \item \code{bind_rows()} is not a generic method in dplyr and may cause conflicts. + \item Users are encouraged to use \code{append_samples()} instead. +}} + \section{Changes in version 1.4.0, Bioconductor 3.14 Release}{ \itemize{ \item Improved sample_n, and sample_frac functions. diff --git a/man/append_samples.Rd b/man/append_samples.Rd new file mode 100644 index 0000000..dc54941 --- /dev/null +++ b/man/append_samples.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{append_samples} +\alias{append_samples} +\alias{append_samples.SingleCellExperiment} +\title{Append samples from multiple SingleCellExperiment objects} +\usage{ +\method{append_samples}{SingleCellExperiment}(x, ..., .id = NULL) +} +\arguments{ +\item{x}{First SingleCellExperiment object to combine} + +\item{...}{Additional SingleCellExperiment objects to combine by samples} + +\item{.id}{Object identifier (currently not used)} +} +\value{ +A combined SingleCellExperiment object +} +\description{ +Append samples from multiple SingleCellExperiment objects by column-binding them. +This function is equivalent to `cbind` but provides a tidyverse-like interface. +} +\examples{ +data(pbmc_small) +append_samples(pbmc_small, pbmc_small) + +} diff --git a/man/bind_rows.Rd b/man/bind_rows.Rd deleted file mode 100644 index ed34bb5..0000000 --- a/man/bind_rows.Rd +++ /dev/null @@ -1,72 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dplyr_methods.R -\name{bind_rows} -\alias{bind_rows} -\alias{bind_rows.SingleCellExperiment} -\alias{bind_cols.SingleCellExperiment} -\alias{bind_cols} -\title{Efficiently bind multiple data frames by row and column} -\usage{ -\method{bind_rows}{SingleCellExperiment}(..., .id = NULL, add.cell.ids = NULL) - -\method{bind_cols}{SingleCellExperiment}(..., .id = NULL) -} -\arguments{ -\item{...}{Data frames to combine. - - Each argument can either be a data frame, a list that could be a data - frame, or a list of data frames. - - When row-binding, columns are matched by name, and any missing - columns will be filled with NA. - - When column-binding, rows are matched by position, so all data - frames must have the same number of rows. To match by value, not - position, see mutate-joins.} - -\item{.id}{Data frame identifier. - - When `.id` is supplied, a new column of identifiers is - created to link each row to its original data frame. The labels - are taken from the named arguments to `bind_rows()`. When a - list of data frames is supplied, the labels are taken from the - names of the list. If no names are found a numeric sequence is - used instead.} - -\item{add.cell.ids}{from Seurat 3.0 A character vector of length(x = c(x, y)). Appends the corresponding values to the start of each objects' cell names.} -} -\value{ -`bind_rows()` and `bind_cols()` return the same type as - the first input, either a data frame, `tbl_df`, or `grouped_df`. - -`bind_rows()` and `bind_cols()` return the same type as - the first input, either a data frame, `tbl_df`, or `grouped_df`. -} -\description{ -This is an efficient implementation of the common pattern of -`do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many -data frames into one. - -This is an efficient implementation of the common pattern of -`do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many -data frames into one. -} -\details{ -The output of `bind_rows()` will contain a column if that column -appears in any of the inputs. - -The output of `bind_rows()` will contain a column if that column -appears in any of the inputs. -} -\examples{ -data(pbmc_small) -tt <- pbmc_small -bind_rows(tt, tt) - -tt_bind <- tt |> select(nCount_RNA, nFeature_RNA) -tt |> bind_cols(tt_bind) - -} -\references{ -Hutchison, W.J., Keyes, T.J., The tidyomics Consortium. et al. The tidyomics ecosystem: enhancing omic data analyses. Nat Methods 21, 1166–1170 (2024). https://doi.org/10.1038/s41592-024-02299-2 -} diff --git a/tests/testthat/test-dplyr_methods.R b/tests/testthat/test-dplyr_methods.R index 46cb83f..755b475 100755 --- a/tests/testthat/test-dplyr_methods.R +++ b/tests/testthat/test-dplyr_methods.R @@ -24,9 +24,9 @@ df$factor <- sample( # expect_identical(fd, df) # }) -test_that("bind_rows()", { +test_that("append_samples()", { # warn about duplicated cells names - expect_warning(fd <- bind_rows(df, df)) + expect_warning(fd <- append_samples(df, df)) # cell names should be unique after binding expect_true(!any(duplicated(pull(fd, .cell)))) }) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 53230ce..d35c039 100755 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -273,6 +273,24 @@ pbmc_small_cluster %>% .value=.abundance_counts, scale="column") ``` +# Combining datasets + +We can use `append_samples()` to combine multiple SingleCellExperiment objects by samples. +This is useful when you have multiple datasets that you want to analyze together. + +```{r} +# Create two subsets of the data +pbmc_subset1 <- pbmc_small_cluster %>% + filter(groups == "g1") + +pbmc_subset2 <- pbmc_small_cluster %>% + filter(groups == "g2") + +# Combine them using append_samples +combined_data <- append_samples(pbmc_subset1, pbmc_subset2) +combined_data +``` + # Reduce dimensions We can calculate the first 3 UMAP dimensions using `r BiocStyle::Biocpkg("scater")`. @@ -454,7 +472,7 @@ pbmc_small_nested_interactions <- cell_signaling(genes=rownames(data), cluster=cluster) %>% inter_network(data=data, signal=., genes=rownames(data), cluster=cluster) %$% `individual-networks` %>% - map_dfr(~ bind_rows(as_tibble(.x))) + map_dfr(~ append_samples(as_tibble(.x))) })) pbmc_small_nested_interactions %>%