From ddac2c3b44eefb465bdd95ac7e4701ea604519e5 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Fri, 24 Jul 2020 16:00:46 -0500 Subject: [PATCH 01/18] don't force an id on a Category and make Categories fill in missing ids --- R/AllClasses.R | 22 +++++++++++++++++++++- R/category.R | 2 +- tests/testthat/test-categories.R | 12 ++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/R/AllClasses.R b/R/AllClasses.R index 55b702f53..d8ed9535a 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -314,7 +314,27 @@ setClass("Categories", contains = "AbstractCategories") #' @rdname Categories #' @export -Categories <- GenericConstructor("Categories") +Categories <- function(..., data = NULL) { + # Fill in ids if missing + if (is.null(data)) data <- list(...) + + # use try because we haven't validated that they're category-like yet + used_ids <- try(vapply(data, function(x) x$id %||% NA, numeric(1)), silent = TRUE) + if (!inherits(used_ids, "try-error") && any(is.na(used_ids))) { + all_ids <- used_ids + all_ids[is.na(used_ids)] <- setdiff( + seq_along(data), + used_ids + )[seq_len(sum(is.na(used_ids)))] + + data <- mapply(function(cat, used_id, all_id) { + if (is.na(used_id)) cat$id <- all_id + cat + }, data, used_ids, all_ids, SIMPLIFY = FALSE) + } + + new("Categories", data) +} #' @rdname Categories #' @export diff --git a/R/category.R b/R/category.R index 2b1e174ac..34355ac44 100644 --- a/R/category.R +++ b/R/category.R @@ -1,7 +1,7 @@ is.category <- function(x) inherits(x, "Category") setValidity("Category", function(object) { - is.cat <- all(c("id", "name") %in% names(object)) + is.cat <- all(c("name") %in% names(object)) if (!all(is.cat)) { val <- "Not a category" } else { diff --git a/tests/testthat/test-categories.R b/tests/testthat/test-categories.R index 8b229fd7f..2563f13de 100644 --- a/tests/testthat/test-categories.R +++ b/tests/testthat/test-categories.R @@ -41,6 +41,18 @@ with_mock_crunch({ ), "Invalid category names: must be unique") }) + test_that("fill in category id when missing", { + expect_equal( + Categories(list(name = "A"), list(name = "B")), + Categories(list(id = 1L, name = "A"), list(id = 2L, name = "B")) + ) + + expect_equal( + Categories(list(name = "A", id = 2L), list(name = "B")), + Categories(list(id = 2L, name = "A"), list(id = 1L, name = "B")) + ) + }) + test_that("category slicers", { expect_true(is.categories(cats[1])) expect_equal(cats[c("Female", "Male")], cats[c(2, 1)]) From 94b905d3cb1a5fbc16a8cc8c268841c3973488df Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Fri, 24 Jul 2020 16:01:07 -0500 Subject: [PATCH 02/18] weird R behavior - sometimes empty names get attached which breaks the JSON --- R/case-variables.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/case-variables.R b/R/case-variables.R index 411137fa0..8fa969e5d 100644 --- a/R/case-variables.R +++ b/R/case-variables.R @@ -124,12 +124,12 @@ caseExpr <- function(..., cases) { new_cat_type <- list( value = list( class = "categorical", - categories = lapply(cases, function(case) { + categories = unname(lapply(cases, function(case) { case[c("id", "name", "numeric_value", "missing")] - }) + })) ) ) - new_cat_ids <- vapply(cases, vget("id"), integer(1)) + new_cat_ids <- unname(vapply(cases, vget("id"), integer(1))) new_cat <- list(column = I(new_cat_ids), type = new_cat_type) # remove nulls from case expressions (should only be from the else case) From 2bc33c2a3f4c8e4b561891c2f4e9ac90cce11c78 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Fri, 24 Jul 2020 16:01:46 -0500 Subject: [PATCH 03/18] create caseWhenExpr and tests --- DESCRIPTION | 1 + NAMESPACE | 2 + R/case-when-variable.R | 145 +++++++++++++++++++++ man/makeCaseWhenVariable.Rd | 74 +++++++++++ tests/testthat/test-case-when-variable.R | 153 +++++++++++++++++++++++ 5 files changed, 375 insertions(+) create mode 100644 R/case-when-variable.R create mode 100644 man/makeCaseWhenVariable.Rd create mode 100644 tests/testthat/test-case-when-variable.R diff --git a/DESCRIPTION b/DESCRIPTION index 38709e0e0..55ca59c04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,6 +66,7 @@ Collate: 'auth.R' 'batches.R' 'case-variables.R' + 'case-when-variable.R' 'categories.R' 'category.R' 'change-category-id.R' diff --git a/NAMESPACE b/NAMESPACE index 8b8aa9486..de0e5782b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,6 +119,7 @@ export(availableGeodataFeatures) export(batches) export(bin) export(caseExpr) +export(caseWhenExpr) export(categoriesFromLevels) export(cd) export(changeCategoryID) @@ -230,6 +231,7 @@ export(logout) export(makeArray) export(makeArrayGadget) export(makeCaseVariable) +export(makeCaseWhenVariable) export(makeFrame) export(makeMR) export(makeMRFromText) diff --git a/R/case-when-variable.R b/R/case-when-variable.R new file mode 100644 index 000000000..219c9ec9f --- /dev/null +++ b/R/case-when-variable.R @@ -0,0 +1,145 @@ +#' Create a variable from categorical variables or categories based on conditions +#' +#' Conditions are specified using a series of formulas: the left-hand side is +#' the condition that must be true (a `CrunchLogicalExpr`) and the right-hand +#' side is where to get the value if the condition on the left-hand side is +#' true. This must be either a Crunch Categorical variable or a Category. +#' +#' @param ... formulas where the left hand side is a CrunchExpression (or `TRUE` +#' to indicate the "else" case that will be met if all the other expression are +#' not met) and the right hand side is a CrunchVariable that should be filled in, +#' a `Category` object describing the Category it should be used, a string +#' which will be the name of the `Category` or `NA` to indicate that it should +#' be replaced with the system missing value. For `makeCaseWhenVariable()` +#' non-formula arguments will be passed to `[VarDef()]` +#' @param data A CrunchDataset to use if variable aliases are left bare in the +#' formulas. +#' @param formulas A list of formulas that match the description in `...` +#' @param name For `maekCaseWhenVariable()` the name of the variable to create. +#' +#' @return `makeCaseWhenVariable()` returns a `VariableDefinition` and +#' `caseWhenExpr()` returns an expression +#' @export +#' @examples +#' \dontrun{ +#' ds$new_var <- makeCaseWhenVariable( +#' ds$x %in% c("a", "b") ~ ds$y, # can fill with a variable +#' ds$x %in% c("c", "d") ~ Category(name = "c or d", numeric_value = 10), # or a Category +#' # If none of the categories match, will be set to missing unless you +#' # specify an "else" case with `TRUE` in the left hand side +#' TRUE ~ Category(name = "catch all"), +#' name = "combined x and y" +#' ) +#' +#' ds$brand_x_pref <- makeCaseWhenVariable( +#' ds$brand[[1]] == "Brand X" ~ ds$pref[[1]], +#' ds$brand[[2]] == "Brand X" ~ ds$pref[[2]], +#' ds$brand[[3]] == "Brand X" ~ ds$pref[[3]] +#' name = "brand x preference" +#' ) +#' +#' ds$rebased_x <- makeCaseWhenVariable( +#' ds$skipped_x != "Yes" ~ ds$x, +#' name = "rebased x" +#' ) +#' +#' # caseWhenExpr can be used inside other expressions +#' ds$brand_x_prefer_high <- VarDef( +#' selectCategories( +#' caseWhenExpr( +#' ds$brand[[1]] == "Brand X" ~ ds$pref[[1]], +#' ds$brand[[2]] == "Brand X" ~ ds$pref[[2]], +#' ds$brand[[3]] == "Brand X" ~ ds$pref[[3]] +#' ), +#' c("Best", "Very Good") +#' ), +#' name = "brand x preference selected" +#' ) +#' } +makeCaseWhenVariable <- function(..., data = NULL, formulas = NULL, name) { + dots <- list(...) + formula_dots <- vapply(dots, function(x) inherits(x, "formula"), logical(1)) + + args <- list( + data = caseWhenExpr(data = data, formulas = c(formulas, dots[formula_dots])), + name = name + ) + args <- c(args, dots[!formula_dots]) + + do.call(VarDef, args) +} + +#' @export +#' @rdname makeCaseWhenVariable +caseWhenExpr <- function(..., data = NULL, formulas = NULL) { + formulas <- c(formulas, list(...)) + case_fills <- lapply(formulas, parse_case_when_formula, data = data) + + # Get set of unique IDs that fill in for when IDs are missing + used_ids <- vapply(case_fills, function(x) x$id %||% NA, numeric(1)) + case_ids <- used_ids + case_ids[is.na(used_ids)] <- setdiff( + seq_along(case_fills), + used_ids + )[seq_len(sum(is.na(used_ids)))] + + cases <- mapply(function(case_fill, case_id) { + # Make a temporary cases for expressions that will be filled in + if ("fill" %in% names(case_fill)) { + list( + expression = case_fill$expression, + id = as.integer(case_id), + name = paste0("casefill__internal", case_id) + ) + } else { + case_fill + } + }, case_fills, case_ids, SIMPLIFY = FALSE) + + need_fills <- vapply(case_fills, function(x) "fill" %in% names(x), logical(1)) + + if (!any(need_fills)) return(caseExpr(cases = cases)) + + fills <- lapply(which(need_fills), function(cf_num) { + case_fill <- case_fills[[cf_num]] + list(fill = case_fill$fill, id = case_ids[cf_num]) + }) + + fillExpr(caseExpr(cases = cases), fills = fills) +} + +parse_case_when_formula <- function(formula, data) { + if (length(formula) != 3) { + halt( + "The condition provided must be a proper formula: ", + deparseAndFlatten(formula) + ) + } + + expr <- evalLHS(formula, data) + if (!inherits(expr, c("logical", "CrunchLogicalExpr"))) { + halt( + "The left-hand side provided must be a logical or a ", + "CrunchLogicalExpr: ", dQuote(LHS_string(formula)) + ) + } + if (identical(expr, TRUE)) expr <- "else" + + rhs <- evalRHS(formula, data) + if (is.variable(rhs)) { + rhs <- list(fill = rhs) + } else if (inherits(rhs, "Category")) { + rhs <- lapply(rhs, identity) + } else if (is.character(rhs)) { + rhs <- list(name = rhs) + } else if (is.na(rhs)) { + list(name = "No Data", missing = TRUE) + } else { + halt( + "The right-hand side provided must be a Category, CrunchVariable ", + "string, or `NA`: ", dQuote(RHS_string(formula)) + ) + } + + c(list(expression = expr), rhs) +} diff --git a/man/makeCaseWhenVariable.Rd b/man/makeCaseWhenVariable.Rd new file mode 100644 index 000000000..8c4e07e47 --- /dev/null +++ b/man/makeCaseWhenVariable.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/case-when-variable.R +\name{makeCaseWhenVariable} +\alias{makeCaseWhenVariable} +\alias{caseWhenExpr} +\title{Create a variable from categorical variables or categories based on conditions} +\usage{ +makeCaseWhenVariable(..., data = NULL, formulas = NULL, name) + +caseWhenExpr(..., data = NULL, formulas = NULL) +} +\arguments{ +\item{...}{formulas where the left hand side is a CrunchExpression (or \code{TRUE} +to indicate the "else" case that will be met if all the other expression are +not met) and the right hand side is a CrunchVariable that should be filled in, +a \code{Category} object describing the Category it should be used, a string +which will be the name of the \code{Category} or \code{NA} to indicate that it should +be replaced with the system missing value. For \code{makeCaseWhenVariable()} +non-formula arguments will be passed to \verb{[VarDef()]}} + +\item{data}{A CrunchDataset to use if variable aliases are left bare in the +formulas.} + +\item{formulas}{A list of formulas that match the description in \code{...}} + +\item{name}{For \code{maekCaseWhenVariable()} the name of the variable to create.} +} +\value{ +\code{makeCaseWhenVariable()} returns a \code{VariableDefinition} and +\code{caseWhenExpr()} returns an expression +} +\description{ +Conditions are specified using a series of formulas: the left-hand side is +the condition that must be true (a \code{CrunchLogicalExpr}) and the right-hand +side is where to get the value if the condition on the left-hand side is +true. This must be either a Crunch Categorical variable or a Category. +} +\examples{ +\dontrun{ +ds$new_var <- makeCaseWhenVariable( + ds$x \%in\% c("a", "b") ~ ds$y, # can fill with a variable + ds$x \%in\% c("c", "d") ~ Category(name = "c or d", numeric_value = 10), # or a Category + # If none of the categories match, will be set to missing unless you + # specify an "else" case with `TRUE` in the left hand side + TRUE ~ Category(name = "catch all"), + name = "combined x and y" +) + +ds$brand_x_pref <- makeCaseWhenVariable( + ds$brand[[1]] == "Brand X" ~ ds$pref[[1]], + ds$brand[[2]] == "Brand X" ~ ds$pref[[2]], + ds$brand[[3]] == "Brand X" ~ ds$pref[[3]] + name = "brand x preference" +) + +ds$rebased_x <- makeCaseWhenVariable( + ds$skipped_x != "Yes" ~ ds$x, + name = "rebased x" +) + +# caseWhenExpr can be used inside other expressions +ds$brand_x_prefer_high <- VarDef( + selectCategories( + caseWhenExpr( + ds$brand[[1]] == "Brand X" ~ ds$pref[[1]], + ds$brand[[2]] == "Brand X" ~ ds$pref[[2]], + ds$brand[[3]] == "Brand X" ~ ds$pref[[3]] + ), + c("Best", "Very Good") + ), + name = "brand x preference selected" +) +} +} diff --git a/tests/testthat/test-case-when-variable.R b/tests/testthat/test-case-when-variable.R new file mode 100644 index 000000000..6e4503fed --- /dev/null +++ b/tests/testthat/test-case-when-variable.R @@ -0,0 +1,153 @@ +context("caseWhen variable") + +with_mock_crunch({ + ds <- loadDataset("test ds") + + test_that("caseWhenExpr works when single rhs variable", { + expect_equal( + unclass(toJSON( + caseWhenExpr(ds$birthyr > 1970 ~ ds$gender)@expression + )), + paste0( + '{"function":"fill","args":[{"function":"case","args":[{"column":[1],"type":{', + '"value":{"class":"categorical","categories":[', + '{"id":1,"name":"casefill__internal1","numeric_value":null,"missing":false}', + ']}}},{"function":">","args":[{"variable"', + ':"https://app.crunch.io/api/datasets/1/variables/birthyr/"},{"value":1970}]}]}', + ',{"map":{"1":{"variable":"https://app.crunch.io/api/datasets/1/variables/gender/"', + '}}}]}' + ) + ) + }) + + test_that("caseWhenExpr works when single rhs Category", { + expect_equal( + unclass(toJSON( + caseWhenExpr(ds$birthyr > 1970 ~ Category(name = "Hello"))@expression + )), + paste0( + '{"function":"case","args":[{"column":[1],"type":{"value":{"class":"categorical",', + '"categories":[{"id":1,"name":"Hello","numeric_value":null,"missing":false}]}}},', + '{"function":">","args":[{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/birthyr/"},{"value":1970}]}]}' + ) + ) + }) + + test_that("caseWhenExpr works when variable + rhs string + else statement", { + expect_equal( + unclass(toJSON( + caseWhenExpr( + between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + between(ds$birthyr, 1980, 1990) ~ ds$gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE) + )@expression + )), + paste0( + '{"function":"fill","args":[{"function":"case","args":[{"column":[1,2,3],', + '"type":{"value":{"class":"categorical","categories":[{"id":1,"name":"Hello",', + '"numeric_value":null,"missing":false},{"id":2,"name":"casefill__internal2",', + '"numeric_value":null,"missing":false},{"id":3,"name":"Missed Q",', + '"numeric_value":null,"missing":true}]}}},{"function":"between","args"', + ':[{"variable":"https://app.crunch.io/api/datasets/1/variables/birthyr/"},', + '{"value":1970},{"value":1980},{"value":[true,false]}]},{"function":"between",', + '"args":[{"variable":"https://app.crunch.io/api/datasets/1/variables/birthyr/"},', + '{"value":1980},{"value":1990},{"value":[true,false]}]}]},{"map":{"2":{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/gender/"}}}]}' + ) + ) + }) + + test_that("caseWhenExpr handles formulas argument", { + expect_equal( + caseWhenExpr( + between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + between(ds$birthyr, 1980, 1990) ~ ds$gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE) + ), + caseWhenExpr( + formulas = list( + between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + between(ds$birthyr, 1980, 1990) ~ ds$gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE) + ) + ) + ) + }) + + test_that("caseWhenExpr handles data argument", { + expect_equal( + caseWhenExpr( + between(birthyr, 1970, 1980) ~ Category(name = "Hello"), + between(birthyr, 1980, 1990) ~ gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE), + data = ds + ), + caseWhenExpr( + between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + between(ds$birthyr, 1980, 1990) ~ ds$gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE) + ) + ) + }) + + test_that("makeCaseWhenVariable handles data argument", { + expect_equal( + makeCaseWhenVariable( + between(birthyr, 1970, 1980) ~ Category(name = "Hello"), + between(birthyr, 1980, 1990) ~ gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE), + data = ds, + name = "test" + ), + makeCaseWhenVariable( + between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + between(ds$birthyr, 1980, 1990) ~ ds$gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE), + name = "test" + ) + ) + }) + + test_that("makeCaseWhenVariable correctly separates dots", { + expect_equal( + makeCaseWhenVariable( + between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + between(ds$birthyr, 1980, 1990) ~ ds$gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE), + name = "test", + description = "desc" + ), + VarDef( + caseWhenExpr( + between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + between(ds$birthyr, 1980, 1990) ~ ds$gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE) + ), + name = "test", + description = "desc" + ) + ) + }) + + + test_that("caseWhenExpr formula validations", { + expect_error( + makeCaseWhenVariable(~ds$gender), + "The condition provided must be a proper formula: .ds.gender" + ) + + expect_error( + makeCaseWhenVariable(ds$gender ~ ds$gender), + "The left-hand side provided must be a logical or a CrunchLogicalExpr: \"ds.gender\"" + ) + + expect_error( + makeCaseWhenVariable(ds$birthyr > 1980 ~ 1), + paste0( + "The right-hand side provided must be a Category, CrunchVariable ", + "string, or `NA`: \"1\"" + ) + ) + }) +}) \ No newline at end of file From 1dd4b18ad828720cd0005d34dd612c804e7f1a9a Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Fri, 24 Jul 2020 17:50:19 -0500 Subject: [PATCH 04/18] better strategy for keeping names off cases --- R/case-variables.R | 6 +++--- R/case-when-variable.R | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/case-variables.R b/R/case-variables.R index 8fa969e5d..411137fa0 100644 --- a/R/case-variables.R +++ b/R/case-variables.R @@ -124,12 +124,12 @@ caseExpr <- function(..., cases) { new_cat_type <- list( value = list( class = "categorical", - categories = unname(lapply(cases, function(case) { + categories = lapply(cases, function(case) { case[c("id", "name", "numeric_value", "missing")] - })) + }) ) ) - new_cat_ids <- unname(vapply(cases, vget("id"), integer(1))) + new_cat_ids <- vapply(cases, vget("id"), integer(1)) new_cat <- list(column = I(new_cat_ids), type = new_cat_type) # remove nulls from case expressions (should only be from the else case) diff --git a/R/case-when-variable.R b/R/case-when-variable.R index 219c9ec9f..4674ee102 100644 --- a/R/case-when-variable.R +++ b/R/case-when-variable.R @@ -61,7 +61,7 @@ makeCaseWhenVariable <- function(..., data = NULL, formulas = NULL, name) { formula_dots <- vapply(dots, function(x) inherits(x, "formula"), logical(1)) args <- list( - data = caseWhenExpr(data = data, formulas = c(formulas, dots[formula_dots])), + data = caseWhenExpr(data = data, formulas = c(formulas, unname(dots[formula_dots]))), name = name ) args <- c(args, dots[!formula_dots]) From c1833c158c0aa8af2aa8eedfd66d016f8d357f24 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Thu, 30 Jul 2020 16:14:58 -0500 Subject: [PATCH 05/18] typo and merge conflict fix --- R/case-when-variable.R | 2 +- man/makeCaseWhenVariable.Rd | 2 +- tests/testthat/test-case-when-variable.R | 36 ++++++++++++------------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/R/case-when-variable.R b/R/case-when-variable.R index 4674ee102..81f3eed07 100644 --- a/R/case-when-variable.R +++ b/R/case-when-variable.R @@ -5,7 +5,7 @@ #' side is where to get the value if the condition on the left-hand side is #' true. This must be either a Crunch Categorical variable or a Category. #' -#' @param ... formulas where the left hand side is a CrunchExpression (or `TRUE` +#' @param ... formulas where the left hand side is a `CrunchLogicalExpression` (or `TRUE` #' to indicate the "else" case that will be met if all the other expression are #' not met) and the right hand side is a CrunchVariable that should be filled in, #' a `Category` object describing the Category it should be used, a string diff --git a/man/makeCaseWhenVariable.Rd b/man/makeCaseWhenVariable.Rd index 8c4e07e47..2b96e0345 100644 --- a/man/makeCaseWhenVariable.Rd +++ b/man/makeCaseWhenVariable.Rd @@ -10,7 +10,7 @@ makeCaseWhenVariable(..., data = NULL, formulas = NULL, name) caseWhenExpr(..., data = NULL, formulas = NULL) } \arguments{ -\item{...}{formulas where the left hand side is a CrunchExpression (or \code{TRUE} +\item{...}{formulas where the left hand side is a \code{CrunchLogicalExpression} (or \code{TRUE} to indicate the "else" case that will be met if all the other expression are not met) and the right hand side is a CrunchVariable that should be filled in, a \code{Category} object describing the Category it should be used, a string diff --git a/tests/testthat/test-case-when-variable.R b/tests/testthat/test-case-when-variable.R index 6e4503fed..1b309bab9 100644 --- a/tests/testthat/test-case-when-variable.R +++ b/tests/testthat/test-case-when-variable.R @@ -38,8 +38,8 @@ with_mock_crunch({ expect_equal( unclass(toJSON( caseWhenExpr( - between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), - between(ds$birthyr, 1980, 1990) ~ ds$gender, + crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(ds$birthyr, 1980, 1990) ~ ds$gender, TRUE ~ Category(name = "Missed Q", missing = TRUE) )@expression )), @@ -61,14 +61,14 @@ with_mock_crunch({ test_that("caseWhenExpr handles formulas argument", { expect_equal( caseWhenExpr( - between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), - between(ds$birthyr, 1980, 1990) ~ ds$gender, + crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(ds$birthyr, 1980, 1990) ~ ds$gender, TRUE ~ Category(name = "Missed Q", missing = TRUE) ), caseWhenExpr( formulas = list( - between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), - between(ds$birthyr, 1980, 1990) ~ ds$gender, + crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(ds$birthyr, 1980, 1990) ~ ds$gender, TRUE ~ Category(name = "Missed Q", missing = TRUE) ) ) @@ -78,14 +78,14 @@ with_mock_crunch({ test_that("caseWhenExpr handles data argument", { expect_equal( caseWhenExpr( - between(birthyr, 1970, 1980) ~ Category(name = "Hello"), - between(birthyr, 1980, 1990) ~ gender, + crunchBetween(birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(birthyr, 1980, 1990) ~ gender, TRUE ~ Category(name = "Missed Q", missing = TRUE), data = ds ), caseWhenExpr( - between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), - between(ds$birthyr, 1980, 1990) ~ ds$gender, + crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(ds$birthyr, 1980, 1990) ~ ds$gender, TRUE ~ Category(name = "Missed Q", missing = TRUE) ) ) @@ -94,15 +94,15 @@ with_mock_crunch({ test_that("makeCaseWhenVariable handles data argument", { expect_equal( makeCaseWhenVariable( - between(birthyr, 1970, 1980) ~ Category(name = "Hello"), - between(birthyr, 1980, 1990) ~ gender, + crunchBetween(birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(birthyr, 1980, 1990) ~ gender, TRUE ~ Category(name = "Missed Q", missing = TRUE), data = ds, name = "test" ), makeCaseWhenVariable( - between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), - between(ds$birthyr, 1980, 1990) ~ ds$gender, + crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(ds$birthyr, 1980, 1990) ~ ds$gender, TRUE ~ Category(name = "Missed Q", missing = TRUE), name = "test" ) @@ -112,16 +112,16 @@ with_mock_crunch({ test_that("makeCaseWhenVariable correctly separates dots", { expect_equal( makeCaseWhenVariable( - between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), - between(ds$birthyr, 1980, 1990) ~ ds$gender, + crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(ds$birthyr, 1980, 1990) ~ ds$gender, TRUE ~ Category(name = "Missed Q", missing = TRUE), name = "test", description = "desc" ), VarDef( caseWhenExpr( - between(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), - between(ds$birthyr, 1980, 1990) ~ ds$gender, + crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(ds$birthyr, 1980, 1990) ~ ds$gender, TRUE ~ Category(name = "Missed Q", missing = TRUE) ), name = "test", From 2b2c3aff9c216988c8329f19de3e1a5d1b4f0448 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Mon, 3 Aug 2020 10:19:58 -0500 Subject: [PATCH 06/18] lint fixes --- tests/testthat/test-case-when-variable.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-case-when-variable.R b/tests/testthat/test-case-when-variable.R index 1b309bab9..3ed450d11 100644 --- a/tests/testthat/test-case-when-variable.R +++ b/tests/testthat/test-case-when-variable.R @@ -14,8 +14,7 @@ with_mock_crunch({ '{"id":1,"name":"casefill__internal1","numeric_value":null,"missing":false}', ']}}},{"function":">","args":[{"variable"', ':"https://app.crunch.io/api/datasets/1/variables/birthyr/"},{"value":1970}]}]}', - ',{"map":{"1":{"variable":"https://app.crunch.io/api/datasets/1/variables/gender/"', - '}}}]}' + ',{"map":{"1":{"variable":"https://app.crunch.io/api/datasets/1/variables/gender/"}}}]}' # nolint ) ) }) @@ -150,4 +149,4 @@ with_mock_crunch({ ) ) }) -}) \ No newline at end of file +}) From 2ba12bc650df2f819e87de1efbe11156f45f590b Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Mon, 3 Aug 2020 11:14:18 -0500 Subject: [PATCH 07/18] caseWhen takes list objects for programmatically working --- R/case-when-variable.R | 31 ++++++++++++++++++------ man/makeCaseWhenVariable.Rd | 20 ++++++++++++--- tests/testthat/test-case-when-variable.R | 31 ++++++++++++++++++++++-- 3 files changed, 69 insertions(+), 13 deletions(-) diff --git a/R/case-when-variable.R b/R/case-when-variable.R index 81f3eed07..e9c96129a 100644 --- a/R/case-when-variable.R +++ b/R/case-when-variable.R @@ -14,8 +14,11 @@ #' non-formula arguments will be passed to `[VarDef()]` #' @param data A CrunchDataset to use if variable aliases are left bare in the #' formulas. -#' @param formulas A list of formulas that match the description in `...` -#' @param name For `maekCaseWhenVariable()` the name of the variable to create. +#' @param cases A list of formulas that match the description in `...` or a list of +#' lists with named items, "expression" (like the left-hand side of the formulas above), +#' "fill" for a variable to fill in, or "name", "id", and other items that describe a +#' category. +#' @param name For `makeCaseWhenVariable()` the name of the variable to create. #' #' @return `makeCaseWhenVariable()` returns a `VariableDefinition` and #' `caseWhenExpr()` returns an expression @@ -55,13 +58,22 @@ #' ), #' name = "brand x preference selected" #' ) +#' +#' # Using lists in `cases` argument can be helpful when working programmatically +#' fill_var <- ds$x +#' fill_condition <- ds$skipped_x != "Yes" +#' +#' ds$rebased_x2 <- makeCaseWhenVariable( +#' cases = list(list(fill = fill_var, expression = fill_condition)), +#' name = "rebased x 2" +#' ) #' } -makeCaseWhenVariable <- function(..., data = NULL, formulas = NULL, name) { +makeCaseWhenVariable <- function(..., data = NULL, cases = NULL, name) { dots <- list(...) formula_dots <- vapply(dots, function(x) inherits(x, "formula"), logical(1)) args <- list( - data = caseWhenExpr(data = data, formulas = c(formulas, unname(dots[formula_dots]))), + data = caseWhenExpr(data = data, cases = c(cases, unname(dots[formula_dots]))), name = name ) args <- c(args, dots[!formula_dots]) @@ -71,9 +83,9 @@ makeCaseWhenVariable <- function(..., data = NULL, formulas = NULL, name) { #' @export #' @rdname makeCaseWhenVariable -caseWhenExpr <- function(..., data = NULL, formulas = NULL) { - formulas <- c(formulas, list(...)) - case_fills <- lapply(formulas, parse_case_when_formula, data = data) +caseWhenExpr <- function(..., data = NULL, cases = NULL) { + cases <- unname(c(cases, list(...))) + case_fills <- lapply(cases, parse_case_when_formula, data = data) # Get set of unique IDs that fill in for when IDs are missing used_ids <- vapply(case_fills, function(x) x$id %||% NA, numeric(1)) @@ -109,6 +121,11 @@ caseWhenExpr <- function(..., data = NULL, formulas = NULL) { } parse_case_when_formula <- function(formula, data) { + if (is.list(formula)) { + if (identical(formula$expression, TRUE)) formula$expression <- "else" + return(formula) + } + if (length(formula) != 3) { halt( "The condition provided must be a proper formula: ", diff --git a/man/makeCaseWhenVariable.Rd b/man/makeCaseWhenVariable.Rd index 2b96e0345..5def15e8a 100644 --- a/man/makeCaseWhenVariable.Rd +++ b/man/makeCaseWhenVariable.Rd @@ -5,9 +5,9 @@ \alias{caseWhenExpr} \title{Create a variable from categorical variables or categories based on conditions} \usage{ -makeCaseWhenVariable(..., data = NULL, formulas = NULL, name) +makeCaseWhenVariable(..., data = NULL, cases = NULL, name) -caseWhenExpr(..., data = NULL, formulas = NULL) +caseWhenExpr(..., data = NULL, cases = NULL) } \arguments{ \item{...}{formulas where the left hand side is a \code{CrunchLogicalExpression} (or \code{TRUE} @@ -21,9 +21,12 @@ non-formula arguments will be passed to \verb{[VarDef()]}} \item{data}{A CrunchDataset to use if variable aliases are left bare in the formulas.} -\item{formulas}{A list of formulas that match the description in \code{...}} +\item{cases}{A list of formulas that match the description in \code{...} or a list of +lists with named items, "expression" (like the left-hand side of the formulas above), +"fill" for a variable to fill in, or "name", "id", and other items that describe a +category.} -\item{name}{For \code{maekCaseWhenVariable()} the name of the variable to create.} +\item{name}{For \code{makeCaseWhenVariable()} the name of the variable to create.} } \value{ \code{makeCaseWhenVariable()} returns a \code{VariableDefinition} and @@ -70,5 +73,14 @@ ds$brand_x_prefer_high <- VarDef( ), name = "brand x preference selected" ) + +# Using lists in `cases` argument can be helpful when working programmatically +fill_var <- ds$x +fill_condition <- ds$skipped_x != "Yes" + +ds$rebased_x2 <- makeCaseWhenVariable( + cases = list(list(fill = fill_var, expression = fill_condition)), + name = "rebased x 2" +) } } diff --git a/tests/testthat/test-case-when-variable.R b/tests/testthat/test-case-when-variable.R index 3ed450d11..d6de5869b 100644 --- a/tests/testthat/test-case-when-variable.R +++ b/tests/testthat/test-case-when-variable.R @@ -57,7 +57,7 @@ with_mock_crunch({ ) }) - test_that("caseWhenExpr handles formulas argument", { + test_that("caseWhenExpr handles formulas in cases argument", { expect_equal( caseWhenExpr( crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), @@ -65,7 +65,7 @@ with_mock_crunch({ TRUE ~ Category(name = "Missed Q", missing = TRUE) ), caseWhenExpr( - formulas = list( + cases = list( crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), crunchBetween(ds$birthyr, 1980, 1990) ~ ds$gender, TRUE ~ Category(name = "Missed Q", missing = TRUE) @@ -74,6 +74,33 @@ with_mock_crunch({ ) }) + test_that("caseWhenExpr handles lists in cases argument", { + expect_equal( + caseWhenExpr( + crunchBetween(ds$birthyr, 1970, 1980) ~ Category(name = "Hello"), + crunchBetween(ds$birthyr, 1980, 1990) ~ ds$gender, + TRUE ~ Category(name = "Missed Q", missing = TRUE) + ), + caseWhenExpr( + cases = list( + list( + expression = crunchBetween(ds$birthyr, 1970, 1980), + name = "Hello" + ), + list( + expression = crunchBetween(ds$birthyr, 1980, 1990), + fill = ds$gender + ), + list( + expression = TRUE, + name = "Missed Q", + missing = TRUE + ) + ) + ) + ) + }) + test_that("caseWhenExpr handles data argument", { expect_equal( caseWhenExpr( From b503dcf6cd804e20f84bc2ad75fd3d82ec7eff8b Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Fri, 31 Jul 2020 15:15:17 -0500 Subject: [PATCH 08/18] allow crunch variables to be zcl'ed (helpful in eg deriveArray) --- R/R-to-variable.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/R-to-variable.R b/R/R-to-variable.R index 22c63826c..9d1f7b243 100644 --- a/R/R-to-variable.R +++ b/R/R-to-variable.R @@ -42,7 +42,7 @@ setGeneric("toVariable", function(x, ...) standardGeneric("toVariable")) #' @rdname toVariable #' @export -setMethod("toVariable", "CrunchExpr", function(x, ...) { +setMethod("toVariable", "CrunchVarOrExpr", function(x, ...) { structure(list(derivation = zcl(x), ...), class = "VariableDefinition") }) From 21a46feb06cdd246dc8b4a316d3f325d6319cf40 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Fri, 31 Jul 2020 15:17:16 -0500 Subject: [PATCH 09/18] alterArray expression --- NAMESPACE | 1 + R/expressions.R | 92 +++++++++++++++++++++++++++++++------ R/zcl.R | 11 +++++ man/expressions-internal.Rd | 10 ++++ 4 files changed, 99 insertions(+), 15 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index de0e5782b..5669ba936 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -101,6 +101,7 @@ export(addSubvariable) export(addSubvariables) export(addSummaryStat) export(addVariables) +export(alterArrayExpr) export(alterCategoriesExpr) export(analyses) export(analysis) diff --git a/R/expressions.R b/R/expressions.R index 9eecc8ee4..a1437a6c6 100644 --- a/R/expressions.R +++ b/R/expressions.R @@ -704,30 +704,92 @@ alter_cats_get_subvar_ids <- function(x, subvariables) { }) } +#' @rdname expressions-internal +#' @export +alterArrayExpr <- function( + x, + add = NULL, + order = NULL, + remove = NULL, + remove_id = c("alias", "name", "id"), + subreferences = NULL +) { + isVarButNotType(x, "Array", "alterArrayExpr") + remove_id <- match.arg(remove_id) + + add_ids <- if (is.null(names(add))) new_array_ids(x, length(add)) else names(add) + add <- setNames(lapply(add, zcl), add_ids) + + if (is.null(order) && !is.null(add)) { + if (!is.variable(x)) { + halt("Must set order when adding subvariables to an expression") + } + order <- c(ids(subvariables(x)), add_ids) + } + + remove <- match_subvar_to_id(x, remove, remove_id) + + + args <- list( + fun = "alter_array", + x = x + ) + + if (!is.null(add)) args$add <- list(map = I(add)) + if (!is.null(order)) args$order <- list(value = I(order)) + if (!is.null(remove)) args$remove <- list(value = I(remove)) + if (!is.null(subreferences)) args$subreferences <- list(value = I(subreferences)) + + do.call(zfuncExpr, args) +} + +new_array_ids <- function(array, num) { + if (is.variable(array)) { + existing_ids <- ids(subvariables(array)) + } else { + existing_ids <- try({ + arrayflat <- unlist(array) + unlist(arrayflat)[grepl("\\.map\\.", names(arrayflat))] + }, silent = TRUE) + if (inherits(existing_ids, "try-error")) existing_ids <- c() + } + + max_numeric <- suppressWarnings(max(c(as.numeric(existing_ids), 0), na.rm = TRUE)) + as.character(max_numeric + seq_len(num)) +} + #' @rdname expressions-internal #' @export arraySubsetExpr <- function(x, subvars, subvar_id = c("alias", "name", "id")) { isVarButNotType(x, "Array", "arraySubsetExpr") subvar_id <- match.arg(subvar_id) - if (subvar_id != "id") { - if (!is.variable(x)) halt("Must subset by id when subsetting an expression") + subvars <- match_subvar_to_id(x, subvars, subvar_id) - if (subvar_id == "alias") { - matches <- match(subvars, aliases(subvariables(x))) - } else if (subvar_id == "name") { - matches <- match(subvars, names(subvariables(x))) - } + zfuncExpr("array_subset", x, list(value = I(subvars))) +} - if (any(is.na(matches))) { - halt( - "Could not find subvariables with ", subvar_id, " ", - paste0("'", subvars[is.na(matches)], "'", collapse = ",") - ) - } - subvars <- ids(subvariables(x))[matches] +match_subvar_to_id <- function(x, subvars, id_type = c("alias", "name", "id")) { + if (is.null(subvars)) return() + id_type <- match.arg(id_type) + + if (id_type == "id") { + if (!is.variable(x)) return(subvars) # no validation possible + matches <- match(subvars, ids(subvariables(x))) + } else if (!is.variable(x)) { + halt("Must provide subvariable ids when x is an expression") + } else if (id_type == "alias") { + matches <- match(subvars, aliases(subvariables(x))) + } else if (id_type == "name") { + matches <- match(subvars, names(subvariables(x))) } - zfuncExpr("array_subset", x, list(value = I(subvars))) + if (any(is.na(matches))) { + halt( + "Could not find subvariables with ", id_type, " ", + paste0("'", subvars[is.na(matches)], "'", collapse = ",") + ) + } + ids(subvariables(x))[matches] } #' @rdname crunch-is diff --git a/R/zcl.R b/R/zcl.R index 277523e40..536fe3dbb 100644 --- a/R/zcl.R +++ b/R/zcl.R @@ -45,6 +45,17 @@ setOldClass("zcl") setMethod("zcl", "zcl", function(x) x) setMethod("zcl", "list", function(x) x) ## is this a good idea? setMethod("zcl", "CrunchFilter", function(x) x@body$expression) +setMethod("zcl", "VariableDefinition", function(x) { + non_ref_names <- c("derivation", "values", "type", "categories", "resolution") + if ("derivation" %in% names(x)) { + out <- x$derivation + } else { + halt("zcl functions only work on existing crunch variables") + } + references <- x[!names(x) %in% non_ref_names] + if (length(references) > 0) out$references <- references + out +}) zfunc <- function(func, ...) { ## Wrapper that creates ZCL function syntax diff --git a/man/expressions-internal.Rd b/man/expressions-internal.Rd index ca3b4d0d4..2b034a96e 100644 --- a/man/expressions-internal.Rd +++ b/man/expressions-internal.Rd @@ -41,6 +41,7 @@ \alias{nchar,CrunchVarOrExpr-method} \alias{trim} \alias{alterCategoriesExpr} +\alias{alterArrayExpr} \alias{arraySubsetExpr} \alias{makeFrame} \title{Crunch expressions internal} @@ -122,6 +123,15 @@ alterCategoriesExpr( subvariables = NULL ) +alterArrayExpr( + x, + add = NULL, + order = NULL, + remove = NULL, + remove_id = c("alias", "name", "id"), + subreferences = NULL +) + arraySubsetExpr(x, subvars, subvar_id = c("alias", "name", "id")) makeFrame(x) From 085f204c9edf65d8cd9eaa867483ce7df46239d6 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Fri, 31 Jul 2020 15:17:32 -0500 Subject: [PATCH 10/18] makeFrame can use VarDef zcl method --- R/make-array.R | 10 +--------- tests/testthat/test-make-array.R | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/R/make-array.R b/R/make-array.R index cdca67e38..f1fea5704 100644 --- a/R/make-array.R +++ b/R/make-array.R @@ -268,15 +268,7 @@ makeFrame <- function(x) { # if it's a list, it could contain variable definitions: if (is.list(x)) { x <- x[lengths(x) > 0] # remove NULLs (from eg slider) - x <- lapply(x, function(sv) { - if (is.VarDef(sv)) { - out <- sv$derivation - out$references <- sv[names(sv) != "derivation"] - out - } else { - list(variable = urls(sv)) - } - }) + x <- lapply(x, zcl) } else { # but ShojiCatalogs don't give their urls when lapplying, so treat differently x <- lapply(urls(x), function(sv) list(variable = sv)) } diff --git a/tests/testthat/test-make-array.R b/tests/testthat/test-make-array.R index a7333d2d6..abe12bb41 100644 --- a/tests/testthat/test-make-array.R +++ b/tests/testthat/test-make-array.R @@ -224,7 +224,7 @@ with_mock_crunch({ `function` = "make_frame", args = list(list( map = list( - c(zcl(ds$gender == "Male"), references = list(name = "male")) + c(zcl(ds$gender == "Male"), list(references = list(name = "male"))) ) ), list(value = I("1"))) )) From 66d45c34d94879e8239c29d2537201f8aa5c762a Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Fri, 31 Jul 2020 15:21:08 -0500 Subject: [PATCH 11/18] broken test --- tests/testthat/test-expressions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-expressions.R b/tests/testthat/test-expressions.R index 437b28e01..96707addb 100644 --- a/tests/testthat/test-expressions.R +++ b/tests/testthat/test-expressions.R @@ -676,7 +676,7 @@ with_mock_crunch({ # fail expect_error( arraySubsetExpr(asSelected(ds$catarray), c("subvar1", "subvar2"), "alias"), - "Must subset by id when subsetting an expression" + "Must provide subvariable ids when x is an expression" ) expect_error( From 4f292bfccd668e806d94f7b1a2d2298a710831f8 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Mon, 3 Aug 2020 13:00:45 -0500 Subject: [PATCH 12/18] add way to specify id type for ordering and subreferences during alterArray --- R/expressions.R | 57 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/R/expressions.R b/R/expressions.R index a1437a6c6..ef4251f1d 100644 --- a/R/expressions.R +++ b/R/expressions.R @@ -710,30 +710,41 @@ alterArrayExpr <- function( x, add = NULL, order = NULL, + order_id = c("alias", "name", "id"), remove = NULL, remove_id = c("alias", "name", "id"), - subreferences = NULL + subreferences = NULL, + subreferences_id = c("alias", "name", "id") ) { isVarButNotType(x, "Array", "alterArrayExpr") remove_id <- match.arg(remove_id) + order_id <- match.arg(order_id) + subreferences_id <- match.arg(subreferences_id) - add_ids <- if (is.null(names(add))) new_array_ids(x, length(add)) else names(add) - add <- setNames(lapply(add, zcl), add_ids) + if (!is.null(add)) { + if (is.variable(add) || is.VarDef(add)) add <- list(add) + add_ids <- if (is.null(names(add))) new_array_ids(x, length(add)) else names(add) + add <- setNames(lapply(add, zcl), add_ids) - if (is.null(order) && !is.null(add)) { - if (!is.variable(x)) { - halt("Must set order when adding subvariables to an expression") + if (is.null(order)) { + if (!is.variable(x)) { + halt("Must set order when adding subvariables to an expression") + } + order <- c(ids(subvariables(x)), add_ids) + order_id <- "id" } - order <- c(ids(subvariables(x)), add_ids) } - remove <- match_subvar_to_id(x, remove, remove_id) + if (!is.null(order)) { + order <- match_subvar_to_id(x, order, order_id, add) + } + remove <- match_subvar_to_id(x, remove, remove_id, add) + if (!is.null(subreferences)) { + names(subreferences) <- match_subvar_to_id(x, names(subreferences), subreferences_id, add) + } - args <- list( - fun = "alter_array", - x = x - ) + args <- list(fun = "alter_array", x = x) if (!is.null(add)) args$add <- list(map = I(add)) if (!is.null(order)) args$order <- list(value = I(order)) @@ -768,19 +779,29 @@ arraySubsetExpr <- function(x, subvars, subvar_id = c("alias", "name", "id")) { zfuncExpr("array_subset", x, list(value = I(subvars))) } -match_subvar_to_id <- function(x, subvars, id_type = c("alias", "name", "id")) { - if (is.null(subvars)) return() +match_subvar_to_id <- function(x, subvars, id_type = c("alias", "name", "id"), add = NULL) { + if (is.null(subvars)) return(subvars) id_type <- match.arg(id_type) + if (!is.null(add)) { + add_refs <- list( + ids = names(add), + aliases = vapply(add, function(x) x$references$alias %||% "", ""), + names = vapply(add, function(x) x$references$name %||% "", "") + ) + } else { + add_refs <- list(ids = NULL, aliases = NULL, names = NULL) + } + if (id_type == "id") { if (!is.variable(x)) return(subvars) # no validation possible - matches <- match(subvars, ids(subvariables(x))) + matches <- match(subvars, c(ids(subvariables(x)), add_refs$ids)) } else if (!is.variable(x)) { halt("Must provide subvariable ids when x is an expression") } else if (id_type == "alias") { - matches <- match(subvars, aliases(subvariables(x))) + matches <- match(subvars, c(aliases(subvariables(x)), add_refs$aliases)) } else if (id_type == "name") { - matches <- match(subvars, names(subvariables(x))) + matches <- match(subvars, c(names(subvariables(x)), add_refs$names)) } if (any(is.na(matches))) { @@ -789,7 +810,7 @@ match_subvar_to_id <- function(x, subvars, id_type = c("alias", "name", "id")) { paste0("'", subvars[is.na(matches)], "'", collapse = ",") ) } - ids(subvariables(x))[matches] + c(ids(subvariables(x)), add_refs$ids)[matches] } #' @rdname crunch-is From bc1cd482df19f51b79b9ed9bb3396376b5db1dba Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Mon, 3 Aug 2020 13:00:57 -0500 Subject: [PATCH 13/18] add tests for alterArray --- tests/testthat/test-expressions.R | 102 ++++++++++++++++++++++++++++++ tests/testthat/test-zcl.R | 10 +++ 2 files changed, 112 insertions(+) diff --git a/tests/testthat/test-expressions.R b/tests/testthat/test-expressions.R index 96707addb..506ff22d3 100644 --- a/tests/testthat/test-expressions.R +++ b/tests/testthat/test-expressions.R @@ -633,6 +633,108 @@ with_mock_crunch({ ) }) + + test_that("alterArrayExpr - add var and order", { + expr <- alterArrayExpr( + ds$mymrset, + add = list("4" = ds$gender), + order = c("gender", "4", "subvar1", "subvar3"), + order_id = "id" + ) + expect_is(expr, "CrunchExpr") + expect_equal( + unclass(toJSON(expr@expression)), + paste0( + '{"function":"alter_array","args":[{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/mymrset/"}],', + '"kwargs":{"add":{"map":{"4":{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/gender/"}}},', + '"order":{"value":["gender","4","subvar1","subvar3"]}}}' + ) + ) + }) + + test_that("alterArrayExpr - add var and order by new alias", { + expr <- alterArrayExpr( + ds$mymrset, + add = list("4" = VarDef(alias = "new_gender", ds$gender)), + order = c("subvar2", "new_gender", "subvar1", "subvar3") + ) + expect_is(expr, "CrunchExpr") + expect_equal( + unclass(toJSON(expr@expression)), + paste0( + '{"function":"alter_array","args":[{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/mymrset/"}],', + '"kwargs":{"add":{"map":{"4":{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/gender/",', + '"references":{"alias":"new_gender"}}}},', + '"order":{"value":["gender","4","subvar1","subvar3"]}}}' + ) + ) + }) + + test_that("alterArrayExpr - add var no order", { + expr <- alterArrayExpr( + ds$mymrset, + add = list(ds$gender), + ) + expect_is(expr, "CrunchExpr") + expect_equal( + unclass(toJSON(expr@expression)), + paste0( + '{"function":"alter_array","args":[{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/mymrset/"}],', + '"kwargs":{"add":{"map":{"1":{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/gender/"}}},', + '"order":{"value":["gender","subvar1","subvar3","1"]}}}' + ) + ) + }) + + test_that("alterArrayExpr - remove var", { + expr <- alterArrayExpr( + ds$mymrset, + remove = "gender", + remove_id = "id" + ) + expect_is(expr, "CrunchExpr") + expect_equal( + unclass(toJSON(expr@expression)), + paste0( + '{"function":"alter_array","args":[{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/mymrset/"}],', + '"kwargs":{"remove":{"value":["gender"]}}}' + ) + ) + + expect_equal( + unclass(toJSON(alterArrayExpr(ds$mymrset, remove = "subvar2", remove_id = "alias")@expression)), + unclass(toJSON(expr@expression)) + ) + + expect_equal( + unclass(toJSON(alterArrayExpr(ds$mymrset, remove = "First", remove_id = "name")@expression)), + unclass(toJSON(expr@expression)) + ) + }) + + test_that("alterArrayExpr - subreferences", { + expr <- alterArrayExpr( + ds$mymrset, + subreferences = list("subvar2" = list(name = "new name")) + ) + expect_is(expr, "CrunchExpr") + expect_equal( + unclass(toJSON(expr@expression)), + paste0( + '{"function":"alter_array","args":[{"variable":', + '"https://app.crunch.io/api/datasets/1/variables/mymrset/"}],', + '"kwargs":{"subreferences":{"value":{"gender":{"name":"new name"}}}}}' + ) + ) + }) + test_that("arraySubsetExpr", { # aliases expr <- arraySubsetExpr(ds$catarray, c("subvar1", "subvar3"), "alias") diff --git a/tests/testthat/test-zcl.R b/tests/testthat/test-zcl.R index 8d6e1dfd7..c22aa4f38 100644 --- a/tests/testthat/test-zcl.R +++ b/tests/testthat/test-zcl.R @@ -57,4 +57,14 @@ with_mock_crunch({ ) ) }) + + test_that("zcl (VarDef)", { + expect_equal( + zcl(VarDef(ds$gender, name = "x")), + list( + variable = "https://app.crunch.io/api/datasets/1/variables/gender", + references = list(name = "x") + ) + ) + }) }) From 00dbb86c45954fd4f9177e092ec625f68e6b8bad Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Mon, 3 Aug 2020 13:02:36 -0500 Subject: [PATCH 14/18] lint --- tests/testthat/test-expressions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-expressions.R b/tests/testthat/test-expressions.R index 506ff22d3..29558f428 100644 --- a/tests/testthat/test-expressions.R +++ b/tests/testthat/test-expressions.R @@ -709,12 +709,12 @@ with_mock_crunch({ ) expect_equal( - unclass(toJSON(alterArrayExpr(ds$mymrset, remove = "subvar2", remove_id = "alias")@expression)), + unclass(toJSON(alterArrayExpr(ds$mymrset, remove = "subvar2", remove_id = "alias")@expression)), #nolint unclass(toJSON(expr@expression)) ) expect_equal( - unclass(toJSON(alterArrayExpr(ds$mymrset, remove = "First", remove_id = "name")@expression)), + unclass(toJSON(alterArrayExpr(ds$mymrset, remove = "First", remove_id = "name")@expression)), #nolint unclass(toJSON(expr@expression)) ) }) From 10d3dfd5e4995530f94fc0d2961a2fd134752c50 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Mon, 3 Aug 2020 13:34:25 -0500 Subject: [PATCH 15/18] oops --- tests/testthat/test-zcl.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-zcl.R b/tests/testthat/test-zcl.R index c22aa4f38..a53e7b471 100644 --- a/tests/testthat/test-zcl.R +++ b/tests/testthat/test-zcl.R @@ -62,7 +62,7 @@ with_mock_crunch({ expect_equal( zcl(VarDef(ds$gender, name = "x")), list( - variable = "https://app.crunch.io/api/datasets/1/variables/gender", + variable = "https://app.crunch.io/api/datasets/1/variables/gender/", references = list(name = "x") ) ) From 312084d54727be8c2ca572d71d0ccbe3d256d59c Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Mon, 3 Aug 2020 13:59:14 -0500 Subject: [PATCH 16/18] oops doc fixes --- R/expressions.R | 2 +- inst/WORDLIST | 1 + man/expressions-internal.Rd | 6 ++++-- man/toVariable.Rd | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/expressions.R b/R/expressions.R index ef4251f1d..a238fa852 100644 --- a/R/expressions.R +++ b/R/expressions.R @@ -5,7 +5,7 @@ #' @param x,e1,e2 inputs #' @param table,na.rm,selections,upper,lower,inclusive,regex,ignore_case,selections,collapse,tiers, #' cases,data,resolution,year,month,day,hours,minutes,seconds,min,max,categories,category_order, -#' subvariables +#' subvariables,remove,remove_id,subreferences,subreferences_id #' Other parameters used in some functions, see details of [`expressions`] for more details. #' @return Most functions return a CrunchExpr or CrunchLogicalExpr. #' `as.vector` returns an R vector. diff --git a/inst/WORDLIST b/inst/WORDLIST index 04f84beae..4d5df7559 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -172,6 +172,7 @@ Snowden's spss subclasses subfolder +subreferences subtyped subtypes subvariable diff --git a/man/expressions-internal.Rd b/man/expressions-internal.Rd index 2b034a96e..299b7bd7e 100644 --- a/man/expressions-internal.Rd +++ b/man/expressions-internal.Rd @@ -127,9 +127,11 @@ alterArrayExpr( x, add = NULL, order = NULL, + order_id = c("alias", "name", "id"), remove = NULL, remove_id = c("alias", "name", "id"), - subreferences = NULL + subreferences = NULL, + subreferences_id = c("alias", "name", "id") ) arraySubsetExpr(x, subvars, subvar_id = c("alias", "name", "id")) @@ -140,7 +142,7 @@ makeFrame(x) \item{x, e1, e2}{inputs} \item{table, na.rm, selections, upper, lower, inclusive, regex, ignore_case, selections, collapse, tiers, }{cases,data,resolution,year,month,day,hours,minutes,seconds,min,max,categories,category_order, -subvariables +subvariables,remove,remove_id,subreferences,subreferences_id Other parameters used in some functions, see details of \code{\link{expressions}} for more details.} } \value{ diff --git a/man/toVariable.Rd b/man/toVariable.Rd index 7b998f3b4..844839b8f 100644 --- a/man/toVariable.Rd +++ b/man/toVariable.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/R-to-variable.R \name{toVariable} \alias{toVariable} -\alias{toVariable,CrunchExpr-method} +\alias{toVariable,CrunchVarOrExpr-method} \alias{toVariable,character-method} \alias{toVariable,numeric-method} \alias{toVariable,factor-method} @@ -19,7 +19,7 @@ \usage{ toVariable(x, ...) -\S4method{toVariable}{CrunchExpr}(x, ...) +\S4method{toVariable}{CrunchVarOrExpr}(x, ...) \S4method{toVariable}{character}(x, ...) From f0b4ea7383bf5e6d03aef5451d7ffd92c21b8d76 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Tue, 4 Aug 2020 11:38:15 -0500 Subject: [PATCH 17/18] alter categories used kwargs incorrectly --- R/expressions.R | 11 +++++------ tests/testthat/test-expressions.R | 22 +++++++++++----------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/R/expressions.R b/R/expressions.R index a238fa852..88eedf142 100644 --- a/R/expressions.R +++ b/R/expressions.R @@ -612,14 +612,13 @@ alterCategoriesExpr <- function( ) { isVarButNotType(x, c("Array", "Categorical"), "alterCategoriesExpr") - args <- list() - if (!is.null(categories)) args$categories <- alter_cats_get_cat_ids(x, categories) - if (!is.null(order)) args$order <- alter_cats_get_order_ids(x, category_order) + args <- list(fun = "alter_categories", x) + if (!is.null(categories)) args$categories <- list(value = alter_cats_get_cat_ids(x, categories)) # nolint + if (!is.null(category_order)) args$order <- list(value = alter_cats_get_order_ids(x, category_order)) if (!is.null(subvariables)) { - args$subvariables <- alter_cats_get_subvar_ids(x, subvariables) + args$subvariables <- list(value = alter_cats_get_subvar_ids(x, subvariables)) } - - zfuncExpr("alter_categories", x, list(value = I(args))) + do.call(zfuncExpr, args) } alter_cats_get_cat_ids <- function(x, categories) { diff --git a/tests/testthat/test-expressions.R b/tests/testthat/test-expressions.R index 29558f428..43338585d 100644 --- a/tests/testthat/test-expressions.R +++ b/tests/testthat/test-expressions.R @@ -502,9 +502,9 @@ with_mock_crunch({ expect_equal( unclass(toJSON(expr@expression)), paste0( - '{"function":"alter_categories","args":[{"variable":"https://app.crunch.io/api/datasets/1/variables/catarray/"},', # nolint - '{"value":{"categories":[{"id":1,"name":"AAA"}],"order":[2,1,-1],', - '"subvariables":[{"id":"subvar1","name":"ZZZ"}]}}]}' + '{"function":"alter_categories","args":[{"variable":"https://app.crunch.io/api/datasets/1/variables/catarray/"}],', # nolint + '"kwargs":{"categories":{"value":[{"id":1,"name":"AAA"}]},"order":{"value":[2,1,-1]},', #nolint + '"subvariables":{"value":[{"id":"subvar1","name":"ZZZ"}]}}}' ) ) }) @@ -521,9 +521,9 @@ with_mock_crunch({ expect_equal( unclass(toJSON(expr@expression)), paste0( - '{"function":"alter_categories","args":[{"variable":"https://app.crunch.io/api/datasets/1/variables/catarray/"},', # nolint - '{"value":{"categories":[{"id":1,"name":"AAA"}],"order":[2,1,-1],', - '"subvariables":[{"id":"subvar1","name":"ZZZ"}]}}]}' + '{"function":"alter_categories","args":[{"variable":"https://app.crunch.io/api/datasets/1/variables/catarray/"}],', # nolint + '"kwargs":{"categories":{"value":[{"id":1,"name":"AAA"}]},"order":{"value":[2,1,-1]},', #nolint + '"subvariables":{"value":[{"id":"subvar1","name":"ZZZ"}]}}}' ) ) }) @@ -538,8 +538,8 @@ with_mock_crunch({ expect_equal( unclass(toJSON(expr@expression)), paste0( - '{"function":"alter_categories","args":[{"variable":"https://app.crunch.io/api/datasets/1/variables/catarray/"},', # nolint - '{"value":{"subvariables":[{"id":"subvar1","name":"ZZZ"}]}}]}' + '{"function":"alter_categories","args":[{"variable":"https://app.crunch.io/api/datasets/1/variables/catarray/"}],', # nolint + '"kwargs":{"subvariables":{"value":[{"id":"subvar1","name":"ZZZ"}]}}}' ) ) }) @@ -558,9 +558,9 @@ with_mock_crunch({ '{"function":"alter_categories","args":[{"function":"as_selected","args":', '[{"function":"select_categories","args":', '[{"variable":"https://app.crunch.io/api/datasets/1/variables/catarray/"},', - '{"value":["A"]}]}]},', - '{"value":{"categories":[{"id":1,"name":"AAA"}],"order":[2,1,-1],', - '"subvariables":[{"id":"subvar1","name":"ZZZ"}]}}]}' + '{"value":["A"]}]}]}],"kwargs":{', + '"categories":{"value":[{"id":1,"name":"AAA"}]},"order":{"value":[2,1,-1]},', + '"subvariables":{"value":[{"id":"subvar1","name":"ZZZ"}]}}}' ) ) }) From bcfed7c9fd368bd1c391bfd9ae5b33fd395e21ea Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Tue, 4 Aug 2020 11:39:23 -0500 Subject: [PATCH 18/18] look into whether `categories()<-` should use expressions on derived variables --- R/categories.R | 77 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/R/categories.R b/R/categories.R index 9b712cfca..6b3f8273a 100644 --- a/R/categories.R +++ b/R/categories.R @@ -251,6 +251,70 @@ is.3vl <- function(cats) { ) } +updateCategoriesDerivation <- function(x, value) { + old_cats <- categories(x) + + if (!setequal(ids(old_cats), ids(value))) { + halt("Updating categories must preserve the same ids for derived variables") + } + + cat_changes <- lapply(ids(value), function(cat_id) { + old <- old_cats[[which(ids(old_cats) == cat_id)]] + new <- value[[which(ids(value) == cat_id)]] + defaults <- list( + numeric_value = NA_real_, missing = FALSE, selected = FALSE, date = NA_character_ + ) + + old <- modifyList(defaults, unclass(old)) + new <- modifyList(defaults, unclass(new)) + + changed <- vapply(names(new), function(attr) { + !isTRUE(all.equal(old[[attr]], new[[attr]])) && !(is.na(old[[attr]]) && is.null(new[[attr]])) && !(is.null(old[[attr]]) && is.na(new[[attr]])) + }, logical(1)) + + new[names(new) == "id" | changed] + }) + + # We'll use `selectCategories()` for selection changes, but `alterCategories()` for everything else + sel_changes <- vapply(cat_changes, function(x) x$selected %||% NA, logical(1)) + names(sel_changes) <- names(value) + sel_changes <- sel_changes[!is.na(sel_changes)] + + nonsel_changes <- lapply(cat_changes, function(x) { + names <- setdiff(names(x), "selected") + if (identical(names, "id")) return(NULL) + + x[names] + }) + nonsel_changes <- nonsel_changes[lengths(nonsel_changes) > 0] + if (length(nonsel_changes) == 0) nonsel_changes <- NULL + + old_order <- ids(old_cats) + new_order <- ids(value) + if (identical(old_order, new_order)) new_order <- NULL + + + # can't use derivation because it mangles urls for array subvariables + derivation <- CrunchExpr(expression = entity(x)@body$derivation) + if (!is.null(nonsel_changes) || !is.null(new_order)) { + derivation <- alterCategoriesExpr( + derivation, + categories = unname(nonsel_changes), + category_order = new_order + ) + } + if (length(sel_changes) > 0) { + current_sels <- ids(old_cats[is.selected(old_cats)]) + drop_sels <- names(sel_changes[!sel_changes]) + add_sels <- names(sel_changes[sel_changes]) + new_sels <- c(setdiff(current_sels, drop_sels), add_sels) + + derivation <- selectCategories(derivation, new_sels, collapse = FALSE) + } + derivation(x) <- derivation + x +} + #' Get and set Categories on Variables #' #' @param x a Variable @@ -303,7 +367,12 @@ setMethod( setMethod( "categories<-", c("CategoricalVariable", "Categories"), function(x, value) { - ent <- setEntitySlot(entity(x), "categories", value) + if (!is.derived(x)) { + ent <- setEntitySlot(entity(x), "categories", value) + } else { + x <- updateCategoriesDerivation(x, value) + } + dropCache(cubeURL(x)) return(x) } @@ -313,7 +382,11 @@ setMethod( setMethod( "categories<-", c("CategoricalArrayVariable", "Categories"), function(x, value) { - ent <- setEntitySlot(entity(x), "categories", value) + if (!is.derived(x)) { + ent <- setEntitySlot(entity(x), "categories", value) + } else { + x <- updateCategoriesDerivation(x, value) + } lapply(subvariableURLs(tuple(x)), dropCache) ## Subvariables will update too dropCache(cubeURL(x)) return(x)