From 3527e4c9c69c0e9c6d6d3488f78b994e85711ec1 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Tue, 13 Apr 2021 13:01:13 -0500 Subject: [PATCH 1/3] WIP generateOrganizeCommand to get the Automation script for current folder order --- NAMESPACE | 1 + R/folders.R | 103 +++++++++++++++++++++++++++++++++ _pkgdown.yml | 1 + man/generateOrganizeCommand.Rd | 20 +++++++ 4 files changed, 125 insertions(+) create mode 100644 man/generateOrganizeCommand.Rd diff --git a/NAMESPACE b/NAMESPACE index 7e291eea0..372e072b8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -196,6 +196,7 @@ export(forkDataset) export(formulaToSlideQuery) export(fortify.CrunchDataFrame) export(fortify.CrunchDataset) +export(generateOrganizeCommand) export(getAccountUserCatalog) export(getDimTypes) export(getTeams) diff --git a/R/folders.R b/R/folders.R index 5c93f7334..a80c085c6 100644 --- a/R/folders.R +++ b/R/folders.R @@ -385,6 +385,109 @@ copyFolders <- function(source, target) { } +# Recursively go through folders and create crunch automation commands +organize_recurse <- function(base, base_type, path) { + items <- cd(base, path) + + # Can't make empty folders in crunch automation + if (length(items) == 0) { + return() + } + + if (length(items) == 1) { + split_items <- list(items) + } else { + # Make sequences where each folder is on its own, and the variables + # at the folder level are in sequence. This allows the organize folder + # to go in the right place so that the folder is organized among the vas + folder_pos <- which(types(items) == "folder") + # starts are 1, the folder positions, and 1 after the folder positions so that + # each folder gets split out on its own. If position is to high, needs to be + # replaced with length, which can happen if a folder is in last position + starts <- pmin(c(1, folder_pos + c(0, 1)), length(items)) + # and ends are one less than the folders, the folder positions and the length of + # the list. Need to replace item less than 1, which can occur if folder is in + # first position + ends <- pmax(c(folder_pos - c(1, 0), length(items)), 1) + + split_items <- mapply( + FUN = function(starts, ends) items[seq(starts, ends)], + starts = starts, + ends = ends + ) + } + + lapply( + split_items, + organize_recurse_cmd, + path = path, + base = base, + base_type = base_type + ) + +} + +# Generate commands for organizing, and recurse into folder if necessary +organize_recurse_cmd <- function(items, path, base, base_type) { + if (length(items) == 1 && types(items) == "folder") { + below_path <- paste0(path, name(items[[1]]), "/") + organize_recurse(base, base_type, below_path) + } else { + aliases <- paste0(validate_automation_aliases(aliases(items)), collapse = ", ") + path <- path_to_automation_path(path, base_type) + paste0("ORGANIZE ", aliases, " INTO ", path, ";") + } +} + +# Convert from a rcrunch path to a Crunch Automation path +path_to_automation_path <- function(path, base_type) { + path <- gsub("^/?(.*?)/?$", "\\1", path) + path <- gsub("/", "|", path) + if (path == "" && base_type != "ROOT") { + return(base_type) + } + + if (base_type == "ROOT") { + base_type <- "" + } else { + base_type <- paste0(base_type, " ") + } + paste0(base_type, '"', path, '"') +} + +# decide whether to put backticks on variable aliases +validate_automation_aliases <- function(x) { + # Can't start with a number or have non-alphanumeric characters + # (may be overly cautious, but doesn't do backticks on clearly good aliases + invalid <- grepl("^[0-9]|[^[:alnum:]_]", x) + + x[invalid] <- paste0("`", x[invalid], "`") + x +} + +#' Generate Crunch Automation commands to create a dataset's current folder struct +#' +#' Take a datset and generate the Crunch Automation commands needed to create the +#' current folder structure of the dataset. Useful for saving a snapshot, or copying +#' one dataset's folder structure to another (though see [`copyFolders()`] for another +#' way of copying a dataset's folders). +#' +#' @param dataset A crunch Dataset +#' +#' @return A string of the commands that can be passed to [`runCrunchAutomation()`] +#' @export +generateOrganizeCommand <- function(dataset) { + if (!is.dataset(dataset) ) { + halt("dataset must be a Crunch dataset.") + } + root_cmds <- organize_recurse(dataset, "ROOT", "/") + hidden_cmds <- organize_recurse(hiddenFolder(dataset), "HIDDEN", "/") + secure_cmds <- organize_recurse(privateFolder(dataset), "PRIVATE", "/") + + paste0(c(unlist(root_cmds), unlist(hidden_cmds), unlist(secure_cmds)), collapse = "\n") +} + + # Recursively get all variables below a folder # TODO: Use trampoline? https://community.rstudio.com/t/tidiest-way-to-do-recursion-safely-in-r/1408 # My initial tests say it's slower, but is safer if we ever expect a large number of folders diff --git a/_pkgdown.yml b/_pkgdown.yml index ca948f83e..859dc7726 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -148,6 +148,7 @@ reference: - runCrunchAutomation - automation-undo - script-catalog + - generateOrganizeCommand - title: Analysis contents: - crtabs diff --git a/man/generateOrganizeCommand.Rd b/man/generateOrganizeCommand.Rd new file mode 100644 index 000000000..99dcee94a --- /dev/null +++ b/man/generateOrganizeCommand.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/folders.R +\name{generateOrganizeCommand} +\alias{generateOrganizeCommand} +\title{Generate Crunch Automation commands to create a dataset's current folder struct} +\usage{ +generateOrganizeCommand(dataset) +} +\arguments{ +\item{dataset}{A crunch Dataset} +} +\value{ +A string of the commands that can be passed to \code{\link[=runCrunchAutomation]{runCrunchAutomation()}} +} +\description{ +Take a datset and generate the Crunch Automation commands needed to create the +current folder structure of the dataset. Useful for saving a snapshot, or copying +one dataset's folder structure to another (though see \code{\link[=copyFolders]{copyFolders()}} for another +way of copying a dataset's folders). +} From ff7ab66ce935e8da0af2ed067a862f75e1181fe6 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Tue, 13 Apr 2021 15:24:58 -0500 Subject: [PATCH 2/3] oops automation calls private secure --- R/folders.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/folders.R b/R/folders.R index a80c085c6..bb809beb0 100644 --- a/R/folders.R +++ b/R/folders.R @@ -482,7 +482,7 @@ generateOrganizeCommand <- function(dataset) { } root_cmds <- organize_recurse(dataset, "ROOT", "/") hidden_cmds <- organize_recurse(hiddenFolder(dataset), "HIDDEN", "/") - secure_cmds <- organize_recurse(privateFolder(dataset), "PRIVATE", "/") + secure_cmds <- organize_recurse(privateFolder(dataset), "SECURE", "/") paste0(c(unlist(root_cmds), unlist(hidden_cmds), unlist(secure_cmds)), collapse = "\n") } From 4e114b9895bf591b6eeae2dcc612c462775d62b0 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Wed, 14 Apr 2021 18:10:23 -0500 Subject: [PATCH 3/3] oops --- R/folders.R | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/R/folders.R b/R/folders.R index bb809beb0..d4f282cf6 100644 --- a/R/folders.R +++ b/R/folders.R @@ -399,22 +399,14 @@ organize_recurse <- function(base, base_type, path) { } else { # Make sequences where each folder is on its own, and the variables # at the folder level are in sequence. This allows the organize folder - # to go in the right place so that the folder is organized among the vas - folder_pos <- which(types(items) == "folder") - # starts are 1, the folder positions, and 1 after the folder positions so that - # each folder gets split out on its own. If position is to high, needs to be - # replaced with length, which can happen if a folder is in last position - starts <- pmin(c(1, folder_pos + c(0, 1)), length(items)) - # and ends are one less than the folders, the folder positions and the length of - # the list. Need to replace item less than 1, which can occur if folder is in - # first position - ends <- pmax(c(folder_pos - c(1, 0), length(items)), 1) - - split_items <- mapply( - FUN = function(starts, ends) items[seq(starts, ends)], - starts = starts, - ends = ends - ) + # to go in the right place so that the folder is organized among the vars + is_folder <- types(items) == "folder" + prev_was_folder <- c(FALSE, is_folder[-length(is_folder)]) + groups <- cumsum(is_folder | prev_was_folder) + + split_items <- lapply(unique(groups), function(grp) { + items[groups == grp] + }) } lapply(