Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions src/main/scala/com/snowflake/snowpark/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.snowflake.snowpark

import com.snowflake.snowpark.internal.analyzer._
import com.snowflake.snowpark.internal.ScalaFunctions._
import com.snowflake.snowpark.types._
import com.snowflake.snowpark.internal.{
ErrorMessage,
OpenTelemetry,
Expand Down Expand Up @@ -3140,6 +3141,158 @@ object functions {
*/
def listagg(col: Column): Column = listagg(col, "", isDistinct = false)

/**
* This leverages JSON_EXTRACT_PATH_TEXT and improves functionality by allowing multiple columns
* in a single call, whereas JSON_EXTRACT_PATH_TEXT must be called once for every column.
*
* NOTE:
* <ul>
* <li> Timestamp type: there is no interpretation of date values as UTC</li>
* <li> Identifiers with spaces: Snowflake returns error when an invalid expression is sent. </li>
*
* Usage:
* {
* df = session.createDataFrame(Seq(("CR", "{\"id\": 5,
* \"name\": \"Jose\", \"age\": 29}")))
* .toDF(Seq("nationality", "json_string"))
* }
* When the result of this function is the only part of
* the select statement, no changes are needed
* df.select(json_tuple(col("json_string"), "id", "name", "age")).show()
*
* <pre>
* ----------------------
* |"C0" |"C1" |"C2" |
* ----------------------
* |5 |Jose |29 |
* ----------------------
* </pre>
* However, when specifying multiple columns, an expression like this is required:
* <pre>
* df.select(
* col("nationality")
* , json_tuple(col("json_string"), "id", "name", "age"):_* // Notice the :_* syntax.
* ).show()
* </pre>
*
* <pre>
* -------------------------------------------------
* |"NATIONALITY" |"C0" |"C1" |"C2" |"C3" |
* -------------------------------------------------
* |CR |5 |Jose |29 |Mobilize |
* -------------------------------------------------
* </pre>
* @since 1.12.1
* @param json Column containing the JSON string text.
* @param fields Fields to pull from the JSON file.
* @return Column sequence with the specified strings.
*/
def json_tuple(json: Column, fields: String*): Seq[Column] = {
var i = -1
fields.map(f => {
i += 1
builtin("JSON_EXTRACT_PATH_TEXT")(json, f).as(s"c$i")
})
}

/**
* Used to calculate the cubic root of a number.
* @since 1.12.1
* @param column Column to calculate the cubic root.
* @return Column object.
*/
def cbrt(e: Column): Column = {
builtin("CBRT")(e)
}

/**
* Used to calculate the cubic root of a number. There were slight differences found:
* @since 1.12.1
* @param column Column to calculate the cubic root.
* @return Column object.
*/
def cbrt(columnName: String): Column = {
cbrt(col(columnName))
}

/**
* This function converts a JSON string to a variant in Snowflake.
*
* In Snowflake the values are converted automatically, however they're converted as variants,
* meaning that the printSchema
* function would return different datatypes.
* To convert the datatype and it to be printed as the expected datatype,
* it should be read on the
* selectExpr function as "json['relative']['age']::integer"
* val data_for_json = Seq(
* (1, "{\"id\": 172319, \"age\": 41, \"relative\": {\"id\": 885471, \"age\": 29}}")
* (2, "{\"id\": 532161, \"age\": 17, \"relative\":{\"id\": 873513, \"age\": 47}}")
* )
* val data_for_json_column = Seq("col1", "col2")
* val df_for_json = session.createDataFrame(data_for_json).toDF(data_for_json_column)
*
* val json_df = df_for_json.select(
* from_json(col("col2")).as("json")
* )
*
* json_df.selectExpr(
* "json['id']::integer as id"
* , "json['age']::integer as age"
* , "json['relative']['id']::integer as rel_id"
* , "json['relative']['age']::integer as rel_age"
* ).show(10, 10000)
* </pre>
*
* <pre>
* -----------------------------------------
* |"ID" |"AGE" |"REL_ID" |"REL_AGE" |
* -----------------------------------------
* |172319 |41 |885471 |29 |
* |532161 |17 |873513 |47 |
* -----------------------------------------
* </pre>
* @since 1.12.1
* @param e String column to convert to variant.
* @return Column object.
*/
def from_json(e: Column): Column = {
builtin("TRY_PARSE_JSON")(e)
}

/**
* Returns the value of sourceExpr cast to data type
* targetType if possible, or NULL if not possible.
* @since 1.12.1
* @param source Any castable expression
* @param Target The type of the result
* @return The result is of type targetType.
* special version of CAST for a subset of datatype conversions.
* It performs the same operation
* (i.e. converts a value of one data type into another data type),
* but returns a NULL value instead of raising an error
* when the conversion can not be performed.
* The column argument must be a string column in Snowflake.
*/
def try_cast(e: Column, targetType: DataType): Column = {
try_cast(col("e"), targetType)
}

/**
* This function receives a date or timestamp, as well as a
* properly formatted string and subtracts the specified
* amount of days from it. If receiving a string, this string is
* casted to date using try_cast and if it's not possible to cast,
* returns null. If receiving
* a timestamp it will be casted to date (removing its time).
* @since 1.12.1
* @param start Date, Timestamp or String column to subtract days from.
* @param days Days to subtract.
* @return Column object.
*/
def date_sub(start: Column, days: Int): Column = {
dateadd("DAY", lit(days * -1), try_cast(col("start"), DateType))
}

/**
* Invokes a built-in snowflake function with the specified name and arguments.
* Arguments can be of two types
Expand Down
40 changes: 40 additions & 0 deletions src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2177,6 +2177,46 @@ trait FunctionSuite extends TestData {
expected,
sort = false)
}
test("cbrt") {
checkAnswer(
testData1.select(cbrt(col("NUM"))),
Seq(Row(1.0), Row(1.25992104989)),
sort = false)
}
test("from_json") {
var expected = Seq(("21", "Joe", "21021"), ("26", "Jay", "94021")).toDF("age", "name", "zip")
checkAnswer(
object2
.select(from_json(col("obj"))),
expected,
sort = false)
}
test("json_tuple") {
var expected = Seq(("21", "Joe"), ("26", "Jay")).toDF("age", "name")
checkAnswer(
object2
.select(json_tuple(col("obj"), "age", "name")),
expected,
sort = false)
}

test("date_sub") {
var expected = Seq(("2020-04-30 13:11:20.000"), ("2020-08-20 01:30:05.000")).toDF("b")
checkAnswer(
timestamp1
.select(date_sub(col("a"), 1)),
expected,
sort = false)
}

test("try_cast") {
var expected = Seq(("2020-08-01"), ("2010-12-01")).toDF("b")
checkAnswer(
date1
.select(try_cast(col("a"), StringType)),
expected,
sort = false)
}

}

Expand Down