From c54bef654721ccc1413cbfbe8e5cf62ffdb28024 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 27 Jul 2025 11:52:12 +0200 Subject: [PATCH 1/4] Support for colon preceeded placeholders --- src/dialect/mod.rs | 6 +++ src/tokenizer.rs | 91 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index c78b00033..4a6d5b148 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -841,6 +841,12 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if this dialect allow colon placeholders + /// e.g. `SELECT :var` (JPA named parameters) + fn supports_colon_placeholder(&self) -> bool { + false + } + /// Does the dialect support with clause in create index statement? /// e.g. `CREATE INDEX idx ON t WITH (key = value, key2)` fn supports_create_index_with_clause(&self) -> bool { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8382a5344..cfbb97f71 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1521,6 +1521,11 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some(':') => self.consume_and_return(chars, Token::DoubleColon), Some('=') => self.consume_and_return(chars, Token::Assignment), + Some(c) + if self.dialect.supports_colon_placeholder() && c.is_alphabetic() => + { + self.tokenize_colon_preceeded_placeholder(chars).map(Some) + } _ => Ok(Some(Token::Colon)), } } @@ -1756,6 +1761,30 @@ impl<'a> Tokenizer<'a> { } } + /// Tokenizes an identifier followed immediately after a colon, + /// aka named query parameter, e.g. `:name`. The next char of the + /// processed char stream is to be an alphabetic - panics otherwise. + fn tokenize_colon_preceeded_placeholder( + &self, + chars: &mut State, + ) -> Result { + let mut s = String::with_capacity(16); + s.push(':'); + s.push(chars.next().expect("initial character missing")); + while let Some(&ch) = chars.peek() { + if ch.is_alphanumeric() + || ch == '_' + || matches!(ch, '$' if self.dialect.supports_dollar_placeholder()) + { + s.push(ch); + chars.next(); + } else { + break; + } + } + Ok(Token::Placeholder(s)) + } + /// Tokenize dollar preceded value (i.e: a string/placeholder) fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result { let mut s = String::new(); @@ -2952,6 +2981,68 @@ mod tests { ); } + #[test] + fn tokenize_colon_placeholder() { + #[derive(Debug)] + struct TestDialect(bool); + impl Dialect for TestDialect { + fn supports_colon_placeholder(&self) -> bool { + true + } + fn supports_dollar_placeholder(&self) -> bool { + self.0 + } + fn is_identifier_start(&self, ch: char) -> bool { + ch.is_alphabetic() || ch == '_' + } + fn is_identifier_part(&self, ch: char) -> bool { + ch.is_alphabetic() || ch.is_ascii_digit() || ch == '_' + } + } + + let sql = "SELECT :foo FROM bar"; + let tokens = Tokenizer::new(&TestDialect(false), sql) + .tokenize_with_location() + .unwrap(); + assert_eq!( + tokens.iter().map(|t| t.token.clone()).collect::>(), + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Placeholder(":foo".into()), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("FROM"), + Token::Whitespace(Whitespace::Space), + Token::make_word("bar", None) + ] + ); + assert_eq!( + tokens[2].span, + Span::new(Location::of(1, 8), Location::of(1, 12)) + ); + + let sql = "SELECT :foo$bar FROM bar"; + let tokens = Tokenizer::new(&TestDialect(true), sql) + .tokenize_with_location() + .unwrap(); + assert_eq!( + tokens.iter().map(|t| t.token.clone()).collect::>(), + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Placeholder(":foo$bar".into()), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("FROM"), + Token::Whitespace(Whitespace::Space), + Token::make_word("bar", None) + ] + ); + assert_eq!( + tokens[2].span, + Span::new(Location::of(1, 8), Location::of(1, 16)) + ); + } + #[test] fn tokenize_dollar_placeholder() { let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC"); From 7932fc52fda73fa28e17d65e83085aee7ff639bd Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Thu, 31 Jul 2025 07:47:03 +0200 Subject: [PATCH 2/4] Revert "Support for colon preceeded placeholders" This reverts commit c54bef654721ccc1413cbfbe8e5cf62ffdb28024. --- src/dialect/mod.rs | 6 --- src/tokenizer.rs | 91 ---------------------------------------------- 2 files changed, 97 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 4a6d5b148..c78b00033 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -841,12 +841,6 @@ pub trait Dialect: Debug + Any { false } - /// Returns true if this dialect allow colon placeholders - /// e.g. `SELECT :var` (JPA named parameters) - fn supports_colon_placeholder(&self) -> bool { - false - } - /// Does the dialect support with clause in create index statement? /// e.g. `CREATE INDEX idx ON t WITH (key = value, key2)` fn supports_create_index_with_clause(&self) -> bool { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index cfbb97f71..8382a5344 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1521,11 +1521,6 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some(':') => self.consume_and_return(chars, Token::DoubleColon), Some('=') => self.consume_and_return(chars, Token::Assignment), - Some(c) - if self.dialect.supports_colon_placeholder() && c.is_alphabetic() => - { - self.tokenize_colon_preceeded_placeholder(chars).map(Some) - } _ => Ok(Some(Token::Colon)), } } @@ -1761,30 +1756,6 @@ impl<'a> Tokenizer<'a> { } } - /// Tokenizes an identifier followed immediately after a colon, - /// aka named query parameter, e.g. `:name`. The next char of the - /// processed char stream is to be an alphabetic - panics otherwise. - fn tokenize_colon_preceeded_placeholder( - &self, - chars: &mut State, - ) -> Result { - let mut s = String::with_capacity(16); - s.push(':'); - s.push(chars.next().expect("initial character missing")); - while let Some(&ch) = chars.peek() { - if ch.is_alphanumeric() - || ch == '_' - || matches!(ch, '$' if self.dialect.supports_dollar_placeholder()) - { - s.push(ch); - chars.next(); - } else { - break; - } - } - Ok(Token::Placeholder(s)) - } - /// Tokenize dollar preceded value (i.e: a string/placeholder) fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result { let mut s = String::new(); @@ -2981,68 +2952,6 @@ mod tests { ); } - #[test] - fn tokenize_colon_placeholder() { - #[derive(Debug)] - struct TestDialect(bool); - impl Dialect for TestDialect { - fn supports_colon_placeholder(&self) -> bool { - true - } - fn supports_dollar_placeholder(&self) -> bool { - self.0 - } - fn is_identifier_start(&self, ch: char) -> bool { - ch.is_alphabetic() || ch == '_' - } - fn is_identifier_part(&self, ch: char) -> bool { - ch.is_alphabetic() || ch.is_ascii_digit() || ch == '_' - } - } - - let sql = "SELECT :foo FROM bar"; - let tokens = Tokenizer::new(&TestDialect(false), sql) - .tokenize_with_location() - .unwrap(); - assert_eq!( - tokens.iter().map(|t| t.token.clone()).collect::>(), - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Placeholder(":foo".into()), - Token::Whitespace(Whitespace::Space), - Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), - Token::make_word("bar", None) - ] - ); - assert_eq!( - tokens[2].span, - Span::new(Location::of(1, 8), Location::of(1, 12)) - ); - - let sql = "SELECT :foo$bar FROM bar"; - let tokens = Tokenizer::new(&TestDialect(true), sql) - .tokenize_with_location() - .unwrap(); - assert_eq!( - tokens.iter().map(|t| t.token.clone()).collect::>(), - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Placeholder(":foo$bar".into()), - Token::Whitespace(Whitespace::Space), - Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), - Token::make_word("bar", None) - ] - ); - assert_eq!( - tokens[2].span, - Span::new(Location::of(1, 8), Location::of(1, 16)) - ); - } - #[test] fn tokenize_dollar_placeholder() { let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC"); From 10cd9e2bb008f71b6fcac5d439b78588b593d210 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Thu, 31 Jul 2025 07:51:13 +0200 Subject: [PATCH 3/4] Placeholder spans, strict parsing 1. Require placeholders to be whitespace/comment-less 2. Correctly report span covering the whole placeholder --- src/parser/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d35d7880f..8a55570ea 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9586,14 +9586,14 @@ impl<'a> Parser<'a> { tok @ Token::Colon | tok @ Token::AtSign => { // Not calling self.parse_identifier(false)? because only in placeholder we want to check numbers as idfentifies // This because snowflake allows numbers as placeholders - let next_token = self.next_token(); + let next_token = self.next_token_no_skip().unwrap_or(&EOF_TOKEN).clone(); let ident = match next_token.token { Token::Word(w) => Ok(w.into_ident(next_token.span)), - Token::Number(w, false) => Ok(Ident::new(w)), + Token::Number(w, false) => Ok(Ident::with_span(next_token.span, w)), _ => self.expected("placeholder", next_token), }?; - let placeholder = tok.to_string() + &ident.value; - ok_value(Value::Placeholder(placeholder)) + Ok(Value::Placeholder(tok.to_string() + &ident.value) + .with_span(Span::new(span.start, ident.span.end))) } unexpected => self.expected( "a value", From e63108b69e12a243507f1b39af7bdc42bfd76b1d Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Thu, 31 Jul 2025 14:49:01 +0200 Subject: [PATCH 4/4] Cover placeholder detection with tests --- src/ast/spans.rs | 23 +++++++++++++++++++++++ src/parser/mod.rs | 17 +++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 91523925e..429315d2f 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2512,4 +2512,27 @@ pub mod tests { "CASE 1 WHEN 2 THEN 3 ELSE 4 END" ); } + + #[test] + fn test_placeholder_span() { + let sql = "\nSELECT\n :fooBar"; + let r = Parser::parse_sql(&GenericDialect, sql).unwrap(); + assert_eq!(1, r.len()); + match &r[0] { + Statement::Query(q) => { + let col = &q.body.as_select().unwrap().projection[0]; + match col { + SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan { + value: Value::Placeholder(s), + span, + })) => { + assert_eq!(":fooBar", s); + assert_eq!(&Span::new((3, 3).into(), (3, 10).into()), span); + } + _ => panic!("expected unnamed expression; got {col:?}"), + } + } + stmt => panic!("expected query; got {stmt:?}"), + } + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8a55570ea..b33ea7fb7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9584,8 +9584,13 @@ impl<'a> Parser<'a> { Token::HexStringLiteral(ref s) => ok_value(Value::HexStringLiteral(s.to_string())), Token::Placeholder(ref s) => ok_value(Value::Placeholder(s.to_string())), tok @ Token::Colon | tok @ Token::AtSign => { - // Not calling self.parse_identifier(false)? because only in placeholder we want to check numbers as idfentifies - // This because snowflake allows numbers as placeholders + // 1. Not calling self.parse_identifier(false)? + // because only in placeholder we want to check + // numbers as idfentifies. This because snowflake + // allows numbers as placeholders + // 2. Not calling self.next_token() to enforce `tok` + // be followed immediately by a word/number, ie. + // without any whitespace in between let next_token = self.next_token_no_skip().unwrap_or(&EOF_TOKEN).clone(); let ident = match next_token.token { Token::Word(w) => Ok(w.into_ident(next_token.span)), @@ -17494,4 +17499,12 @@ mod tests { canonical, ); } + + #[test] + fn test_placeholder_invalid_whitespace() { + for w in [" ", "/*invalid*/"] { + let sql = format!("\nSELECT\n :{w}fooBar"); + assert!(Parser::parse_sql(&GenericDialect, &sql).is_err()); + } + } }