From c54bef654721ccc1413cbfbe8e5cf62ffdb28024 Mon Sep 17 00:00:00 2001
From: Petr Novotnik <petr.novotnik@mgm-tp.com>
Date: Sun, 27 Jul 2025 11:52:12 +0200
Subject: [PATCH 1/4] Support for colon preceeded placeholders

---
 src/dialect/mod.rs |  6 +++
 src/tokenizer.rs   | 91 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)

diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index c78b00033..4a6d5b148 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -841,6 +841,12 @@ pub trait Dialect: Debug + Any {
         false
     }
 
+    /// Returns true if this dialect allow colon placeholders
+    /// e.g. `SELECT :var` (JPA named parameters)
+    fn supports_colon_placeholder(&self) -> bool {
+        false
+    }
+
     /// Does the dialect support with clause in create index statement?
     /// e.g. `CREATE INDEX idx ON t WITH (key = value, key2)`
     fn supports_create_index_with_clause(&self) -> bool {
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 8382a5344..cfbb97f71 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1521,6 +1521,11 @@ impl<'a> Tokenizer<'a> {
                     match chars.peek() {
                         Some(':') => self.consume_and_return(chars, Token::DoubleColon),
                         Some('=') => self.consume_and_return(chars, Token::Assignment),
+                        Some(c)
+                            if self.dialect.supports_colon_placeholder() && c.is_alphabetic() =>
+                        {
+                            self.tokenize_colon_preceeded_placeholder(chars).map(Some)
+                        }
                         _ => Ok(Some(Token::Colon)),
                     }
                 }
@@ -1756,6 +1761,30 @@ impl<'a> Tokenizer<'a> {
         }
     }
 
+    /// Tokenizes an identifier followed immediately after a colon,
+    /// aka named query parameter, e.g. `:name`. The next char of the
+    /// processed char stream is to be an alphabetic - panics otherwise.
+    fn tokenize_colon_preceeded_placeholder(
+        &self,
+        chars: &mut State,
+    ) -> Result<Token, TokenizerError> {
+        let mut s = String::with_capacity(16);
+        s.push(':');
+        s.push(chars.next().expect("initial character missing"));
+        while let Some(&ch) = chars.peek() {
+            if ch.is_alphanumeric()
+                || ch == '_'
+                || matches!(ch, '$' if self.dialect.supports_dollar_placeholder())
+            {
+                s.push(ch);
+                chars.next();
+            } else {
+                break;
+            }
+        }
+        Ok(Token::Placeholder(s))
+    }
+
     /// Tokenize dollar preceded value (i.e: a string/placeholder)
     fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
         let mut s = String::new();
@@ -2952,6 +2981,68 @@ mod tests {
         );
     }
 
+    #[test]
+    fn tokenize_colon_placeholder() {
+        #[derive(Debug)]
+        struct TestDialect(bool);
+        impl Dialect for TestDialect {
+            fn supports_colon_placeholder(&self) -> bool {
+                true
+            }
+            fn supports_dollar_placeholder(&self) -> bool {
+                self.0
+            }
+            fn is_identifier_start(&self, ch: char) -> bool {
+                ch.is_alphabetic() || ch == '_'
+            }
+            fn is_identifier_part(&self, ch: char) -> bool {
+                ch.is_alphabetic() || ch.is_ascii_digit() || ch == '_'
+            }
+        }
+
+        let sql = "SELECT :foo FROM bar";
+        let tokens = Tokenizer::new(&TestDialect(false), sql)
+            .tokenize_with_location()
+            .unwrap();
+        assert_eq!(
+            tokens.iter().map(|t| t.token.clone()).collect::<Vec<_>>(),
+            vec![
+                Token::make_keyword("SELECT"),
+                Token::Whitespace(Whitespace::Space),
+                Token::Placeholder(":foo".into()),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_keyword("FROM"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("bar", None)
+            ]
+        );
+        assert_eq!(
+            tokens[2].span,
+            Span::new(Location::of(1, 8), Location::of(1, 12))
+        );
+
+        let sql = "SELECT :foo$bar FROM bar";
+        let tokens = Tokenizer::new(&TestDialect(true), sql)
+            .tokenize_with_location()
+            .unwrap();
+        assert_eq!(
+            tokens.iter().map(|t| t.token.clone()).collect::<Vec<_>>(),
+            vec![
+                Token::make_keyword("SELECT"),
+                Token::Whitespace(Whitespace::Space),
+                Token::Placeholder(":foo$bar".into()),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_keyword("FROM"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("bar", None)
+            ]
+        );
+        assert_eq!(
+            tokens[2].span,
+            Span::new(Location::of(1, 8), Location::of(1, 16))
+        );
+    }
+
     #[test]
     fn tokenize_dollar_placeholder() {
         let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC");

From 7932fc52fda73fa28e17d65e83085aee7ff639bd Mon Sep 17 00:00:00 2001
From: Petr Novotnik <petr.novotnik@mgm-tp.com>
Date: Thu, 31 Jul 2025 07:47:03 +0200
Subject: [PATCH 2/4] Revert "Support for colon preceeded placeholders"

This reverts commit c54bef654721ccc1413cbfbe8e5cf62ffdb28024.
---
 src/dialect/mod.rs |  6 ---
 src/tokenizer.rs   | 91 ----------------------------------------------
 2 files changed, 97 deletions(-)

diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index 4a6d5b148..c78b00033 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -841,12 +841,6 @@ pub trait Dialect: Debug + Any {
         false
     }
 
-    /// Returns true if this dialect allow colon placeholders
-    /// e.g. `SELECT :var` (JPA named parameters)
-    fn supports_colon_placeholder(&self) -> bool {
-        false
-    }
-
     /// Does the dialect support with clause in create index statement?
     /// e.g. `CREATE INDEX idx ON t WITH (key = value, key2)`
     fn supports_create_index_with_clause(&self) -> bool {
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index cfbb97f71..8382a5344 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1521,11 +1521,6 @@ impl<'a> Tokenizer<'a> {
                     match chars.peek() {
                         Some(':') => self.consume_and_return(chars, Token::DoubleColon),
                         Some('=') => self.consume_and_return(chars, Token::Assignment),
-                        Some(c)
-                            if self.dialect.supports_colon_placeholder() && c.is_alphabetic() =>
-                        {
-                            self.tokenize_colon_preceeded_placeholder(chars).map(Some)
-                        }
                         _ => Ok(Some(Token::Colon)),
                     }
                 }
@@ -1761,30 +1756,6 @@ impl<'a> Tokenizer<'a> {
         }
     }
 
-    /// Tokenizes an identifier followed immediately after a colon,
-    /// aka named query parameter, e.g. `:name`. The next char of the
-    /// processed char stream is to be an alphabetic - panics otherwise.
-    fn tokenize_colon_preceeded_placeholder(
-        &self,
-        chars: &mut State,
-    ) -> Result<Token, TokenizerError> {
-        let mut s = String::with_capacity(16);
-        s.push(':');
-        s.push(chars.next().expect("initial character missing"));
-        while let Some(&ch) = chars.peek() {
-            if ch.is_alphanumeric()
-                || ch == '_'
-                || matches!(ch, '$' if self.dialect.supports_dollar_placeholder())
-            {
-                s.push(ch);
-                chars.next();
-            } else {
-                break;
-            }
-        }
-        Ok(Token::Placeholder(s))
-    }
-
     /// Tokenize dollar preceded value (i.e: a string/placeholder)
     fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
         let mut s = String::new();
@@ -2981,68 +2952,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn tokenize_colon_placeholder() {
-        #[derive(Debug)]
-        struct TestDialect(bool);
-        impl Dialect for TestDialect {
-            fn supports_colon_placeholder(&self) -> bool {
-                true
-            }
-            fn supports_dollar_placeholder(&self) -> bool {
-                self.0
-            }
-            fn is_identifier_start(&self, ch: char) -> bool {
-                ch.is_alphabetic() || ch == '_'
-            }
-            fn is_identifier_part(&self, ch: char) -> bool {
-                ch.is_alphabetic() || ch.is_ascii_digit() || ch == '_'
-            }
-        }
-
-        let sql = "SELECT :foo FROM bar";
-        let tokens = Tokenizer::new(&TestDialect(false), sql)
-            .tokenize_with_location()
-            .unwrap();
-        assert_eq!(
-            tokens.iter().map(|t| t.token.clone()).collect::<Vec<_>>(),
-            vec![
-                Token::make_keyword("SELECT"),
-                Token::Whitespace(Whitespace::Space),
-                Token::Placeholder(":foo".into()),
-                Token::Whitespace(Whitespace::Space),
-                Token::make_keyword("FROM"),
-                Token::Whitespace(Whitespace::Space),
-                Token::make_word("bar", None)
-            ]
-        );
-        assert_eq!(
-            tokens[2].span,
-            Span::new(Location::of(1, 8), Location::of(1, 12))
-        );
-
-        let sql = "SELECT :foo$bar FROM bar";
-        let tokens = Tokenizer::new(&TestDialect(true), sql)
-            .tokenize_with_location()
-            .unwrap();
-        assert_eq!(
-            tokens.iter().map(|t| t.token.clone()).collect::<Vec<_>>(),
-            vec![
-                Token::make_keyword("SELECT"),
-                Token::Whitespace(Whitespace::Space),
-                Token::Placeholder(":foo$bar".into()),
-                Token::Whitespace(Whitespace::Space),
-                Token::make_keyword("FROM"),
-                Token::Whitespace(Whitespace::Space),
-                Token::make_word("bar", None)
-            ]
-        );
-        assert_eq!(
-            tokens[2].span,
-            Span::new(Location::of(1, 8), Location::of(1, 16))
-        );
-    }
-
     #[test]
     fn tokenize_dollar_placeholder() {
         let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC");

From 10cd9e2bb008f71b6fcac5d439b78588b593d210 Mon Sep 17 00:00:00 2001
From: Petr Novotnik <petr.novotnik@mgm-tp.com>
Date: Thu, 31 Jul 2025 07:51:13 +0200
Subject: [PATCH 3/4] Placeholder spans, strict parsing

1. Require placeholders to be whitespace/comment-less
2. Correctly report span covering the whole placeholder
---
 src/parser/mod.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index d35d7880f..8a55570ea 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -9586,14 +9586,14 @@ impl<'a> Parser<'a> {
             tok @ Token::Colon | tok @ Token::AtSign => {
                 // Not calling self.parse_identifier(false)? because only in placeholder we want to check numbers as idfentifies
                 // This because snowflake allows numbers as placeholders
-                let next_token = self.next_token();
+                let next_token = self.next_token_no_skip().unwrap_or(&EOF_TOKEN).clone();
                 let ident = match next_token.token {
                     Token::Word(w) => Ok(w.into_ident(next_token.span)),
-                    Token::Number(w, false) => Ok(Ident::new(w)),
+                    Token::Number(w, false) => Ok(Ident::with_span(next_token.span, w)),
                     _ => self.expected("placeholder", next_token),
                 }?;
-                let placeholder = tok.to_string() + &ident.value;
-                ok_value(Value::Placeholder(placeholder))
+                Ok(Value::Placeholder(tok.to_string() + &ident.value)
+                    .with_span(Span::new(span.start, ident.span.end)))
             }
             unexpected => self.expected(
                 "a value",

From e63108b69e12a243507f1b39af7bdc42bfd76b1d Mon Sep 17 00:00:00 2001
From: Petr Novotnik <petr.novotnik@mgm-tp.com>
Date: Thu, 31 Jul 2025 14:49:01 +0200
Subject: [PATCH 4/4] Cover placeholder detection with tests

---
 src/ast/spans.rs  | 23 +++++++++++++++++++++++
 src/parser/mod.rs | 17 +++++++++++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/src/ast/spans.rs b/src/ast/spans.rs
index 91523925e..429315d2f 100644
--- a/src/ast/spans.rs
+++ b/src/ast/spans.rs
@@ -2512,4 +2512,27 @@ pub mod tests {
             "CASE 1 WHEN 2 THEN 3 ELSE 4 END"
         );
     }
+
+    #[test]
+    fn test_placeholder_span() {
+        let sql = "\nSELECT\n  :fooBar";
+        let r = Parser::parse_sql(&GenericDialect, sql).unwrap();
+        assert_eq!(1, r.len());
+        match &r[0] {
+            Statement::Query(q) => {
+                let col = &q.body.as_select().unwrap().projection[0];
+                match col {
+                    SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan {
+                        value: Value::Placeholder(s),
+                        span,
+                    })) => {
+                        assert_eq!(":fooBar", s);
+                        assert_eq!(&Span::new((3, 3).into(), (3, 10).into()), span);
+                    }
+                    _ => panic!("expected unnamed expression; got {col:?}"),
+                }
+            }
+            stmt => panic!("expected query; got {stmt:?}"),
+        }
+    }
 }
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 8a55570ea..b33ea7fb7 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -9584,8 +9584,13 @@ impl<'a> Parser<'a> {
             Token::HexStringLiteral(ref s) => ok_value(Value::HexStringLiteral(s.to_string())),
             Token::Placeholder(ref s) => ok_value(Value::Placeholder(s.to_string())),
             tok @ Token::Colon | tok @ Token::AtSign => {
-                // Not calling self.parse_identifier(false)? because only in placeholder we want to check numbers as idfentifies
-                // This because snowflake allows numbers as placeholders
+                // 1. Not calling self.parse_identifier(false)?
+                //    because only in placeholder we want to check
+                //    numbers as idfentifies.  This because snowflake
+                //    allows numbers as placeholders
+                // 2. Not calling self.next_token() to enforce `tok`
+                //    be followed immediately by a word/number, ie.
+                //    without any whitespace in between
                 let next_token = self.next_token_no_skip().unwrap_or(&EOF_TOKEN).clone();
                 let ident = match next_token.token {
                     Token::Word(w) => Ok(w.into_ident(next_token.span)),
@@ -17494,4 +17499,12 @@ mod tests {
             canonical,
         );
     }
+
+    #[test]
+    fn test_placeholder_invalid_whitespace() {
+        for w in ["  ", "/*invalid*/"] {
+            let sql = format!("\nSELECT\n  :{w}fooBar");
+            assert!(Parser::parse_sql(&GenericDialect, &sql).is_err());
+        }
+    }
 }