Skip to content

Commit 27ab141

Browse files
committed
feat: Add PEP 750 template string support
1 parent 710796b commit 27ab141

File tree

2 files changed

+168
-9
lines changed

2 files changed

+168
-9
lines changed

src/scanner.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ typedef enum {
2929
Format = 1 << 4,
3030
Triple = 1 << 5,
3131
Bytes = 1 << 6,
32+
Template = 1 << 7,
3233
} Flags;
3334

3435
typedef struct {
@@ -45,6 +46,8 @@ static inline bool is_triple(Delimiter *delimiter) { return delimiter->flags & T
4546

4647
static inline bool is_bytes(Delimiter *delimiter) { return delimiter->flags & Bytes; }
4748

49+
static inline bool is_template(Delimiter *delimiter) { return delimiter->flags & Template; }
50+
4851
static inline int32_t end_character(Delimiter *delimiter) {
4952
if (delimiter->flags & SingleQuote) {
5053
return '\'';
@@ -66,6 +69,8 @@ static inline void set_triple(Delimiter *delimiter) { delimiter->flags |= Triple
6669

6770
static inline void set_bytes(Delimiter *delimiter) { delimiter->flags |= Bytes; }
6871

72+
static inline void set_template(Delimiter *delimiter) { delimiter->flags |= (Template | Format); }
73+
6974
static inline void set_end_character(Delimiter *delimiter, int32_t character) {
7075
switch (character) {
7176
case '\'':
@@ -85,7 +90,7 @@ static inline void set_end_character(Delimiter *delimiter, int32_t character) {
8590
typedef struct {
8691
Array(uint16_t) indents;
8792
Array(Delimiter) delimiters;
88-
bool inside_f_string;
93+
bool inside_interpolated_string;
8994
} Scanner;
9095

9196
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
@@ -102,7 +107,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
102107
if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.size > 0 &&
103108
(lexer->lookahead == '{' || lexer->lookahead == '}') && !error_recovery_mode) {
104109
Delimiter *delimiter = array_back(&scanner->delimiters);
105-
if (is_format(delimiter)) {
110+
if (is_format(delimiter) || is_template(delimiter)) {
106111
lexer->mark_end(lexer);
107112
bool is_left_brace = lexer->lookahead == '{';
108113
advance(lexer);
@@ -122,7 +127,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
122127
int32_t end_char = end_character(delimiter);
123128
bool has_content = advanced_once;
124129
while (lexer->lookahead) {
125-
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') && is_format(delimiter)) {
130+
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') && (is_format(delimiter) || is_template(delimiter))) {
126131
lexer->mark_end(lexer);
127132
lexer->result_symbol = STRING_CONTENT;
128133
return has_content;
@@ -177,7 +182,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
177182
lexer->mark_end(lexer);
178183
array_pop(&scanner->delimiters);
179184
lexer->result_symbol = STRING_END;
180-
scanner->inside_f_string = false;
185+
scanner->inside_interpolated_string = false;
181186
}
182187
return true;
183188
}
@@ -195,7 +200,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
195200
advance(lexer);
196201
array_pop(&scanner->delimiters);
197202
lexer->result_symbol = STRING_END;
198-
scanner->inside_f_string = false;
203+
scanner->inside_interpolated_string = false;
199204
}
200205
lexer->mark_end(lexer);
201206
return true;
@@ -280,7 +285,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
280285
if ((valid_symbols[DEDENT] ||
281286
(!valid_symbols[NEWLINE] && !(valid_symbols[STRING_START] && next_tok_is_string_start) &&
282287
!within_brackets)) &&
283-
indent_length < current_indent_length && !scanner->inside_f_string &&
288+
indent_length < current_indent_length && !scanner->inside_interpolated_string &&
284289

285290
// Wait to create a dedent token until we've consumed any
286291
// comments
@@ -309,6 +314,8 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
309314
set_raw(&delimiter);
310315
} else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') {
311316
set_bytes(&delimiter);
317+
} else if (lexer->lookahead == 't' || lexer->lookahead == 'T') {
318+
set_template(&delimiter);
312319
} else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') {
313320
break;
314321
}
@@ -349,7 +356,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
349356
if (end_character(&delimiter)) {
350357
array_push(&scanner->delimiters, delimiter);
351358
lexer->result_symbol = STRING_START;
352-
scanner->inside_f_string = is_format(&delimiter);
359+
scanner->inside_interpolated_string = is_format(&delimiter) || is_template(&delimiter);
353360
return true;
354361
}
355362
if (has_flags) {
@@ -365,7 +372,7 @@ unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buff
365372

366373
size_t size = 0;
367374

368-
buffer[size++] = (char)scanner->inside_f_string;
375+
buffer[size++] = (char)scanner->inside_interpolated_string;
369376

370377
size_t delimiter_count = scanner->delimiters.size;
371378
if (delimiter_count > UINT8_MAX) {
@@ -398,7 +405,7 @@ void tree_sitter_python_external_scanner_deserialize(void *payload, const char *
398405
if (length > 0) {
399406
size_t size = 0;
400407

401-
scanner->inside_f_string = (bool)buffer[size++];
408+
scanner->inside_interpolated_string = (bool)buffer[size++];
402409

403410
size_t delimiter_count = (uint8_t)buffer[size++];
404411
if (delimiter_count > 0) {

test/corpus/template_strings.txt

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
================================================================================
2+
Simple template string
3+
================================================================================
4+
5+
t"Hello, {name}!"
6+
7+
--------------------------------------------------------------------------------
8+
9+
(module
10+
(expression_statement
11+
(string
12+
(string_start)
13+
(string_content)
14+
(interpolation
15+
(identifier))
16+
(string_content)
17+
(string_end))))
18+
19+
================================================================================
20+
Template string with format spec
21+
================================================================================
22+
23+
t"Price: {price:.2f}"
24+
25+
--------------------------------------------------------------------------------
26+
27+
(module
28+
(expression_statement
29+
(string
30+
(string_start)
31+
(string_content)
32+
(interpolation
33+
(identifier)
34+
(format_specifier))
35+
(string_end))))
36+
37+
================================================================================
38+
Raw template string
39+
================================================================================
40+
41+
tr"Path: {path}\n"
42+
43+
--------------------------------------------------------------------------------
44+
45+
(module
46+
(expression_statement
47+
(string
48+
(string_start)
49+
(string_content)
50+
(interpolation
51+
(identifier))
52+
(string_content)
53+
(string_end))))
54+
55+
================================================================================
56+
Triple quoted template string
57+
================================================================================
58+
59+
t"""
60+
Multi-line template
61+
with {variable}
62+
"""
63+
64+
--------------------------------------------------------------------------------
65+
66+
(module
67+
(expression_statement
68+
(string
69+
(string_start)
70+
(string_content)
71+
(interpolation
72+
(identifier))
73+
(string_content)
74+
(string_end))))
75+
76+
================================================================================
77+
Template string with multiple interpolations
78+
================================================================================
79+
80+
t"Hello {first_name} {last_name}!"
81+
82+
--------------------------------------------------------------------------------
83+
84+
(module
85+
(expression_statement
86+
(string
87+
(string_start)
88+
(string_content)
89+
(interpolation
90+
(identifier))
91+
(string_content)
92+
(interpolation
93+
(identifier))
94+
(string_content)
95+
(string_end))))
96+
97+
================================================================================
98+
Template string with expression
99+
================================================================================
100+
101+
t"Result: {a + b}"
102+
103+
--------------------------------------------------------------------------------
104+
105+
(module
106+
(expression_statement
107+
(string
108+
(string_start)
109+
(string_content)
110+
(interpolation
111+
(binary_operator
112+
(identifier)
113+
(identifier)))
114+
(string_end))))
115+
116+
================================================================================
117+
Template string with escaped braces
118+
================================================================================
119+
120+
t"Use {{braces}} to escape"
121+
122+
--------------------------------------------------------------------------------
123+
124+
(module
125+
(expression_statement
126+
(string
127+
(string_start)
128+
(string_content
129+
(escape_interpolation)
130+
(escape_interpolation))
131+
(string_end))))
132+
================================================================================
133+
Template string with format spec (copy from f-string)
134+
================================================================================
135+
136+
t"a {b:2} {c:34.5}"
137+
138+
--------------------------------------------------------------------------------
139+
140+
(module
141+
(expression_statement
142+
(string
143+
(string_start)
144+
(string_content)
145+
(interpolation
146+
(identifier)
147+
(format_specifier))
148+
(string_content)
149+
(interpolation
150+
(identifier)
151+
(format_specifier))
152+
(string_end))))

0 commit comments

Comments
 (0)