Skip to content

Commit 3a22828

Browse files
authored
Merge pull request #37 from useblocks/36-yaml-support
Implement YAML support
2 parents 84b35f0 + 8677b1b commit 3a22828

File tree

8 files changed

+693
-5
lines changed

8 files changed

+693
-5
lines changed

docs/source/components/analyse.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Limitations
4747

4848
**Current Limitations:**
4949

50-
- **Language Support**: Only C/C++ (``//``, ``/* */``) and Python (``#``) comment styles are supported
50+
- **Language Support**: C/C++ (``//``, ``/* */``), C# (``//``, ``/* */``, ``///``), Python (``#``) and YAML (``#``) comment styles are supported
5151
- **Single Comment Style**: Each analysis run processes only one comment style at a time
5252

5353
Extraction Examples

docs/source/components/configuration.rst

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ Specifies the comment syntax style used in the source code files. This determine
267267

268268
**Type:** ``str``
269269
**Default:** ``"cpp"``
270-
**Supported values:** ``"cpp"``, ``"python"``
270+
**Supported values:** ``"cpp"``, ``"python"``, ``"cs"``, ``"yaml"``
271271

272272
.. code-block:: toml
273273
@@ -288,12 +288,22 @@ Specifies the comment syntax style used in the source code files. This determine
288288
- ``"cpp"``
289289
- ``//`` (single-line),
290290
``/* */`` (multi-line)
291-
- ``c``, ``h``, ``.cpp``, and ``.hpp``
291+
- ``.c``, ``.ci``, ``.h``, ``.cpp``, ``.cc``, ``.cxx``, ``.hpp``, ``.hh``, ``.hxx`` and ``.ihl``
292292
* - Python
293293
- ``"python"``
294294
- ``#`` (single-line),
295295
``""" """`` (docstrings)
296296
- ``.py``
297+
* - C#
298+
- ``"cs"``
299+
- ``//`` (single-line),
300+
``/* */`` (multi-line),
301+
``///`` (XML doc comments)
302+
- ``.cs``
303+
* - YAML
304+
- ``"yaml"``
305+
- ``#`` (single-line)
306+
- ``.yaml``, ``.yml``
297307

298308
.. note:: Future versions may support additional programming languages. Currently, only C/C++ and Python comment styles are supported.
299309

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ dependencies = [
2828
# https://github.com/tree-sitter/py-tree-sitter/issues/386#issuecomment-3101430799
2929
"tree-sitter~=0.25.1",
3030
"tree-sitter-c-sharp>=0.23.1",
31+
"tree-sitter-yaml>=0.7.1",
3132
]
3233

3334
[build-system]

src/sphinx_codelinks/analyse/utils.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
CommentType.python: {"function_definition", "class_definition"},
1818
CommentType.cpp: {"function_definition", "class_definition"},
1919
CommentType.cs: {"method_declaration", "class_declaration", "property_declaration"},
20+
CommentType.yaml: {"block_mapping_pair", "block_sequence_item", "document"},
2021
}
2122

2223
# initialize logger
@@ -43,6 +44,7 @@
4344
"""
4445
CPP_QUERY = """(comment) @comment"""
4546
C_SHARP_QUERY = """(comment) @comment"""
47+
YAML_QUERY = """(comment) @comment"""
4648

4749

4850
def is_text_file(filepath: Path, sample_size: int = 2048) -> bool:
@@ -76,6 +78,11 @@ def init_tree_sitter(comment_type: CommentType) -> tuple[Parser, Query]:
7678

7779
parsed_language = Language(tree_sitter_c_sharp.language())
7880
query = Query(parsed_language, C_SHARP_QUERY)
81+
elif comment_type == CommentType.yaml:
82+
import tree_sitter_yaml # noqa: PLC0415
83+
84+
parsed_language = Language(tree_sitter_yaml.language())
85+
query = Query(parsed_language, YAML_QUERY)
7986
else:
8087
raise ValueError(f"Unsupported comment style: {comment_type}")
8188
parser = Parser(parsed_language)
@@ -133,10 +140,96 @@ def find_next_scope(
133140
return None
134141

135142

143+
def _find_yaml_structure_in_block_node(
144+
block_node: TreeSitterNode,
145+
) -> TreeSitterNode | None:
146+
"""Find YAML structure elements within a block_node."""
147+
for grandchild in block_node.named_children:
148+
if grandchild.type == "block_mapping":
149+
for ggchild in grandchild.named_children:
150+
if ggchild.type == "block_mapping_pair":
151+
return ggchild
152+
elif grandchild.type == "block_sequence":
153+
for ggchild in grandchild.named_children:
154+
if ggchild.type == "block_sequence_item":
155+
return ggchild
156+
return None
157+
158+
159+
def find_yaml_next_structure(node: TreeSitterNode) -> TreeSitterNode | None:
160+
"""Find the next YAML structure element after the comment node."""
161+
current = node.next_named_sibling
162+
while current:
163+
if current.type in {
164+
"block_mapping_pair",
165+
"block_sequence_item",
166+
"flow_mapping",
167+
"flow_sequence",
168+
}:
169+
return current
170+
if current.type == "document":
171+
for child in current.named_children:
172+
if child.type == "block_node":
173+
result = _find_yaml_structure_in_block_node(child)
174+
if result:
175+
return result
176+
if current.type == "block_node":
177+
result = _find_yaml_structure_in_block_node(current)
178+
if result:
179+
return result
180+
current = current.next_named_sibling
181+
return None
182+
183+
184+
def find_yaml_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | None:
185+
"""Find a previous named sibling that is on the same row as the comment."""
186+
comment_row = node.start_point.row
187+
current = node.prev_named_sibling
188+
189+
while current:
190+
# Check if this sibling ends on the same row as the comment starts
191+
# This indicates it's an inline comment
192+
if current.end_point.row == comment_row:
193+
return current
194+
# If we find a sibling that ends before the comment row, we can stop
195+
# as we won't find any siblings on the same row going backwards
196+
if current.end_point.row < comment_row:
197+
break
198+
current = current.prev_named_sibling
199+
200+
return None
201+
202+
203+
def find_yaml_associated_structure(node: TreeSitterNode) -> TreeSitterNode | None:
204+
"""Find the YAML structure (key-value pair, list item, etc.) associated with a comment."""
205+
# First, check if this is an inline comment by looking for a previous sibling on the same row
206+
prev_sibling_same_row = find_yaml_prev_sibling_on_same_row(node)
207+
if prev_sibling_same_row:
208+
return prev_sibling_same_row
209+
210+
# If no previous sibling on same row, try to find the next named sibling (structure after the comment)
211+
structure = find_yaml_next_structure(node)
212+
if structure:
213+
return structure
214+
215+
# If no next sibling found, traverse up to find parent structure
216+
parent = node.parent
217+
while parent:
218+
if parent.type in {"block_mapping_pair", "block_sequence_item"}:
219+
return parent
220+
parent = parent.parent
221+
222+
return None
223+
224+
136225
def find_associated_scope(
137226
node: TreeSitterNode, comment_type: CommentType = CommentType.cpp
138227
) -> TreeSitterNode | None:
139228
"""Find the associated scope of a comment."""
229+
if comment_type == CommentType.yaml:
230+
# YAML uses different structure association logic
231+
return find_yaml_associated_structure(node)
232+
140233
if node.type == CommentCategory.docstring:
141234
# Only for python's docstring
142235
return find_enclosing_scope(node, comment_type)

src/sphinx_codelinks/source_discover/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@
99
"cpp": ["c", "ci", "cpp", "cc", "cxx", "h", "hpp", "hxx", "hh", "ihl"],
1010
"python": ["py"],
1111
"cs": ["cs"],
12+
"yaml": ["yml", "yaml"],
1213
}
1314

1415

1516
class CommentType(str, Enum):
1617
python = "python"
1718
cpp = "cpp"
1819
cs = "cs"
20+
yaml = "yaml"
1921

2022

2123
class SourceDiscoverSectionConfigType(TypedDict, total=False):

0 commit comments

Comments
 (0)