Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
419dba5
feat: add java tests
vitali87 Aug 14, 2025
64b52e1
fix: add generic interfaces for java
vitali87 Aug 14, 2025
7517fb2
fix: rm dead code java
vitali87 Aug 14, 2025
95cae46
refactor: extract magic strings to named constants in java_utils.py
claude[bot] Aug 14, 2025
671bd1b
fix: add more java tests
vitali87 Aug 14, 2025
196025f
fix: resolve linting errors in java_type_inference.py
vitali87 Aug 15, 2025
ad737d7
fix: add java tree-sitter inheritance
vitali87 Aug 18, 2025
1886552
fix: add java grammar to is_java_main_method
vitali87 Aug 18, 2025
81cc524
fix: modifier extraction java_utils.py functions
vitali87 Aug 18, 2025
5b91178
feat: tree-sitterise java
vitali87 Aug 18, 2025
ff82972
fix: address PR revierws
vitali87 Aug 18, 2025
4bcf5ac
feat: add java typed dicts
vitali87 Aug 18, 2025
bc5426f
fix: correct impl Java's scoping semantics
vitali87 Aug 18, 2025
6fcb803
fix: directory resolution java
vitali87 Aug 18, 2025
56f44c9
fix: java javae type inference
vitali87 Aug 18, 2025
14ee548
fix: add java test to reach 100
vitali87 Aug 18, 2025
871792e
fix: add IMPLEMENTS for java
vitali87 Aug 20, 2025
1b6b4d2
fix: add module_qn_to_file_path to provide efficient lookups instead …
vitali87 Aug 20, 2025
c8db6af
fix: impelemnt _resolve_java_method_return_type() method
vitali87 Aug 20, 2025
40f01d0
fix: optimize java
vitali87 Aug 20, 2025
aa9245e
fix: replace heuristic inhertance method resolution with proper one
vitali87 Aug 20, 2025
eb79396
fix: correctly handle overloaded Java methods
vitali87 Aug 20, 2025
753d9ad
refactor: DRY
vitali87 Aug 20, 2025
d7013d5
fix: pdated the static method resolution logic
vitali87 Aug 20, 2025
a84be40
fix: the class qualified name construction in _lookup_java_field_type…
vitali87 Aug 20, 2025
606d3f2
refactor: optimize
vitali87 Aug 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion codebase_rag/graph_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,9 @@ def should_skip_path(path: Path) -> bool:

def _process_function_calls(self) -> None:
"""Third pass: Process function calls using the cached ASTs."""
for file_path, (root_node, language) in self.ast_cache.items():
# Create a copy of items to prevent "OrderedDict mutated during iteration" errors
ast_cache_items = list(self.ast_cache.items())
for file_path, (root_node, language) in ast_cache_items:
self.factory.call_processor.process_calls_in_file(
file_path, root_node, language, self.queries
)
29 changes: 29 additions & 0 deletions codebase_rag/language_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,41 @@ class LanguageConfig:
"interface_declaration",
"enum_declaration",
"annotation_type_declaration",
"record_declaration",
],
module_node_types=["program"],
package_indicators=[], # Java uses package declarations
call_node_types=["method_invocation"],
import_node_types=COMMON_DECLARATION_IMPORT,
import_from_node_types=COMMON_DECLARATION_IMPORT, # Java uses same node for imports
# Pre-formatted Tree-sitter queries for comprehensive Java parsing
function_query="""
(method_declaration
name: (identifier) @name) @function
(constructor_declaration
name: (identifier) @name) @function
""",
class_query="""
(class_declaration
name: (identifier) @name) @class
(interface_declaration
name: (identifier) @name) @class
(enum_declaration
name: (identifier) @name) @class
(annotation_type_declaration
name: (identifier) @name) @class
(record_declaration
name: (identifier) @name) @class
""",
call_query="""
(method_invocation
name: (identifier) @name) @call
(method_invocation
object: (_)
name: (identifier) @name) @call
(object_creation_expression
type: (type_identifier) @name) @call
""",
),
"cpp": create_lang_config(
file_extensions=[
Expand Down
55 changes: 52 additions & 3 deletions codebase_rag/parsers/call_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,22 @@ def _get_call_target_name(self, call_node: Node) -> str | None:
return convert_operator_symbol_to_name(operator_text)

# For 'method_invocation' in Java
if call_node.type == "method_invocation":
# Get the object (receiver) part
object_node = call_node.child_by_field_name("object")
name_node = call_node.child_by_field_name("name")

if name_node and name_node.text:
method_name = str(name_node.text.decode("utf8"))

if object_node and object_node.text:
object_text = str(object_node.text.decode("utf8"))
return f"{object_text}.{method_name}"
else:
# No object, likely this.method() or static method
return method_name

# General case for other languages
if name_node := call_node.child_by_field_name("name"):
text = name_node.text
if text is not None:
Expand Down Expand Up @@ -327,6 +343,10 @@ def _ingest_function_calls(
captures = cursor.captures(caller_node)
call_nodes = captures.get("call", [])

logger.debug(
f"Found {len(call_nodes)} call nodes in {language} for {caller_qn}"
)

for call_node in call_nodes:
if not isinstance(call_node, Node):
continue
Expand All @@ -345,9 +365,15 @@ def _ingest_function_calls(
if not call_name:
continue

callee_info = self._resolve_function_call(
call_name, module_qn, local_var_types, class_context
)
# Use Java-specific resolution for Java method calls
if language == "java" and call_node.type == "method_invocation":
callee_info = self._resolve_java_method_call(
call_node, module_qn, local_var_types, language
)
else:
callee_info = self._resolve_function_call(
call_name, module_qn, local_var_types, class_context
)
if not callee_info:
# Check if it's a built-in JavaScript method
builtin_info = self._resolve_builtin_call(call_name)
Expand Down Expand Up @@ -1027,3 +1053,26 @@ def _is_method(self, func_node: Node, lang_config: LanguageConfig) -> bool:
return True
current = current.parent
return False

def _resolve_java_method_call(
self,
call_node: Node,
module_qn: str,
local_var_types: dict[str, str],
language: str,
) -> tuple[str, str] | None:
"""Resolve Java method calls using the JavaTypeInferenceEngine."""
# Get the Java type inference engine from the main type inference engine
java_engine = self.type_inference.java_type_inference

# Use the Java engine to resolve the method call
result = java_engine.resolve_java_method_call(
call_node, local_var_types, module_qn
)

if result:
logger.debug(
f"Java method call resolved: {call_node.text.decode('utf8') if call_node.text else 'unknown'} -> {result[1]}"
)

return result
108 changes: 104 additions & 4 deletions codebase_rag/parsers/definition_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,15 @@ def __init__(
function_registry: Any,
simple_name_lookup: dict[str, set[str]],
import_processor: ImportProcessor,
module_qn_to_file_path: dict[str, Path],
):
self.ingestor = ingestor
self.repo_path = repo_path
self.project_name = project_name
self.function_registry = function_registry
self.simple_name_lookup = simple_name_lookup
self.import_processor = import_processor
self.module_qn_to_file_path = module_qn_to_file_path
self.class_inheritance: dict[str, list[str]] = {}

def _get_node_type_for_inheritance(self, qualified_name: str) -> str:
Expand Down Expand Up @@ -121,6 +123,9 @@ def process_file(
[self.project_name] + list(relative_path.parent.parts)
)

# Populate the module QN to file path mapping for efficient lookups
self.module_qn_to_file_path[module_qn] = file_path

self.ingestor.ensure_node_batch(
"Module",
{
Expand Down Expand Up @@ -1151,6 +1156,16 @@ def _ingest_classes_and_methods(
node_type, class_qn, parent_class_qn
)

# Handle Java interface implementations
if class_node.type == "class_declaration":
implemented_interfaces = self._extract_implemented_interfaces(
class_node, module_qn
)
for interface_qn in implemented_interfaces:
self._create_implements_relationship(
node_type, class_qn, interface_qn
)

body_node = class_node.child_by_field_name("body")
if not body_node:
continue
Expand Down Expand Up @@ -1327,6 +1342,35 @@ def _extract_parent_classes(self, class_node: Node, module_qn: str) -> list[str]
)
return parent_classes

# Look for superclass in Java class definition (extends clause)
if class_node.type == "class_declaration":
superclass_node = class_node.child_by_field_name("superclass")
if superclass_node:
# Java superclass is a single type identifier
if superclass_node.type == "type_identifier":
parent_text = superclass_node.text
if parent_text:
parent_name = parent_text.decode("utf8")
# Resolve to full qualified name if possible
resolved_superclass = (
self._resolve_class_name(parent_name, module_qn)
or f"{module_qn}.{parent_name}"
)
parent_classes.append(resolved_superclass)
else:
# Look for type_identifier children in superclass node
for child in superclass_node.children:
if child.type == "type_identifier":
parent_text = child.text
if parent_text:
parent_name = parent_text.decode("utf8")
resolved_superclass_child = (
self._resolve_class_name(parent_name, module_qn)
or f"{module_qn}.{parent_name}"
)
parent_classes.append(resolved_superclass_child)
break

# Look for superclasses in Python class definition
superclasses_node = class_node.child_by_field_name("superclasses")
if superclasses_node:
Expand All @@ -1343,11 +1387,11 @@ def _extract_parent_classes(self, class_node: Node, module_qn: str) -> list[str]
parent_classes.append(import_map[parent_name])
else:
# Try to resolve within same module
resolved_parent = self._resolve_class_name(
parent_name, module_qn
resolved_python_parent: str | None = (
self._resolve_class_name(parent_name, module_qn)
)
if resolved_parent is not None:
parent_classes.append(resolved_parent)
if resolved_python_parent is not None:
parent_classes.append(resolved_python_parent)
else:
# Fallback: assume same module
parent_classes.append(f"{module_qn}.{parent_name}")
Expand Down Expand Up @@ -2491,3 +2535,59 @@ def _build_assignment_arrow_function_qualified_name(
return f"{module_qn}.{'.'.join(path_parts)}.{function_name}"
else:
return f"{module_qn}.{function_name}"

def _extract_implemented_interfaces(
self, class_node: Node, module_qn: str
) -> list[str]:
"""Extract implemented interface names from a Java class definition."""
implemented_interfaces: list[str] = []

# Look for interfaces field in Java class declaration
interfaces_node = class_node.child_by_field_name("interfaces")
if interfaces_node:
# The interfaces node contains a super_interfaces structure
# which has a type_list with comma-separated interface types
self._extract_java_interface_names(
interfaces_node, implemented_interfaces, module_qn
)

return implemented_interfaces

def _extract_java_interface_names(
self, interfaces_node: Node, interface_list: list[str], module_qn: str
) -> None:
"""Extract interface names from Java interfaces clause using tree-sitter."""
for child in interfaces_node.children:
if child.type == "type_list":
# Type list contains the actual interface types
for type_child in child.children:
if type_child.type == "type_identifier":
interface_name = type_child.text
if interface_name:
interface_name_str = interface_name.decode("utf8")
# Resolve to fully qualified name
resolved_interface = (
self._resolve_class_name(interface_name_str, module_qn)
or f"{module_qn}.{interface_name_str}"
)
interface_list.append(resolved_interface)
elif child.type == "type_identifier":
# Direct type identifier (fallback case)
interface_name = child.text
if interface_name:
interface_name_str = interface_name.decode("utf8")
resolved_interface = (
self._resolve_class_name(interface_name_str, module_qn)
or f"{module_qn}.{interface_name_str}"
)
interface_list.append(resolved_interface)

def _create_implements_relationship(
self, class_type: str, class_qn: str, interface_qn: str
) -> None:
"""Create an IMPLEMENTS relationship between a class and an interface."""
self.ingestor.ensure_relationship_batch(
(class_type, "qualified_name", class_qn),
"IMPLEMENTS",
("Interface", "qualified_name", interface_qn),
)
5 changes: 5 additions & 0 deletions codebase_rag/parsers/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ def __init__(
self.simple_name_lookup = simple_name_lookup
self.ast_cache = ast_cache

# Mapping from module qualified names to file paths for efficient lookups
self.module_qn_to_file_path: dict[str, Path] = {}

# Create processors with proper dependencies
self._import_processor: ImportProcessor | None = None
self._structure_processor: StructureProcessor | None = None
Expand Down Expand Up @@ -104,6 +107,7 @@ def definition_processor(self) -> DefinitionProcessor:
function_registry=self.function_registry,
simple_name_lookup=self.simple_name_lookup,
import_processor=self.import_processor,
module_qn_to_file_path=self.module_qn_to_file_path,
)
return self._definition_processor

Expand All @@ -118,6 +122,7 @@ def type_inference(self) -> TypeInferenceEngine:
project_name=self.project_name,
ast_cache=self.ast_cache,
queries=self.queries,
module_qn_to_file_path=self.module_qn_to_file_path,
)
return self._type_inference

Expand Down
Loading
Loading