diff --git a/server/lsp_interface.jai b/server/lsp_interface.jai index dc59fe4..ad15d8e 100644 --- a/server/lsp_interface.jai +++ b/server/lsp_interface.jai @@ -110,6 +110,20 @@ LSP_Request_Message_Document_Symbol :: struct { } } +LSP_Request_Message_Workspace_Symbol :: struct { + using request: LSP_Request_Message; + params: struct{ + query: string; + } +} + +LSP_Workspace_Symbol :: struct{ + name: string; + kind: LSP_Symbol_Kind; + //tags: Tag + location: LSP_Location; +} + LSP_Did_Open_Text_Document :: struct { using request: LSP_Request_Message; params: struct { @@ -402,36 +416,6 @@ LSP_Partial_Result_Params :: struct { // 📌 LSP_Document_Symbol :: struct { - - Kind :: enum s64 { - FILE :: 1; - MODULE :: 2; - NAMESPACE :: 3; - PACKAGE :: 4; - CLASS :: 5; - METHOD :: 6; - PROPERTY :: 7; - FIELD :: 8; - CONSTRUCTOR :: 9; - ENUM :: 10; - INTERFACE :: 11; - FUNCTION :: 12; - VARIABLE :: 13; - CONSTANT :: 14; - STRING :: 15; - NUMBER :: 16; - BOOLEAN :: 17; - ARRAY :: 18; - OBJECT :: 19; - KEY :: 20; - NULL :: 21; - ENUMMEMBER :: 22; - STRUCT :: 23; - EVENT :: 24; - OPERATOR :: 25; - TYPEPARAMETER :: 26; - } - name: string; kind: s64; range: LSP_Range; @@ -490,8 +474,9 @@ LSP_Request_Message_Initialize :: struct { LSP_Result_Initialize :: struct { capabilities: struct { // semanticTokensProvider := true; - // documentSymbolProvider := true; - + documentSymbolProvider := true; + workspaceSymbolProvider := true; + definitionProvider := true; // hoverProvider := true; @@ -765,6 +750,23 @@ lsp_respond :: (id: u32, data: $T) { }); } +lsp_respond_with_already_json_serialized_result_data :: (id: u32, data: string){ + Reply_With_Placeholder :: struct { + using response: LSP_Response_Message; + result := "T"; + } + + body_with_placeholder := json_write_string(Reply_With_Placeholder.{id = id}); + defer free(body_with_placeholder); + + found_separator, prefix, postfix := split_from_right(body_with_placeholder, "\"T\""); + + assert(found_separator); + final_body := tprint("%0%0%0", prefix, data, postfix); + + print("Content-Length: %\r\n\r\n%", final_body.count, final_body); +} + lsp_respond_with_error :: (id: u32, code: LSP_Error_Codes, message: string, data: $T) { reply: struct { using response: LSP_Response_Message; diff --git a/server/main.jai b/server/main.jai index 527ef8a..720ac9f 100644 --- a/server/main.jai +++ b/server/main.jai @@ -351,6 +351,27 @@ handle_request :: (request: LSP_Request_Message, raw_request: string) { handle_hover(body); json_free(xx body); + + case "textDocument/documentSymbol"; + //ZoneScoped("textDocument/documentSymbol"); + success, body := json_parse_string(raw_request, LSP_Request_Message_Document_Symbol); + if !success { + log_error("Unable to parse textDocument/documentSymbol message"); + return; + } + + handle_document_symbol(body); + + case "workspace/symbol"; + //ZoneScoped("workspace/symbol"); + success, body := json_parse_string(raw_request, LSP_Request_Message_Workspace_Symbol); + if !success { + log_error("Unable to parse workspace/symbol message"); + return; + } + + handle_workspace_symbol(body); + } } @@ -427,6 +448,7 @@ main :: () { #load "completition.jai"; #load "goto.jai"; +#load "symbols.jai"; #load "signature_help.jai"; #load "hover.jai"; diff --git a/server/program.jai b/server/program.jai index 704720b..7a35f9c 100644 --- a/server/program.jai +++ b/server/program.jai @@ -10,7 +10,9 @@ Program_File :: struct { nodes: [..]*Node; declarations: Table(string, *Declaration); resolved_identifiers: Table(*Node, []*Declaration); - linked_files: []Linked_File; // @TODO: this is probably stupid name for this! + declaration_hierarchy: Table(*Declaration, [..]*Declaration); + serialized_workspace_symbols: Table(string, string); + linked_files : [] Linked_File; // @TODO: this is probably stupid name for this! ast_pool: Pool; analysis_pool: Pool; @@ -24,10 +26,14 @@ Linked_File :: struct { init_file :: (using file: Program_File) { init(*declarations); init(*resolved_identifiers); + init(*declaration_hierarchy); + init(*serialized_workspace_symbols); } deinit_file :: (using file: Program_File) { deinit(*declarations); + deinit(*declaration_hierarchy); + deinit(*serialized_workspace_symbols); array_free(nodes); array_free(loads); array_free(imports); @@ -37,6 +43,8 @@ deinit_file :: (using file: Program_File) { reset_file :: (using file: Program_File) { table_reset(*declarations); + table_reset(*declaration_hierarchy); + table_reset(*serialized_workspace_symbols); array_reset(*nodes); array_reset(*loads); array_reset(*imports); @@ -145,6 +153,7 @@ parse_file :: (path: string, force := false) { } analyze_files :: () { + //ZoneScoped(); for file: server.files_to_be_analyzed { release(*file.analysis_pool); } @@ -180,6 +189,8 @@ analyze_files :: () { table_reset(*file.resolved_identifiers); resolve_identifiers(file,, pool_alloc(*file.analysis_pool)); // log("Analyzed: %", file.path); + build_declaration_hierarchy(file); + generate_and_serialize_workspace_symbols_for_file(file); } array_reset(*server.files_to_be_analyzed); @@ -321,6 +332,23 @@ resolve_identifiers :: (file: *Program_File) { } } +build_declaration_hierarchy :: (file: *Program_File) { + //ZoneScoped(); + for file.declarations{ + parent_declaration: *Declaration; + + if it.parent + parent_declaration = cast(*Declaration) get_node_nearest_parent(it.parent, kind => kind == .DECLARATION); + + children := find_or_add(*file.declaration_hierarchy, parent_declaration); + + array_add(children, it); + } + + for file.declaration_hierarchy + quick_sort(it, (a, b) => ifx a.location.l0 != b.location.l0 then a.location.l0 - b.location.l0 else a.location.c0 - b.location.c0); +} + get_node_by_location :: (file: *Program_File, location: Node.Location, filter: Node.Kind = .UNINITIALIZATED) -> *Node { nearest_node: *Node; @@ -688,7 +716,18 @@ get_identifier_decl :: (__file: *Program_File, ident: *Identifier) -> []*Declara return decls; } - return .[]; + //the loop above fails to find identifier declarations for many common symbols. As a hacky fix, we fall back to searching for declarations that match the name of the identifier. This may lead to false positives obviously, but I do not know if this is any worse than what the loop above does. And since we only do this when the loop above has not been successfull in finding something, I would argue that getting a 90% correct answer is better than no answer. + + declarations: [..]*Declaration; + + for file: server.files { + file_decl, found := table_find(*file.declarations, ident.name); + + if found + array_add(*declarations, file_decl); + } + + return declarations; } // @TODO: Maybe refactor this to be more useful? @@ -989,4 +1028,6 @@ run_diagnostics :: () { } server.previously_errored_file = file; -} \ No newline at end of file +} + +#import "Sort"; \ No newline at end of file diff --git a/server/symbols.jai b/server/symbols.jai new file mode 100644 index 0000000..f90604b --- /dev/null +++ b/server/symbols.jai @@ -0,0 +1,154 @@ +expression_to_symbol_kind :: (expression: *Node) -> LSP_Symbol_Kind{ + if expression{ + if expression.kind == { + case .STRUCT; return .STRUCT; + case .ENUM; return .ENUM; + case .PROCEDURE; return .FUNCTION; + case .OPERATOR_OVERLOAD; return .OPERATOR; + case .QUICK_LAMBDA; return .FUNCTION; + case .POLYMORPHIC_CONSTANT; return .TYPEPARAMETER; + case .DIRECTIVE_IMPORT; return .MODULE; + case .DIRECTIVE_BAKE_ARGUMENTS; return .FUNCTION; + case .DIRECTIVE_LIBRARY; return .PACKAGE; + case .UNION; return .STRUCT; + + // with all of the following, I don't really know how to get enough information about them to confidently determine what they are. A type directive could declare a function type, or a struct type for example. A cast could literally yield anything. I'm sure you could look at the AST here in more detail, but I don't know my way well enough around this codebase to do that. So, as a kind-of-hacky-thing, I just return VARIABLE for all of them. The most important thing is that they show imo, not that the kind matches exactly (I think the kind is mostly for display anyway, not really that functional.) + + case .DIRECTIVE_TYPE; #through; + case .CAST; #through; + case .PROCEDURE_CALL; #through; + case .IDENTIFIER; #through; + case .BINARY_OPERATION; #through; + case .LITERAL; + return .VARIABLE; + case; + //log("Unhandled expression kind % at %:%:%.", expression.kind, expression.location.file, expression.location.l0 + 1, expression.location.c0 + 1); + return 0; + + //There are other shortcomings here: I would like to return things like FIELD for struct-members, CONSTANT for constants, also use FILE vs just MODULE or PACKAGE, but for all of these I am not confident what the AST would look like. So I opted to not pretend I can return the maximum detail here and kept it simple instead. + } + } + else{ + return .VARIABLE; + } +} + +handle_document_symbol :: (request: LSP_Request_Message_Document_Symbol) { + //ZoneScoped(); + file_path := normalize_path(request.params.textDocument.uri); + + file := get_file(file_path); + if !file { + log_error("File does not exists or has not been parser yet! (%)", file_path); + lsp_respond(request.id, null); + return; + } + + create_lsp_document_from_declaration :: (file: *Program_File, using declaration: *Declaration) -> LSP_Document_Symbol{ + //ZoneScoped(); + child_declarations, has_children := table_find(*file.declaration_hierarchy, declaration); + child_lsp_document_symbols: [..]LSP_Document_Symbol; + + if has_children{ + for child_declarations{ + symbol := create_lsp_document_from_declaration(file, it); + array_add(*child_lsp_document_symbols, symbol); + } + } + + result: LSP_Document_Symbol; + + result.name = name; + result.kind = xx expression_to_symbol_kind(expression); + result.range = .{.{xx location.l0, xx location.c0}, .{xx location.l1, xx location.c1}}; + result.selectionRange = result.range; + result.children = child_lsp_document_symbols; + + return result; + } + + if file.declaration_hierarchy.count > 0{ + lsp_symbols: [..]LSP_Document_Symbol; + + for table_find_pointer(*file.declaration_hierarchy, null).*{ + symbol := create_lsp_document_from_declaration(file, it); + array_add(*lsp_symbols, symbol); + } + + lsp_respond(request.id, lsp_symbols); + } +} + +generate_and_serialize_workspace_symbols_for_file :: (file: *Program_File){ + for file.serialized_workspace_symbols + free(it); + + table_reset(*file.serialized_workspace_symbols); + + for declaration: file.declarations{ + symbol: LSP_Workspace_Symbol; + symbol.name = declaration.name; + symbol.location = node_location_to_lsp_location(declaration.location); + symbol.kind = expression_to_symbol_kind(declaration.expression); + + //turns out, if we include all variable declarations (including all parameters and local variables of all functions), then I get like 80,000 symbols in the workspace of my game, and then vscode takes half a second just to process the result. We send it very quickly, because we cache the serialized json, but vscode can't keep up. So I skip it here. + + if symbol.kind == .VARIABLE + continue; + + serialized_symbol := json_write_string(symbol); + + table_add(*file.serialized_workspace_symbols, declaration.name, serialized_symbol); + } +} + +MAX_NUMBER_OF_WORKSPACE_SYMBOLS_TO_RETURN_TO_PREVENT_VSCODE_FROM_BEING_SUPER_SLOW :: 1000; + +handle_workspace_symbol :: (request: LSP_Request_Message_Workspace_Symbol) { + //ZoneScoped(); + query_parts := split(request.params.query, cast(u8) #char " "); + + for query_parts + if it.count == 0 + remove it; + + builder: String_Builder; + append(*builder, "["); + + symbol_count := 0; + + for file: server.files{ + for serialized_symbol, symbol_name: file.serialized_workspace_symbols{ + if symbol_count == MAX_NUMBER_OF_WORKSPACE_SYMBOLS_TO_RETURN_TO_PREVENT_VSCODE_FROM_BEING_SUPER_SLOW + break file; + + //the recommendation from visual studio code is that when searching for tokens, the LSP should be very lenient as to what it filters, so that vscode can then be flexible about ordering and interpreting your search terms. They explicitely say you shouldn't check if the query is a substring, because then you can't have stuff like having the query "abc" match "arrayBoundsCheck", which is apparently something that people want to do. I personally hate that for the query "read vessel", vscode doesn't treat "read" and "vessel" as two different search terms, and so won't show you "readVessel", and also that for "readvessel", "readVessel" would match, but not "vesselRead". Basically, I ideally want to type multiple search terms, and if they are in the name, I want that to match (like in sublime text). Vscode doesn't do that. Whatever. + // What I decided to do here is the following: I'm interpreting each space-separated element of the query as its own search term. (vscode doesn't even seem to transmit stuff after the space to the LSP, so if that is a rule and not a bug (which I couldn't find out) this part isn't even relevant). For each search term, we follow the recommandation and consider it matched, if the characters of the search term appear in the symbol name in order, case insensitive. + + remaining_name_to_search := symbol_name; + + for part: query_parts { + for 0..request.params.query.count - 1 { + search_character_as_string := slice(request.params.query, it, 1); + index := index_of_string_nocase(remaining_name_to_search, search_character_as_string); + + if index == -1 + continue serialized_symbol; + else + remaining_name_to_search = slice(remaining_name_to_search, index + 1, remaining_name_to_search.count - index - 1); + } + } + + if symbol_count > 0 + append(*builder, ","); + + append(*builder, serialized_symbol); + symbol_count += 1; + } + } + + append(*builder, "]"); + json_serialized_symbol_list := builder_to_string(*builder); + lsp_respond_with_already_json_serialized_result_data(request.id, json_serialized_symbol_list); + free(json_serialized_symbol_list); +}