diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index b842b83e24804..587571c21d2d6 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -506,6 +506,25 @@ static int probe_kern_arg_ctx_tag(int token_fd) return probe_fd(prog_fd); } +#ifdef __x86_64__ +#ifndef __NR_uprobe +#define __NR_uprobe 336 +#endif +static int probe_uprobe_syscall(int token_fd) +{ + /* + * When not executed from executed kernel provided trampoline, + * the uprobe syscall returns ENXIO error. + */ + return syscall(__NR_uprobe) == -1 && errno == ENXIO; +} +#else +static int probe_uprobe_syscall(int token_fd) +{ + return 0; +} +#endif + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -581,6 +600,9 @@ static struct kern_feature_desc { [FEAT_BTF_QMARK_DATASEC] = { "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec, }, + [FEAT_UPROBE_SYSCALL] = { + "Kernel supports uprobe syscall", probe_uprobe_syscall, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index fc59b21b51b5e..69aa61c038a9e 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -392,6 +392,8 @@ enum kern_feature_id { FEAT_ARG_CTX_TAG, /* Kernel supports '?' at the front of datasec names */ FEAT_BTF_QMARK_DATASEC, + /* Kernel supports uprobe syscall */ + FEAT_UPROBE_SYSCALL, __FEAT_CNT, }; diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index c174b40866738..5730295e69d31 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -241,6 +241,7 @@ struct usdt_note { long loc_addr; long base_addr; long sema_addr; + bool nop_combo; }; struct usdt_target { @@ -262,6 +263,7 @@ struct usdt_manager { bool has_bpf_cookie; bool has_sema_refcnt; bool has_uprobe_multi; + bool has_uprobe_syscall; }; struct usdt_manager *usdt_manager_new(struct bpf_object *obj) @@ -301,6 +303,11 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj) * usdt probes. */ man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK); + + /* + * Detect kernel support for uprobe syscall to be used to pick usdt attach point. + */ + man->has_uprobe_syscall = kernel_supports(obj, FEAT_UPROBE_SYSCALL); return man; } @@ -784,6 +791,15 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * target = &targets[target_cnt]; memset(target, 0, sizeof(*target)); + /* + * We have usdt with nop,nop5 instruction and we detected uprobe syscall, + * so we can place the uprobe directly on nop5 (+1) to get it optimized. + */ + if (note.nop_combo && man->has_uprobe_syscall) { + usdt_abs_ip++; + usdt_rel_ip++; + } + target->abs_ip = usdt_abs_ip; target->rel_ip = usdt_rel_ip; target->sema_off = usdt_sema_off; @@ -1144,7 +1160,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, struct usdt_note *note) { - const char *provider, *name, *args; + const char *provider, *name, *args, *end, *extra; long addrs[3]; size_t len; @@ -1182,6 +1198,15 @@ static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, s if (args >= data + len) /* missing arguments spec */ return -EINVAL; + extra = memchr(args, '\0', data + len - args); + if (!extra) /* non-zero-terminated args */ + return -EINVAL; + ++extra; + end = data + len; + + /* check if we have one extra byte and if it's zero */ + note->nop_combo = (extra + 1) == end && *extra == 0; + note->provider = provider; note->name = name; if (*args == '\0' || *args == ':') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index be1ee7ba7ce03..89f480729a6b5 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -45,3 +45,5 @@ xdp_synproxy xdp_hw_metadata xdp_features verification_cert.h +usdt_1 +usdt_2 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 34ea23c63bd5d..4a21657e45f7d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -733,6 +733,10 @@ $(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP) $(VERIFY_SIG_HDR): $(VERIFICATION_CERT) $(Q)xxd -i -n test_progs_verification_cert $< > $@ +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) +USDTX := usdt_1.c usdt_2.c +endif + # Define test_progs test runner. TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs @@ -754,7 +758,8 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ json_writer.c \ $(VERIFY_SIG_HDR) \ flow_dissector_load.h \ - ip_check_defrag_frags.h + ip_check_defrag_frags.h \ + $(USDTX) TRUNNER_LIB_SOURCES := find_bit.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(OUTPUT)/liburandom_read.so \ diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index f4be5269fa901..a8ca2920c0092 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -247,6 +247,86 @@ static void subtest_basic_usdt(bool optimized) #undef TRIGGER } +#ifdef __x86_64 +extern void usdt_1(void); +extern void usdt_2(void); + +/* nop, nop5 */ +static unsigned char nop15[6] = { 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 }; + +static void *find_nop15(void *fn) +{ + int i; + + for (i = 0; i < 10; i++) { + if (!memcmp(nop15, fn + i, 5)) + return fn + i; + } + return NULL; +} + +static void subtest_optimized_attach(void) +{ + struct test_usdt *skel; + __u8 *addr_1, *addr_2; + + addr_1 = find_nop15(usdt_1); + if (!ASSERT_OK_PTR(addr_1, "find_nop15")) + return; + + addr_2 = find_nop15(usdt_2); + if (!ASSERT_OK_PTR(addr_2, "find_nop15")) + return; + + skel = test_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_usdt__open_and_load")) + return; + + /* + * Attach program on top of usdt_1 which is standard nop probe + * incidentally followed by nop5. The usdt probe does not have + * extra data in elf note record, so we expect the probe to land + * on the first nop without being optimized. + */ + skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed, + 0 /*self*/, "/proc/self/exe", + "optimized_attach", "usdt_1", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt")) + goto cleanup; + + usdt_1(); + usdt_1(); + + /* nop is on addr_1 address */ + ASSERT_EQ(*addr_1, 0xcc, "int3"); + ASSERT_EQ(skel->bss->executed, 2, "executed"); + + bpf_link__destroy(skel->links.usdt_executed); + + /* + * Attach program on top of usdt_2 which is probe defined on top + * of nop,nop5 combo. The extra data in the elf note record and + * presence of uprobe syscall ensures that the probe is placed + * on top of nop5 and optimized. + */ + skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed, + 0 /*self*/, "/proc/self/exe", + "optimized_attach", "usdt_2", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt")) + goto cleanup; + + usdt_2(); + usdt_2(); + + /* nop5 is on addr_2 + 1 address */ + ASSERT_EQ(*(addr_2 + 1), 0xe8, "call"); + ASSERT_EQ(skel->bss->executed, 4, "executed"); + +cleanup: + test_usdt__destroy(skel); +} +#endif + unsigned short test_usdt_100_semaphore SEC(".probes"); unsigned short test_usdt_300_semaphore SEC(".probes"); unsigned short test_usdt_400_semaphore SEC(".probes"); @@ -516,6 +596,8 @@ void test_usdt(void) #ifdef __x86_64__ if (test__start_subtest("basic_optimized")) subtest_basic_usdt(true); + if (test__start_subtest("optimized_attach")) + subtest_optimized_attach(); #endif if (test__start_subtest("multispec")) subtest_multispec_usdt(); diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c index a78c87537b078..6911868cdf67b 100644 --- a/tools/testing/selftests/bpf/progs/test_usdt.c +++ b/tools/testing/selftests/bpf/progs/test_usdt.c @@ -138,4 +138,13 @@ int usdt_sib(struct pt_regs *ctx) return 0; } +int executed; + +SEC("usdt") +int usdt_executed(struct pt_regs *ctx) +{ + executed++; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h index 549d1f7748101..57fa2902136cd 100644 --- a/tools/testing/selftests/bpf/usdt.h +++ b/tools/testing/selftests/bpf/usdt.h @@ -312,9 +312,16 @@ struct usdt_sema { volatile unsigned short active; }; #ifndef USDT_NOP #if defined(__ia64__) || defined(__s390__) || defined(__s390x__) #define USDT_NOP nop 0 +#elif defined(__x86_64__) +#define USDT_NOP .byte 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x0 /* nop, nop5 */ #else #define USDT_NOP nop #endif +#else +/* + * User define its own nop instruction, do not emit extra note data. + */ +#define __usdt_asm_extra #endif /* USDT_NOP */ /* @@ -403,6 +410,15 @@ struct usdt_sema { volatile unsigned short active; }; __asm__ __volatile__ ("" :: "m" (sema)); #endif +#ifndef __usdt_asm_extra +#ifdef __x86_64__ +#define __usdt_asm_extra \ + __usdt_asm1( .ascii "\0") +#else +#define __usdt_asm_extra +#endif +#endif + /* main USDT definition (nop and .note.stapsdt metadata) */ #define __usdt_probe(group, name, sema_def, sema, ...) do { \ sema_def(sema) \ @@ -420,6 +436,7 @@ struct usdt_sema { volatile unsigned short active; }; __usdt_asm_strz(name) \ __usdt_asm_args(__VA_ARGS__) \ __usdt_asm1( .ascii "\0") \ + __usdt_asm_extra \ __usdt_asm1(994: .balign 4) \ __usdt_asm1( .popsection) \ __usdt_asm1(.ifndef _.stapsdt.base) \ diff --git a/tools/testing/selftests/bpf/usdt_1.c b/tools/testing/selftests/bpf/usdt_1.c new file mode 100644 index 0000000000000..0e00702b17010 --- /dev/null +++ b/tools/testing/selftests/bpf/usdt_1.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Include usdt.h with defined USDT_NOP macro will switch + * off the extra info in usdt probe. + */ +#define USDT_NOP .byte 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 +#include "usdt.h" + +__attribute__((aligned(16))) +void usdt_1(void) +{ + USDT(optimized_attach, usdt_1); +} diff --git a/tools/testing/selftests/bpf/usdt_2.c b/tools/testing/selftests/bpf/usdt_2.c new file mode 100644 index 0000000000000..fcb7417a19535 --- /dev/null +++ b/tools/testing/selftests/bpf/usdt_2.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Include usdt.h with the extra info in usdt probe and + * nop/nop5 combo for usdt attach point. + */ +#include "usdt.h" + +__attribute__((aligned(16))) +void usdt_2(void) +{ + USDT(optimized_attach, usdt_2); +}