From 9df1c28bb75217b244257152ab7d788bb2a386d0 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Fri, 26 Apr 2019 11:49:47 -0700 Subject: [PATCH 1/5] bpf: add writable context for raw tracepoints This is an opt-in interface that allows a tracepoint to provide a safe buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program. The size of the buffer must be a compile-time constant, and is checked before allowing a BPF program to attach to a tracepoint that uses this feature. The pointer to this buffer will be the first argument of tracepoints that opt in; the pointer is valid and can be bpf_probe_read() by both BPF_PROG_TYPE_RAW_TRACEPOINT and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that attach to such a tracepoint, but the buffer to which it points may only be written by the latter. Signed-off-by: Matt Mullins Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ include/linux/bpf_types.h | 1 + include/linux/tracepoint-defs.h | 1 + include/trace/bpf_probe.h | 27 +++++++++++++++++++++++++-- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 8 ++++++-- kernel/bpf/verifier.c | 31 +++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 24 ++++++++++++++++++++++++ 8 files changed, 91 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f15432d90728..cd6341eabd74 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -272,6 +272,7 @@ enum bpf_reg_type { PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ }; /* The information passed from prog-specific *_is_valid_access @@ -361,6 +362,7 @@ struct bpf_prog_aux { u32 used_map_cnt; u32 max_ctx_offset; u32 max_pkt_offset; + u32 max_tp_access; u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index d26991a16894..a10d37bce364 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 49ba9cde7e4b..b29950a19205 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -45,6 +45,7 @@ struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; u32 num_args; + u32 writable_size; } __aligned(32); #endif diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 505dae0bed80..d6e556c0a085 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \ * to make sure that if the tracepoint handling changes, the * bpf probe will fail to compile unless it too is updated. */ -#undef DEFINE_EVENT -#define DEFINE_EVENT(template, call, proto, args) \ +#define __DEFINE_EVENT(template, call, proto, args, size) \ static inline void bpf_test_probe_##call(void) \ { \ check_trace_callback_type_##call(__bpf_trace_##template); \ @@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \ .tp = &__tracepoint_##call, \ .bpf_func = (void *)__bpf_trace_##template, \ .num_args = COUNT_ARGS(args), \ + .writable_size = size, \ }; +#define FIRST(x, ...) x + +#undef DEFINE_EVENT_WRITABLE +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ +static inline void bpf_test_buffer_##call(void) \ +{ \ + /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ + * BUILD_BUG_ON_ZERO() uses a different mechanism that is not \ + * dead-code-eliminated. \ + */ \ + FIRST(proto); \ + (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ +} \ +__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0) #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef DEFINE_EVENT_WRITABLE +#undef __DEFINE_EVENT +#undef FIRST + #endif /* CONFIG_BPF_EVENTS */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eaf2d3284248..f7fa7a34a62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -168,6 +168,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b0de49598341..ae141e745f92 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) } raw_tp->btp = btp; - prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, - BPF_PROG_TYPE_RAW_TRACEPOINT); + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto out_free_tp; } + if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && + prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { + err = -EINVAL; + goto out_put_prog; + } err = bpf_probe_register(raw_tp->btp, prog); if (err) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 423f242a5efb..2ef442c62c0e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -405,6 +405,7 @@ static const char * const reg_type_str[] = { [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", [PTR_TO_TCP_SOCK] = "tcp_sock", [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", + [PTR_TO_TP_BUFFER] = "tp_buffer", }; static char slot_type_char[] = { @@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env, return 0; } +static int check_tp_buffer_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int regno, int off, int size) +{ + if (off < 0) { + verbose(env, + "R%d invalid tracepoint buffer access: off=%d, size=%d", + regno, off, size); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, + "R%d invalid variable buffer offset: off=%d, var_off=%s", + regno, off, tn_buf); + return -EACCES; + } + if (off + size > env->prog->aux->max_tp_access) + env->prog->aux->max_tp_access = off + size; + + return 0; +} + + /* truncate register to smaller size (in bytes) * must be called with size < BPF_REG_SIZE */ @@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_sock_access(env, insn_idx, regno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_TP_BUFFER) { + err = check_tp_buffer_access(env, reg, regno, off, size); + if (!err && t == BPF_READ && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 91800be0c8eb..8607aba1d882 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -915,6 +915,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { const struct bpf_prog_ops raw_tracepoint_prog_ops = { }; +static bool raw_tp_writable_prog_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off == 0) { + if (size != sizeof(u64) || type != BPF_READ) + return false; + info->reg_type = PTR_TO_TP_BUFFER; + } + return raw_tp_prog_is_valid_access(off, size, type, prog, info); +} + +const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { + .get_func_proto = raw_tp_prog_func_proto, + .is_valid_access = raw_tp_writable_prog_is_valid_access, +}; + +const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { +}; + static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -1204,6 +1225,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) return -EINVAL; + if (prog->aux->max_tp_access > btp->writable_size) + return -EINVAL; + return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog); } From ea106722c76f08002b69a6983ed84dc18958ba48 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Fri, 26 Apr 2019 11:49:48 -0700 Subject: [PATCH 2/5] nbd: trace sending nbd requests This adds a tracepoint that can both observe the nbd request being sent to the server, as well as modify that request , e.g., setting a flag in the request that will cause the server to collect detailed tracing data. The struct request * being handled is included to permit correlation with the block tracepoints. Signed-off-by: Matt Mullins Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- MAINTAINERS | 1 + drivers/block/nbd.c | 5 ++++ include/trace/events/nbd.h | 56 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 include/trace/events/nbd.h diff --git a/MAINTAINERS b/MAINTAINERS index 72dfb80e8721..025c6d27789e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10741,6 +10741,7 @@ L: linux-block@vger.kernel.org L: nbd@other.debian.org F: Documentation/blockdev/nbd.txt F: drivers/block/nbd.c +F: include/trace/events/nbd.h F: include/uapi/linux/nbd.h NETWORK DROP MONITOR diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 92b8aafb8bb4..24cc10d1f0b4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -44,6 +44,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); static int nbd_total_devices = 0; @@ -526,6 +529,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) handle = nbd_cmd_handle(cmd); memcpy(request.handle, &handle, sizeof(handle)); + trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd)); + dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", req, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); diff --git a/include/trace/events/nbd.h b/include/trace/events/nbd.h new file mode 100644 index 000000000000..5928255ed02e --- /dev/null +++ b/include/trace/events/nbd.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nbd + +#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NBD_H + +#include + +DECLARE_EVENT_CLASS(nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + TP_STRUCT__entry( + __field(struct nbd_request *, nbd_request) + __field(u64, dev_index) + __field(struct request *, request) + ), + + TP_fast_assign( + __entry->nbd_request = 0; + __entry->dev_index = index; + __entry->request = rq; + ), + + TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + sizeof(struct nbd_request) +); + +#endif + +/* This part must be outside protection */ +#include From 2abd2de712cd891321a06b0890a85aef1e506cb5 Mon Sep 17 00:00:00 2001 From: Andrew Hall Date: Fri, 26 Apr 2019 11:49:49 -0700 Subject: [PATCH 3/5] nbd: add tracepoints for send/receive timing This adds four tracepoints to nbd, enabling separate tracing of payload and header sending/receipt. In the send path for headers that have already been sent, we also explicitly initialize the handle so it can be referenced by the later tracepoint. Signed-off-by: Andrew Hall Signed-off-by: Matt Mullins Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- drivers/block/nbd.c | 8 ++++++ include/trace/events/nbd.h | 51 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 24cc10d1f0b4..3e6c3d5dadc8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -513,6 +513,10 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) if (sent) { if (sent >= sizeof(request)) { skip = sent - sizeof(request); + + /* initialize handle for tracing purposes */ + handle = nbd_cmd_handle(cmd); + goto send_pages; } iov_iter_advance(&from, sent); @@ -536,6 +540,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); result = sock_xmit(nbd, index, 1, &from, (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); + trace_nbd_header_sent(req, handle); if (result <= 0) { if (was_interrupted(result)) { /* If we havne't sent anything we can just return BUSY, @@ -608,6 +613,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) bio = next; } out: + trace_nbd_payload_sent(req, handle); nsock->pending = NULL; nsock->sent = 0; return 0; @@ -655,6 +661,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) tag, req); return ERR_PTR(-ENOENT); } + trace_nbd_header_received(req, handle); cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); @@ -708,6 +715,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) } } out: + trace_nbd_payload_received(req, handle); mutex_unlock(&cmd->lock); return ret ? ERR_PTR(ret) : cmd; } diff --git a/include/trace/events/nbd.h b/include/trace/events/nbd.h index 5928255ed02e..9849956f34d8 100644 --- a/include/trace/events/nbd.h +++ b/include/trace/events/nbd.h @@ -7,6 +7,57 @@ #include +DECLARE_EVENT_CLASS(nbd_transport_event, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle), + + TP_STRUCT__entry( + __field(struct request *, req) + __field(u64, handle) + ), + + TP_fast_assign( + __entry->req = req; + __entry->handle = handle; + ), + + TP_printk( + "nbd transport event: request %p, handle 0x%016llx", + __entry->req, + __entry->handle + ) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + DECLARE_EVENT_CLASS(nbd_send_request, TP_PROTO(struct nbd_request *nbd_request, int index, From 4635b0ae4d26f87cf68dbab6740955dd1ad67cf4 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Fri, 26 Apr 2019 11:49:50 -0700 Subject: [PATCH 4/5] tools: sync bpf.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, and fixes up the error: enumeration value ‘BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE’ not handled in switch [-Werror=switch-enum] build errors it would otherwise cause in libbpf. Signed-off-by: Matt Mullins Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 10 +++++++++- tools/lib/bpf/libbpf.c | 1 + tools/lib/bpf/libbpf_probes.c | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 704bb69514a2..f7fa7a34a62d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -168,6 +168,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { @@ -1737,12 +1738,19 @@ union bpf_attr { * error if an eBPF program tries to set a callback that is not * supported in the current kernel. * - * The supported callback values that *argval* can combine are: + * *argval* is a flag array which can combine these flags: * * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) * + * Therefore, this function can be used to clear a callback flag by + * setting the appropriate bit to zero. e.g. to disable the RTO + * callback: + * + * **bpf_sock_ops_cb_flags_set(bpf_sock,** + * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** + * * Here are some examples of where one could call such eBPF * program: * diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9052061ba7fc..11a65db4b93f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2136,6 +2136,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_CGROUP_SYSCTL: return false; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 0f25541632e3..80ee922f290c 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -93,6 +93,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_SK_MSG: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: From e950e843367d7990b9d7ea964e3c33876d477c4b Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Fri, 26 Apr 2019 11:49:51 -0700 Subject: [PATCH 5/5] selftests: bpf: test writable buffers in raw tps This tests that: * a BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE cannot be attached if it uses either: * a variable offset to the tracepoint buffer, or * an offset beyond the size of the tracepoint buffer * a tracer can modify the buffer provided when attached to a writable tracepoint in bpf_prog_test_run Signed-off-by: Matt Mullins Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/trace/events/bpf_test_run.h | 50 ++++++++++++ net/bpf/test_run.c | 4 + .../raw_tp_writable_reject_nbd_invalid.c | 42 ++++++++++ .../bpf/prog_tests/raw_tp_writable_test_run.c | 80 +++++++++++++++++++ .../selftests/bpf/verifier/raw_tp_writable.c | 34 ++++++++ 5 files changed, 210 insertions(+) create mode 100644 include/trace/events/bpf_test_run.h create mode 100644 tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c create mode 100644 tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c create mode 100644 tools/testing/selftests/bpf/verifier/raw_tp_writable.c diff --git a/include/trace/events/bpf_test_run.h b/include/trace/events/bpf_test_run.h new file mode 100644 index 000000000000..265447e3f71a --- /dev/null +++ b/include/trace/events/bpf_test_run.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf_test_run + +#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BPF_TEST_RUN_H + +#include + +DECLARE_EVENT_CLASS(bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + TP_STRUCT__entry( + __field(int, err) + ), + + TP_fast_assign( + __entry->err = *err; + ), + + TP_printk("bpf_test_finish with err=%d", __entry->err) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + sizeof(int) +); + +#endif + +/* This part must be outside protection */ +#include diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 8606e5aef0b6..6c4694ae4241 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -13,6 +13,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time) { @@ -100,6 +103,7 @@ static int bpf_test_finish(const union bpf_attr *kattr, if (err != -ENOSPC) err = 0; out: + trace_bpf_test_finish(&err); return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c new file mode 100644 index 000000000000..9807336a3016 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +void test_raw_tp_writable_reject_nbd_invalid(void) +{ + __u32 duration = 0; + char error[4096]; + int bpf_fd = -1, tp_fd = -1; + + const struct bpf_insn program[] = { + /* r6 is our tp buffer */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* one byte beyond the end of the nbd_request struct */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, + sizeof(struct nbd_request)), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = program, + .insns_cnt = sizeof(program) / sizeof(struct bpf_insn), + .log_level = 2, + }; + + bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load", + "failed: %d errno %d\n", bpf_fd, errno)) + return; + + tp_fd = bpf_raw_tracepoint_open("nbd_send_request", bpf_fd); + if (CHECK(tp_fd >= 0, "bpf_raw_tracepoint_writable open", + "erroneously succeeded\n")) + goto out_bpffd; + + close(tp_fd); +out_bpffd: + close(bpf_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c new file mode 100644 index 000000000000..5c45424cac5f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +void test_raw_tp_writable_test_run(void) +{ + __u32 duration = 0; + char error[4096]; + + const struct bpf_insn trace_program[] = { + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = trace_program, + .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), + .log_level = 2, + }; + + int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded", + "failed: %d errno %d\n", bpf_fd, errno)) + return; + + const struct bpf_insn skb_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr skb_load_attr = { + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .license = "GPL v2", + .insns = skb_program, + .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), + }; + + int filter_fd = + bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); + if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", + filter_fd, errno)) + goto out_bpffd; + + int tp_fd = bpf_raw_tracepoint_open("bpf_test_finish", bpf_fd); + if (CHECK(tp_fd < 0, "bpf_raw_tracepoint_writable opened", + "failed: %d errno %d\n", tp_fd, errno)) + goto out_filterfd; + + char test_skb[128] = { + 0, + }; + + __u32 prog_ret; + int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, + 0, &prog_ret, 0); + CHECK(err != 42, "test_run", + "tracepoint did not modify return value\n"); + CHECK(prog_ret != 0, "test_run_ret", + "socket_filter did not return 0\n"); + + close(tp_fd); + + err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0, + &prog_ret, 0); + CHECK(err != 0, "test_run_notrace", + "test_run failed with %d errno %d\n", err, errno); + CHECK(prog_ret != 0, "test_run_ret_notrace", + "socket_filter did not return 0\n"); + +out_filterfd: + close(filter_fd); +out_bpffd: + close(bpf_fd); +} diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c new file mode 100644 index 000000000000..95b5d70a1dc1 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c @@ -0,0 +1,34 @@ +{ + "raw_tracepoint_writable: reject variable offset", + .insns = { + /* r6 is our tp buffer */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + + BPF_LD_MAP_FD(BPF_REG_1, 0), + /* move the key (== 0) to r10-8 */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + /* lookup in the map */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + + /* exit clean if null */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + /* shift the buffer pointer to a variable location */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0), + /* clobber whatever's there */ + BPF_MOV64_IMM(BPF_REG_7, 4242), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1, }, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)", +},