diff --git a/common/test/Makefile b/common/test/Makefile
index 46b579a41..69a87e0e1 100644
--- a/common/test/Makefile
+++ b/common/test/Makefile
@@ -105,4 +105,10 @@ common/test/run-version:				\
 	wire/towire.o
 
 
+common/test/run-trace:					\
+	common/amount.o                                 \
+	common/memleak.o				\
+	common/trace.o					\
+	wire/fromwire.o					\
+	wire/towire.o
 check-units: $(COMMON_TEST_PROGRAMS:%=unittest/%)
diff --git a/common/test/run-trace.c b/common/test/run-trace.c
new file mode 100644
index 000000000..99fd94b69
--- /dev/null
+++ b/common/test/run-trace.c
@@ -0,0 +1,32 @@
+#include "config.h"
+#include <common/setup.h>
+#include <common/trace.h>
+
+/* This is mostly a benchmark to see how much overhead the tracing
+ * introduces. */
+
+int main(int argx, char *argv[])
+{
+	/* Just some context objects to hang spans off of. */
+	int a, b, c, d;
+
+	common_setup(argv[0]);
+
+	/* Create a bunch of nested spans to emit. */
+	for(int i=0; i<25000; i++) {
+		trace_span_start("a", &a);
+		trace_span_start("b", &b);
+
+		trace_span_start("c", &c);
+		trace_span_end(&c);
+
+		trace_span_end(&b);
+
+		trace_span_start("d", &d);
+		trace_span_end(&d);
+
+		trace_span_end(&a);
+	}
+	trace_cleanup();
+	common_shutdown();
+}
diff --git a/doc/developers-guide/tracing-cln-performance.md b/doc/developers-guide/tracing-cln-performance.md
index 54d75a026..8ec1ad563 100644
--- a/doc/developers-guide/tracing-cln-performance.md
+++ b/doc/developers-guide/tracing-cln-performance.md
@@ -81,3 +81,36 @@ Attaching 1 probe...
 
 
 Notice that due to a [limitation](https://github.com/iovisor/bpftrace/issues/305) in `bpftrace` you'll at most get the first 200 bytes of the payload. If you write your own exporter you'll be able to specify the size of the buffer that is being used, and can extract the entire span.
+
+## Tracing Overhead
+
+While we try to make tracing as lightweight as possible it isn't
+free. To quantify how much time is spent actually maintaining the
+tracing context we built a small test in `common/test/run-trace` which
+creates a sequence of 25'000 traces, with a total of 1'000'000 spans,
+without any additional operations.
+
+The following run with [hyperfine][hf] shows the runtime for that test
+without an exporter attached:
+
+
+```bash
+
+$ hyperfine common/test/run-trace
+Benchmark 1: common/test/run-trace
+  Time (mean ± σ):     368.4 ms ±   4.8 ms    [User: 142.7 ms, System: 225.1 ms]
+  Range (min … max):   358.3 ms … 377.2 ms    10 runs
+```
+
+While the following is the same run, but with an exporter attached:
+
+```bash
+$ ❯ hyperfine common/test/run-trace
+Benchmark 1: common/test/run-trace
+  Time (mean ± σ):     561.5 ms ±  14.1 ms    [User: 164.5 ms, System: 396.5 ms]
+  Range (min … max):   546.5 ms … 598.9 ms    10 runs
+```
+
+So depending on whether an exporter is attached, creating and emitting
+span without and with an exporter takes around 370ns and 560ns
+respectively.