From c69a636feff9bf3eaf30077a8a79f90e8269b847 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 23 Apr 2025 09:55:15 +0930 Subject: [PATCH] trace: track suspensions, check they match. I added this debugging because the next test revealed a mismatch, so I wanted to see where it was happening. The comment in lightningd suggests it's possible, but I can't see any code which suspends in the lightningd io_loop, so I cannot see how this is triggered. Signed-off-by: Rusty Russell --- common/trace.c | 6 ++++++ lightningd/lightningd.c | 8 -------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/common/trace.c b/common/trace.c index 5bd5fa564..d85c61afb 100644 --- a/common/trace.c +++ b/common/trace.c @@ -67,6 +67,7 @@ struct span { struct span_tag *tags; char *name; + bool suspended; /* Indicate whether this is a remote span, i.e., it was inherited by some other process, which is in charge of emitting the span. This just means that we don't emit this @@ -286,6 +287,7 @@ void trace_span_start(const char *name, const void *key) s->parent = current; s->tags = notleak(tal_arr(NULL, struct span_tag, 0)); s->name = notleak(tal_strdup(NULL, name)); + s->suspended = false; /* If this is a new root span we also need to associate a new * trace_id with it. */ @@ -369,6 +371,8 @@ void trace_span_suspend_(const void *key, const char *lbl) struct span *span = trace_span_find(numkey); TRACE_DBG("Suspending span %s (%zu)\n", current->name, current->key); assert(current == span); + assert(!span->suspended); + span->suspended = true; current = NULL; DTRACE_PROBE1(lightningd, span_suspend, span->id); } @@ -402,6 +406,8 @@ void trace_span_resume_(const void *key, const char *lbl) size_t numkey = trace_key(key); current = trace_span_find(numkey); + assert(current->suspended); + current->suspended = false; TRACE_DBG("Resuming span %s (%zu)\n", current->name, current->key); DTRACE_PROBE1(lightningd, span_resume, current->id); } diff --git a/lightningd/lightningd.c b/lightningd/lightningd.c index a5d46e7f3..756cae072 100644 --- a/lightningd/lightningd.c +++ b/lightningd/lightningd.c @@ -1409,14 +1409,6 @@ int main(int argc, char *argv[]) /*~ Now handle sigchld, so we can clean up appropriately. */ sigchld_conn = notleak(io_new_conn(ld, sigchld_rfd, sigchld_rfd_in, ld)); - /* This span was started before handing control to `io_loop` - * which suspends active spans in-between processing - * events. Depending on how the `io_loop` was interrupted, the - * current context span may have been suspended. We need to - * manually resume it for this case. Notice that resuming is - * idempotent, and doing so repeatedly is safe. - */ - trace_span_resume(argv); trace_span_end(argv); /*~ Mark ourselves live.