Files
palladum-lightning/lightningd/memdump.c
Rusty Russell fd64bb114b lightningd: fix bogus memleak report.
We do our own leak detection on a reply from a subd, but the reply
code set subd->conn to NULL (saving it temporarily, in case the subd
is freed), resulting in it being seen as a leak:

```
lightningd-2 2026-01-12T14:11:12.677Z DEBUG   0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-channeld-chan#1: billboard perm: Reconnected, and reestablished.
lightningd-2 2026-01-12T14:11:12.677Z DEBUG   0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-channeld-chan#1: billboard: Channel ready for use. Shutdown messages exchanged.
lightningd-1 2026-01-12T14:11:12.705Z DEBUG   022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59-closingd-chan#1: pid 100718, msgfd 87
lightningd-1 2026-01-12T14:11:12.705Z DEBUG   022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59-channeld-chan#1: Status closed, but not exited. Killing
...
lightningd-1 2026-01-12T14:11:32.546Z **BROKEN** lightningd: MEMLEAK: 0x55f61eb4d848
lightningd-1 2026-01-12T14:11:32.546Z **BROKEN** lightningd:   label=ccan/ccan/io/io.c:92:struct io_conn
lightningd-1 2026-01-12T14:11:32.546Z **BROKEN** lightningd:   alloc:
lightningd-1 2026-01-12T14:11:32.840Z **BROKEN** lightningd:     ccan/ccan/tal/tal.c:488 (tal_alloc_)
lightningd-1 2026-01-12T14:11:32.845Z **BROKEN** lightningd:     ccan/ccan/io/io.c:92 (io_new_conn_)
lightningd-1 2026-01-12T14:11:32.845Z **BROKEN** lightningd:     lightningd/subd.c:785 (new_subd)
lightningd-1 2026-01-12T14:11:32.845Z **BROKEN** lightningd:     lightningd/subd.c:839 (new_channel_subd_)
lightningd-1 2026-01-12T14:11:32.846Z **BROKEN** lightningd:     lightningd/channel_control.c:1714 (peer_start_channeld)
lightningd-1 2026-01-12T14:11:32.847Z **BROKEN** lightningd:     lightningd/peer_control.c:1390 (connect_activate_subd)
lightningd-1 2026-01-12T14:11:32.847Z **BROKEN** lightningd:     lightningd/peer_control.c:1516 (peer_connected_hook_final)
lightningd-1 2026-01-12T14:11:32.847Z **BROKEN** lightningd:     lightningd/plugin_hook.c:243 (hook_done)
lightningd-1 2026-01-12T14:11:32.847Z **BROKEN** lightningd:     lightningd/plugin_hook.c:343 (plugin_hook_call_next)
lightningd-1 2026-01-12T14:11:32.847Z **BROKEN** lightningd:     lightningd/plugin_hook.c:299 (plugin_hook_callback)
lightningd-1 2026-01-12T14:11:32.851Z **BROKEN** lightningd:     lightningd/plugin.c:701 (plugin_response_handle)
lightningd-1 2026-01-12T14:11:32.851Z **BROKEN** lightningd:     lightningd/plugin.c:790 (plugin_read_json)
lightningd-1 2026-01-12T14:11:32.851Z **BROKEN** lightningd:     ccan/ccan/io/io.c:60 (next_plan)
lightningd-1 2026-01-12T14:11:32.851Z **BROKEN** lightningd:     ccan/ccan/io/io.c:422 (do_plan)
lightningd-1 2026-01-12T14:11:32.851Z **BROKEN** lightningd:     ccan/ccan/io/io.c:439 (io_ready)
lightningd-1 2026-01-12T14:11:32.851Z **BROKEN** lightningd:     ccan/ccan/io/poll.c:470 (io_loop)
lightningd-1 2026-01-12T14:11:32.851Z **BROKEN** lightningd:     lightningd/io_loop_with_timers.c:22 (io_loop_with_timers)
lightningd-1 2026-01-12T14:11:32.851Z **BROKEN** lightningd:     lightningd/lightningd.c:1492 (main)
lightningd-1 2026-01-12T14:11:32.852Z **BROKEN** lightningd:     ../sysdeps/nptl/libc_start_call_main.h:58 (__libc_start_call_main)
lightningd-1 2026-01-12T14:11:32.852Z **BROKEN** lightningd:     ../csu/libc-start.c:360 (__libc_start_main_impl)
lightningd-1 2026-01-12T14:11:32.852Z **BROKEN** lightningd:   parents:
lightningd-1 2026-01-12T14:11:32.852Z **BROKEN** lightningd:     lightningd/lightningd.c:108:struct lightningd
lightningd-1 2026-01-12T14:11:32.853Z DEBUG   lightningd: channel_gossip: no longer in startup mode
lightningd-1 2026-01-12T14:11:32.856Z DEBUG   hsmd: new_client: 1
```

The workaround is to do our own leak detection on a timer (making the
conn notleak() would leave us open to a real leak in future!).

We also move the `struct leak_detect` definition inside the C file
where it belongs.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2026-01-14 15:41:45 +10:30

277 lines
7.5 KiB
C

/* Only possible if we're in developer mode. */
#include "config.h"
#include <backtrace.h>
#include <ccan/tal/str/str.h>
#include <common/json_command.h>
#include <common/memleak.h>
#include <common/timeout.h>
#include <connectd/connectd_wiregen.h>
#include <hsmd/hsmd_wiregen.h>
#include <lightningd/chaintopology.h>
#include <lightningd/channel.h>
#include <lightningd/closed_channel.h>
#include <lightningd/hsm_control.h>
#include <lightningd/jsonrpc.h>
#include <lightningd/lightningd.h>
#include <lightningd/memdump.h>
#include <lightningd/subd.h>
struct leak_detect {
struct command *cmd;
struct lightningd *ld;
size_t num_outstanding_requests;
const char **leakers;
};
static void json_add_ptr(struct json_stream *response, const char *name,
const void *ptr)
{
char ptrstr[STR_MAX_CHARS(void *)];
snprintf(ptrstr, sizeof(ptrstr), "%p", ptr);
json_add_string(response, name, ptrstr);
}
static size_t add_memdump(struct json_stream *response,
const char *fieldname, const tal_t *root,
struct command *cmd)
{
size_t cumulative_size = 0;
json_array_start(response, fieldname);
for (const tal_t *i = tal_first(root); i; i = tal_next(i)) {
const char *name = tal_name(i);
size_t size = tal_bytelen(i);
/* Don't try to dump this command! */
if (i == cmd || i == cmd->jcon)
continue;
/* Don't dump logs, we know they grow. */
if (name && streq(name, "struct log_book"))
continue;
json_object_start(response, NULL);
json_add_ptr(response, "parent", tal_parent(i));
json_add_ptr(response, "value", i);
json_add_u64(response, "size", size);
if (name)
json_add_string(response, "label", name);
if (tal_first(i))
size += add_memdump(response, "children", i, cmd);
json_add_u64(response, "cumulative_size", size);
json_object_end(response);
cumulative_size += size;
}
json_array_end(response);
return cumulative_size;
}
static struct command_result *json_memdump(struct command *cmd,
const char *buffer,
const jsmntok_t *obj UNNEEDED,
const jsmntok_t *params)
{
struct json_stream *response;
if (!param(cmd, buffer, params, NULL))
return command_param_failed();
response = json_stream_success(cmd);
add_memdump(response, "memdump", NULL, cmd);
return command_success(cmd, response);
}
static const struct json_command dev_memdump_command = {
"dev-memdump",
json_memdump,
.dev_only = true,
};
AUTODATA(json_command, &dev_memdump_command);
static void memleak_log(struct logger *log, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
logv(log, LOG_BROKEN, NULL, true, fmt, ap);
va_end(ap);
}
static bool lightningd_check_leaks(struct command *cmd)
{
struct lightningd *ld = cmd->ld;
struct htable *memtable;
/* Enter everything, except this cmd and its jcon */
memtable = memleak_start(cmd);
/* This command is not a leak! */
memleak_ptr(memtable, cmd);
memleak_ignore_children(memtable, cmd);
/* Now delete ld and those which it has pointers to. */
memleak_scan_obj(memtable, ld);
return dump_memleak(memtable, memleak_log, ld->log);
}
static void finish_report(struct leak_detect *leaks)
{
bool found_leak;
struct json_stream *response;
const u8 *msg;
/* If it timed out, we free ourselved and exit! */
if (!leaks->cmd) {
tal_free(leaks);
return;
}
/* Check for our own leaks. */
if (lightningd_check_leaks(leaks->cmd))
tal_arr_expand(&leaks->leakers, "lightningd");
/* Check hsmd for leaks. */
msg = hsm_sync_req(tmpctx, leaks->cmd->ld, take(towire_hsmd_dev_memleak(NULL)));
if (!fromwire_hsmd_dev_memleak_reply(msg, &found_leak))
fatal("Bad HSMD_DEV_MEMLEAK_REPLY: %s", tal_hex(tmpctx, msg));
if (found_leak)
report_subd_memleak(leaks, leaks->cmd->ld->hsm);
response = json_stream_success(leaks->cmd);
json_array_start(response, "leaks");
for (size_t num_leakers = 0;
num_leakers < tal_count(leaks->leakers);
num_leakers++) {
json_object_start(response, NULL);
json_add_string(response, "subdaemon", leaks->leakers[num_leakers]);
json_object_end(response);
}
json_array_end(response);
/* Command is now done. */
was_pending(command_success(leaks->cmd, response));
}
static void leak_detect_timeout(struct leak_detect *leak_detect)
{
/* We actually *do* leak the leak_detect, but cmd is about
* to exit. */
notleak(tal_steal(NULL, leak_detect));
finish_report(leak_detect);
leak_detect->cmd = NULL;
}
static void leak_detect_req_done(const struct subd_req *req,
struct leak_detect *leak_detect)
{
leak_detect->num_outstanding_requests--;
if (leak_detect->num_outstanding_requests == 0) {
/* We do this off a timer: doing it off a subd reply makes us think that the
* subd->conn (temporarily set to NULL during the cb) is a leak! */
new_reltimer(leak_detect->ld->timers, leak_detect,
time_from_sec(0),
finish_report, leak_detect);
}
}
/* Start a leak request: decrements num_outstanding_requests when freed. */
void start_leak_request(const struct subd_req *req,
struct leak_detect *leak_detect)
{
leak_detect->num_outstanding_requests++;
/* When req is freed, request finished. */
tal_add_destructor2(req, leak_detect_req_done, leak_detect);
}
/* Yep, found a leak in this subd. */
void report_subd_memleak(struct leak_detect *leak_detect, struct subd *leaker)
{
tal_arr_expand(&leak_detect->leakers,
tal_strdup(leak_detect, leaker->name));
}
static void gossip_dev_memleak_done(struct subd *gossipd,
const u8 *reply,
const int *fds UNUSED,
struct leak_detect *leaks)
{
bool found_leak;
if (!fromwire_gossipd_dev_memleak_reply(reply, &found_leak))
fatal("Bad gossip_dev_memleak");
if (found_leak)
report_subd_memleak(leaks, gossipd);
}
static void connect_dev_memleak_done(struct subd *connectd,
const u8 *reply,
const int *fds UNUSED,
struct leak_detect *leaks)
{
bool found_leak;
if (!fromwire_connectd_dev_memleak_reply(reply, &found_leak))
fatal("Bad connect_dev_memleak");
if (found_leak)
report_subd_memleak(leaks, connectd);
}
static struct command_result *json_memleak(struct command *cmd,
const char *buffer,
const jsmntok_t *obj UNNEEDED,
const jsmntok_t *params)
{
struct lightningd *ld = cmd->ld;
struct leak_detect *leaks;
if (!param_check(cmd, buffer, params, NULL))
return command_param_failed();
if (!getenv("LIGHTNINGD_DEV_MEMLEAK")) {
return command_fail(cmd, LIGHTNINGD,
"Leak detection needs $LIGHTNINGD_DEV_MEMLEAK");
}
if (command_check_only(cmd))
return command_check_done(cmd);
leaks = tal(cmd, struct leak_detect);
leaks->ld = cmd->ld;
leaks->cmd = cmd;
leaks->num_outstanding_requests = 0;
leaks->leakers = tal_arr(leaks, const char *, 0);
/* Now do all the async ones. By doing connectd first, it
* has the side-effect of suppressing the complaint it makes
* about us being unresponsive. */
start_leak_request(subd_req(ld->connectd, ld->connectd,
take(towire_connectd_dev_memleak(NULL)),
-1, 0, connect_dev_memleak_done, leaks),
leaks);
start_leak_request(subd_req(ld->gossip, ld->gossip,
take(towire_gossipd_dev_memleak(NULL)),
-1, 0, gossip_dev_memleak_done, leaks),
leaks);
/* Ask all per-peer daemons */
peer_dev_memleak(ld, leaks);
/* Set timer: dualopend doesn't always listen! */
notleak(new_reltimer(ld->timers, leaks, time_from_sec(20),
leak_detect_timeout, leaks));
return command_still_pending(cmd);
}
static const struct json_command dev_memleak_command = {
"dev-memleak",
json_memleak,
.dev_only = true,
};
AUTODATA(json_command, &dev_memleak_command);