lightningd: fix race with crossover pings.

We cannot use subd_req() here: replies will come out of order, and the
we should not simply assign the reponses in FIFO order.

Changelog-Fixed: lightningd: don't get confused with parallel ping commands.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell
2025-08-14 14:34:25 +09:30
parent a0fd72eb5e
commit 5f5440383d
8 changed files with 80 additions and 27 deletions

View File

@@ -2368,7 +2368,7 @@ static struct io_plan *recv_req(struct io_conn *conn,
case WIRE_CONNECTD_PEER_SPOKE:
case WIRE_CONNECTD_CONNECT_FAILED:
case WIRE_CONNECTD_DEV_MEMLEAK_REPLY:
case WIRE_CONNECTD_PING_REPLY:
case WIRE_CONNECTD_PING_DONE:
case WIRE_CONNECTD_GOT_ONIONMSG_TO_US:
case WIRE_CONNECTD_CUSTOMMSG_IN:
case WIRE_CONNECTD_PEER_DISCONNECT_DONE:

View File

@@ -89,6 +89,7 @@ struct peer {
/* Are we expecting a pong? */
enum pong_expect_type expecting_pong;
u64 ping_reqid;
/* Random ping timer, to detect dead connections. */
struct oneshot *ping_timer;

View File

@@ -130,15 +130,17 @@ msgtype,connectd_dev_report_fds,2034
# Ping/pong test. Waits for a reply if it expects one.
msgtype,connectd_ping,2030
msgdata,connectd_ping,reqid,u64,
msgdata,connectd_ping,id,node_id,
msgdata,connectd_ping,num_pong_bytes,u16,
msgdata,connectd_ping,len,u16,
msgtype,connectd_ping_reply,2130
msgtype,connectd_ping_done,2037
msgdata,connectd_ping_done,reqid,u64,
# False if we there was already a ping in progress.
msgdata,connectd_ping_reply,sent,bool,
msgdata,connectd_ping_done,sent,bool,
# 0 == no pong expected, otherwise length of pong.
msgdata,connectd_ping_reply,totlen,u16,
msgdata,connectd_ping_done,totlen,u16,
# We tell lightningd we got an onionmsg
msgtype,connectd_got_onionmsg_to_us,2145
1 #include <bitcoin/block.h>
130 msgdata,connectd_send_onionmsg,id,node_id, # Lightningd tells us to send an onion message.
131 msgdata,connectd_send_onionmsg,onion_len,u16, msgtype,connectd_send_onionmsg,2041
132 msgdata,connectd_send_onionmsg,onion,u8,onion_len msgdata,connectd_send_onionmsg,id,node_id,
133 msgdata,connectd_send_onionmsg,onion_len,u16,
134 msgdata,connectd_send_onionmsg,path_key,pubkey, msgdata,connectd_send_onionmsg,onion,u8,onion_len
135 # Lightningd tells us to digest an onion message. msgdata,connectd_send_onionmsg,path_key,pubkey,
136 msgtype,connectd_inject_onionmsg,2042 # Lightningd tells us to digest an onion message.
137 msgdata,connectd_inject_onionmsg,path_key,pubkey, msgtype,connectd_inject_onionmsg,2042
138 msgdata,connectd_inject_onionmsg,onion_len,u16, msgdata,connectd_inject_onionmsg,path_key,pubkey,
139 msgdata,connectd_inject_onionmsg,onion_len,u16,
140 msgdata,connectd_inject_onionmsg,onion,u8,onion_len
141 # Reply. If error isn't empty, something went wrong.
142 msgtype,connectd_inject_onionmsg_reply,2142
143 msgdata,connectd_inject_onionmsg_reply,err,wirestring,
144 # A custom message that we got from a peer and don't know how to handle, so we
145 # forward it to the master for further handling.
146 msgtype,connectd_custommsg_in,2110

View File

@@ -719,8 +719,8 @@ static void handle_ping_reply(struct peer *peer, const u8 *msg)
status_debug("Got pong %zu bytes (%.*s...)",
tal_count(ignored), (int)i, (char *)ignored);
daemon_conn_send(peer->daemon->master,
take(towire_connectd_ping_reply(NULL, true,
tal_bytelen(msg))));
take(towire_connectd_ping_done(NULL, peer->ping_reqid, true,
tal_bytelen(msg))));
}
static void handle_pong_in(struct peer *peer, const u8 *msg)
@@ -1470,25 +1470,26 @@ void send_manual_ping(struct daemon *daemon, const u8 *msg)
{
u8 *ping;
struct node_id id;
u64 reqid;
u16 len, num_pong_bytes;
struct peer *peer;
if (!fromwire_connectd_ping(msg, &id, &num_pong_bytes, &len))
if (!fromwire_connectd_ping(msg, &reqid, &id, &num_pong_bytes, &len))
master_badmsg(WIRE_CONNECTD_PING, msg);
peer = peer_htable_get(daemon->peers, &id);
if (!peer) {
daemon_conn_send(daemon->master,
take(towire_connectd_ping_reply(NULL,
false, 0)));
take(towire_connectd_ping_done(NULL, reqid,
false, 0)));
return;
}
/* We're not supposed to send another ping until previous replied */
if (peer->expecting_pong != PONG_UNEXPECTED) {
daemon_conn_send(daemon->master,
take(towire_connectd_ping_reply(NULL,
false, 0)));
take(towire_connectd_ping_done(NULL, reqid,
false, 0)));
return;
}
@@ -1513,13 +1514,14 @@ void send_manual_ping(struct daemon *daemon, const u8 *msg)
*/
if (num_pong_bytes >= 65532) {
daemon_conn_send(daemon->master,
take(towire_connectd_ping_reply(NULL,
take(towire_connectd_ping_done(NULL, reqid,
true, 0)));
return;
}
/* We'll respond to lightningd once the pong comes in */
peer->expecting_pong = PONG_EXPECTED_COMMAND;
peer->ping_reqid = reqid;
/* Since we're doing this manually, kill and restart timer. */
tal_free(peer->ping_timer);

View File

@@ -36,6 +36,7 @@ LIGHTNINGD_SRC := \
lightningd/peer_control.c \
lightningd/peer_fd.c \
lightningd/peer_htlcs.c \
lightningd/ping.c \
lightningd/plugin.c \
lightningd/plugin_control.c \
lightningd/plugin_hook.c \
@@ -48,7 +49,6 @@ LIGHTNINGD_SRC := \
LIGHTNINGD_SRC_NOHDR := \
lightningd/configs.c \
lightningd/datastore.c \
lightningd/ping.c \
lightningd/offer.c \
lightningd/signmessage.c

View File

@@ -21,6 +21,7 @@
#include <lightningd/opening_common.h>
#include <lightningd/opening_control.h>
#include <lightningd/peer_control.h>
#include <lightningd/ping.h>
#include <lightningd/plugin_hook.h>
struct connect {
@@ -494,7 +495,6 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd
case WIRE_CONNECTD_INIT_REPLY:
case WIRE_CONNECTD_ACTIVATE_REPLY:
case WIRE_CONNECTD_DEV_MEMLEAK_REPLY:
case WIRE_CONNECTD_PING_REPLY:
case WIRE_CONNECTD_START_SHUTDOWN_REPLY:
case WIRE_CONNECTD_INJECT_ONIONMSG_REPLY:
break;
@@ -526,6 +526,10 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd
case WIRE_CONNECTD_ONIONMSG_FORWARD_FAIL:
handle_onionmsg_forward_fail(connectd->ld, msg);
break;
case WIRE_CONNECTD_PING_DONE:
handle_ping_done(connectd, msg);
break;
}
return 0;
}

View File

@@ -2,37 +2,65 @@
#include <common/json_command.h>
#include <common/json_param.h>
#include <connectd/connectd_wiregen.h>
#include <inttypes.h>
#include <lightningd/channel.h>
#include <lightningd/jsonrpc.h>
#include <lightningd/lightningd.h>
#include <lightningd/peer_control.h>
#include <lightningd/ping.h>
#include <lightningd/subd.h>
static void ping_reply(struct subd *connectd,
const u8 *msg, const int *fds,
struct command *cmd)
struct ping_command {
struct list_node list;
u64 reqid;
struct command *cmd;
};
static void destroy_ping_command(struct ping_command *ping_command,
struct lightningd *ld)
{
list_del_from(&ld->ping_commands, &ping_command->list);
}
static struct ping_command *find_ping_command(struct lightningd *ld,
u64 reqid)
{
struct ping_command *i;
list_for_each(&ld->ping_commands, i, list) {
if (i->reqid == reqid)
return i;
}
return NULL;
}
void handle_ping_done(struct subd *connectd, const u8 *msg)
{
u16 totlen;
bool sent;
u64 reqid;
struct ping_command *ping_command;
log_debug(connectd->log, "Got ping reply!");
if (!fromwire_connectd_ping_reply(msg, &sent, &totlen)) {
if (!fromwire_connectd_ping_done(msg, &reqid, &sent, &totlen)) {
log_broken(connectd->log, "Malformed ping reply %s",
tal_hex(tmpctx, msg));
was_pending(command_fail(cmd, LIGHTNINGD,
"Bad reply message"));
return;
}
ping_command = find_ping_command(connectd->ld, reqid);
if (!ping_command) {
log_broken(connectd->log, "ping reply for unknown reqid %"PRIu64, reqid);
return;
}
log_debug(connectd->log, "Got ping reply!");
if (!sent)
was_pending(command_fail(cmd, LIGHTNINGD,
was_pending(command_fail(ping_command->cmd, LIGHTNINGD,
"Ping already pending"));
else {
struct json_stream *response = json_stream_success(cmd);
struct json_stream *response = json_stream_success(ping_command->cmd);
json_add_num(response, "totlen", totlen);
was_pending(command_success(cmd, response));
was_pending(command_success(ping_command->cmd, response));
}
}
@@ -44,6 +72,8 @@ static struct command_result *json_ping(struct command *cmd,
unsigned int *len, *pongbytes;
struct node_id *id;
u8 *msg;
static u64 reqid;
struct ping_command *ping_command;
if (!param_check(cmd, buffer, params,
p_req("id", param_node_id, &id),
@@ -83,8 +113,14 @@ static struct command_result *json_ping(struct command *cmd,
if (command_check_only(cmd))
return command_check_done(cmd);
msg = towire_connectd_ping(NULL, id, *pongbytes, *len);
subd_req(cmd, cmd->ld->connectd, take(msg), -1, 0, ping_reply, cmd);
ping_command = tal(cmd, struct ping_command);
ping_command->cmd = cmd;
ping_command->reqid = ++reqid;
list_add_tail(&cmd->ld->ping_commands, &ping_command->list);
tal_add_destructor2(ping_command, destroy_ping_command, cmd->ld);
msg = towire_connectd_ping(NULL, ping_command->reqid, id, *pongbytes, *len);
subd_send_msg(cmd->ld->connectd, take(msg));
return command_still_pending(cmd);
}

8
lightningd/ping.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef LIGHTNING_LIGHTNINGD_PING_H
#define LIGHTNING_LIGHTNINGD_PING_H
#include "config.h"
struct subd;
void handle_ping_done(struct subd *connectd, const u8 *msg);
#endif /* LIGHTNING_LIGHTNINGD_PING_H */