connectd: warn if we ignore peer incoming for longer than 5 seconds.

One reason why ping processing could be slow is that, once we receive
a message from the peer to send to a subdaemon, we don't listen for
others until we've drained that subdaemon queue entirely.

This can happens for reestablish: slow machines can take a while to
set that subdaemon up.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell
2025-08-14 14:34:15 +09:30
parent 0938d544ed
commit a0fd72eb5e
3 changed files with 18 additions and 0 deletions

View File

@@ -147,6 +147,7 @@ static struct peer *new_peer(struct daemon *daemon,
peer->sent_to_peer = NULL;
peer->urgent = false;
peer->draining = false;
peer->peer_in_lastmsg = -1;
peer->peer_outq = msg_queue_new(peer, false);
peer->last_recv_time = time_now();
peer->is_websocket = is_websocket;

View File

@@ -96,6 +96,10 @@ struct peer {
/* Last time we received traffic */
struct timeabs last_recv_time;
/* How long have we been ignoring peer input? */
struct timemono peer_in_lasttime;
int peer_in_lastmsg;
/* Ratelimits for onion messages. One token per msec. */
size_t onionmsg_incoming_tokens;
struct timemono onionmsg_last_incoming;

View File

@@ -1147,6 +1147,16 @@ static struct io_plan *write_to_subd(struct io_conn *subd_conn,
/* Tell them to read again. */
io_wake(&subd->peer->peer_in);
if (subd->peer->peer_in_lastmsg != -1) {
u64 msec = time_to_msec(timemono_between(time_mono(),
subd->peer->peer_in_lasttime));
if (msec > 5000)
status_peer_broken(&subd->peer->id,
"wake delay for %s: %"PRIu64"msec",
peer_wire_name(subd->peer->peer_in_lastmsg),
msec);
subd->peer->peer_in_lastmsg = -1;
}
/* Wait for them to wake us */
return msg_queue_wait(subd_conn, subd->outq,
@@ -1317,6 +1327,9 @@ static struct io_plan *read_body_from_peer_done(struct io_conn *peer_conn,
}
/* Wait for them to wake us */
peer->peer_in_lastmsg = type;
peer->peer_in_lasttime = time_mono();
return io_wait(peer_conn, &peer->peer_in, next_read, peer);
}