pytest: make test_no_delay more robust.
Unfortunately the effect of leaving Nagle enabled is subtle. Here it
is in v25.12:
Normal:
tests/test_connection.py::test_no_delay PASSED
====================================================================== 1 passed in 13.87s
Nagle enabled:
tests/test_connection.py::test_no_delay PASSED
====================================================================== 1 passed in 21.70s
So it's hard to both catch this issue and not have false positives. Improve the
test by deliberately running with Nagle enabled, so we can do a direct comparison.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
@@ -515,9 +515,13 @@ static bool get_remote_address(struct io_conn *conn,
|
||||
* inserting a delay, creating a trap for every author of network code
|
||||
* everywhere.
|
||||
*/
|
||||
static void set_tcp_no_delay(int fd)
|
||||
static void set_tcp_no_delay(const struct daemon *daemon, int fd)
|
||||
{
|
||||
int val = 1;
|
||||
|
||||
if (daemon->dev_keep_nagle)
|
||||
return;
|
||||
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) != 0) {
|
||||
status_broken("setsockopt TCP_NODELAY=1 fd=%u: %s",
|
||||
fd, strerror(errno));
|
||||
@@ -658,7 +662,7 @@ static struct io_plan *connection_in(struct io_conn *conn,
|
||||
|
||||
/* Don't try to set TCP options on UNIX socket! */
|
||||
if (conn_in_arg.addr.itype == ADDR_INTERNAL_WIREADDR)
|
||||
set_tcp_no_delay(io_conn_fd(conn));
|
||||
set_tcp_no_delay(daemon, io_conn_fd(conn));
|
||||
|
||||
conn_in_arg.daemon = daemon;
|
||||
conn_in_arg.is_websocket = false;
|
||||
@@ -1198,7 +1202,7 @@ static void try_connect_one_addr(struct connecting *connect)
|
||||
|
||||
/* Don't try to set TCP options on UNIX socket! */
|
||||
if (addr->itype == ADDR_INTERNAL_WIREADDR)
|
||||
set_tcp_no_delay(fd);
|
||||
set_tcp_no_delay(connect->daemon, fd);
|
||||
connect->connect_attempted = true;
|
||||
|
||||
/* This creates the new connection using our fd, with the initialization
|
||||
@@ -1685,7 +1689,8 @@ static void connect_init(struct daemon *daemon, const u8 *msg)
|
||||
&dev_throttle_gossip,
|
||||
&daemon->dev_no_reconnect,
|
||||
&daemon->dev_fast_reconnect,
|
||||
&dev_limit_connections_inflight)) {
|
||||
&dev_limit_connections_inflight,
|
||||
&daemon->dev_keep_nagle)) {
|
||||
/* This is a helper which prints the type expected and the actual
|
||||
* message, then exits (it should never be called!). */
|
||||
master_badmsg(WIRE_CONNECTD_INIT, msg);
|
||||
@@ -2555,6 +2560,7 @@ int main(int argc, char *argv[])
|
||||
daemon->custom_msgs = NULL;
|
||||
daemon->dev_exhausted_fds = false;
|
||||
daemon->dev_lightningd_is_slow = false;
|
||||
daemon->dev_keep_nagle = false;
|
||||
/* We generally allow 1MB per second per peer, except for dev testing */
|
||||
daemon->gossip_stream_limit = 1000000;
|
||||
daemon->scid_htable = new_htable(daemon, scid_htable);
|
||||
|
||||
@@ -371,6 +371,8 @@ struct daemon {
|
||||
bool dev_fast_reconnect;
|
||||
/* Don't complain about lightningd being unresponsive. */
|
||||
bool dev_lightningd_is_slow;
|
||||
/* Don't set TCP_NODELAY */
|
||||
bool dev_keep_nagle;
|
||||
};
|
||||
|
||||
/* Called by io_tor_connect once it has a connection out. */
|
||||
|
||||
@@ -28,6 +28,7 @@ msgdata,connectd_init,dev_throttle_gossip,bool,
|
||||
msgdata,connectd_init,dev_no_reconnect,bool,
|
||||
msgdata,connectd_init,dev_fast_reconnect,bool,
|
||||
msgdata,connectd_init,dev_limit_connections_inflight,bool,
|
||||
msgdata,connectd_init,dev_keep_nagle,bool,
|
||||
|
||||
# Connectd->master, here are the addresses I bound, can announce.
|
||||
msgtype,connectd_init_reply,2100
|
||||
|
||||
|
@@ -726,7 +726,8 @@ int connectd_init(struct lightningd *ld)
|
||||
ld->dev_throttle_gossip,
|
||||
!ld->reconnect,
|
||||
ld->dev_fast_reconnect,
|
||||
ld->dev_limit_connections_inflight);
|
||||
ld->dev_limit_connections_inflight,
|
||||
ld->dev_keep_nagle);
|
||||
|
||||
subd_req(ld->connectd, ld->connectd, take(msg), -1, 0,
|
||||
connect_init_done, NULL);
|
||||
|
||||
@@ -150,6 +150,7 @@ static struct lightningd *new_lightningd(const tal_t *ctx)
|
||||
ld->dev_handshake_no_reply = false;
|
||||
ld->dev_strict_forwarding = false;
|
||||
ld->dev_limit_connections_inflight = false;
|
||||
ld->dev_keep_nagle = false;
|
||||
|
||||
/*~ We try to ensure enough fds for twice the number of channels
|
||||
* we start with. We have a developer option to change that factor
|
||||
|
||||
@@ -369,6 +369,9 @@ struct lightningd {
|
||||
/* Tell connectd to block more than 1 simultanous connection attempt */
|
||||
bool dev_limit_connections_inflight;
|
||||
|
||||
/* Tell connectd we don't want TCP_NODELAY */
|
||||
bool dev_keep_nagle;
|
||||
|
||||
/* tor support */
|
||||
struct wireaddr *proxyaddr;
|
||||
bool always_use_proxy;
|
||||
|
||||
@@ -948,6 +948,10 @@ static void dev_register_opts(struct lightningd *ld)
|
||||
opt_set_charp, opt_show_charp,
|
||||
&ld->plugins->dev_save_io,
|
||||
"Directory to place all plugin notifications/hooks JSON into.");
|
||||
clnopt_noarg("--dev-keep-nagle", OPT_DEV,
|
||||
opt_set_bool,
|
||||
&ld->dev_keep_nagle,
|
||||
"Tell connectd not to set TCP_NODELAY.");
|
||||
/* This is handled directly in daemon_developer_mode(), so we ignore it here */
|
||||
clnopt_noarg("--dev-debug-self", OPT_DEV,
|
||||
opt_ignore,
|
||||
|
||||
@@ -4581,10 +4581,11 @@ def test_private_channel_no_reconnect(node_factory):
|
||||
assert only_one(l1.rpc.listpeers()['peers'])['connected'] is False
|
||||
|
||||
|
||||
@unittest.skipIf(VALGRIND, "We assume machine is reasonably fast")
|
||||
@pytest.mark.slow_test
|
||||
def test_no_delay(node_factory):
|
||||
"""Is our Nagle disabling for critical messages working?"""
|
||||
l1, l2 = node_factory.line_graph(2)
|
||||
l1, l2 = node_factory.line_graph(2, opts={'dev-keep-nagle': None,
|
||||
'may_reconnect': True})
|
||||
|
||||
scid = only_one(l1.rpc.listpeerchannels()['channels'])['short_channel_id']
|
||||
routestep = {
|
||||
@@ -4593,16 +4594,36 @@ def test_no_delay(node_factory):
|
||||
'delay': 5,
|
||||
'channel': scid
|
||||
}
|
||||
|
||||
# Test with nagle
|
||||
start = time.time()
|
||||
# If we were stupid enough to leave Nagle enabled, this would add 200ms
|
||||
# seconds delays each way!
|
||||
for _ in range(100):
|
||||
phash = random.randbytes(32).hex()
|
||||
l1.rpc.sendpay([routestep], phash)
|
||||
with pytest.raises(RpcError, match="WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS"):
|
||||
l1.rpc.waitsendpay(phash)
|
||||
end = time.time()
|
||||
assert end < start + 100 * 0.5
|
||||
nagle_time = end - start
|
||||
|
||||
del l1.daemon.opts['dev-keep-nagle']
|
||||
del l2.daemon.opts['dev-keep-nagle']
|
||||
l1.restart()
|
||||
l2.restart()
|
||||
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
|
||||
|
||||
# Test without nagle
|
||||
start = time.time()
|
||||
for _ in range(100):
|
||||
phash = random.randbytes(32).hex()
|
||||
l1.rpc.sendpay([routestep], phash)
|
||||
with pytest.raises(RpcError, match="WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS"):
|
||||
l1.rpc.waitsendpay(phash)
|
||||
end = time.time()
|
||||
normal_time = end - start
|
||||
|
||||
# 100 round trips, average delay 1/2 of 200ms -> 10 seconds extra.
|
||||
# Make it half that for variance.
|
||||
assert normal_time < nagle_time - 100 * (0.2 / 2) / 2
|
||||
|
||||
|
||||
def test_listpeerchannels_by_scid(node_factory):
|
||||
|
||||
Reference in New Issue
Block a user