From 3cd589d2ce0761ccd19ebc486a87967daff4841f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 8 May 2025 12:20:44 +0930 Subject: [PATCH] bench: why does sending payments take so long under CI? To run, use: VALGRIND=0 TEST_BENCH=1 eatmydata pytest tests/test_connection.py::test_bench Example of running on my laptop (without --enable-debugbuild, over of 10 runs): FAILED tests/test_connection.py::test_bench - assert 47.403406-49.191765(48.343+/-0.48) == 0 With eatmydata: FAILED tests/test_connection.py::test_bench - assert 25.867860-27.549878(26.5762+/-0.5) == 0 You can also run perf on l1 once it's running: perf record --call-graph dwarf -q -p $(cat /tmp/ltests-*/test_bench_1/lightning-1/lightningd-regtest.pid) Then ^C after 10 seconds and run "perf report". Things which stood out: 1. Tracing in db_exec_prepared_v2 (fixed in another PR) 31.12% 0.04% lightningd lightningd [.] db_exec_prepared_v2 - 31.08% db_exec_prepared_v2 + 22.96% db_sqlite3_exec + 4.46% trace_span_end + 1.77% trace_span_start + 1.11% trace_span_tag + 0.72% tal_free 2. Logging: - 16.03% logv - 8.15% maybe_print - log_to_files + 4.51% __GI__IO_fflush (inlined) + 1.97% tal_fmt_ + 0.51% __GI___strftime_l (inlined) 3. Notification (when nothing is listening) in notify_log: - 6.84% maybe_notify_log - notify_log + 3.37% notify_send + 1.75% notify_start + 1.71% log_notification_serialize 0.56% new_log_entry Signed-off-by: Rusty Russell --- tests/test_connection.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/test_connection.py b/tests/test_connection.py index 9b152bb44..1118bea51 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -4846,6 +4846,41 @@ def test_no_delay(node_factory): assert end < start + 100 * 0.5 +@unittest.skipIf(os.getenv('TEST_BENCH', '0') == '0', "For profiling") +def test_bench(node_factory): + """Is our Nagle disabling for critical messages working?""" + l1, l2 = node_factory.get_nodes(2, opts={'start': False, + 'commit-time': 0}) + + # memleak detection plays havoc with profiles. + del l1.daemon.env["LIGHTNINGD_DEV_MEMLEAK"] + del l2.daemon.env["LIGHTNINGD_DEV_MEMLEAK"] + + l1.start() + l2.start() + node_factory.join_nodes([l1, l2]) + + scid = only_one(l1.rpc.listpeerchannels()['channels'])['short_channel_id'] + routestep = { + 'amount_msat': 100, + 'id': l2.info['id'], + 'delay': 5, + 'channel': scid + } + + start = time.time() + # If we were stupid enough to leave Nagle enabled, this would add 200ms + # seconds delays each way! + for _ in range(1000): + phash = random.randbytes(32).hex() + l1.rpc.sendpay([routestep], phash) + with pytest.raises(RpcError, match="WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS"): + l1.rpc.waitsendpay(phash) + end = time.time() + duration = end - start + assert duration == 0 + + def test_listpeerchannels_by_scid(node_factory): l1, l2, l3 = node_factory.line_graph(3, announce_channels=False)