hsmd: fix HSM sent an unknown message type error

When we enter the wrong passphrase hsmd crashes like this with an unknown message type:

lightning_hsmd: Failed to load hsm_secret: Wrong passphrase (version v25.12rc1-7-g7713a42-modded)
0x102ba44bf ???
        send_backtrace+0x4f:0
0x102b0900f status_failed
        common/status.c:207
0x102af1a37 hsmd_send_init_reply_failure
        hsmd/hsmd.c:301
0x102af1497 load_hsm
        hsmd/hsmd.c:446
0x102af1497 init_hsm
        hsmd/hsmd.c:548
0x102b29e63 next_plan
        ccan/ccan/io/io.c:60
0x102b29e63 do_plan
        ccan/ccan/io/io.c:422
0x102b29d8b io_ready
        ccan/ccan/io/io.c:439
0x102b2b4bf io_loop
        ccan/ccan/io/poll.c:470
0x102af0a83 main
        hsmd/hsmd.c:886
lightningd: HSM sent unknown message type

This change swaps write_all() to wire_synce_write() because write_all() is missing the wire protocol length prefix. We also don't send a stack trace anymore if the user has entered the wrong passphrase and exit cleanly.
This commit is contained in:
Sangbida Chaudhuri
2025-11-26 10:00:11 +10:30
committed by Rusty Russell
parent 192fc6ae60
commit 4c7e2d449d
2 changed files with 55 additions and 1 deletions

View File

@@ -33,6 +33,7 @@
#include <wally_bip39.h>
#include <wally_core.h>
#include <wire/wire_io.h>
#include <wire/wire_sync.h>
/*~ Each subdaemon is started with stdin connected to lightningd (for status
* messages), and stderr untouched (for emergency printing). File descriptors
@@ -291,7 +292,7 @@ static void hsmd_send_init_reply_failure(enum hsm_secret_error error_code, enum
msg = towire_hsmd_init_reply_failure(NULL, error_code, formatted_msg);
if (msg) {
/* Send directly to lightningd via REQ_FD */
write_all(REQ_FD, msg, tal_bytelen(msg));
wire_sync_write(REQ_FD, msg);
tal_free(msg);
}
@@ -441,6 +442,15 @@ static void load_hsm(const char *passphrase)
tal_bytelen(hsm_secret_contents),
passphrase, &err);
if (!hsms) {
/* Wrong passphrase is user error, not internal error - exit cleanly */
if (err == HSM_SECRET_ERR_WRONG_PASSPHRASE) {
u8 *msg = towire_hsmd_init_reply_failure(NULL, err,
tal_fmt(tmpctx,
"Failed to load hsm_secret: %s",
hsm_secret_error_str(err)));
wire_sync_write(REQ_FD, msg);
exit(0);
}
hsmd_send_init_reply_failure(err, STATUS_FAIL_INTERNAL_ERROR,
"Failed to load hsm_secret: %s", hsm_secret_error_str(err));
}

View File

@@ -2492,3 +2492,47 @@ def test_pending_payments_cleanup(node_factory, bitcoind):
l1 = node_factory.get_node(dbfile='l1-pending-sendpays-with-no-htlc.sqlite3.xz', options={'database-upgrade': True})
assert [p['status'] for p in l1.rpc.listsendpays()['payments']] == ['failed', 'pending']
assert [p['status'] for p in l1.rpc.listpays()['pays']] == ['pending']
@unittest.skipIf(VALGRIND, "It does not play well with prompt and key derivation.")
def test_hsm_wrong_passphrase_crash(node_factory):
"""Test that hsmd handles wrong passphrase gracefully without crashing.
This test reproduces a bug where hsmd would crash with "HSM sent unknown message type"
when a wrong passphrase was provided. The issue was that hsmd_send_init_reply_failure
was using write_all() instead of wire_sync_write(), missing the length prefix.
"""
l1 = node_factory.get_node(start=False, expect_fail=True)
hsm_path = os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, "hsm_secret")
os.remove(hsm_path)
# Create hsm_secret with a passphrase
passphrase = "correct_passphrase"
mnemonic = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about"
hsmtool = HsmTool(node_factory.directory, "generatehsm", hsm_path)
master_fd, slave_fd = os.openpty()
hsmtool.start(stdin=slave_fd)
hsmtool.wait_for_log(r"Introduce your BIP39 word list")
write_all(master_fd, f"{mnemonic}\n".encode("utf-8"))
hsmtool.wait_for_log(r"Enter your passphrase:")
write_all(master_fd, f"{passphrase}\n".encode("utf-8"))
assert hsmtool.proc.wait(WAIT_TIMEOUT) == 0
os.close(master_fd)
os.close(slave_fd)
# Try to start with wrong passphrase
l1.daemon.opts["hsm-passphrase"] = None
master_fd2, slave_fd2 = os.openpty()
l1.daemon.start(stdin=slave_fd2, wait_for_initialized=False, stderr_redir=True)
l1.daemon.wait_for_log("Enter hsm_secret passphrase:")
write_all(master_fd2, "wrong_passphrase\n".encode("utf-8"))
# Should fail gracefully with proper error message, not "unknown message type"
l1.daemon.wait()
assert l1.daemon.is_in_stderr("Failed to load hsm_secret: Wrong passphrase")
assert not l1.daemon.is_in_stderr("HSM sent unknown message type")
assert not l1.daemon.is_in_stderr("send_backtrace") # No backtrace for user error
os.close(master_fd2)
os.close(slave_fd2)