diff --git a/gossipd/gossip_store.c b/gossipd/gossip_store.c index 92d8d732e..383665330 100644 --- a/gossipd/gossip_store.c +++ b/gossipd/gossip_store.c @@ -185,23 +185,40 @@ static u8 *new_uuid_record(const tal_t *ctx, int fd, u64 *off) for (size_t i = 0; i < tal_bytelen(uuid); i++) uuid[i] = pseudorand(256); - append_msg(fd, towire_gossip_store_uuid(tmpctx, uuid), 0, off, NULL); + if (!append_msg(fd, towire_gossip_store_uuid(tmpctx, uuid), 0, off, NULL)) + return tal_free(uuid); /* append_msg does not change file offset, so do that now. */ lseek(fd, 0, SEEK_END); return uuid; } -/* Read gossip store entries, copy non-deleted ones. Check basic - * validity, but this code is written as simply and robustly as - * possible! - * - * Returns fd of new store, or -1 if it was grossly invalid. - */ -static int gossip_store_compact(struct daemon *daemon, +static int make_new_gossip_store(u64 *total_len) +{ + u8 version = GOSSIP_STORE_VER; + int new_fd = open(GOSSIP_STORE_TEMP_FILENAME, O_RDWR|O_TRUNC|O_CREAT, 0600); + + *total_len = sizeof(version); + + if (new_fd < 0 + || !write_all(new_fd, &version, sizeof(version)) + || !new_uuid_record(tmpctx, new_fd, total_len)) { + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Creating new gossip_store file: %s", + strerror(errno)); + } + if (rename(GOSSIP_STORE_TEMP_FILENAME, GOSSIP_STORE_FILENAME) != 0) { + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Renaming gossip_store failed: %s", + strerror(errno)); + } + return new_fd; +} + +/* If this returns -1, we cannot upgrade. */ +static int gossip_store_upgrade(struct daemon *daemon, u64 *total_len, bool *populated) { - size_t cannounces = 0, cupdates = 0, nannounces = 0, deleted = 0; int old_fd, new_fd; u64 old_len, cur_off; struct gossip_hdr hdr; @@ -209,11 +226,49 @@ static int gossip_store_compact(struct daemon *daemon, struct stat st; struct timemono start = time_mono(); const char *bad; - u8 *uuid = NULL; + u8 *uuid; - *populated = false; - old_len = 1; + old_fd = open(GOSSIP_STORE_FILENAME, O_RDWR); + if (old_fd == -1) { + if (errno == ENOENT) { + *populated = false; + return make_new_gossip_store(total_len); + } + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Reading gossip_store file: %s", + strerror(errno)); + } + + if (!read_all(old_fd, &oldversion, sizeof(oldversion))) { + status_broken("Cannot read gossip_store version"); + goto upgrade_failed; + } + + if (fstat(old_fd, &st) != 0) { + status_broken("Could not stat gossip_store: %s", + strerror(errno)); + goto upgrade_failed; + } + + /* If we have any contents (beyond uuid), and the file is less + * than 1 hour old, say "seems good" */ + *populated = (st.st_mtime > time_now().ts.tv_sec - 3600 + && st.st_size > 1 + sizeof(hdr) + 2 + 32); + + /* No upgrade necessary? We're done. */ + if (oldversion == GOSSIP_STORE_VER) { + *total_len = st.st_size; + return old_fd; + } + + if (!can_upgrade(oldversion)) { + status_unusual("Cannot upgrade gossip_store version %u", + oldversion); + goto upgrade_failed; + } + + /* OK, create new gossip store to convert into */ new_fd = open(GOSSIP_STORE_TEMP_FILENAME, O_RDWR|O_TRUNC|O_CREAT, 0600); if (new_fd < 0) { status_failed(STATUS_FAIL_INTERNAL_ERROR, @@ -226,36 +281,20 @@ static int gossip_store_compact(struct daemon *daemon, "Writing new gossip_store file: %s", strerror(errno)); } + *total_len = sizeof(version); - - /* RDWR since we add closed marker at end! */ - old_fd = open(GOSSIP_STORE_FILENAME, O_RDWR); - if (old_fd == -1) { - if (errno == ENOENT) - goto rename_new; - - status_failed(STATUS_FAIL_INTERNAL_ERROR, - "Reading gossip_store file: %s", - strerror(errno)); - }; - - if (fstat(old_fd, &st) != 0) { - status_broken("Could not stat gossip_store: %s", - strerror(errno)); - goto rename_new; - } - - if (!read_all(old_fd, &oldversion, sizeof(oldversion)) - || (oldversion != version && !can_upgrade(oldversion))) { - status_broken("gossip_store_compact: bad version"); - goto rename_new; - } - cur_off = old_len = sizeof(oldversion); - /* Make up uuid for old version */ - if (oldversion < 16) - uuid = new_uuid_record(tmpctx, new_fd, total_len); + /* Create a fresh uuid, make sure we're after it. */ + uuid = new_uuid_record(tmpctx, new_fd, total_len); + if (!uuid) { + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Writing new gossip_store file: %s", + strerror(errno)); + } + assert(*total_len == lseek(new_fd, 0, SEEK_END)); + /* Move to the end (new_uuid_record uses pwrite, not write) */ + lseek(new_fd, *total_len, SEEK_SET); /* Read everything, write non-deleted ones to new_fd. If something goes wrong, * we end up with truncated store. */ @@ -270,14 +309,13 @@ static int gossip_store_compact(struct daemon *daemon, status_unusual("gossip_store_compact: store ends early at %"PRIu64, old_len); tal_free(msg); - goto rename_new; + goto upgrade_failed_close_new; } cur_off = old_len; old_len += sizeof(hdr) + msglen; if (be16_to_cpu(hdr.flags) & GOSSIP_STORE_DELETED_BIT) { - deleted++; tal_free(msg); continue; } @@ -300,10 +338,8 @@ static int gossip_store_compact(struct daemon *daemon, } /* It can tell us to delete record entirely. */ - if (msg == NULL) { - deleted++; + if (msg == NULL) continue; - } /* Recalc msglen and header */ msglen = tal_bytelen(msg); @@ -314,32 +350,19 @@ static int gossip_store_compact(struct daemon *daemon, /* Don't write out old tombstones */ if (fromwire_peektype(msg) == WIRE_GOSSIP_STORE_DELETE_CHAN) { - deleted++; tal_free(msg); continue; } - switch (fromwire_peektype(msg)) { - case WIRE_CHANNEL_ANNOUNCEMENT: - cannounces++; - break; - case WIRE_CHANNEL_UPDATE: - cupdates++; - break; - case WIRE_NODE_ANNOUNCEMENT: - nannounces++; - break; - case WIRE_GOSSIP_STORE_UUID: - uuid = tal_arr(tmpctx, u8, 32); - if (!fromwire_gossip_store_uuid(msg, uuid)) { - bad = "Corrupt uuid"; - goto badmsg; - } - break; + /* Ignore uuid: fresh file will have fresh uuid */ + if (fromwire_peektype(msg) == WIRE_GOSSIP_STORE_UUID) { + tal_free(msg); + continue; } if (!write_all(new_fd, &hdr, sizeof(hdr)) || !write_all(new_fd, msg, msglen)) { + /* We fail hard here, since we're probably out of space. */ status_failed(STATUS_FAIL_INTERNAL_ERROR, "gossip_store_compact: writing msg len %zu to new store: %s", msglen, strerror(errno)); @@ -350,23 +373,6 @@ static int gossip_store_compact(struct daemon *daemon, assert(*total_len == lseek(new_fd, 0, SEEK_END)); - /* If we have any contents, and the file is less than 1 hour - * old, say "seems good" */ - if (st.st_mtime > clock_time().ts.tv_sec - 3600 && *total_len > 1) { - *populated = true; - } - -rename_new: - /* If we didn't copy a uuid, do so now. */ - if (!uuid) { - if (lseek(new_fd, 0, SEEK_END) != 1) { - bad = tal_fmt(tmpctx, - "missing uuid in version %u", oldversion); - goto badmsg; - } - uuid = new_uuid_record(tmpctx, new_fd, total_len); - } - if (rename(GOSSIP_STORE_TEMP_FILENAME, GOSSIP_STORE_FILENAME) != 0) { status_failed(STATUS_FAIL_INTERNAL_ERROR, "gossip_store_compact: rename failed: %s", @@ -374,26 +380,20 @@ rename_new: } /* Create end marker now new file exists. */ - if (old_fd != -1) { - /* FIXME: real uuid! */ - append_msg(old_fd, towire_gossip_store_ended(tmpctx, *total_len, uuid), - 0, &old_len, NULL); - close(old_fd); - } + append_msg(old_fd, towire_gossip_store_ended(tmpctx, *total_len, uuid), 0, &old_len, NULL); + close(old_fd); - status_debug("Store compact time: %"PRIu64" msec", + status_debug("Time to convert version %u store: %"PRIu64" msec", + oldversion, time_to_msec(timemono_between(time_mono(), start))); - status_debug("gossip_store: Read %zu/%zu/%zu/%zu cannounce/cupdate/nannounce/delete from store in %"PRIu64" bytes, now %"PRIu64" bytes (populated=%s)", - cannounces, cupdates, nannounces, deleted, - old_len, *total_len, - *populated ? "true": "false"); return new_fd; badmsg: - /* Caller will presumably try gossip_store_reset. */ status_broken("gossip_store: %s (offset %"PRIu64").", bad, cur_off); - close(old_fd); +upgrade_failed_close_new: close(new_fd); +upgrade_failed: + close(old_fd); return -1; } @@ -411,7 +411,7 @@ struct gossip_store *gossip_store_new(const tal_t *ctx, struct gossip_store *gs = tal(ctx, struct gossip_store); gs->daemon = daemon; - gs->fd = gossip_store_compact(daemon, &gs->len, populated); + gs->fd = gossip_store_upgrade(daemon, &gs->len, populated); if (gs->fd < 0) return tal_free(gs); tal_add_destructor(gs, gossip_store_destroy); diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index 5aa97c96a..c6cf48625 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -458,12 +458,11 @@ static void gossmap_add_dying_chan(struct short_channel_id scid, static bool setup_gossmap(struct gossmap_manage *gm, struct daemon *daemon) { - u64 expected_len; + u64 expected_len, num_live, num_dead; gm->dying_channels = tal_arr(gm, struct chan_dying, 0); - /* This does simple sanitry checks, compacts, and creates if - * necessary */ + /* This does creates or converts if necessary. */ gm->gs = gossip_store_new(gm, daemon, &gm->gossip_store_populated); @@ -483,6 +482,10 @@ static bool setup_gossmap(struct gossmap_manage *gm, return false; } gm->last_writes = tal_arr(gm, const u8 *, 0); + + gossmap_stats(gm->raw_gossmap, &num_live, &num_dead); + status_debug("gossip_store: %"PRIu64" live records, %"PRIu64" deleted", + num_live, num_dead); return true; } diff --git a/tests/test_gossip.py b/tests/test_gossip.py index e6a2bb075..440a9a7c1 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -1207,8 +1207,9 @@ def test_gossip_store_load(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/0 cannounce/cupdate/nannounce/delete from store in 832 bytes, now 824 bytes')) - assert not l1.daemon.is_in_log('gossip_store.*truncating') + l1.daemon.logsearch_start = 0 + l1.daemon.wait_for_log("gossipd: Time to convert version 12 store") + l1.daemon.wait_for_log("gossipd: gossip_store: 3 live records, 0 deleted") def test_gossip_store_v10_upgrade(node_factory): @@ -1275,8 +1276,10 @@ def test_gossip_store_load_announce_before_update(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/1 cannounce/cupdate/nannounce/delete from store in 982 bytes, now 824 bytes')) - assert not l1.daemon.is_in_log('gossip_store.*truncating') + l1.daemon.logsearch_start = 0 + l1.daemon.wait_for_log("gossipd: Time to convert version 12 store: [0-9]* msec") + l1.daemon.wait_for_log("gossipd: gossip_store: 3 live records, 0 deleted") + assert not l1.daemon.is_in_log('gossip_store.*corrupt') def test_gossip_store_load_amount_truncated(node_factory): @@ -1294,7 +1297,7 @@ def test_gossip_store_load_amount_truncated(node_factory): # May preceed the Started msg waited for in 'start'. wait_for(lambda: l1.daemon.is_in_log(r'\*\*BROKEN\*\* gossipd: gossip_store only processed 47 bytes of 491 \(expected 491\)')) wait_for(lambda: l1.daemon.is_in_log(r'\*\*BROKEN\*\* gossipd: gossip_store: Moving to gossip_store.corrupt')) - wait_for(lambda: l1.daemon.is_in_log(r'gossip_store: Read 0/0/0/0 cannounce/cupdate/nannounce/delete from store in 1 bytes, now 47 bytes \(populated=false\)')) + wait_for(lambda: l1.daemon.is_in_log('gossip_store: 0 live records, 0 deleted')) assert os.path.exists(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) @@ -1609,7 +1612,7 @@ def setup_gossip_store_test(node_factory, bitcoind, opts=None): def test_gossip_store_corrupt(node_factory, bitcoind): - l2 = setup_gossip_store_test(node_factory, bitcoind, opts=[{}, {'broken_log': 'gossipd:.*bad version'}, {}]) + l2 = setup_gossip_store_test(node_factory, bitcoind, opts=[{}, {'broken_log': 'gossip_store: Moving to gossip_store.corrupt'}, {}]) l2.stop() # It should truncate this, not leave junk! @@ -1617,11 +1620,9 @@ def test_gossip_store_corrupt(node_factory, bitcoind): f.write(bytearray.fromhex("07deadbeef")) l2.start() - wait_for(lambda: l2.daemon.is_in_log('gossip_store: Read ')) - assert l2.daemon.is_in_log('gossip_store_compact: bad version') - - # Will simply overwrite, due to old version. - assert not os.path.exists(os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) + # Complains, moves. + wait_for(lambda: l2.daemon.is_in_log('gossipd: Cannot upgrade gossip_store version 7')) + assert os.path.exists(os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) def test_gossip_store_load_complex(node_factory, bitcoind): @@ -1629,7 +1630,7 @@ def test_gossip_store_load_complex(node_factory, bitcoind): l2.restart() - wait_for(lambda: l2.daemon.is_in_log('gossip_store: Read ')) + wait_for(lambda: l2.daemon.is_in_log('gossip_store: 9 live records, 2 deleted')) def test_gossip_store_load_no_channel_update(node_factory): @@ -1658,11 +1659,14 @@ def test_gossip_store_load_no_channel_update(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/0/1/0 cannounce/cupdate/nannounce/delete from store in 682 bytes, now 674 bytes')) + l1.daemon.logsearch_start = 0 + l1.daemon.wait_for_log("gossipd: Time to convert version 13 store: [0-9]* msec") + # One is the uuid we insert! + l1.daemon.wait_for_log("gossipd: gossip_store: 2 live records, 0 deleted") assert not os.path.exists(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) -def test_gossip_store_compact_on_load(node_factory, bitcoind): +def test_gossip_store_load_uncompacted(node_factory, bitcoind): l2 = setup_gossip_store_test(node_factory, bitcoind) gs_path = os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, 'gossip_store') @@ -1673,7 +1677,7 @@ def test_gossip_store_compact_on_load(node_factory, bitcoind): l2.restart() # These appear before we're fully started, so will already in log: - assert l2.daemon.is_in_log('gossip_store: Read 2/4/3/2 cannounce/cupdate/nannounce/delete from store') + assert l2.daemon.is_in_log('gossip_store: 9 live records, 2 deleted') def test_gossip_announce_invalid_block(node_factory, bitcoind):