Improved block sync speed

-A number of functions have been rewritten to be more optimized and faster: calculate_total, is_unique, convert_to_satoshi, get_input_addresses, processVoutAddresses, prepare_vout, prepare_vin
-Txes are now written to database via bulk writes which helps improve the sync speed and also controls memory usage with batching to write data once a certain threshold is reached
-update_address function changed to update_addresses since it now bulk writes the addresses in batches to improve sync speed and also controls memory usage with batching to write data once a certain threshold is reached
-The syncLoop function has been completely removed from the project and replaced with async library loops or even normal "for" loops in some cases which greatly improves sync speeds over large batches of data
-Fixed an issue with the flattened count of txes that is saved to the coinstats collection which could save incorrectly when using more than 1 thread
-Fixed an issue with the block sync which caused an unwanted delay when syncing less blocks than the amount of threads used to sync the data
-Fixed an issue with vout data processing that could sometimes populate data out of order
-Added a new sync.batch_size setting used to determine how many records (txes, addresses, addresstxes) should be saved in a single database transaction
-Added a new wait_for_bulk_database_save setting used to increase the block sync speed at the cost of not returning any error msgs for data that failed to save
-get_input_addresses function no longer returns in the exports section of the explorer.js file since it is only referenced in that file
-Updated explorerspec tests to use the newest function changes for any tests that needed to be updated

Special thanks to Karzo from Pepecoin for help with the bulkwrite code changes!
This commit is contained in:
Joe Uhren
2025-02-02 19:10:17 -07:00
parent 0b0ef817f1
commit 3a2f679201
10 changed files with 966 additions and 867 deletions
+348 -138
View File
@@ -8,17 +8,17 @@ const async = require('async');
let stopSync = false;
let stackSizeErrorId = null;
function check_delete_tx(tx, block_height, tx_count, timeout, cb) {
function check_delete_tx(tx, block_height, timeout, cb) {
// check if the tx object exists and does not match the current block height
if (tx && tx.blockindex != block_height) {
// the transaction exists but does not match the correct block height, therefore it should be deleted
module.exports.delete_and_cleanup_tx(tx.txid, tx.blockindex, tx_count, timeout, function(updated_tx_count) {
module.exports.delete_and_cleanup_tx(tx.txid, tx.blockindex, timeout, function(updated_tx_count) {
// finished removing the transaction
return cb(updated_tx_count, true);
});
} else {
// tx dosn't exist or block heights match so nothing to do
return cb(tx_count, false);
return cb(0, false);
}
}
@@ -71,50 +71,112 @@ function hex_to_ascii(hex) {
return str;
}
function update_address(hash, blockheight, txid, amount, type, cb) {
let addr_inc = {}
function update_addresses(addresses, blockheight, txid, cb) {
const bulkAddresses = addresses.map((address) => {
return {
updateOne: {
filter: { a_id: address.hash },
update: {
$setOnInsert: { a_id: address.hash },
$inc: { sent: address.sent, balance: address.balance, received: address.received },
},
upsert: true
}
};
});
if (hash == 'coinbase')
addr_inc.sent = amount;
else {
if (type == 'vin') {
addr_inc.sent = amount;
addr_inc.balance = -amount;
} else {
addr_inc.received = amount;
addr_inc.balance = amount;
const bulkAddressTxes = addresses
.filter((address) => address.hash !== 'coinbase')
.map((address) => {
return {
updateOne: {
filter: { a_id: address.hash, txid: txid },
update: {
$setOnInsert: { a_id: address.hash, blockindex: blockheight, txid: txid },
$inc: { amount: (address.type == 'vin' ? -address.amount : address.amount) }
},
upsert: true
}
};
});
try {
let completed = 0;
let errorCalled = false;
// callback to run when one of the 2 table writes is complete
function onDone(err) {
// check if the previous process already returned an error
if (errorCalled) {
// stop if there was already an error
return;
} else {
// check if there was an error
if (err) {
// ensure the next data set knows there was already an error
errorCalled = true;
// return the error
return cb(err);
}
// increment the completed counter
completed += 1;
// check if both data sets completed
if (completed === 2) {
// finished updating address data
return cb();
}
}
}
// start processing both sets of data in parallel
processInBatches(Address, bulkAddresses, onDone);
processInBatches(AddressTx, bulkAddressTxes, onDone);
} catch (err) {
return cb(err);
}
}
function processInBatches(collection, data, cb) {
const batch_size = settings.sync.batch_size;
let index = 0;
function processNextBatch() {
// check if all records were saved
if (index >= data.length) {
// all records were saved
return cb();
}
// get the next batch of data
const batch = data.slice(index, index + batch_size);
// increment the index by the batch size
index += batch_size;
try {
// asynchronously write data to the collection database
collection.bulkWrite(batch, { ordered: false, writeConcern: { w: (settings.sync.wait_for_bulk_database_save ? 1 : 0) } }).then((result) => {
// process the next batch of records
processNextBatch();
}).catch((err) => {
console.log(err);
// process the next batch of records
processNextBatch();
});
} catch(err) {
console.log(err);
// process the next batch of records
processNextBatch();
}
}
Address.findOneAndUpdate({a_id: hash}, {
$inc: addr_inc
}, {
new: true,
upsert: true
}).then((address) => {
if (hash != 'coinbase') {
AddressTx.findOneAndUpdate({a_id: hash, txid: txid}, {
$inc: {
amount: addr_inc.balance
},
$set: {
a_id: hash,
blockindex: blockheight,
txid: txid
}
}, {
new: true,
upsert: true
}).then((addresstx) => {
return cb();
}).catch((err) => {
return cb(err);
});
} else
return cb();
}).catch((err) => {
return cb(err);
});
// start processing records
processNextBatch();
}
function finalize_update_tx_db(coin, check_only, end, txes, cb) {
@@ -143,49 +205,103 @@ function finalize_update_tx_db(coin, check_only, end, txes, cb) {
module.exports = {
save_tx: function(txid, blockheight, block, cb) {
lib.get_rawtransaction(txid, function(tx) {
if (tx && tx != `${settings.localization.ex_error}: ${settings.localization.check_console}`) {
lib.prepare_vin(tx, function(vin, tx_type_vin) {
lib.prepare_vout(tx.vout, txid, vin, ((!settings.blockchain_specific.zksnarks.enabled || typeof tx.vjoinsplit === 'undefined' || tx.vjoinsplit == null) ? [] : tx.vjoinsplit), function(vout, nvin, tx_type_vout) {
// check if vout is null which indicates an error
if (vout != null) {
lib.syncLoop(nvin.length, function (loop) {
const i = loop.iteration();
// check if address is inside an array
if (Array.isArray(nvin[i].addresses)) {
// extract the address
nvin[i].addresses = nvin[i].addresses[0];
}
update_address(nvin[i].addresses, blockheight, txid, nvin[i].amount, 'vin', function() {
loop.next();
});
}, function() {
lib.syncLoop(vout.length, function (subloop) {
const t = subloop.iteration();
try {
lib.get_rawtransaction(txid, function(tx) {
if (tx && tx != `${settings.localization.ex_error}: ${settings.localization.check_console}`) {
lib.prepare_vin(tx, function(vin, tx_type_vin) {
lib.prepare_vout(tx.vout, txid, vin, ((!settings.blockchain_specific.zksnarks.enabled || typeof tx.vjoinsplit === 'undefined' || tx.vjoinsplit == null) ? [] : tx.vjoinsplit), function(vout, nvin, tx_type_vout) {
// check if vout is null which indicates an error
if (vout != null) {
let addressBatch = [];
// add all vin addresses to the batch array
nvin.forEach(function(input) {
// check if address is inside an array
if (Array.isArray(vout[t].addresses)) {
if (Array.isArray(input.addresses)) {
// extract the address
vout[t].addresses = vout[t].addresses[0];
input.addresses = input.addresses[0];
}
if (vout[t].addresses) {
update_address(vout[t].addresses, blockheight, txid, vout[t].amount, 'vout', function() {
subloop.next();
});
} else
subloop.next();
}, function() {
lib.calculate_total(vout, function(total) {
var op_return = null;
var algo = null;
// check if a vin address exists
if (input.addresses) {
const index = lib.is_unique(addressBatch, input.addresses, 'hash');
let sent = 0;
let balance = 0;
if (input.addresses == 'coinbase')
sent = input.amount;
else {
sent = input.amount;
balance = -input.amount;
}
// check if the address already exists in the array
if (index == -1) {
// unique address
addressBatch.push({
hash: input.addresses,
amount: input.amount,
type: 'vin',
sent: sent,
balance: balance,
received: 0
});
} else {
// address already exists
addressBatch[index].amount += input.amount;
addressBatch[index].sent += sent;
addressBatch[index].balance += balance;
}
}
});
// add all vout addresses to the batch array
vout.forEach(function(output) {
// check if address is inside an array
if (Array.isArray(output.addresses)) {
// extract the address
output.addresses = output.addresses[0];
}
// check if a vout address exists
if (output.addresses) {
const index = lib.is_unique(addressBatch, output.addresses, 'hash');
const balance = output.amount;
const received = output.amount;
// check if the address already exists in the array
if (index == -1) {
// unique address
addressBatch.push({
hash: output.addresses,
amount: output.amount,
type: 'vout',
sent: 0,
balance: balance,
received: received
});
} else {
// address already exists
addressBatch[index].amount -= output.amount;
addressBatch[index].balance += balance;
addressBatch[index].received += received;
}
}
});
// save the addresses to the database
update_addresses(addressBatch, blockheight, txid, function(err) {
if (err)
return cb(err, false, null);
else {
const total = lib.calculate_total(vout);
let op_return = null;
let algo = null;
// check if the op_return value should be decoded and saved
if (settings.transaction_page.show_op_return) {
// loop through vout to find the op_return value
tx.vout.forEach(function (vout_data) {
tx.vout.forEach(function(vout_data) {
// check if the op_return value exists
if (vout_data.scriptPubKey != null && vout_data.scriptPubKey.asm != null && vout_data.scriptPubKey.asm.indexOf('OP_RETURN') > -1) {
// decode the op_return value
@@ -200,7 +316,8 @@ module.exports = {
algo = block[settings.block_page.multi_algorithm.key_name];
}
const newTx = new Tx({
// return the transaction data
return cb(null, vout.length > 0, new Tx({
txid: tx.txid,
vin: (vin == null || vin.length == 0 ? [] : nvin),
vout: vout,
@@ -211,30 +328,27 @@ module.exports = {
tx_type: (tx_type_vout == null ? tx_type_vin : tx_type_vout),
op_return: op_return,
algo: algo
});
newTx.save().then(() => {
return cb(null, vout.length > 0);
}).catch((err) => {
return cb(err, false);
});
});
}));
}
});
});
} else {
// create a custom error that will be specifically checked for later (NOTE: tx_type_vout contains the error code in this special case)
const customError = new Error(tx_type_vout);
} else {
// create a custom error that will be specifically checked for later
// NOTE: tx_type_vout contains the error code in this special case
const customError = new Error(tx_type_vout);
customError.code = tx_type_vout;
customError.code = tx_type_vout;
// return the custom error
return cb(customError, false);
}
// return the custom error
return cb(customError, false, null);
}
});
});
});
} else
return cb('tx not found: ' + txid, false);
});
} else
return cb('tx not found: ' + txid, false, null);
});
} catch(err) {
return cb(`Error querying database for txid ${txid}: ${err}`, false, null);
}
},
// updates tx & address balances
@@ -329,6 +443,84 @@ module.exports = {
lib.get_block(blockhash, function(block) {
if (block) {
let tx_counter = 0;
let txBatch = [];
// create a queue for batching txes for this block
const txBatchQueue = async.queue(function(tx, done) {
// add the tx to an array
txBatch.push(tx);
// check if the batch of txes should be saved
if (txBatch.length >= settings.sync.batch_size) {
// save the current batch of txes to the database now
flushTxBatch(done);
} else {
// continue without saving txes
done();
}
}, 1);
// add a function used to bulkWrite txes to the database
function flushTxBatch(txBatchCallback) {
// copy current batch of txes to a local variable
const localTxBatch = txBatch;
// clear the global array of batched txes
txBatch = [];
// check if there are actually any txes to save
if (!localTxBatch || localTxBatch.length === 0) {
// no txes to save
return txBatchCallback();
} else {
// get the transaction batch ready to bulk update
const bulkTxes = localTxBatch.map(tx => {
// convert tx to plain JS object
const plainTx = tx.toObject();
// remove the _id field to prevent issues with some blockchains that can reuse non-standard txids
delete plainTx._id;
return {
updateOne: {
filter: { txid: plainTx.txid },
update: [
{
$replaceWith: {
$cond: {
if: { $gt: [ plainTx.blockindex, { $ifNull: ["$blockindex", -1] } ] },
then: {
$mergeObjects: [
// if a doc exists, keep that _id
{ _id: "$_id" },
// overwrite everything else with plainTx
plainTx
]
},
else: "$$ROOT"
}
}
}
],
upsert: true
}
};
});
try {
// write the transactions to the database
Tx.bulkWrite(bulkTxes, { ordered: false, writeConcern: { w: (settings.sync.wait_for_bulk_database_save ? 1 : 0) } }).then(() => {
return txBatchCallback();
}).catch((err) => {
console.log(err);
return txBatchCallback();
});
} catch(err) {
console.log(err);
return txBatchCallback();
}
}
}
// loop through all txes in this block
async.eachLimit(block.tx, parallel_tasks, function(txid, next_tx) {
@@ -350,42 +542,54 @@ module.exports = {
}, timeout);
} else {
// check if the transaction exists but doesn't match the current block height
check_delete_tx(tx, block_height, txes, timeout, function(updated_txes, tx_deleted) {
check_delete_tx(tx, block_height, timeout, function(updated_txes, tx_deleted) {
// update the running tx count
txes = updated_txes;
txes += updated_txes;
// check if this tx should be added to the local database
if (tx_deleted || !tx) {
// save the transaction to local database
module.exports.save_tx(txid, block_height, block, function(err, tx_has_vout) {
module.exports.save_tx(txid, block_height, block, function(err, tx_has_vout, newTx) {
if (err) {
// check the error code
if (err.code == 'StackSizeError') {
// ensure the process halts after stopping all sync threads
stackSizeErrorId = txid;
} else if (err.code === 11000) {
// output a nicer error msg for the 11000 error code "duplicate key error collection" which can happen in some blockchains with non-standard txids being reused
console.log(`${settings.localization.ex_warning}: ${block_height}: ${txid} already exists`);
} else
console.log(err);
}
else
setTimeout(function() {
tx = null;
tx_counter--;
// check if the script is stopping
if ((stopSync && check_only != 2) || stackSizeErrorId) {
// stop the loop
next_tx({});
} else
next_tx();
}, timeout);
} else {
console.log('%s: %s', block_height, txid);
if (tx_has_vout)
txes++;
if (tx_has_vout)
txes++;
setTimeout(function() {
tx = null;
tx_counter--;
// add the tx to a queue
txBatchQueue.push(newTx, function(queue_err) {
setTimeout(function() {
tx = null;
tx_counter--;
// check if the script is stopping
if ((stopSync && check_only != 2) || stackSizeErrorId) {
// stop the loop
next_tx({});
} else
next_tx();
}, timeout);
// check if the script is stopping
if ((stopSync && check_only != 2) || stackSizeErrorId) {
// stop the loop
next_tx({});
} else
next_tx();
}, timeout);
});
}
});
} else {
// skip adding the current tx
@@ -437,22 +641,28 @@ module.exports = {
blockhash = null;
block = null;
// reset the slot in the block array back to 0
block_numbers[slotIndex] = 0;
// save the remaining txes for this block
flushTxBatch(function(batch_err) {
if (batch_err)
console.error(batch_err);
// check if the script is stopping
if ((stopSync && check_only != 2) || stackSizeErrorId) {
// stop the loop
finished_tasks++;
next_block({});
} else {
// check if the last block is finished or in process and increment the finished counter
if (processed_last_block)
// reset the slot in the block array back to 0
block_numbers[slotIndex] = 0;
// check if the script is stopping
if ((stopSync && check_only != 2) || stackSizeErrorId) {
// stop the loop
finished_tasks++;
next_block({});
} else {
// check if the last block is finished or in process and increment the finished counter
if (processed_last_block)
finished_tasks++;
// proceed to next block
next_block();
}
// proceed to next block
next_block();
}
});
}
}, timeout);
});
@@ -516,7 +726,7 @@ module.exports = {
// check if all threads have properly finished or else the retry limit has been reached
// NOTE: the retry limit should never need to be used but is put in place to prevent an
// infinite loop just in case something goes very wrong
if (finished_tasks === parallel_tasks || retryAttempts >= retryLimit) {
if (finished_tasks === ((end - start + 1) >= parallel_tasks ? parallel_tasks : (end - start + 1)) || retryAttempts >= retryLimit) {
// stop waiting for all threads to finish
clearInterval(handle);
@@ -539,7 +749,9 @@ module.exports = {
});
},
delete_and_cleanup_tx: function(txid, block_height, tx_count, timeout, cb) {
delete_and_cleanup_tx: function(txid, block_height, timeout, cb) {
let tx_count = 0;
// lookup all address tx records associated with the current tx
AddressTx.find({txid: txid}).exec().then((address_txes) => {
if (address_txes.length == 0) {
@@ -704,14 +916,12 @@ module.exports = {
}
// loop through the address txes
lib.syncLoop(addressTxArray.length, function(address_loop) {
var a = address_loop.iteration();
async.eachSeries(addressTxArray, function(addressTx, address_loop) {
// fix the balance, sent and received data for the current address
fix_address_data(addressTxArray[a], function() {
fix_address_data(addressTx, function() {
setTimeout(function() {
// move to the next address record
address_loop.next();
address_loop();
}, timeout);
});
}, function() {