perf(sha256): add ARMv8 2-way interleaved transform and scan_4way_direct
Process two independent SHA256 chains simultaneously to hide the 2-cycle latency of vsha256hq_u32 on Cortex-A76, approaching full throughput. Also reduces memcpy from 512 to ~192 bytes per 4-nonce group by reusing block buffers, and adds scan_4way_direct to bypass pthread_once (LDAR barrier) on every inner-loop call.
This commit is contained in:
@@ -181,6 +181,41 @@ static int test_sha256d80_4way_100k_nonces(void) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int test_sha256d80_scan_4way_direct(void) {
|
||||
uint8_t header76[76];
|
||||
sha256d80_midstate_t mid;
|
||||
sha256_state_t st_ref[4];
|
||||
sha256_state_t st_direct[4];
|
||||
uint32_t all_max[8];
|
||||
uint32_t mask_ref;
|
||||
uint32_t mask_direct;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 76; i++) {
|
||||
header76[i] = (uint8_t)((i * 13 + 7) & 0xFF);
|
||||
}
|
||||
for (i = 0; i < 8; i++) {
|
||||
all_max[i] = 0xFFFFFFFFU;
|
||||
}
|
||||
|
||||
sha256d80_midstate_init(&mid, header76);
|
||||
sha256_backend_ensure_init();
|
||||
|
||||
mask_ref = sha256d80_scan_4way(&mid, 0x11223344U, all_max, st_ref);
|
||||
mask_direct = sha256d80_scan_4way_direct(&mid, 0x11223344U, all_max, st_direct);
|
||||
|
||||
if (mask_ref != mask_direct) {
|
||||
fprintf(stderr, "[test_sha256_backend] scan_4way_direct mask mismatch ref=%u direct=%u\n",
|
||||
mask_ref, mask_direct);
|
||||
return 0;
|
||||
}
|
||||
if (memcmp(st_ref, st_direct, sizeof(st_ref)) != 0) {
|
||||
fprintf(stderr, "[test_sha256_backend] scan_4way_direct state mismatch\n");
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int test_sha256d80_scan_hitmask_basic(void) {
|
||||
uint8_t header76[76];
|
||||
sha256d80_midstate_t mid;
|
||||
@@ -219,6 +254,9 @@ int main(void) {
|
||||
if (!test_sha256d80_4way_100k_nonces()) {
|
||||
return 1;
|
||||
}
|
||||
if (!test_sha256d80_scan_4way_direct()) {
|
||||
return 1;
|
||||
}
|
||||
if (!test_sha256d80_scan_hitmask_basic()) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user