Skip to content

Commit 31173b5

Browse files
committed
Improved OpenCL performance
Introduced a uint256 datatype which replaces use of 8-word arrays.
1 parent a47023a commit 31173b5

File tree

8 files changed

+1118
-921
lines changed

8 files changed

+1118
-921
lines changed

CLKeySearchDevice/CLKeySearchDevice.cpp

Lines changed: 39 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
extern char _bitcrack_cl[];
88

99
typedef struct {
10-
int thread;
11-
int block;
1210
int idx;
1311
bool compressed;
1412
unsigned int x[8];
@@ -36,7 +34,7 @@ CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPer
3634
{
3735
_threads = threads;
3836
_blocks = blocks;
39-
_pointsPerThread = pointsPerThread;
37+
_points = pointsPerThread * threads * blocks;
4038
_device = (cl_device_id)device;
4139

4240

@@ -145,7 +143,7 @@ void CLKeySearchDevice::initializeBloomFilter(const std::vector<struct hash160>
145143

146144
void CLKeySearchDevice::allocateBuffers()
147145
{
148-
size_t numKeys = (size_t)_threads * _blocks * _pointsPerThread;
146+
size_t numKeys = (size_t)_points;
149147
size_t size = numKeys * 8 * sizeof(unsigned int);
150148

151149
// X values
@@ -205,7 +203,7 @@ void CLKeySearchDevice::init(const secp256k1::uint256 &start, int compression, c
205203

206204
// Set the incrementor
207205
secp256k1::ecpoint g = secp256k1::G();
208-
secp256k1::ecpoint p = secp256k1::multiplyPoint(secp256k1::uint256((uint64_t)_threads * _blocks * _pointsPerThread) * _stride, g);
206+
secp256k1::ecpoint p = secp256k1::multiplyPoint(secp256k1::uint256((uint64_t)_points ) * _stride, g);
209207

210208
setIncrementor(p);
211209
} catch(cl::CLException ex) {
@@ -216,14 +214,12 @@ void CLKeySearchDevice::init(const secp256k1::uint256 &start, int compression, c
216214
void CLKeySearchDevice::doStep()
217215
{
218216
try {
219-
uint64_t numKeys = (uint64_t)_blocks * _threads * _pointsPerThread;
217+
uint64_t numKeys = (uint64_t)_points;
220218

221219
if(_iterations < 2 && _start.cmp(numKeys) <= 0) {
222220

223-
_stepKernelWithDouble->call(
224-
_blocks,
225-
_threads,
226-
_pointsPerThread,
221+
_stepKernelWithDouble->set_args(
222+
_points,
227223
_compression,
228224
_chain,
229225
_x,
@@ -235,12 +231,11 @@ void CLKeySearchDevice::doStep()
235231
_deviceTargetList.mask,
236232
_deviceResults,
237233
_deviceResultsCount);
238-
234+
_stepKernelWithDouble->call(_blocks, _threads);
239235
} else {
240-
_stepKernel->call(
241-
_blocks,
242-
_threads,
243-
_pointsPerThread,
236+
237+
_stepKernel->set_args(
238+
_points,
244239
_compression,
245240
_chain,
246241
_x,
@@ -252,6 +247,7 @@ void CLKeySearchDevice::doStep()
252247
_deviceTargetList.mask,
253248
_deviceResults,
254249
_deviceResultsCount);
250+
_stepKernel->call(_blocks, _threads);
255251
}
256252
fflush(stdout);
257253

@@ -332,7 +328,7 @@ size_t CLKeySearchDevice::getResults(std::vector<KeySearchResult> &results)
332328

333329
uint64_t CLKeySearchDevice::keysPerStep()
334330
{
335-
return (uint64_t)_threads * _blocks * _pointsPerThread;
331+
return (uint64_t)_points;
336332
}
337333

338334
std::string CLKeySearchDevice::getDeviceName()
@@ -346,13 +342,13 @@ void CLKeySearchDevice::getMemoryInfo(uint64_t &freeMem, uint64_t &totalMem)
346342
totalMem = _globalMemSize;
347343
}
348344

349-
void CLKeySearchDevice::splatBigInt(secp256k1::uint256 &k, unsigned int *ptr)
345+
void CLKeySearchDevice::splatBigInt(unsigned int *ptr, int idx, secp256k1::uint256 &k)
350346
{
351347
unsigned int buf[8];
352348

353349
k.exportWords(buf, 8, secp256k1::uint256::BigEndian);
354350

355-
memcpy(ptr, buf, sizeof(unsigned int) * 8);
351+
memcpy(ptr + idx * 8, buf, sizeof(unsigned int) * 8);
356352

357353
}
358354

@@ -383,18 +379,6 @@ void CLKeySearchDevice::removeTargetFromList(const unsigned int hash[5])
383379
}
384380
}
385381

386-
uint32_t CLKeySearchDevice::getPrivateKeyOffset(int thread, int block, int idx)
387-
{
388-
// Total number of threads
389-
int totalThreads = _blocks * _threads;
390-
391-
int base = idx * totalThreads;
392-
393-
// Global ID of the current thread
394-
int threadId = block * _threads + thread;
395-
396-
return base + threadId;
397-
}
398382

399383
void CLKeySearchDevice::getResultsInternal()
400384
{
@@ -420,7 +404,7 @@ void CLKeySearchDevice::getResultsInternal()
420404
KeySearchResult minerResult;
421405

422406
// Calculate the private key based on the number of iterations and the current thread
423-
secp256k1::uint256 offset = (secp256k1::uint256((uint64_t)_blocks * _threads * _pointsPerThread * _iterations) + secp256k1::uint256(getPrivateKeyOffset(ptr[i].thread, ptr[i].block, ptr[i].idx))) * _stride;
407+
secp256k1::uint256 offset = secp256k1::uint256((uint64_t)_points * _iterations) + secp256k1::uint256(ptr[i].idx) * _stride;
424408
secp256k1::uint256 privateKey = secp256k1::addModN(_start, offset);
425409

426410
minerResult.privateKey = privateKey;
@@ -443,7 +427,7 @@ void CLKeySearchDevice::getResultsInternal()
443427

444428
void CLKeySearchDevice::selfTest()
445429
{
446-
uint64_t numPoints = (uint64_t)_threads * _blocks * _pointsPerThread;
430+
uint64_t numPoints = (uint64_t)_points;
447431
std::vector<secp256k1::uint256> privateKeys;
448432

449433
// Generate key pairs for k, k+1, k+2 ... k + <total points in parallel - 1>
@@ -462,63 +446,37 @@ void CLKeySearchDevice::selfTest()
462446
_clContext->copyDeviceToHost(_x, xBuf, sizeof(unsigned int) * 8 * numPoints);
463447
_clContext->copyDeviceToHost(_y, yBuf, sizeof(unsigned int) * 8 * numPoints);
464448

465-
for(int block = 0; block < _blocks; block++) {
466-
for(int thread = 0; thread < _threads; thread++) {
467-
for(int idx = 0; idx < _pointsPerThread; idx++) {
468-
469-
int index = getIndex(block, thread, idx);
449+
for(int index = 0; index < _points; index++) {
450+
secp256k1::uint256 privateKey = privateKeys[index];
470451

471-
secp256k1::uint256 privateKey = privateKeys[index];
452+
secp256k1::uint256 x = readBigInt(xBuf, index);
453+
secp256k1::uint256 y = readBigInt(yBuf, index);
472454

473-
secp256k1::uint256 x = readBigInt(xBuf, block, thread, idx);
474-
secp256k1::uint256 y = readBigInt(yBuf, block, thread, idx);
455+
secp256k1::ecpoint p1(x, y);
456+
secp256k1::ecpoint p2 = secp256k1::multiplyPoint(privateKey, secp256k1::G());
475457

476-
secp256k1::ecpoint p1(x, y);
477-
secp256k1::ecpoint p2 = secp256k1::multiplyPoint(privateKey, secp256k1::G());
478-
479-
if(!secp256k1::pointExists(p1)) {
480-
throw std::string("Validation failed: invalid point");
481-
}
458+
if(!secp256k1::pointExists(p1)) {
459+
throw std::string("Validation failed: invalid point");
460+
}
482461

483-
if(!secp256k1::pointExists(p2)) {
484-
throw std::string("Validation failed: invalid point");
485-
}
462+
if(!secp256k1::pointExists(p2)) {
463+
throw std::string("Validation failed: invalid point");
464+
}
486465

487-
if(!(p1 == p2)) {
488-
throw std::string("Validation failed: points do not match");
489-
}
490-
}
466+
if(!(p1 == p2)) {
467+
throw std::string("Validation failed: points do not match");
491468
}
492469
}
493470
}
494471

495-
void CLKeySearchDevice::splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i)
496-
{
497-
unsigned int value[8] = {0};
498-
499-
i.exportWords(value, 8, secp256k1::uint256::BigEndian);
500472

501-
int totalThreads = _blocks * _threads;
502-
int threadId = block * _threads + thread;
503473

504-
int base = idx * totalThreads * 8;
505-
506-
for(int k = 0; k < 8; k++) {
507-
dest[base + threadId * 8 + k] = value[k];
508-
}
509-
}
510-
511-
secp256k1::uint256 CLKeySearchDevice::readBigInt(unsigned int *src, int block, int thread, int idx)
474+
secp256k1::uint256 CLKeySearchDevice::readBigInt(unsigned int *src, int idx)
512475
{
513476
unsigned int value[8] = {0};
514477

515-
int totalThreads = _blocks * _threads;
516-
int threadId = block * _threads + thread;
517-
518-
int base = idx * totalThreads * 8;
519-
520478
for(int k = 0; k < 8; k++) {
521-
value[k] = src[base + threadId * 8 + k];
479+
value[k] = src[idx * 8 + k];
522480
}
523481

524482
secp256k1::uint256 v(value, secp256k1::uint256::BigEndian);
@@ -563,29 +521,18 @@ void CLKeySearchDevice::initializeBasePoints()
563521
_clContext->copyHostToDevice(tmpY, _yTable, count * 8 * sizeof(unsigned int));
564522
}
565523

566-
int CLKeySearchDevice::getIndex(int block, int thread, int idx)
567-
{
568-
// Total number of threads
569-
int totalThreads = _blocks * _threads;
570524

571-
int base = idx * totalThreads;
572-
573-
// Global ID of the current thread
574-
int threadId = block * _threads + thread;
575-
576-
return base + threadId;
577-
}
578525

579526
void CLKeySearchDevice::generateStartingPoints()
580527
{
581-
uint64_t totalPoints = (uint64_t)_pointsPerThread * _threads * _blocks;
528+
uint64_t totalPoints = (uint64_t)_points;
582529
uint64_t totalMemory = totalPoints * 40;
583530

584531
std::vector<secp256k1::uint256> exponents;
585532

586533
initializeBasePoints();
587534

588-
_pointsMemSize = totalPoints * sizeof(unsigned int) * 16 + _pointsPerThread * sizeof(unsigned int) * 8;
535+
_pointsMemSize = totalPoints * sizeof(unsigned int) * 16 + _points * sizeof(unsigned int) * 8;
589536

590537
Logger::log(LogLevel::Info, "Generating " + util::formatThousands(totalPoints) + " starting points (" + util::format("%.1f", (double)totalMemory / (double)(1024 * 1024)) + "MB)");
591538

@@ -601,15 +548,8 @@ void CLKeySearchDevice::generateStartingPoints()
601548

602549
unsigned int *privateKeys = new unsigned int[8 * totalPoints];
603550

604-
for(int block = 0; block < _blocks; block++) {
605-
for(int thread = 0; thread < _threads; thread++) {
606-
for(int idx = 0; idx < _pointsPerThread; idx++) {
607-
608-
int index = getIndex(block, thread, idx);
609-
610-
splatBigInt(privateKeys, block, thread, idx, exponents[index]);
611-
}
612-
}
551+
for(int index = 0; index < _points; index++) {
552+
splatBigInt(privateKeys, index, exponents[index]);
613553
}
614554

615555
// Copy to device
@@ -620,7 +560,8 @@ void CLKeySearchDevice::generateStartingPoints()
620560
// Show progress in 10% increments
621561
double pct = 10.0;
622562
for(int i = 0; i < 256; i++) {
623-
_initKeysKernel->call(_blocks, _threads, _pointsPerThread, i, _privateKeys, _chain, _xTable, _yTable, _x, _y);
563+
_initKeysKernel->set_args(_points, i, _privateKeys, _chain, _xTable, _yTable, _x, _y);
564+
_initKeysKernel->call(_blocks, _threads);
624565

625566
if(((double)(i+1) / 256.0) * 100.0 >= pct) {
626567
Logger::log(LogLevel::Info, util::format("%.1f%%", pct));
@@ -634,7 +575,7 @@ void CLKeySearchDevice::generateStartingPoints()
634575

635576
secp256k1::uint256 CLKeySearchDevice::getNextKey()
636577
{
637-
uint64_t totalPoints = (uint64_t)_pointsPerThread * _threads * _blocks;
578+
uint64_t totalPoints = (uint64_t)_points * _threads * _blocks;
638579

639580
return _start + secp256k1::uint256(totalPoints) * _iterations * _stride;
640581
}

CLKeySearchDevice/CLKeySearchDevice.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class CLKeySearchDevice : public KeySearchDevice {
3636

3737
int _threads;
3838

39-
int _pointsPerThread;
39+
int _points;
4040

4141
cl_device_id _device;
4242

@@ -84,7 +84,8 @@ class CLKeySearchDevice : public KeySearchDevice {
8484
int getIndex(int block, int thread, int idx);
8585

8686
void splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i);
87-
secp256k1::uint256 readBigInt(unsigned int *src, int block, int thread, int idx);
87+
void splatBigInt(unsigned int *dest, int idx, secp256k1::uint256 &k);
88+
secp256k1::uint256 readBigInt(unsigned int *src, int idx);
8889

8990
void selfTest();
9091

0 commit comments

Comments
 (0)