Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions BitCrack.props
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup Label="UserMacros">
<CUDA_INCLUDE>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\include</CUDA_INCLUDE>
<CUDA_LIB>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\lib\x64</CUDA_LIB>
<OPENCL_INCLUDE>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\include</OPENCL_INCLUDE>
<OPENCL_LIB>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\lib\x64</OPENCL_LIB>
<CUDA_INCLUDE>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include</CUDA_INCLUDE>
<CUDA_LIB>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\lib\x64</CUDA_LIB>
<OPENCL_INCLUDE>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include</OPENCL_INCLUDE>
<OPENCL_LIB>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\lib\x64</OPENCL_LIB>
</PropertyGroup>
<PropertyGroup />
<ItemDefinitionGroup />
Expand Down
52 changes: 41 additions & 11 deletions CudaKeySearchDevice/CudaDeviceKeys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,22 @@ __device__ unsigned int *ec::getYPtr()
return _yPtr[0];
}

__global__ void multiplyStepKernel(const unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr);
__global__ void multiplyStepKernel(unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr);

unsigned int * CudaDeviceKeys::getXPoints()
{
return _devX;
}

unsigned int * CudaDeviceKeys::getYPoints()
{
return _devY;
}

unsigned int * CudaDeviceKeys::getChain()
{
return _devChain;
}

int CudaDeviceKeys::getIndex(int block, int thread, int idx)
{
Expand All @@ -45,15 +59,22 @@ void CudaDeviceKeys::splatBigInt(unsigned int *dest, int block, int thread, int
i.exportWords(value, 8, secp256k1::uint256::BigEndian);

int totalThreads = _blocks * _threads;
int threadId = block * _threads + thread;
int threadId = block * _threads * 4 + thread * 4;

int base = idx * _blocks * _threads * 8;

int index = base + threadId;

for(int k = 0; k < 8; k++) {
for(int k = 0; k < 4; k++) {
dest[index] = value[k];
index += totalThreads;
index++;
}

index = base + totalThreads * 4 + threadId;

for(int k = 4; k < 8; k++) {
dest[index] = value[k];
index++;
}
}

Expand All @@ -62,15 +83,22 @@ secp256k1::uint256 CudaDeviceKeys::readBigInt(unsigned int *src, int block, int
unsigned int value[8] = { 0 };

int totalThreads = _blocks * _threads;
int threadId = block * _threads + thread;
int threadId = block * _threads * 4 + thread * 4;

int base = idx * _blocks * _threads * 8;

int index = base + threadId;

for(int k = 0; k < 8; k++) {
for(int k = 0; k < 4; k++) {
value[k] = src[index];
index++;
}

index = base + totalThreads * 4 + threadId;

for(int k = 4; k < 8; k++) {
value[k] = src[index];
index += totalThreads;
index++;
}

secp256k1::uint256 v(value, secp256k1::uint256::BigEndian);
Expand Down Expand Up @@ -255,9 +283,11 @@ cudaError_t CudaDeviceKeys::init(int blocks, int threads, int pointsPerThread, c

void CudaDeviceKeys::clearPublicKeys()
{
cudaFree(_devChain);
cudaFree(_devX);
cudaFree(_devY);

_devChain = NULL;
_devX = NULL;
_devY = NULL;
}
Expand All @@ -267,9 +297,9 @@ void CudaDeviceKeys::clearPrivateKeys()
cudaFree(_devBasePointX);
cudaFree(_devBasePointY);
cudaFree(_devPrivate);
cudaFree(_devChain);
//cudaFree(_devChain);

_devChain = NULL;
//_devChain = NULL;
_devBasePointX = NULL;
_devBasePointY = NULL;
_devPrivate = NULL;
Expand All @@ -286,7 +316,7 @@ cudaError_t CudaDeviceKeys::doStep()
return err;
}

__global__ void multiplyStepKernel(const unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr)
__global__ void multiplyStepKernel(unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr)
{
unsigned int *xPtr = ec::getXPtr();

Expand Down Expand Up @@ -394,4 +424,4 @@ bool CudaDeviceKeys::selfTest(const std::vector<secp256k1::uint256> &privateKeys
}

return true;
}
}
5 changes: 4 additions & 1 deletion CudaKeySearchDevice/CudaDeviceKeys.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class CudaDeviceKeys {

void clearPublicKeys();

unsigned int * getXPoints();
unsigned int * getYPoints();
unsigned int * getChain();
};

#endif
#endif
167 changes: 167 additions & 0 deletions CudaKeySearchDevice/CudaKeyCheckDevice.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include "KeySearchTypes.h"
#include "CudaKeySearchDevice.h"
#include "ptx.cuh"
#include "secp256k1.cuh"

#include "sha256.cuh"
#include "ripemd160.cuh"

#include "secp256k1.h"

#include "CudaHashLookup.cuh"
#include "CudaAtomicList.cuh"
#include "CudaDeviceKeys.cuh"


__device__ void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
{
const unsigned int iv[5] = {
0x67452301,
0xefcdab89,
0x98badcfe,
0x10325476,
0xc3d2e1f0
};

for(int i = 0; i < 5; i++) {
hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]);
}
}

__device__ void hashPublicKey(const unsigned int *x, const unsigned int *y, unsigned int *digestOut)
{
unsigned int hash[8];

sha256PublicKey(x, y, hash);

// Swap to little-endian
for(int i = 0; i < 8; i++) {
hash[i] = endian(hash[i]);
}

ripemd160sha256NoFinal(hash, digestOut);
}

__device__ void hashPublicKeyCompressed(const unsigned int *x, unsigned int yParity, unsigned int *digestOut)
{
unsigned int hash[8];

sha256PublicKeyCompressed(x, yParity, hash);

// Swap to little-endian
for(int i = 0; i < 8; i++) {
hash[i] = endian(hash[i]);
}

ripemd160sha256NoFinal(hash, digestOut);
}

__device__ void setResultFound(const int numBlocks, bool compressed, unsigned int x[8], unsigned int y[8], unsigned int digest[5])
{
CudaDeviceResult r;

unsigned int virtualPointsPerThread = blockIdx.x / numBlocks; // ex: (uint) 8 / 6 = 1
unsigned int virtualBlock = blockIdx.x % numBlocks;

r.block = virtualBlock;
r.thread = threadIdx.x;
r.idx = virtualPointsPerThread;
r.compressed = compressed;

for(int i = 0; i < 8; i++) {
r.x[i] = x[i];
r.y[i] = y[i];
}

doRMD160FinalRound(digest, r.digest);

atomicListAdd(&r, sizeof(r));
}

/**
* Reads an 2-vector4 big integer from device memory with virtualized loop
*/
__device__ static void readIntVirtualized(unsigned int *ara, const int numBlocks, unsigned int x[8])
{
uint4 *araTmp = reinterpret_cast<uint4 *>(ara);

unsigned int virtualPointsPerThread = blockIdx.x / numBlocks; // ex: (uint) 8 / 6 = 1
unsigned int virtualBlock = blockIdx.x % numBlocks; // ex: 8 % 6 = 2
unsigned int totalThreads = numBlocks * blockDim.x; // 6 * 128 = 768
unsigned int base = virtualPointsPerThread * totalThreads * 2; // 1 * 768
unsigned int threadId = blockDim.x * virtualBlock + threadIdx.x; // 128 * 2 * ...
unsigned int index = base + threadId;

uint4 xTmp = araTmp[index];
x[0] = xTmp.x;
x[1] = xTmp.y;
x[2] = xTmp.z;
x[3] = xTmp.w;

index += totalThreads;

xTmp = araTmp[index];
x[4] = xTmp.x;
x[5] = xTmp.y;
x[6] = xTmp.z;
x[7] = xTmp.w;
}

__device__ static unsigned int readIntLSWVirtualized(unsigned int *ara, const int numBlocks)
{
uint4 *araTmp = reinterpret_cast<uint4 *>(ara);

unsigned int virtualPointsPerThread = blockIdx.x / numBlocks; // ex: (uint) 8 / 6 = 1
unsigned int virtualBlock = blockIdx.x % numBlocks; // ex: 8 % 6 = 2
unsigned int totalThreads = numBlocks * blockDim.x; // 6 * 128 = 768
unsigned int base = virtualPointsPerThread * totalThreads * 2; // 1 * 768
unsigned int threadId = blockDim.x * virtualBlock + threadIdx.x; // 128 * 2 * ...
unsigned int index = base + threadId;

index += totalThreads;

uint4 xTmp = araTmp[index];

return xTmp.w;
}

__device__ void hashAndCheck(unsigned int *xPtr, unsigned int *yPtr, const int numBlocks, const int compression)
{
unsigned int x[8];
unsigned int digest[5];

readIntVirtualized(xPtr, numBlocks, x);

if(compression == PointCompressionType::UNCOMPRESSED || compression == PointCompressionType::BOTH) {
unsigned int y[8];
readIntVirtualized(yPtr, numBlocks, y);

hashPublicKey(x, y, digest);

if(checkHash(digest)) {
setResultFound(numBlocks, false, x, y, digest);
}
}

if(compression == PointCompressionType::COMPRESSED || compression == PointCompressionType::BOTH) {
hashPublicKeyCompressed(x, readIntLSWVirtualized(yPtr, numBlocks), digest);

if(checkHash(digest)) {
unsigned int y[8];
readIntVirtualized(yPtr, numBlocks, y);
setResultFound(numBlocks, true, x, y, digest);
}
}
}

/**
* Performs a single iteration
*/
__global__ void keyCheckKernel(unsigned int *xPtr, unsigned int *yPtr, const int blocks, const int compression)
{
hashAndCheck(xPtr, yPtr, blocks, compression);
}

8 changes: 4 additions & 4 deletions CudaKeySearchDevice/CudaKeySearchDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ void CudaKeySearchDevice::init(const secp256k1::uint256 &start, int compression,

generateStartingPoints();

cudaCall(allocateChainBuf(_threads * _blocks * _pointsPerThread));
//cudaCall(allocateChainBuf(_threads * _blocks * _pointsPerThread));

// Set the incrementor
secp256k1::ecpoint g = secp256k1::G();
Expand Down Expand Up @@ -148,9 +148,9 @@ void CudaKeySearchDevice::doStep()

try {
if(_iterations < 2 && _startExponent.cmp(numKeys) <= 0) {
callKeyFinderKernel(_blocks, _threads, _pointsPerThread, true, _compression);
callKeyFinderKernel(_blocks, _threads, _pointsPerThread, _deviceKeys.getXPoints(), _deviceKeys.getYPoints(), _deviceKeys.getChain(), true, _compression);
} else {
callKeyFinderKernel(_blocks, _threads, _pointsPerThread, false, _compression);
callKeyFinderKernel(_blocks, _threads, _pointsPerThread, _deviceKeys.getXPoints(), _deviceKeys.getYPoints(), _deviceKeys.getChain(), false, _compression);
}
} catch(cuda::CudaException ex) {
throw KeySearchException(ex.msg);
Expand Down Expand Up @@ -313,4 +313,4 @@ secp256k1::uint256 CudaKeySearchDevice::getNextKey()
uint64_t totalPoints = (uint64_t)_pointsPerThread * _threads * _blocks;

return _startExponent + secp256k1::uint256(totalPoints) * _iterations * _stride;
}
}
Loading