From 4400ddf5087b44c576c7b51a2c128ba34d076204 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:29:57 +0200 Subject: [PATCH 01/62] Refactor AddressUtil --- .gitignore | 1 + AddressUtil/AddressUtil.h | 20 ++-- AddressUtil/AddressUtil.vcxproj | 75 +++++++++++-- AddressUtil/Base58.cpp | 182 +++++++++++++++++++++++++------- AddressUtil/hash.cpp | 4 +- 5 files changed, 226 insertions(+), 56 deletions(-) diff --git a/.gitignore b/.gitignore index 3d8e148a..c8adb8a9 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ CLUnitTests/test.cl [Dd]ebugPublic/ [Rr]elease/ [Rr]eleases/ +Performance Release/ x64/ x86/ bld/ diff --git a/AddressUtil/AddressUtil.h b/AddressUtil/AddressUtil.h index 14aab13a..0879501b 100644 --- a/AddressUtil/AddressUtil.h +++ b/AddressUtil/AddressUtil.h @@ -1,35 +1,31 @@ -#ifndef _ADDRESS_UTIL_H -#define _ADDRESS_UTIL_H +#ifndef ADDRESS_UTIL_H +#define ADDRESS_UTIL_H #include "secp256k1.h" namespace Address { std::string fromPublicKey(const secp256k1::ecpoint &p, bool compressed = false); bool verifyAddress(std::string address); -}; +} namespace Base58 { - std::string toBase58(const secp256k1::uint256 &x); + std::string toBase58(const secp256k1::uint256 value); secp256k1::uint256 toBigInt(const std::string &s); void getMinMaxFromPrefix(const std::string &prefix, secp256k1::uint256 &minValueOut, secp256k1::uint256 &maxValueOut); void toHash160(const std::string &s, unsigned int hash[5]); - bool isBase58(std::string s); -}; - - + bool isBase58(const std::string &value); +} namespace Hash { - void hashPublicKey(const secp256k1::ecpoint &p, unsigned int *digest); void hashPublicKeyCompressed(const secp256k1::ecpoint &p, unsigned int *digest); void hashPublicKey(const unsigned int *x, const unsigned int *y, unsigned int *digest); void hashPublicKeyCompressed(const unsigned int *x, const unsigned int *y, unsigned int *digest); -}; - +} -#endif \ No newline at end of file +#endif diff --git a/AddressUtil/AddressUtil.vcxproj b/AddressUtil/AddressUtil.vcxproj index c2e6a662..df6aee86 100644 --- a/AddressUtil/AddressUtil.vcxproj +++ b/AddressUtil/AddressUtil.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -37,32 +45,46 @@ {34042455-D274-432D-9134-C9EA41FD1B54} Win32Proj AddressUtil - 10.0 + 10.0.19041.0 StaticLibrary true - v141 + ClangCl + Unicode + + + StaticLibrary + false + ClangCl Unicode StaticLibrary false - v141 + ClangCl true Unicode StaticLibrary true - v142 + ClangCl Unicode + + StaticLibrary + false + ClangCl + Unicode + true + x64 + StaticLibrary false - v142 + ClangCL true Unicode @@ -75,6 +97,10 @@ + + + + @@ -83,6 +109,10 @@ + + + + @@ -102,10 +132,23 @@ Windows + + + + + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + $(SolutionDir)secp256k1lib;$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories) + + + Windows + + NotUsing - Level3 + EnableAllWarnings Disabled _CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) $(SolutionDir)secp256k1lib;$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories) @@ -114,6 +157,26 @@ Windows + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions) + $(SolutionDir)secp256k1lib;$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories) + None + MaxSpeed + AnySuitable + true + Speed + true + true + true + + + Windows + + Level3 diff --git a/AddressUtil/Base58.cpp b/AddressUtil/Base58.cpp index 2d80ef35..5c7cddad 100644 --- a/AddressUtil/Base58.cpp +++ b/AddressUtil/Base58.cpp @@ -1,28 +1,146 @@ #include #include "CryptoUtil.h" - #include "AddressUtil.h" - -static const std::string BASE58_STRING = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; - struct Base58Map { - static std::map createBase58Map() + static std::map createBase58OrdinalMap() { - std::map m; - for(int i = 0; i < 58; i++) { - m[BASE58_STRING[i]] = i; - } + std::map m; + + m.insert(std::pair('1', 0)); + m.insert(std::pair('2', 1)); + m.insert(std::pair('3', 2)); + m.insert(std::pair('4', 3)); + m.insert(std::pair('5', 4)); + m.insert(std::pair('6', 5)); + m.insert(std::pair('7', 6)); + m.insert(std::pair('8', 7)); + m.insert(std::pair('9', 8)); + m.insert(std::pair('A', 9)); + m.insert(std::pair('B', 10)); + m.insert(std::pair('C', 11)); + m.insert(std::pair('D', 12)); + m.insert(std::pair('E', 13)); + m.insert(std::pair('F', 14)); + m.insert(std::pair('G', 15)); + m.insert(std::pair('H', 16)); + m.insert(std::pair('J', 17)); + m.insert(std::pair('K', 18)); + m.insert(std::pair('L', 19)); + m.insert(std::pair('M', 20)); + m.insert(std::pair('N', 21)); + m.insert(std::pair('P', 22)); + m.insert(std::pair('Q', 23)); + m.insert(std::pair('R', 24)); + m.insert(std::pair('S', 25)); + m.insert(std::pair('T', 26)); + m.insert(std::pair('U', 27)); + m.insert(std::pair('V', 28)); + m.insert(std::pair('W', 29)); + m.insert(std::pair('X', 30)); + m.insert(std::pair('Y', 31)); + m.insert(std::pair('Z', 32)); + m.insert(std::pair('a', 33)); + m.insert(std::pair('b', 34)); + m.insert(std::pair('c', 35)); + m.insert(std::pair('d', 36)); + m.insert(std::pair('e', 37)); + m.insert(std::pair('f', 38)); + m.insert(std::pair('g', 39)); + m.insert(std::pair('h', 40)); + m.insert(std::pair('i', 41)); + m.insert(std::pair('j', 42)); + m.insert(std::pair('k', 43)); + m.insert(std::pair('m', 44)); + m.insert(std::pair('n', 45)); + m.insert(std::pair('o', 46)); + m.insert(std::pair('p', 47)); + m.insert(std::pair('q', 48)); + m.insert(std::pair('r', 49)); + m.insert(std::pair('s', 50)); + m.insert(std::pair('t', 51)); + m.insert(std::pair('u', 52)); + m.insert(std::pair('v', 53)); + m.insert(std::pair('w', 54)); + m.insert(std::pair('x', 55)); + m.insert(std::pair('y', 56)); + m.insert(std::pair('z', 57)); return m; } - static std::map myMap; -}; + static std::map createBase58ReverseMap() + { + std::map m; + + m.insert(std::pair( 0, '1')); + m.insert(std::pair( 1, '2')); + m.insert(std::pair( 2, '3')); + m.insert(std::pair( 3, '4')); + m.insert(std::pair( 4, '5')); + m.insert(std::pair( 5, '6')); + m.insert(std::pair( 6, '7')); + m.insert(std::pair( 7, '8')); + m.insert(std::pair( 8, '9')); + m.insert(std::pair( 9, 'A')); + m.insert(std::pair( 10, 'B')); + m.insert(std::pair( 11, 'C')); + m.insert(std::pair( 12, 'D')); + m.insert(std::pair( 13, 'E')); + m.insert(std::pair( 14, 'F')); + m.insert(std::pair( 15, 'G')); + m.insert(std::pair( 16, 'H')); + m.insert(std::pair( 17, 'J')); + m.insert(std::pair( 18, 'K')); + m.insert(std::pair( 19, 'L')); + m.insert(std::pair( 20, 'M')); + m.insert(std::pair( 21, 'N')); + m.insert(std::pair( 22, 'P')); + m.insert(std::pair( 23, 'Q')); + m.insert(std::pair( 24, 'R')); + m.insert(std::pair( 25, 'S')); + m.insert(std::pair( 26, 'T')); + m.insert(std::pair( 27, 'U')); + m.insert(std::pair( 28, 'V')); + m.insert(std::pair( 29, 'W')); + m.insert(std::pair( 30, 'X')); + m.insert(std::pair( 31, 'Y')); + m.insert(std::pair( 32, 'Z')); + m.insert(std::pair( 33, 'a')); + m.insert(std::pair( 34, 'b')); + m.insert(std::pair( 35, 'c')); + m.insert(std::pair( 36, 'd')); + m.insert(std::pair( 37, 'e')); + m.insert(std::pair( 38, 'f')); + m.insert(std::pair( 39, 'g')); + m.insert(std::pair( 40, 'h')); + m.insert(std::pair( 41, 'i')); + m.insert(std::pair( 42, 'j')); + m.insert(std::pair( 43, 'k')); + m.insert(std::pair( 44, 'm')); + m.insert(std::pair( 45, 'n')); + m.insert(std::pair( 46, 'o')); + m.insert(std::pair( 47, 'p')); + m.insert(std::pair( 48, 'q')); + m.insert(std::pair( 49, 'r')); + m.insert(std::pair( 50, 's')); + m.insert(std::pair( 51, 't')); + m.insert(std::pair( 52, 'u')); + m.insert(std::pair( 53, 'v')); + m.insert(std::pair( 54, 'w')); + m.insert(std::pair( 55, 'x')); + m.insert(std::pair( 56, 'y')); + m.insert(std::pair( 57, 'z')); -std::map Base58Map::myMap = Base58Map::createBase58Map(); + return m; + } + static std::map ordinal; + static std::map reverse; +}; +std::map Base58Map::ordinal = Base58Map::createBase58OrdinalMap(); +std::map Base58Map::reverse = Base58Map::createBase58ReverseMap(); /** * Converts a base58 string to uint256 @@ -31,11 +149,8 @@ secp256k1::uint256 Base58::toBigInt(const std::string &s) { secp256k1::uint256 value; - for(unsigned int i = 0; i < s.length(); i++) { - value = value.mul(58); - - int c = Base58Map::myMap[s[i]]; - value = value.add(c); + for(size_t i = 0, il = s.length(); i < il; i++) { + value = value.mul(58).add(Base58Map::ordinal.find(s[i])->second); } return value; @@ -48,16 +163,17 @@ void Base58::toHash160(const std::string &s, unsigned int hash[5]) value.exportWords(words, 6, secp256k1::uint256::BigEndian); - // Extract words, ignore checksum - for(int i = 0; i < 5; i++) { - hash[i] = words[i]; - } + hash[0] = words[0]; + hash[1] = words[1]; + hash[2] = words[2]; + hash[3] = words[3]; + hash[4] = words[4]; } -bool Base58::isBase58(std::string s) +bool Base58::isBase58(const std::string &value) { - for(unsigned int i = 0; i < s.length(); i++) { - if(BASE58_STRING.find(s[i]) < 0) { + for(size_t i = 0; i < value.length(); i++) { + if(Base58Map::ordinal.find(value[i]) == Base58Map::ordinal.end()) { return false; } } @@ -65,22 +181,16 @@ bool Base58::isBase58(std::string s) return true; } -std::string Base58::toBase58(const secp256k1::uint256 &x) +std::string Base58::toBase58(secp256k1::uint256 value) { - std::string s; - - secp256k1::uint256 value = x; - - while(!value.isZero()) { - secp256k1::uint256 digit = value.mod(58); - int digitInt = digit.toInt32(); - - s = BASE58_STRING[digitInt] + s; + std::string result; + for (unsigned int i = 0; i <= 32; i++) { + result.insert(0, 1, Base58Map::reverse.find(value.mod(58).toInt32())->second); value = value.div(58); } - return s; + return result; } void Base58::getMinMaxFromPrefix(const std::string &prefix, secp256k1::uint256 &minValueOut, secp256k1::uint256 &maxValueOut) @@ -114,4 +224,4 @@ void Base58::getMinMaxFromPrefix(const std::string &prefix, secp256k1::uint256 & minValueOut = minValue; maxValueOut = maxValue; -} \ No newline at end of file +} diff --git a/AddressUtil/hash.cpp b/AddressUtil/hash.cpp index 8650c0de..d1db5e62 100644 --- a/AddressUtil/hash.cpp +++ b/AddressUtil/hash.cpp @@ -13,7 +13,7 @@ bool Address::verifyAddress(std::string address) { // Check length if(address.length() > 34) { - false; + return false; } // Check encoding @@ -185,4 +185,4 @@ void Hash::hashPublicKeyCompressed(const unsigned int *x, const unsigned int *y, msg[15] = 0; crypto::ripemd160(msg, digest); -} \ No newline at end of file +} From abda527edae3b6455a3066fbb7da5137d9653598 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:32:11 +0200 Subject: [PATCH 02/62] refactored clMath --- clMath/clMath.vcxproj | 62 ++++- clMath/ripemd160.cl | 94 +++---- clMath/secp256k1.cl | 565 ++++++++++++++++-------------------------- clMath/sha256.cl | 386 ++++++++++++++--------------- 4 files changed, 508 insertions(+), 599 deletions(-) diff --git a/clMath/clMath.vcxproj b/clMath/clMath.vcxproj index 5236bff0..c4349796 100644 --- a/clMath/clMath.vcxproj +++ b/clMath/clMath.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -22,32 +30,46 @@ 15.0 {83327841-C283-4D46-A873-97AC674C68AC} clMath - 10.0 + 10.0.19041.0 Application true - v141 + ClangCl + MultiByte + + + Application + false + ClangCl MultiByte Application false - v141 + ClangCl true MultiByte Application true - v142 + ClangCl MultiByte + + Application + false + ClangCl + MultiByte + true + x64 + Application false - v142 + ClangCL true MultiByte @@ -59,12 +81,18 @@ + + + + + + @@ -92,7 +120,7 @@ true - + Level3 Disabled @@ -100,6 +128,28 @@ true + + + EnableAllWarnings + Disabled + true + true + + + + + Level3 + true + true + None + MaxSpeed + AnySuitable + true + Speed + true + true + + Level3 diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index 300ead14..6e36a63e 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -1,24 +1,11 @@ -#ifndef _RIPEMD160_CL -#define _RIPEMD160_CL +#ifndef RIPEMD160_CL +#define RIPEMD160_CL - -__constant unsigned int _RIPEMD160_IV[5] = { - 0x67452301, - 0xefcdab89, - 0x98badcfe, - 0x10325476, - 0xc3d2e1f0 -}; - -__constant unsigned int _K0 = 0x5a827999; -__constant unsigned int _K1 = 0x6ed9eba1; -__constant unsigned int _K2 = 0x8f1bbcdc; -__constant unsigned int _K3 = 0xa953fd4e; - -__constant unsigned int _K4 = 0x7a6d76e9; -__constant unsigned int _K5 = 0x6d703ef3; -__constant unsigned int _K6 = 0x5c4dd124; -__constant unsigned int _K7 = 0x50a28be6; +#define RIPEMD160_IV_0 (0x67452301) +#define RIPEMD160_IV_1 (0xefcdab89) +#define RIPEMD160_IV_2 (0x98badcfe) +#define RIPEMD160_IV_3 (0x10325476) +#define RIPEMD160_IV_4 (0xc3d2e1f0) #define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) @@ -38,22 +25,22 @@ __constant unsigned int _K7 = 0x50a28be6; c = rotl((c), 10) #define GG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + (x) + _K0;\ + a += G((b), (c), (d)) + (x) + 0x5a827999;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + _K1;\ + a += H((b), (c), (d)) + (x) + 0x6ed9eba1;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define II(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + _K2;\ + a += I((b), (c), (d)) + (x) + 0x8f1bbcdc;\ a = rotl((a), (s)) + e;\ c = rotl((c), 10) #define JJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + _K3;\ + a += J((b), (c), (d)) + (x) + 0xa953fd4e;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) @@ -63,33 +50,33 @@ __constant unsigned int _K7 = 0x50a28be6; c = rotl((c), 10) #define GGG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + x + _K4;\ + a += G((b), (c), (d)) + x + 0x7a6d76e9;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HHH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + _K5;\ + a += H((b), (c), (d)) + (x) + 0x6d703ef3;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define III(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + _K6;\ + a += I((b), (c), (d)) + (x) + 0x5c4dd124;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define JJJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + _K7;\ + a += J((b), (c), (d)) + (x) + 0x50a28be6;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a1 = _RIPEMD160_IV[0]; - unsigned int b1 = _RIPEMD160_IV[1]; - unsigned int c1 = _RIPEMD160_IV[2]; - unsigned int d1 = _RIPEMD160_IV[3]; - unsigned int e1 = _RIPEMD160_IV[4]; + unsigned int a1 = RIPEMD160_IV_0; + unsigned int b1 = RIPEMD160_IV_1; + unsigned int c1 = RIPEMD160_IV_2; + unsigned int d1 = RIPEMD160_IV_3; + unsigned int e1 = RIPEMD160_IV_4; const unsigned int x8 = 0x00000080; const unsigned int x14 = 256; @@ -184,11 +171,11 @@ void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) JJ(c1, d1, e1, a1, b1, 0, 5); JJ(b1, c1, d1, e1, a1, 0, 6); - unsigned int a2 = _RIPEMD160_IV[0]; - unsigned int b2 = _RIPEMD160_IV[1]; - unsigned int c2 = _RIPEMD160_IV[2]; - unsigned int d2 = _RIPEMD160_IV[3]; - unsigned int e2 = _RIPEMD160_IV[4]; + unsigned int a2 = RIPEMD160_IV_0; + unsigned int b2 = RIPEMD160_IV_1; + unsigned int c2 = RIPEMD160_IV_2; + unsigned int d2 = RIPEMD160_IV_3; + unsigned int e2 = RIPEMD160_IV_4; /* parallel round 1 */ JJJ(a2, b2, c2, d2, e2, x[5], 8); @@ -280,21 +267,21 @@ void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) FFF(c2, d2, e2, a2, b2, 0, 11); FFF(b2, c2, d2, e2, a2, 0, 11); - digest[0] = _RIPEMD160_IV[1] + c1 + d2; - digest[1] = _RIPEMD160_IV[2] + d1 + e2; - digest[2] = _RIPEMD160_IV[3] + e1 + a2; - digest[3] = _RIPEMD160_IV[4] + a1 + b2; - digest[4] = _RIPEMD160_IV[0] + b1 + c2; + digest[0] = RIPEMD160_IV_1 + c1 + d2; + digest[1] = RIPEMD160_IV_2 + d1 + e2; + digest[2] = RIPEMD160_IV_3 + e1 + a2; + digest[3] = RIPEMD160_IV_4 + a1 + b2; + digest[4] = RIPEMD160_IV_0 + b1 + c2; } void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a1 = _RIPEMD160_IV[0]; - unsigned int b1 = _RIPEMD160_IV[1]; - unsigned int c1 = _RIPEMD160_IV[2]; - unsigned int d1 = _RIPEMD160_IV[3]; - unsigned int e1 = _RIPEMD160_IV[4]; + unsigned int a1 = RIPEMD160_IV_0; + unsigned int b1 = RIPEMD160_IV_1; + unsigned int c1 = RIPEMD160_IV_2; + unsigned int d1 = RIPEMD160_IV_3; + unsigned int e1 = RIPEMD160_IV_4; const unsigned int x8 = 0x00000080; const unsigned int x14 = 256; @@ -389,11 +376,11 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) JJ(c1, d1, e1, a1, b1, 0, 5); JJ(b1, c1, d1, e1, a1, 0, 6); - unsigned int a2 = _RIPEMD160_IV[0]; - unsigned int b2 = _RIPEMD160_IV[1]; - unsigned int c2 = _RIPEMD160_IV[2]; - unsigned int d2 = _RIPEMD160_IV[3]; - unsigned int e2 = _RIPEMD160_IV[4]; + unsigned int a2 = RIPEMD160_IV_0; + unsigned int b2 = RIPEMD160_IV_1; + unsigned int c2 = RIPEMD160_IV_2; + unsigned int d2 = RIPEMD160_IV_3; + unsigned int e2 = RIPEMD160_IV_4; /* parallel round 1 */ JJJ(a2, b2, c2, d2, e2, x[5], 8); @@ -491,4 +478,5 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) digest[3] = a1 + b2; digest[4] = b1 + c2; } + #endif diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index 2a63f3fd..c9471fc5 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -1,85 +1,41 @@ -#ifndef _SECP256K1_CL -#define _SECP256K1_CL +#ifndef SECP256K1_CL +#define SECP256K1_CL -typedef ulong uint64_t; - -typedef struct { - uint v[8]; -}uint256_t; +typedef unsigned long uint64_t; +typedef struct uint256_t { + unsigned int v[8]; +} uint256_t; /** Prime modulus 2^256 - 2^32 - 977 */ -__constant unsigned int _P[8] = { +__constant unsigned int P[8] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F }; -__constant unsigned int _P_MINUS1[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F -}; - -/** - Base point X - */ -__constant unsigned int _GX[8] = { - 0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798 -}; - -/** - Base point Y - */ -__constant unsigned int _GY[8] = { - 0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8 -}; - - -/** - * Group order - */ -__constant unsigned int _N[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141 -}; - -__constant unsigned int _INFINITY[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF -}; - -void printBigInt(const unsigned int x[8]) -{ - printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", - x[0], x[1], x[2], x[3], - x[4], x[5], x[6], x[7]); -} - // Add with carry -unsigned int addc(unsigned int a, unsigned int b, unsigned int *carry) +void addc(unsigned int *a, unsigned int *b, unsigned int *carry, unsigned int *sum) { - unsigned int sum = a + *carry; + *sum = *a + *carry; - unsigned int c1 = (sum < a) ? 1 : 0; + unsigned int c1 = (*sum < *a) * 1; - sum = sum + b; + *sum = *sum + *b; - unsigned int c2 = (sum < b) ? 1 : 0; - - *carry = c1 | c2; - - return sum; + *carry = c1 | ((*sum < *b) * 1); } // Subtract with borrow -unsigned int subc(unsigned int a, unsigned int b, unsigned int *borrow) +void subc(unsigned int *a, unsigned int *b, unsigned int *borrow, unsigned int *diff) { - unsigned int diff = a - *borrow; + unsigned int tmp = *a - *borrow; - *borrow = (diff > a) ? 1 : 0; + *borrow = (tmp > *a) * 1; - unsigned int diff2 = diff - b; + *diff = tmp - *b; - *borrow |= (diff2 > diff) ? 1 : 0; - - return diff2; + *borrow |= (*diff > tmp) ? 1 : 0; } #ifdef DEVICE_VENDOR_INTEL @@ -94,54 +50,42 @@ unsigned int mul_hi977(unsigned int x) } // 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c) +void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { - *low = a * 977; - unsigned int tmp = *low + c; + *low = *a * 977; + unsigned int tmp = *low + *c; unsigned int carry = tmp < *low ? 1 : 0; *low = tmp; - *high = mul_hi977(a) + carry; + *high = mul_hi977(*a) + carry; } #else // 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c) +void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { - *low = a * 977; - unsigned int tmp = *low + c; + *low = *a * 977; + unsigned int tmp = *low + *c; unsigned int carry = tmp < *low ? 1 : 0; *low = tmp; - *high = mad_hi(a, (unsigned int)977, carry); + *high = mad_hi(*a, (unsigned int)977, carry); } #endif -// 32 x 32 multiply-add -void madd(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b, unsigned int c) -{ - *low = a * b; - unsigned int tmp = *low + c; - unsigned int carry = tmp < *low ? 1 : 0; - *low = tmp; - *high = mad_hi(a, b, carry); -} - -void mull(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b) -{ - *low = a * b; - *high = mul_hi(a, b); -} - - uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr) { unsigned int borrow = 0; uint256_t c; - for(int i = 7; i >= 0; i--) { - c.v[i] = subc(a.v[i], b.v[i], &borrow); - } + subc(&a.v[7], &b.v[7], &borrow, &c.v[7]); + subc(&a.v[6], &b.v[6], &borrow, &c.v[6]); + subc(&a.v[5], &b.v[5], &borrow, &c.v[5]); + subc(&a.v[4], &b.v[4], &borrow, &c.v[4]); + subc(&a.v[3], &b.v[3], &borrow, &c.v[3]); + subc(&a.v[2], &b.v[2], &borrow, &c.v[2]); + subc(&a.v[1], &b.v[1], &borrow, &c.v[1]); + subc(&a.v[0], &b.v[0], &borrow, &c.v[0]); *borrow_ptr = borrow; @@ -150,13 +94,29 @@ uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr) bool greaterThanEqualToP(const unsigned int a[8]) { - for(int i = 0; i < 8; i++) { - if(a[i] > _P_MINUS1[i]) { - return true; - } else if(a[i] < _P_MINUS1[i]) { - return false; - } - } + if(a[0] > P[0]) { return true; } + if(a[0] < P[0]) { return false; } + + if(a[1] > P[1]) { return true; } + if(a[1] < P[1]) { return false; } + + if(a[2] > P[2]) { return true; } + if(a[2] < P[2]) { return false; } + + if(a[3] > P[3]) { return true; } + if(a[3] < P[3]) { return false; } + + if(a[4] > P[4]) { return true; } + if(a[4] < P[4]) { return false; } + + if(a[5] > P[5]) { return true; } + if(a[5] < P[5]) { return false; } + + if(a[6] > P[6]) { return true; } + if(a[6] < P[6]) { return false; } + + if(a[7] > P[7]) { return true; } + if(a[7] < P[7]) { return false; } return true; } @@ -164,15 +124,13 @@ bool greaterThanEqualToP(const unsigned int a[8]) void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8]) { unsigned int z[16]; - unsigned int high = 0; + uint64_t product = 0; // First round, overwrite z for(int j = 7; j >= 0; j--) { - uint64_t product = (uint64_t)x[7] * y[j]; - - product = product + high; + product = (uint64_t)x[7] * y[j] + high; z[7 + j + 1] = (unsigned int)product; high = (unsigned int)(product >> 32); @@ -185,9 +143,7 @@ void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int for(int j = 7; j >= 0; j--) { - uint64_t product = (uint64_t)x[i] * y[j]; - - product = product + z[i + j + 1] + high; + product = (uint64_t)x[i] * y[j] + z[i + j + 1] + high; z[i + j + 1] = (unsigned int)product; @@ -203,25 +159,13 @@ void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int } } - -unsigned int add256(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - unsigned int carry = 0; - - for(int i = 7; i >= 0; i--) { - c[i] = addc(a[i], b[i], &carry); - } - - return carry; -} - uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr) { uint256_t c; unsigned int carry = 0; for(int i = 7; i >= 0; i--) { - c.v[i] = addc(a.v[i], b.v[i], &carry); + addc(&a.v[i], &b.v[i], &carry, &c.v[i]); } *carry_ptr = carry; @@ -229,65 +173,32 @@ uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr) return c; } -bool isInfinity(const unsigned int x[8]) +bool isInfinity256k(const uint256_t *x) { - bool isf = true; - - for(int i = 0; i < 8; i++) { - if(x[i] != 0xffffffff) { - isf = false; - } - } - - return isf; + return ( + (x->v[0] == 0xffffffff) && + (x->v[1] == 0xffffffff) && + (x->v[2] == 0xffffffff) && + (x->v[3] == 0xffffffff) && + (x->v[4] == 0xffffffff) && + (x->v[5] == 0xffffffff) && + (x->v[6] == 0xffffffff) && + (x->v[7] == 0xffffffff) + ); } -bool isInfinity256k(const uint256_t x) +bool equal256k(uint256_t *a, uint256_t *b) { - bool isf = true; - - for(int i = 0; i < 8; i++) { - if(x.v[i] != 0xffffffff) { - isf = false; - } - } - - return isf; -} - -bool equal(const unsigned int a[8], const unsigned int b[8]) -{ - for(int i = 0; i < 8; i++) { - if(a[i] != b[i]) { - return false; - } - } - - return true; -} - -bool equal256k(uint256_t a, uint256_t b) -{ - for(int i = 0; i < 8; i++) { - if(a.v[i] != b.v[i]) { - return false; - } - } - - return true; -} - -inline uint256_t readInt256(__global const uint256_t* ara, int idx) -{ - return ara[idx]; -} - -/* - * Read least-significant word - */ -unsigned int readLSW(__global const unsigned int *ara, int idx) -{ - return ara[idx * 8 + 7]; + return ( + (a->v[0] == b->v[0]) && + (a->v[1] == b->v[1]) && + (a->v[2] == b->v[2]) && + (a->v[3] == b->v[3]) && + (a->v[4] == b->v[4]) && + (a->v[5] == b->v[5]) && + (a->v[6] == b->v[6]) && + (a->v[7] == b->v[7]) + ); } unsigned int readLSW256k(__global const uint256_t* ara, int idx) @@ -300,25 +211,36 @@ unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) return ara[idx].v[word]; } -unsigned int addP(const unsigned int a[8], unsigned int c[8]) +void addP(unsigned int a[8], unsigned int c[8]) { unsigned int carry = 0; - - for(int i = 7; i >= 0; i--) { - c[i] = addc(a[i], _P[i], &carry); - } - - return carry; + unsigned int P[8] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F + }; + addc(&a[7], &P[7], &carry, &c[7]); + addc(&a[6], &P[6], &carry, &c[6]); + addc(&a[5], &P[5], &carry, &c[5]); + addc(&a[4], &P[4], &carry, &c[4]); + addc(&a[3], &P[3], &carry, &c[3]); + addc(&a[2], &P[2], &carry, &c[2]); + addc(&a[1], &P[1], &carry, &c[1]); + addc(&a[0], &P[0], &carry, &c[0]); } -unsigned int subP(const unsigned int a[8], unsigned int c[8]) +void subP(unsigned int a[8], unsigned int c[8]) { unsigned int borrow = 0; - for(int i = 7; i >= 0; i--) { - c[i] = subc(a[i], _P[i], &borrow); - } - - return borrow; + unsigned int P[8] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F + }; + subc(&a[7], &P[7], &borrow, &c[7]); + subc(&a[6], &P[6], &borrow, &c[6]); + subc(&a[5], &P[5], &borrow, &c[5]); + subc(&a[4], &P[4], &borrow, &c[4]); + subc(&a[3], &P[3], &borrow, &c[3]); + subc(&a[2], &P[2], &borrow, &c[2]); + subc(&a[1], &P[1], &borrow, &c[1]); + subc(&a[0], &P[0], &borrow, &c[0]); } /** @@ -335,60 +257,79 @@ uint256_t subModP256k(uint256_t a, uint256_t b) return c; } - -uint256_t addModP256k(uint256_t a, uint256_t b) +void addModP256k(uint256_t *a, uint256_t *b, uint256_t *cP) { unsigned int carry = 0; - uint256_t c = add256k(a, b, &carry); + uint256_t c = add256k(*a, *b, &carry); - bool gt = false; - for(int i = 0; i < 8; i++) { - if(c.v[i] > _P[i]) { - gt = true; - break; - } else if(c.v[i] < _P[i]) { - break; - } - } + if(carry) { subP(c.v, c.v); *cP = c; } - if(carry || gt) { - subP(c.v, c.v); - } + else if(c.v[0] > P[0]) { subP(c.v, c.v); *cP = c; } + else if(c.v[0] < P[0]) { *cP = c; } - return c; + else if(c.v[1] > P[1]) { subP(c.v, c.v); *cP = c; } + else if(c.v[1] < P[1]) { *cP = c; } + + else if(c.v[2] > P[2]) { subP(c.v, c.v); *cP = c; } + else if(c.v[2] < P[2]) { *cP = c; } + + else if(c.v[3] > P[3]) { subP(c.v, c.v); *cP = c; } + else if(c.v[3] < P[3]) { *cP = c; } + + else if(c.v[4] > P[4]) { subP(c.v, c.v); *cP = c; } + else if(c.v[4] < P[4]) { *cP = c; } + + else if(c.v[5] > P[5]) { subP(c.v, c.v); *cP = c; } + else if(c.v[5] < P[5]) { *cP = c; } + + else if(c.v[6] > P[6]) { subP(c.v, c.v); *cP = c; } + else if(c.v[6] < P[6]) { *cP = c; } + + else if(c.v[7] > P[7]) { subP(c.v, c.v); *cP = c; } + else { *cP = c; } } -void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int product_low[8]) +void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) { + unsigned int ZERO = 0; unsigned int high[8]; unsigned int hWord = 0; unsigned int carry = 0; + unsigned int t = 0; + unsigned int product6 = 0; + unsigned int product7 = 0; + // 256 x 256 multiply multiply256(a, b, high, product_low); // Add 2^32 * high to the low 256 bits (shift left 1 word and add) // Affects product[14] to product[6] - for(int i = 6; i >= 0; i--) { - product_low[i] = addc(product_low[i], high[i + 1], &carry); - } - unsigned int product7 = addc(high[0], 0, &carry); - unsigned int product6 = carry; + addc(&product_low[6], &high[7], &carry, &product_low[6]); + addc(&product_low[5], &high[6], &carry, &product_low[5]); + addc(&product_low[4], &high[5], &carry, &product_low[4]); + addc(&product_low[3], &high[4], &carry, &product_low[3]); + addc(&product_low[2], &high[3], &carry, &product_low[2]); + addc(&product_low[1], &high[2], &carry, &product_low[1]); + addc(&product_low[0], &high[1], &carry, &product_low[0]); + + addc(&high[0], &ZERO, &carry, &product7); + product6 = carry; carry = 0; // Multiply high by 977 and add to low // Affects product[15] to product[5] for(int i = 7; i >= 0; i--) { - unsigned int t = 0; - madd977(&hWord, &t, high[i], hWord); - product_low[i] = addc(product_low[i], t, &carry); + madd977(&hWord, &t, &high[i], &hWord); + addc(&product_low[i], &t, &carry, &product_low[i]); + t = 0; } - product7 = addc(product7, hWord, &carry); - product6 = addc(product6, 0, &carry); + addc(&product7, &hWord, &carry, &product7); + addc(&product6, &ZERO, &carry, &product6); // Multiply high 2 words by 2^32 and add to low // Affects product[14] to product[7] @@ -399,30 +340,33 @@ void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int prod product7 = 0; product6 = 0; - product_low[6] = addc(product_low[6], high[7], &carry); - product_low[5] = addc(product_low[5], high[6], &carry); + addc(&product_low[6], &high[7], &carry, &product_low[6]); + addc(&product_low[5], &high[6], &carry, &product_low[5]); + + addc(&product_low[4], &ZERO, &carry, &product_low[4]); + addc(&product_low[3], &ZERO, &carry, &product_low[3]); + addc(&product_low[2], &ZERO, &carry, &product_low[2]); + addc(&product_low[1], &ZERO, &carry, &product_low[1]); + addc(&product_low[0], &ZERO, &carry, &product_low[0]); - // Propagate the carry - for(int i = 4; i >= 0; i--) { - product_low[i] = addc(product_low[i], 0, &carry); - } product7 = carry; // Multiply top 2 words by 977 and add to low // Affects product[15] to product[7] carry = 0; hWord = 0; - unsigned int t = 0; - madd977(&hWord, &t, high[7], hWord); - product_low[7] = addc(product_low[7], t, &carry); - madd977(&hWord, &t, high[6], hWord); - product_low[6] = addc(product_low[6], t, &carry); - product_low[5] = addc(product_low[5], hWord, &carry); + madd977(&hWord, &t, &high[7], &hWord); + addc(&product_low[7], &t, &carry, &product_low[7]); + madd977(&hWord, &t, &high[6], &hWord); + addc(&product_low[6], &t, &carry, &product_low[6]); + addc(&product_low[5], &hWord, &carry, &product_low[5]); // Propagate carry - for(int i = 4; i >= 0; i--) { - product_low[i] = addc(product_low[i], 0, &carry); - } + addc(&product_low[4], &ZERO, &carry, &product_low[4]); + addc(&product_low[3], &ZERO, &carry, &product_low[3]); + addc(&product_low[2], &ZERO, &carry, &product_low[2]); + addc(&product_low[1], &ZERO, &carry, &product_low[1]); + addc(&product_low[0], &ZERO, &carry, &product_low[0]); product7 = carry; // Reduce if >= P @@ -440,118 +384,50 @@ uint256_t mulModP256k(uint256_t a, uint256_t b) return c; } - -uint256_t squareModP256k(uint256_t a) +void mulModP256kv(uint256_t *a, uint256_t *b, uint256_t *c) { - uint256_t b; - mulModP(a.v, a.v, b.v); - - return b; + mulModP(a->v, b->v, c->v); } +void squareModP256k(uint256_t *a) +{ + mulModP(a->v, a->v, a->v); +} /** * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains */ -uint256_t invModP256k(uint256_t value) +uint256_t invModP256k(uint256_t x) { - uint256_t x = value; + uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}}; + mulModP256kv(&x, &y, &y); + squareModP256k(&x); + squareModP256k(&x); + mulModP256kv(&x, &y, &y); + squareModP256k(&x); + mulModP256kv(&x, &y, &y); + squareModP256k(&x); + squareModP256k(&x); + mulModP256kv(&x, &y, &y); + + for(int i = 0; i < 5; i++) { + squareModP256k(&x); + } - //unsigned int y[8] = { 0, 0, 0, 0, 0, 0, 0, 1 }; - uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}}; + for(int i = 0; i < 22; i++) { + mulModP256kv(&x, &y, &y); + squareModP256k(&x); + } + + squareModP256k(&x); - // 0xd - 1101 - y = mulModP256k(x, y); - x = squareModP256k(x); - //y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - - // 0x2 - 0010 - //y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - //y = mulModP256k(x, y); - x = squareModP256k(x); - //y = mulModP256k(x, y); - x = squareModP256k(x); - - // 0xc = 0x1100 - //y = mulModP256k(x, y); - x = squareModP256k(x); - //y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - - - // 0xfffff - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - - - // 0xe - 1110 - //y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff - for(int i = 0; i < 219; i++) { - y = mulModP256k(x, y); - x = squareModP256k(x); + for(int i = 0; i < 222; i++) { + mulModP256kv(&x, &y, &y); + squareModP256k(&x); } - y = mulModP256k(x, y); - return y; + return mulModP256k(x, y); } @@ -579,8 +455,8 @@ void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* uint256_t x = xPtr[i]; - if(equal256k(px, x)) { - x = addModP256k(py, py); + if(equal256k(&px, &x)) { + addModP256k(&py,&py, &x); } else { // x = Gx - x x = subModP256k(px, x); @@ -623,8 +499,8 @@ void completeBatchAddWithDouble256k( s = mulModP256k(*inverse, c); uint256_t diff; - if(equal256k(px, x)) { - diff = addModP256k(py, py); + if(equal256k(&px, &x)) { + addModP256k(&py, &py, &diff); } else { diff = subModP256k(px, x); } @@ -635,32 +511,30 @@ void completeBatchAddWithDouble256k( } - if(equal256k(px, x)) { + if(equal256k(&px, &x)) { // currently s = 1 / 2y uint256_t x2; uint256_t tx2; - uint256_t x3; // 3x^2 - x2 = mulModP256k(x, x); - tx2 = addModP256k(x2, x2); - tx2 = addModP256k(x2, tx2); + mulModP256kv(&x, &x, &x2); + addModP256k(&x2, &x2, &tx2); + addModP256k(&x2, &tx2, &tx2); // s = 3x^2 * 1/2y - s = mulModP256k(tx2, s); + mulModP256kv(&tx2, &s, &s); // s^2 uint256_t s2; - s2 = mulModP256k(s, s); + mulModP256kv(&s, &s, &s2); // Rx = s^2 - 2px *newX = subModP256k(s2, x); *newX = subModP256k(*newX, x); // Ry = s(px - rx) - py - uint256_t k; - k = subModP256k(px, *newX); + uint256_t k = subModP256k(px, *newX); *newY = mulModP256k(s, k); *newY = subModP256k(*newY, py); } else { @@ -668,11 +542,11 @@ void completeBatchAddWithDouble256k( uint256_t rise; rise = subModP256k(py, y); - s = mulModP256k(rise, s); + mulModP256kv(&rise, &s, &s); // Rx = s^2 - Gx - Qx uint256_t s2; - s2 = mulModP256k(s, s); + mulModP256kv(&s, &s, &s2); *newX = subModP256k(s2, px); *newX = subModP256k(*newX, x); @@ -719,24 +593,21 @@ void completeBatchAdd256k( s = *inverse; } - uint256_t y; - y = yPtr[i]; + uint256_t y = yPtr[i]; - uint256_t rise; - rise = subModP256k(py, y); + uint256_t rise = subModP256k(py, y); s = mulModP256k(rise, s); // Rx = s^2 - Gx - Qx uint256_t s2; - s2 = mulModP256k(s, s); + mulModP256kv(&s, &s, &s2); *newX = subModP256k(s2, px); *newX = subModP256k(*newX, x); // Ry = s(px - rx) - py - uint256_t k; - k = subModP256k(px, *newX); + uint256_t k = subModP256k(px, *newX); *newY = mulModP256k(s, k); *newY = subModP256k(*newY, py); } diff --git a/clMath/sha256.cl b/clMath/sha256.cl index 7cd26ff6..0168959a 100644 --- a/clMath/sha256.cl +++ b/clMath/sha256.cl @@ -35,7 +35,7 @@ __constant unsigned int _IV[8] = { #define s1(x) (rotr((x), 17) ^ rotr((x), 19) ^ ((x) >> 10)) -#define round(a, b, c, d, e, f, g, h, m, k)\ +#define roundSha(a, b, c, d, e, f, g, h, m, k)\ t = CH((e), (f), (g)) + (rotr((e), 6) ^ rotr((e), 11) ^ rotr((e), 25)) + (k) + (m);\ (d) += (t) + (h);\ (h) += (t) + MAJ((a), (b), (c)) + (rotr((a), 2) ^ rotr((a), 13) ^ rotr((a), 22)) @@ -74,22 +74,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned g = _IV[6]; h = _IV[7]; - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, w[1], _K[1]); - round(g, h, a, b, c, d, e, f, w[2], _K[2]); - round(f, g, h, a, b, c, d, e, w[3], _K[3]); - round(e, f, g, h, a, b, c, d, w[4], _K[4]); - round(d, e, f, g, h, a, b, c, w[5], _K[5]); - round(c, d, e, f, g, h, a, b, w[6], _K[6]); - round(b, c, d, e, f, g, h, a, w[7], _K[7]); - round(a, b, c, d, e, f, g, h, w[8], _K[8]); - round(h, a, b, c, d, e, f, g, w[9], _K[9]); - round(g, h, a, b, c, d, e, f, w[10], _K[10]); - round(f, g, h, a, b, c, d, e, w[11], _K[11]); - round(e, f, g, h, a, b, c, d, w[12], _K[12]); - round(d, e, f, g, h, a, b, c, w[13], _K[13]); - round(c, d, e, f, g, h, a, b, w[14], _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[9]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[10]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[11]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[12]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[13]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[14]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -108,22 +108,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -142,22 +142,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -176,22 +176,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); a += _IV[0]; b += _IV[1]; @@ -216,22 +216,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[0] = (y[7] << 24) | 0x00800000; w[15] = 65 * 8; - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, 0, _K[1]); - round(g, h, a, b, c, d, e, f, 0, _K[2]); - round(f, g, h, a, b, c, d, e, 0, _K[3]); - round(e, f, g, h, a, b, c, d, 0, _K[4]); - round(d, e, f, g, h, a, b, c, 0, _K[5]); - round(c, d, e, f, g, h, a, b, 0, _K[6]); - round(b, c, d, e, f, g, h, a, 0, _K[7]); - round(a, b, c, d, e, f, g, h, 0, _K[8]); - round(h, a, b, c, d, e, f, g, 0, _K[9]); - round(g, h, a, b, c, d, e, f, 0, _K[10]); - round(f, g, h, a, b, c, d, e, 0, _K[11]); - round(e, f, g, h, a, b, c, d, 0, _K[12]); - round(d, e, f, g, h, a, b, c, 0, _K[13]); - round(c, d, e, f, g, h, a, b, 0, _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); + roundSha(h, a, b, c, d, e, f, g, 0, _K[1]); + roundSha(g, h, a, b, c, d, e, f, 0, _K[2]); + roundSha(f, g, h, a, b, c, d, e, 0, _K[3]); + roundSha(e, f, g, h, a, b, c, d, 0, _K[4]); + roundSha(d, e, f, g, h, a, b, c, 0, _K[5]); + roundSha(c, d, e, f, g, h, a, b, 0, _K[6]); + roundSha(b, c, d, e, f, g, h, a, 0, _K[7]); + roundSha(a, b, c, d, e, f, g, h, 0, _K[8]); + roundSha(h, a, b, c, d, e, f, g, 0, _K[9]); + roundSha(g, h, a, b, c, d, e, f, 0, _K[10]); + roundSha(f, g, h, a, b, c, d, e, 0, _K[11]); + roundSha(e, f, g, h, a, b, c, d, 0, _K[12]); + roundSha(d, e, f, g, h, a, b, c, 0, _K[13]); + roundSha(c, d, e, f, g, h, a, b, 0, _K[14]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]); w[0] = w[0] + s0(0) + 0 + s1(0); w[1] = 0 + s0(0) + 0 + s1(w[15]); @@ -250,22 +250,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -284,22 +284,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -318,22 +318,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); digest[0] = tmp[0] + a; digest[1] = tmp[1] + b; @@ -373,22 +373,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un g = _IV[6]; h = _IV[7]; - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, w[1], _K[1]); - round(g, h, a, b, c, d, e, f, w[2], _K[2]); - round(f, g, h, a, b, c, d, e, w[3], _K[3]); - round(e, f, g, h, a, b, c, d, w[4], _K[4]); - round(d, e, f, g, h, a, b, c, w[5], _K[5]); - round(c, d, e, f, g, h, a, b, w[6], _K[6]); - round(b, c, d, e, f, g, h, a, w[7], _K[7]); - round(a, b, c, d, e, f, g, h, w[8], _K[8]); - round(h, a, b, c, d, e, f, g, 0, _K[9]); - round(g, h, a, b, c, d, e, f, 0, _K[10]); - round(f, g, h, a, b, c, d, e, 0, _K[11]); - round(e, f, g, h, a, b, c, d, 0, _K[12]); - round(d, e, f, g, h, a, b, c, 0, _K[13]); - round(c, d, e, f, g, h, a, b, 0, _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]); + roundSha(h, a, b, c, d, e, f, g, 0, _K[9]); + roundSha(g, h, a, b, c, d, e, f, 0, _K[10]); + roundSha(f, g, h, a, b, c, d, e, 0, _K[11]); + roundSha(e, f, g, h, a, b, c, d, 0, _K[12]); + roundSha(d, e, f, g, h, a, b, c, 0, _K[13]); + roundSha(c, d, e, f, g, h, a, b, 0, _K[14]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]); w[0] = w[0] + s0(w[1]) + 0 + s1(0); w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]); @@ -407,22 +407,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -441,22 +441,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); @@ -476,22 +476,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); a += _IV[0]; b += _IV[1]; From cca5e41ed80ccf644ab3c86b7c9baff09267660d Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:32:43 +0200 Subject: [PATCH 03/62] remove cuda files --- CudaKeySearchDevice/CudaAtomicList.cu | 116 --- CudaKeySearchDevice/CudaAtomicList.cuh | 8 - CudaKeySearchDevice/CudaAtomicList.h | 54 -- CudaKeySearchDevice/CudaDeviceKeys.cu | 397 --------- CudaKeySearchDevice/CudaDeviceKeys.cuh | 12 - CudaKeySearchDevice/CudaDeviceKeys.h | 81 -- CudaKeySearchDevice/CudaHashLookup.cu | 306 ------- CudaKeySearchDevice/CudaHashLookup.cuh | 6 - CudaKeySearchDevice/CudaHashLookup.h | 38 - CudaKeySearchDevice/CudaKeySearchDevice.cpp | 316 ------- CudaKeySearchDevice/CudaKeySearchDevice.cu | 261 ------ CudaKeySearchDevice/CudaKeySearchDevice.h | 91 -- .../CudaKeySearchDevice.vcxproj | 119 --- CudaKeySearchDevice/Makefile | 22 - CudaKeySearchDevice/cudabridge.cu | 33 - CudaKeySearchDevice/cudabridge.h | 19 - cudaInfo/Makefile | 9 - cudaInfo/cudaInfo.vcxproj | 94 -- cudaInfo/main.cpp | 34 - cudaMath/cudaMath.vcxproj | 90 -- cudaMath/ptx.cuh | 32 - cudaMath/ripemd160.cuh | 539 ------------ cudaMath/secp256k1.cuh | 802 ------------------ cudaMath/sha256.cuh | 545 ------------ cudaUtil/Makefile | 13 - cudaUtil/cudaUtil.cpp | 92 -- cudaUtil/cudaUtil.h | 42 - cudaUtil/cudaUtil.vcxproj | 160 ---- 28 files changed, 4331 deletions(-) delete mode 100644 CudaKeySearchDevice/CudaAtomicList.cu delete mode 100644 CudaKeySearchDevice/CudaAtomicList.cuh delete mode 100644 CudaKeySearchDevice/CudaAtomicList.h delete mode 100644 CudaKeySearchDevice/CudaDeviceKeys.cu delete mode 100644 CudaKeySearchDevice/CudaDeviceKeys.cuh delete mode 100644 CudaKeySearchDevice/CudaDeviceKeys.h delete mode 100644 CudaKeySearchDevice/CudaHashLookup.cu delete mode 100644 CudaKeySearchDevice/CudaHashLookup.cuh delete mode 100644 CudaKeySearchDevice/CudaHashLookup.h delete mode 100644 CudaKeySearchDevice/CudaKeySearchDevice.cpp delete mode 100644 CudaKeySearchDevice/CudaKeySearchDevice.cu delete mode 100644 CudaKeySearchDevice/CudaKeySearchDevice.h delete mode 100644 CudaKeySearchDevice/CudaKeySearchDevice.vcxproj delete mode 100644 CudaKeySearchDevice/Makefile delete mode 100644 CudaKeySearchDevice/cudabridge.cu delete mode 100644 CudaKeySearchDevice/cudabridge.h delete mode 100644 cudaInfo/Makefile delete mode 100644 cudaInfo/cudaInfo.vcxproj delete mode 100644 cudaInfo/main.cpp delete mode 100644 cudaMath/cudaMath.vcxproj delete mode 100644 cudaMath/ptx.cuh delete mode 100644 cudaMath/ripemd160.cuh delete mode 100644 cudaMath/secp256k1.cuh delete mode 100644 cudaMath/sha256.cuh delete mode 100644 cudaUtil/Makefile delete mode 100644 cudaUtil/cudaUtil.cpp delete mode 100644 cudaUtil/cudaUtil.h delete mode 100644 cudaUtil/cudaUtil.vcxproj diff --git a/CudaKeySearchDevice/CudaAtomicList.cu b/CudaKeySearchDevice/CudaAtomicList.cu deleted file mode 100644 index dcf1096c..00000000 --- a/CudaKeySearchDevice/CudaAtomicList.cu +++ /dev/null @@ -1,116 +0,0 @@ -#include "CudaAtomicList.h" -#include "CudaAtomicList.cuh" - -#include - -#include -#include - -static __constant__ void *_LIST_BUF[1]; -static __constant__ unsigned int *_LIST_SIZE[1]; - - -__device__ void atomicListAdd(void *info, unsigned int size) -{ - unsigned int count = atomicAdd(_LIST_SIZE[0], 1); - - unsigned char *ptr = (unsigned char *)(_LIST_BUF[0]) + count * size; - - memcpy(ptr, info, size); -} - -static cudaError_t setListPtr(void *ptr, unsigned int *numResults) -{ - cudaError_t err = cudaMemcpyToSymbol(_LIST_BUF, &ptr, sizeof(void *)); - - if(err) { - return err; - } - - err = cudaMemcpyToSymbol(_LIST_SIZE, &numResults, sizeof(unsigned int *)); - - return err; -} - - -cudaError_t CudaAtomicList::init(unsigned int itemSize, unsigned int maxItems) -{ - _itemSize = itemSize; - - // The number of results found in the most recent kernel run - _countHostPtr = NULL; - cudaError_t err = cudaHostAlloc(&_countHostPtr, sizeof(unsigned int), cudaHostAllocMapped); - if(err) { - goto end; - } - - // Number of items in the list - _countDevPtr = NULL; - err = cudaHostGetDevicePointer(&_countDevPtr, _countHostPtr, 0); - if(err) { - goto end; - } - *_countHostPtr = 0; - - // Storage for results data - _hostPtr = NULL; - err = cudaHostAlloc(&_hostPtr, itemSize * maxItems, cudaHostAllocMapped); - if(err) { - goto end; - } - - // Storage for results data (device to host pointer) - _devPtr = NULL; - err = cudaHostGetDevicePointer(&_devPtr, _hostPtr, 0); - - if(err) { - goto end; - } - - err = setListPtr(_devPtr, _countDevPtr); - -end: - if(err) { - cudaFreeHost(_countHostPtr); - - cudaFree(_countDevPtr); - - cudaFreeHost(_hostPtr); - - cudaFree(_devPtr); - } - - return err; -} - -unsigned int CudaAtomicList::size() -{ - return *_countHostPtr; -} - -void CudaAtomicList::clear() -{ - *_countHostPtr = 0; -} - -unsigned int CudaAtomicList::read(void *ptr, unsigned int count) -{ - if(count >= *_countHostPtr) { - count = *_countHostPtr; - } - - memcpy(ptr, _hostPtr, count * _itemSize); - - return count; -} - -void CudaAtomicList::cleanup() -{ - cudaFreeHost(_countHostPtr); - - cudaFree(_countDevPtr); - - cudaFreeHost(_hostPtr); - - cudaFree(_devPtr); -} \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaAtomicList.cuh b/CudaKeySearchDevice/CudaAtomicList.cuh deleted file mode 100644 index 70dacb25..00000000 --- a/CudaKeySearchDevice/CudaAtomicList.cuh +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ATOMIC_LIST_CUH -#define _ATOMIC_LIST_CUH - -#include - -__device__ void atomicListAdd(void *info, unsigned int size); - -#endif \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaAtomicList.h b/CudaKeySearchDevice/CudaAtomicList.h deleted file mode 100644 index 8bd9eeb2..00000000 --- a/CudaKeySearchDevice/CudaAtomicList.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _ATOMIC_LIST_HOST_H -#define _ATOMIC_LIST_HOST_H - -#include - -/** - A list that multiple device threads can append items to. Items can be - read and removed by the host - */ -class CudaAtomicList { - -private: - void *_devPtr; - - void *_hostPtr; - - unsigned int *_countHostPtr; - - unsigned int *_countDevPtr; - - unsigned int _maxSize; - - unsigned int _itemSize; - -public: - - CudaAtomicList() - { - _devPtr = NULL; - _hostPtr = NULL; - _countHostPtr = NULL; - _countDevPtr = NULL; - _maxSize = 0; - _itemSize = 0; - } - - ~CudaAtomicList() - { - cleanup(); - } - - cudaError_t init(unsigned int itemSize, unsigned int maxItems); - - unsigned int read(void *dest, unsigned int count); - - unsigned int size(); - - void clear(); - - void cleanup(); - -}; - -#endif \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaDeviceKeys.cu b/CudaKeySearchDevice/CudaDeviceKeys.cu deleted file mode 100644 index d98dbaa7..00000000 --- a/CudaKeySearchDevice/CudaDeviceKeys.cu +++ /dev/null @@ -1,397 +0,0 @@ -#include -#include -#include - -#include "CudaDeviceKeys.h" -#include "CudaDeviceKeys.cuh" -#include "secp256k1.cuh" - - -__constant__ unsigned int *_xPtr[1]; - -__constant__ unsigned int *_yPtr[1]; - - -__device__ unsigned int *ec::getXPtr() -{ - return _xPtr[0]; -} - -__device__ unsigned int *ec::getYPtr() -{ - return _yPtr[0]; -} - -__global__ void multiplyStepKernel(const unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr); - - -int CudaDeviceKeys::getIndex(int block, int thread, int idx) -{ - // Total number of threads - int totalThreads = _blocks * _threads; - - int base = idx * totalThreads; - - // Global ID of the current thread - int threadId = block * _threads + thread; - - return base + threadId; -} - -void CudaDeviceKeys::splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i) -{ - unsigned int value[8] = { 0 }; - - i.exportWords(value, 8, secp256k1::uint256::BigEndian); - - int totalThreads = _blocks * _threads; - int threadId = block * _threads + thread; - - int base = idx * _blocks * _threads * 8; - - int index = base + threadId; - - for(int k = 0; k < 8; k++) { - dest[index] = value[k]; - index += totalThreads; - } -} - -secp256k1::uint256 CudaDeviceKeys::readBigInt(unsigned int *src, int block, int thread, int idx) -{ - unsigned int value[8] = { 0 }; - - int totalThreads = _blocks * _threads; - int threadId = block * _threads + thread; - - int base = idx * _blocks * _threads * 8; - - int index = base + threadId; - - for(int k = 0; k < 8; k++) { - value[k] = src[index]; - index += totalThreads; - } - - secp256k1::uint256 v(value, secp256k1::uint256::BigEndian); - - return v; -} - -/** -* Allocates device memory for storing the multiplication chain used in -the batch inversion operation -*/ -cudaError_t CudaDeviceKeys::allocateChainBuf(unsigned int count) -{ - cudaError_t err = cudaMalloc(&_devChain, count * sizeof(unsigned int) * 8); - - if(err) { - return err; - } - - return err; -} - -cudaError_t CudaDeviceKeys::initializeBasePoints() -{ - // generate a table of points G, 2G, 4G, 8G...(2^255)G - std::vector table; - - table.push_back(secp256k1::G()); - for(int i = 1; i < 256; i++) { - - secp256k1::ecpoint p = doublePoint(table[i - 1]); - if(!pointExists(p)) { - throw std::string("Point does not exist!"); - } - table.push_back(p); - } - - unsigned int count = 256; - - cudaError_t err = cudaMalloc(&_devBasePointX, sizeof(unsigned int) * count * 8); - - if(err) { - return err; - } - - err = cudaMalloc(&_devBasePointY, sizeof(unsigned int) * count * 8); - if(err) { - return err; - } - - unsigned int *tmpX = new unsigned int[count * 8]; - unsigned int *tmpY = new unsigned int[count * 8]; - - for(int i = 0; i < 256; i++) { - unsigned int bufX[8]; - unsigned int bufY[8]; - table[i].x.exportWords(bufX, 8, secp256k1::uint256::BigEndian); - table[i].y.exportWords(bufY, 8, secp256k1::uint256::BigEndian); - - for(int j = 0; j < 8; j++) { - tmpX[i * 8 + j] = bufX[j]; - tmpY[i * 8 + j] = bufY[j]; - } - } - - err = cudaMemcpy(_devBasePointX, tmpX, count * 8 * sizeof(unsigned int), cudaMemcpyHostToDevice); - - delete[] tmpX; - - if(err) { - delete[] tmpY; - return err; - } - - err = cudaMemcpy(_devBasePointY, tmpY, count * 8 * sizeof(unsigned int), cudaMemcpyHostToDevice); - - delete[] tmpY; - - return err; -} - -cudaError_t CudaDeviceKeys::initializePublicKeys(size_t count) -{ - - // Allocate X array - cudaError_t err = cudaMalloc(&_devX, sizeof(unsigned int) * count * 8); - if(err) { - return err; - } - - // Clear X array - err = cudaMemset(_devX, -1, sizeof(unsigned int) * count * 8); - if(err) { - return err; - } - - // Allocate Y array - err = cudaMalloc(&_devY, sizeof(unsigned int) * count * 8); - if(err) { - return err; - } - - // Clear Y array - err = cudaMemset(_devY, -1, sizeof(unsigned int) * count * 8); - if(err) { - return err; - } - - err = cudaMemcpyToSymbol(_xPtr, &_devX, sizeof(unsigned int *)); - if(err) { - return err; - } - - err = cudaMemcpyToSymbol(_yPtr, &_devY, sizeof(unsigned int *)); - - return err; -} - -cudaError_t CudaDeviceKeys::init(int blocks, int threads, int pointsPerThread, const std::vector &privateKeys) -{ - _blocks = blocks; - _threads = threads; - _pointsPerThread = pointsPerThread; - - size_t count = privateKeys.size(); - - // Allocate space for public keys on device - cudaError_t err = initializePublicKeys(count); - - if(err) { - return err; - } - - err = initializeBasePoints(); - if(err) { - return err; - } - - // Allocate private keys on device - err = cudaMalloc(&_devPrivate, sizeof(unsigned int) * count * 8); - if(err) { - return err; - } - - - // Clear private keys - err = cudaMemset(_devPrivate, 0, sizeof(unsigned int) * count * 8); - if(err) { - return err; - } - - err = allocateChainBuf(_threads * _blocks * _pointsPerThread); - if(err) { - return err; - } - - // Copy private keys to system memory buffer - unsigned int *tmp = new unsigned int[count * 8]; - - for(int block = 0; block < _blocks; block++) { - for(int thread = 0; thread < _threads; thread++) { - for(int idx = 0; idx < _pointsPerThread; idx++) { - - int index = getIndex(block, thread, idx); - - splatBigInt(tmp, block, thread, idx, privateKeys[index]); - } - } - } - - // Copy private keys to device memory - err = cudaMemcpy(_devPrivate, tmp, count * sizeof(unsigned int) * 8, cudaMemcpyHostToDevice); - - delete[] tmp; - - if(err) { - return err; - } - - return cudaSuccess; -} - -void CudaDeviceKeys::clearPublicKeys() -{ - cudaFree(_devX); - cudaFree(_devY); - - _devX = NULL; - _devY = NULL; -} - -void CudaDeviceKeys::clearPrivateKeys() -{ - cudaFree(_devBasePointX); - cudaFree(_devBasePointY); - cudaFree(_devPrivate); - cudaFree(_devChain); - - _devChain = NULL; - _devBasePointX = NULL; - _devBasePointY = NULL; - _devPrivate = NULL; -} - -cudaError_t CudaDeviceKeys::doStep() -{ - multiplyStepKernel <<<_blocks, _threads>>>(_devPrivate, _pointsPerThread, _step, _devChain, _devBasePointX, _devBasePointY); - - // Wait for kernel to complete - cudaError_t err = cudaDeviceSynchronize(); - fflush(stdout); - _step++; - return err; -} - -__global__ void multiplyStepKernel(const unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr) -{ - unsigned int *xPtr = ec::getXPtr(); - - unsigned int *yPtr = ec::getYPtr(); - - unsigned int gx[8]; - unsigned int gy[8]; - - for(int i = 0; i < 8; i++) { - gx[i] = gxPtr[step * 8 + i]; - gy[i] = gyPtr[step * 8 + i]; - } - - // Multiply together all (_Gx - x) and then invert - unsigned int inverse[8] = { 0,0,0,0,0,0,0,1 }; - - int batchIdx = 0; - for(int i = 0; i < pointsPerThread; i++) { - - unsigned int p[8]; - readInt(privateKeys, i, p); - unsigned int bit = p[7 - step / 32] & 1 << ((step % 32)); - - unsigned int x[8]; - readInt(xPtr, i, x); - - if(bit != 0) { - if(!isInfinity(x)) { - beginBatchAddWithDouble(gx, gy, xPtr, chain, i, batchIdx, inverse); - batchIdx++; - } - } - } - - doBatchInverse(inverse); - - for(int i = pointsPerThread - 1; i >= 0; i--) { - - unsigned int newX[8]; - unsigned int newY[8]; - - unsigned int p[8]; - readInt(privateKeys, i, p); - unsigned int bit = p[7 - step / 32] & 1 << ((step % 32)); - - unsigned int x[8]; - readInt(xPtr, i, x); - - bool infinity = isInfinity(x); - - if(bit != 0) { - if(!infinity) { - batchIdx--; - completeBatchAddWithDouble(gx, gy, xPtr, yPtr, i, batchIdx, chain, inverse, newX, newY); - } else { - copyBigInt(gx, newX); - copyBigInt(gy, newY); - } - - writeInt(xPtr, i, newX); - writeInt(yPtr, i, newY); - } - } -} - -bool CudaDeviceKeys::selfTest(const std::vector &privateKeys) -{ - unsigned int numPoints = _threads * _blocks * _pointsPerThread; - - unsigned int *xBuf = new unsigned int[numPoints * 8]; - unsigned int *yBuf = new unsigned int[numPoints * 8]; - - cudaError_t err = cudaMemcpy(xBuf, _devX, sizeof(unsigned int) * 8 * numPoints, cudaMemcpyDeviceToHost); - - err = cudaMemcpy(yBuf, _devY, sizeof(unsigned int) * 8 * numPoints, cudaMemcpyDeviceToHost); - - - for(int block = 0; block < _blocks; block++) { - for(int thread = 0; thread < _threads; thread++) { - for(int idx = 0; idx < _pointsPerThread; idx++) { - - int index = getIndex(block, thread, idx); - - secp256k1::uint256 privateKey = privateKeys[index]; - - secp256k1::uint256 x = readBigInt(xBuf, block, thread, idx); - secp256k1::uint256 y = readBigInt(yBuf, block, thread, idx); - - secp256k1::ecpoint p1(x, y); - secp256k1::ecpoint p2 = secp256k1::multiplyPoint(privateKey, secp256k1::G()); - - if(!secp256k1::pointExists(p1)) { - throw std::string("Validation failed: invalid point"); - } - - if(!secp256k1::pointExists(p2)) { - throw std::string("Validation failed: invalid point"); - } - - if(!(p1 == p2)) { - throw std::string("Validation failed: points do not match"); - } - } - } - } - - return true; -} \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaDeviceKeys.cuh b/CudaKeySearchDevice/CudaDeviceKeys.cuh deleted file mode 100644 index 3758b1ca..00000000 --- a/CudaKeySearchDevice/CudaDeviceKeys.cuh +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _EC_CUH -#define _EC_CUH - -#include - -namespace ec { - __device__ unsigned int *getXPtr(); - - __device__ unsigned int *getYPtr(); -} - -#endif \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaDeviceKeys.h b/CudaKeySearchDevice/CudaDeviceKeys.h deleted file mode 100644 index f2407f10..00000000 --- a/CudaKeySearchDevice/CudaDeviceKeys.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef _EC_H -#define _EC_H - -#include -#include - -#include -#include "secp256k1.h" - - -class CudaDeviceKeys { - -private: - int _blocks; - - int _threads; - - int _pointsPerThread; - - unsigned int _numKeys; - - unsigned int *_devX; - - unsigned int *_devY; - - unsigned int *_devPrivate; - - unsigned int *_devChain; - - unsigned int *_devBasePointX; - - unsigned int *_devBasePointY; - - int _step; - - int getIndex(int block, int thread, int idx); - - void splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i); - - secp256k1::uint256 readBigInt(unsigned int *src, int block, int thread, int idx); - - cudaError_t allocateChainBuf(unsigned int count); - - cudaError_t initializePublicKeys(size_t count); - - cudaError_t initializeBasePoints(); - - -public: - - CudaDeviceKeys() - { - _numKeys = 0; - _devX = NULL; - _devY = NULL; - _devPrivate = NULL; - _devChain = NULL; - _devBasePointX = NULL; - _devBasePointY = NULL; - _step = 0; - } - - ~CudaDeviceKeys() - { - clearPublicKeys(); - clearPrivateKeys(); - } - - cudaError_t init(int blocks, int threads, int pointsPerThread, const std::vector &privateKeys); - - bool selfTest(const std::vector &privateKeys); - - cudaError_t doStep(); - - void clearPrivateKeys(); - - void clearPublicKeys(); - -}; - -#endif \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaHashLookup.cu b/CudaKeySearchDevice/CudaHashLookup.cu deleted file mode 100644 index ce99ef2a..00000000 --- a/CudaKeySearchDevice/CudaHashLookup.cu +++ /dev/null @@ -1,306 +0,0 @@ -#include -#include -#include -#include - -#include "KeySearchDevice.h" - -#include "CudaHashLookup.h" - -#include "CudaHashLookup.cuh" - -#include "Logger.h" - -#include "util.h" - -#define MAX_TARGETS_CONSTANT_MEM 16 - -__constant__ unsigned int _TARGET_HASH[MAX_TARGETS_CONSTANT_MEM][5]; -__constant__ unsigned int _NUM_TARGET_HASHES[1]; -__constant__ unsigned int *_BLOOM_FILTER[1]; -__constant__ unsigned int _BLOOM_FILTER_MASK[1]; -__constant__ unsigned long long _BLOOM_FILTER_MASK64[1]; - -__constant__ unsigned int _USE_BLOOM_FILTER[1]; - - -static unsigned int swp(unsigned int x) -{ - return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24); -} - -static void undoRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) -{ - unsigned int iv[5] = { - 0x67452301, - 0xefcdab89, - 0x98badcfe, - 0x10325476, - 0xc3d2e1f0 - }; - - for(int i = 0; i < 5; i++) { - hOut[i] = swp(hIn[i]) - iv[(i + 1) % 5]; - } -} - -/** -Copies the target hashes to constant memory -*/ -cudaError_t CudaHashLookup::setTargetConstantMemory(const std::vector &targets) -{ - size_t count = targets.size(); - - for(size_t i = 0; i < count; i++) { - unsigned int h[5]; - - undoRMD160FinalRound(targets[i].h, h); - - cudaError_t err = cudaMemcpyToSymbol(_TARGET_HASH, h, sizeof(unsigned int) * 5, i * sizeof(unsigned int) * 5); - - if(err) { - return err; - } - } - - cudaError_t err = cudaMemcpyToSymbol(_NUM_TARGET_HASHES, &count, sizeof(unsigned int)); - if(err) { - return err; - } - - unsigned int useBloomFilter = 0; - - err = cudaMemcpyToSymbol(_USE_BLOOM_FILTER, &useBloomFilter, sizeof(bool)); - if(err) { - return err; - } - - return cudaSuccess; -} - -/** -Returns the optimal bloom filter size in bits given the probability of false-positives and the -number of hash functions -*/ -unsigned int CudaHashLookup::getOptimalBloomFilterBits(double p, size_t n) -{ - double m = 3.6 * ceil((n * log(p)) / log(1 / pow(2, log(2)))); - - return (unsigned int)ceil(log(m) / log(2)); -} - -void CudaHashLookup::initializeBloomFilter(const std::vector &targets, unsigned int *filter, unsigned int mask) -{ - // Use the low 16 bits of each word in the hash as the index into the bloom filter - for(unsigned int i = 0; i < targets.size(); i++) { - - unsigned int h[5]; - - undoRMD160FinalRound(targets[i].h, h); - - for(int j = 0; j < 5; j++) { - unsigned int idx = h[j] & mask; - - filter[idx / 32] |= (0x01 << (idx % 32)); - } - - } -} - -void CudaHashLookup::initializeBloomFilter64(const std::vector &targets, unsigned int *filter, unsigned long long mask) -{ - for(unsigned int k = 0; k < targets.size(); k++) { - - unsigned int hash[5]; - - unsigned long long idx[5]; - - undoRMD160FinalRound(targets[k].h, hash); - - idx[0] = ((unsigned long long)hash[0] << 32 | hash[1]) & mask; - idx[1] = ((unsigned long long)hash[2] << 32 | hash[3]) & mask; - idx[2] = ((unsigned long long)(hash[0]^hash[1]) << 32 | (hash[1]^hash[2])) & mask; - idx[3] = ((unsigned long long)(hash[2]^hash[3]) << 32 | (hash[3] ^ hash[4])) & mask; - idx[4] = ((unsigned long long)(hash[0]^hash[3]) << 32 | (hash[1]^hash[3])) & mask; - - for(int i = 0; i < 5; i++) { - - filter[idx[i] / 32] |= (0x01 << (idx[i] % 32)); - } - } -} - -/** -Populates the bloom filter with the target hashes -*/ -cudaError_t CudaHashLookup::setTargetBloomFilter(const std::vector &targets) -{ - unsigned int bloomFilterBits = getOptimalBloomFilterBits(1.0e-9, targets.size()); - - unsigned long long bloomFilterSizeWords = (unsigned long long)1 << (bloomFilterBits - 5); - unsigned long long bloomFilterBytes = (unsigned long long)1 << (bloomFilterBits - 3); - unsigned long long bloomFilterMask = (((unsigned long long)1 << bloomFilterBits) - 1); - - Logger::log(LogLevel::Info, "Allocating bloom filter (" + util::format("%.1f", (double)bloomFilterBytes/(double)(1024*1024)) + "MB)"); - - unsigned int *filter = NULL; - - try { - filter = new unsigned int[bloomFilterSizeWords]; - } catch(std::bad_alloc) { - Logger::log(LogLevel::Error, "Out of system memory"); - - return cudaErrorMemoryAllocation; - } - - cudaError_t err = cudaMalloc(&_bloomFilterPtr, bloomFilterBytes); - - if(err) { - Logger::log(LogLevel::Error, "Device error: " + std::string(cudaGetErrorString(err))); - delete[] filter; - return err; - } - - memset(filter, 0, sizeof(unsigned int) * bloomFilterSizeWords); - if(bloomFilterBits > 32) { - initializeBloomFilter64(targets, filter, bloomFilterMask); - } else { - initializeBloomFilter(targets, filter, (unsigned int)bloomFilterMask); - } - - // Copy to device - err = cudaMemcpy(_bloomFilterPtr, filter, sizeof(unsigned int) * bloomFilterSizeWords, cudaMemcpyHostToDevice); - if(err) { - cudaFree(_bloomFilterPtr); - _bloomFilterPtr = NULL; - delete[] filter; - return err; - } - - // Copy device memory pointer to constant memory - err = cudaMemcpyToSymbol(_BLOOM_FILTER, &_bloomFilterPtr, sizeof(unsigned int *)); - if(err) { - cudaFree(_bloomFilterPtr); - _bloomFilterPtr = NULL; - delete[] filter; - return err; - } - - // Copy device memory pointer to constant memory - if(bloomFilterBits <= 32) { - err = cudaMemcpyToSymbol(_BLOOM_FILTER_MASK, &bloomFilterMask, sizeof(unsigned int *)); - if(err) { - cudaFree(_bloomFilterPtr); - _bloomFilterPtr = NULL; - delete[] filter; - return err; - } - } else { - err = cudaMemcpyToSymbol(_BLOOM_FILTER_MASK64, &bloomFilterMask, sizeof(unsigned long long *)); - if(err) { - cudaFree(_bloomFilterPtr); - _bloomFilterPtr = NULL; - delete[] filter; - return err; - } - } - - unsigned int useBloomFilter = bloomFilterBits <= 32 ? 1 : 2; - - err = cudaMemcpyToSymbol(_USE_BLOOM_FILTER, &useBloomFilter, sizeof(unsigned int)); - - delete[] filter; - - return err; -} - -/** -*Copies the target hashes to either constant memory, or the bloom filter depending -on how many targets there are -*/ -cudaError_t CudaHashLookup::setTargets(const std::vector &targets) -{ - cleanup(); - - if(targets.size() <= MAX_TARGETS_CONSTANT_MEM) { - return setTargetConstantMemory(targets); - } else { - return setTargetBloomFilter(targets); - } -} - -void CudaHashLookup::cleanup() -{ - if(_bloomFilterPtr != NULL) { - cudaFree(_bloomFilterPtr); - _bloomFilterPtr = NULL; - } -} - -__device__ bool checkBloomFilter(const unsigned int hash[5]) -{ - bool foundMatch = true; - - unsigned int mask = _BLOOM_FILTER_MASK[0]; - unsigned int *bloomFilter = _BLOOM_FILTER[0]; - - for(int i = 0; i < 5; i++) { - unsigned int idx = hash[i] & mask; - - unsigned int f = bloomFilter[idx / 32]; - - if((f & (0x01 << (idx % 32))) == 0) { - foundMatch = false; - } - } - - return foundMatch; -} - -__device__ bool checkBloomFilter64(const unsigned int hash[5]) -{ - bool foundMatch = true; - - unsigned long long mask = _BLOOM_FILTER_MASK64[0]; - unsigned int *bloomFilter = _BLOOM_FILTER[0]; - unsigned long long idx[5]; - - idx[0] = ((unsigned long long)hash[0] << 32 | hash[1]) & mask; - idx[1] = ((unsigned long long)hash[2] << 32 | hash[3]) & mask; - idx[2] = ((unsigned long long)(hash[0] ^ hash[1]) << 32 | (hash[1] ^ hash[2])) & mask; - idx[3] = ((unsigned long long)(hash[2] ^ hash[3]) << 32 | (hash[3] ^ hash[4])) & mask; - idx[4] = ((unsigned long long)(hash[0] ^ hash[3]) << 32 | (hash[1] ^ hash[3])) & mask; - - for(int i = 0; i < 5; i++) { - unsigned int f = bloomFilter[idx[i] / 32]; - - if((f & (0x01 << (idx[i] % 32))) == 0) { - foundMatch = false; - } - } - - return foundMatch; -} - - -__device__ bool checkHash(const unsigned int hash[5]) -{ - bool foundMatch = false; - - if(*_USE_BLOOM_FILTER == 1) { - return checkBloomFilter(hash); - } else if(*_USE_BLOOM_FILTER == 2) { - return checkBloomFilter64(hash); - } else { - for(int j = 0; j < *_NUM_TARGET_HASHES; j++) { - bool equal = true; - for(int i = 0; i < 5; i++) { - equal &= (hash[i] == _TARGET_HASH[j][i]); - } - - foundMatch |= equal; - } - } - - return foundMatch; -} \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaHashLookup.cuh b/CudaKeySearchDevice/CudaHashLookup.cuh deleted file mode 100644 index 83b39820..00000000 --- a/CudaKeySearchDevice/CudaHashLookup.cuh +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ADDRESS_LOOKUP_CUH -#define _ADDRESS_LOOKUP_CUH - -__device__ bool checkHash(const unsigned int hash[5]); - -#endif \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaHashLookup.h b/CudaKeySearchDevice/CudaHashLookup.h deleted file mode 100644 index 8e8d87ec..00000000 --- a/CudaKeySearchDevice/CudaHashLookup.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef _HASH_LOOKUP_HOST_H -#define _HASH_LOOKUP_HOST_H - -#include - -class CudaHashLookup { - -private: - unsigned int *_bloomFilterPtr; - - cudaError_t setTargetBloomFilter(const std::vector &targets); - - cudaError_t setTargetConstantMemory(const std::vector &targets); - - unsigned int getOptimalBloomFilterBits(double p, size_t n); - - void cleanup(); - - void initializeBloomFilter(const std::vector &targets, unsigned int *filter, unsigned int mask); - - void initializeBloomFilter64(const std::vector &targets, unsigned int *filter, unsigned long long mask); - -public: - - CudaHashLookup() - { - _bloomFilterPtr = NULL; - } - - ~CudaHashLookup() - { - cleanup(); - } - - cudaError_t setTargets(const std::vector &targets); -}; - -#endif \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.cpp b/CudaKeySearchDevice/CudaKeySearchDevice.cpp deleted file mode 100644 index aad1fd31..00000000 --- a/CudaKeySearchDevice/CudaKeySearchDevice.cpp +++ /dev/null @@ -1,316 +0,0 @@ -#include "CudaKeySearchDevice.h" -#include "Logger.h" -#include "util.h" -#include "cudabridge.h" -#include "AddressUtil.h" - -void CudaKeySearchDevice::cudaCall(cudaError_t err) -{ - if(err) { - std::string errStr = cudaGetErrorString(err); - - throw KeySearchException(errStr); - } -} - -CudaKeySearchDevice::CudaKeySearchDevice(int device, int threads, int pointsPerThread, int blocks) -{ - cuda::CudaDeviceInfo info; - try { - info = cuda::getDeviceInfo(device); - _deviceName = info.name; - } catch(cuda::CudaException ex) { - throw KeySearchException(ex.msg); - } - - if(threads <= 0 || threads % 32 != 0) { - throw KeySearchException("The number of threads must be a multiple of 32"); - } - - if(pointsPerThread <= 0) { - throw KeySearchException("At least 1 point per thread required"); - } - - // Specifying blocks on the commandline is depcreated but still supported. If there is no value for - // blocks, devide the threads evenly among the multi-processors - if(blocks == 0) { - if(threads % info.mpCount != 0) { - throw KeySearchException("The number of threads must be a multiple of " + util::format("%d", info.mpCount)); - } - - _threads = threads / info.mpCount; - - _blocks = info.mpCount; - - while(_threads > 512) { - _threads /= 2; - _blocks *= 2; - } - } else { - _threads = threads; - _blocks = blocks; - } - - _iterations = 0; - - _device = device; - - _pointsPerThread = pointsPerThread; -} - -void CudaKeySearchDevice::init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride) -{ - if(start.cmp(secp256k1::N) >= 0) { - throw KeySearchException("Starting key is out of range"); - } - - _startExponent = start; - - _compression = compression; - - _stride = stride; - - cudaCall(cudaSetDevice(_device)); - - // Block on kernel calls - cudaCall(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync)); - - // Use a larger portion of shared memory for L1 cache - cudaCall(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); - - generateStartingPoints(); - - cudaCall(allocateChainBuf(_threads * _blocks * _pointsPerThread)); - - // Set the incrementor - secp256k1::ecpoint g = secp256k1::G(); - secp256k1::ecpoint p = secp256k1::multiplyPoint(secp256k1::uint256((uint64_t)_threads * _blocks * _pointsPerThread) * _stride, g); - - cudaCall(_resultList.init(sizeof(CudaDeviceResult), 16)); - - cudaCall(setIncrementorPoint(p.x, p.y)); -} - - -void CudaKeySearchDevice::generateStartingPoints() -{ - uint64_t totalPoints = (uint64_t)_pointsPerThread * _threads * _blocks; - uint64_t totalMemory = totalPoints * 40; - - std::vector exponents; - - Logger::log(LogLevel::Info, "Generating " + util::formatThousands(totalPoints) + " starting points (" + util::format("%.1f", (double)totalMemory / (double)(1024 * 1024)) + "MB)"); - - // Generate key pairs for k, k+1, k+2 ... k + - secp256k1::uint256 privKey = _startExponent; - - exponents.push_back(privKey); - - for(uint64_t i = 1; i < totalPoints; i++) { - privKey = privKey.add(_stride); - exponents.push_back(privKey); - } - - cudaCall(_deviceKeys.init(_blocks, _threads, _pointsPerThread, exponents)); - - // Show progress in 10% increments - double pct = 10.0; - for(int i = 1; i <= 256; i++) { - cudaCall(_deviceKeys.doStep()); - - if(((double)i / 256.0) * 100.0 >= pct) { - Logger::log(LogLevel::Info, util::format("%.1f%%", pct)); - pct += 10.0; - } - } - - Logger::log(LogLevel::Info, "Done"); - - _deviceKeys.clearPrivateKeys(); -} - - -void CudaKeySearchDevice::setTargets(const std::set &targets) -{ - _targets.clear(); - - for(std::set::iterator i = targets.begin(); i != targets.end(); ++i) { - hash160 h(i->value); - _targets.push_back(h); - } - - cudaCall(_targetLookup.setTargets(_targets)); -} - -void CudaKeySearchDevice::doStep() -{ - uint64_t numKeys = (uint64_t)_blocks * _threads * _pointsPerThread; - - try { - if(_iterations < 2 && _startExponent.cmp(numKeys) <= 0) { - callKeyFinderKernel(_blocks, _threads, _pointsPerThread, true, _compression); - } else { - callKeyFinderKernel(_blocks, _threads, _pointsPerThread, false, _compression); - } - } catch(cuda::CudaException ex) { - throw KeySearchException(ex.msg); - } - - getResultsInternal(); - - _iterations++; -} - -uint64_t CudaKeySearchDevice::keysPerStep() -{ - return (uint64_t)_blocks * _threads * _pointsPerThread; -} - -std::string CudaKeySearchDevice::getDeviceName() -{ - return _deviceName; -} - -void CudaKeySearchDevice::getMemoryInfo(uint64_t &freeMem, uint64_t &totalMem) -{ - cudaCall(cudaMemGetInfo(&freeMem, &totalMem)); -} - -void CudaKeySearchDevice::removeTargetFromList(const unsigned int hash[5]) -{ - size_t count = _targets.size(); - - while(count) { - if(memcmp(hash, _targets[count - 1].h, 20) == 0) { - _targets.erase(_targets.begin() + count - 1); - return; - } - count--; - } -} - -bool CudaKeySearchDevice::isTargetInList(const unsigned int hash[5]) -{ - size_t count = _targets.size(); - - while(count) { - if(memcmp(hash, _targets[count - 1].h, 20) == 0) { - return true; - } - count--; - } - - return false; -} - -uint32_t CudaKeySearchDevice::getPrivateKeyOffset(int thread, int block, int idx) -{ - // Total number of threads - int totalThreads = _blocks * _threads; - - int base = idx * totalThreads; - - // Global ID of the current thread - int threadId = block * _threads + thread; - - return base + threadId; -} - -void CudaKeySearchDevice::getResultsInternal() -{ - int count = _resultList.size(); - int actualCount = 0; - if(count == 0) { - return; - } - - unsigned char *ptr = new unsigned char[count * sizeof(CudaDeviceResult)]; - - _resultList.read(ptr, count); - - for(int i = 0; i < count; i++) { - struct CudaDeviceResult *rPtr = &((struct CudaDeviceResult *)ptr)[i]; - - // might be false-positive - if(!isTargetInList(rPtr->digest)) { - continue; - } - actualCount++; - - KeySearchResult minerResult; - - // Calculate the private key based on the number of iterations and the current thread - secp256k1::uint256 offset = (secp256k1::uint256((uint64_t)_blocks * _threads * _pointsPerThread * _iterations) + secp256k1::uint256(getPrivateKeyOffset(rPtr->thread, rPtr->block, rPtr->idx))) * _stride; - secp256k1::uint256 privateKey = secp256k1::addModN(_startExponent, offset); - - minerResult.privateKey = privateKey; - minerResult.compressed = rPtr->compressed; - - memcpy(minerResult.hash, rPtr->digest, 20); - - minerResult.publicKey = secp256k1::ecpoint(secp256k1::uint256(rPtr->x, secp256k1::uint256::BigEndian), secp256k1::uint256(rPtr->y, secp256k1::uint256::BigEndian)); - - removeTargetFromList(rPtr->digest); - - _results.push_back(minerResult); - } - - delete[] ptr; - - _resultList.clear(); - - // Reload the bloom filters - if(actualCount) { - cudaCall(_targetLookup.setTargets(_targets)); - } -} - -// Verify a private key produces the public key and hash -bool CudaKeySearchDevice::verifyKey(const secp256k1::uint256 &privateKey, const secp256k1::ecpoint &publicKey, const unsigned int hash[5], bool compressed) -{ - secp256k1::ecpoint g = secp256k1::G(); - - secp256k1::ecpoint p = secp256k1::multiplyPoint(privateKey, g); - - if(!(p == publicKey)) { - return false; - } - - unsigned int xWords[8]; - unsigned int yWords[8]; - - p.x.exportWords(xWords, 8, secp256k1::uint256::BigEndian); - p.y.exportWords(yWords, 8, secp256k1::uint256::BigEndian); - - unsigned int digest[5]; - if(compressed) { - Hash::hashPublicKeyCompressed(xWords, yWords, digest); - } else { - Hash::hashPublicKey(xWords, yWords, digest); - } - - for(int i = 0; i < 5; i++) { - if(digest[i] != hash[i]) { - return false; - } - } - - return true; -} - -size_t CudaKeySearchDevice::getResults(std::vector &resultsOut) -{ - for(int i = 0; i < _results.size(); i++) { - resultsOut.push_back(_results[i]); - } - _results.clear(); - - return resultsOut.size(); -} - -secp256k1::uint256 CudaKeySearchDevice::getNextKey() -{ - uint64_t totalPoints = (uint64_t)_pointsPerThread * _threads * _blocks; - - return _startExponent + secp256k1::uint256(totalPoints) * _iterations * _stride; -} \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.cu b/CudaKeySearchDevice/CudaKeySearchDevice.cu deleted file mode 100644 index cbb79ad6..00000000 --- a/CudaKeySearchDevice/CudaKeySearchDevice.cu +++ /dev/null @@ -1,261 +0,0 @@ -#include -#include -#include -#include "KeySearchTypes.h" -#include "CudaKeySearchDevice.h" -#include "ptx.cuh" -#include "secp256k1.cuh" - -#include "sha256.cuh" -#include "ripemd160.cuh" - -#include "secp256k1.h" - -#include "CudaHashLookup.cuh" -#include "CudaAtomicList.cuh" -#include "CudaDeviceKeys.cuh" - -__constant__ unsigned int _INC_X[8]; - -__constant__ unsigned int _INC_Y[8]; - -__constant__ unsigned int *_CHAIN[1]; - -static unsigned int *_chainBufferPtr = NULL; - - -__device__ void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) -{ - const unsigned int iv[5] = { - 0x67452301, - 0xefcdab89, - 0x98badcfe, - 0x10325476, - 0xc3d2e1f0 - }; - - for(int i = 0; i < 5; i++) { - hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]); - } -} - - -/** - * Allocates device memory for storing the multiplication chain used in - the batch inversion operation - */ -cudaError_t allocateChainBuf(unsigned int count) -{ - cudaError_t err = cudaMalloc(&_chainBufferPtr, count * sizeof(unsigned int) * 8); - - if(err) { - return err; - } - - err = cudaMemcpyToSymbol(_CHAIN, &_chainBufferPtr, sizeof(unsigned int *)); - if(err) { - cudaFree(_chainBufferPtr); - } - - return err; -} - -void cleanupChainBuf() -{ - if(_chainBufferPtr != NULL) { - cudaFree(_chainBufferPtr); - _chainBufferPtr = NULL; - } -} - -/** - *Sets the EC point which all points will be incremented by - */ -cudaError_t setIncrementorPoint(const secp256k1::uint256 &x, const secp256k1::uint256 &y) -{ - unsigned int xWords[8]; - unsigned int yWords[8]; - - x.exportWords(xWords, 8, secp256k1::uint256::BigEndian); - y.exportWords(yWords, 8, secp256k1::uint256::BigEndian); - - cudaError_t err = cudaMemcpyToSymbol(_INC_X, xWords, sizeof(unsigned int) * 8); - if(err) { - return err; - } - - return cudaMemcpyToSymbol(_INC_Y, yWords, sizeof(unsigned int) * 8); -} - - - -__device__ void hashPublicKey(const unsigned int *x, const unsigned int *y, unsigned int *digestOut) -{ - unsigned int hash[8]; - - sha256PublicKey(x, y, hash); - - // Swap to little-endian - for(int i = 0; i < 8; i++) { - hash[i] = endian(hash[i]); - } - - ripemd160sha256NoFinal(hash, digestOut); -} - -__device__ void hashPublicKeyCompressed(const unsigned int *x, unsigned int yParity, unsigned int *digestOut) -{ - unsigned int hash[8]; - - sha256PublicKeyCompressed(x, yParity, hash); - - // Swap to little-endian - for(int i = 0; i < 8; i++) { - hash[i] = endian(hash[i]); - } - - ripemd160sha256NoFinal(hash, digestOut); -} - - -__device__ void setResultFound(int idx, bool compressed, unsigned int x[8], unsigned int y[8], unsigned int digest[5]) -{ - CudaDeviceResult r; - - r.block = blockIdx.x; - r.thread = threadIdx.x; - r.idx = idx; - r.compressed = compressed; - - for(int i = 0; i < 8; i++) { - r.x[i] = x[i]; - r.y[i] = y[i]; - } - - doRMD160FinalRound(digest, r.digest); - - atomicListAdd(&r, sizeof(r)); -} - -__device__ void doIteration(int pointsPerThread, int compression) -{ - unsigned int *chain = _CHAIN[0]; - unsigned int *xPtr = ec::getXPtr(); - unsigned int *yPtr = ec::getYPtr(); - - // Multiply together all (_Gx - x) and then invert - unsigned int inverse[8] = {0,0,0,0,0,0,0,1}; - for(int i = 0; i < pointsPerThread; i++) { - unsigned int x[8]; - - unsigned int digest[5]; - - readInt(xPtr, i, x); - - if(compression == PointCompressionType::UNCOMPRESSED || compression == PointCompressionType::BOTH) { - unsigned int y[8]; - readInt(yPtr, i, y); - - hashPublicKey(x, y, digest); - - if(checkHash(digest)) { - setResultFound(i, false, x, y, digest); - } - } - - if(compression == PointCompressionType::COMPRESSED || compression == PointCompressionType::BOTH) { - hashPublicKeyCompressed(x, readIntLSW(yPtr, i), digest); - - if(checkHash(digest)) { - unsigned int y[8]; - readInt(yPtr, i, y); - setResultFound(i, true, x, y, digest); - } - } - - beginBatchAdd(_INC_X, x, chain, i, i, inverse); - } - - doBatchInverse(inverse); - - for(int i = pointsPerThread - 1; i >= 0; i--) { - - unsigned int newX[8]; - unsigned int newY[8]; - - completeBatchAdd(_INC_X, _INC_Y, xPtr, yPtr, i, i, chain, inverse, newX, newY); - - writeInt(xPtr, i, newX); - writeInt(yPtr, i, newY); - } -} - -__device__ void doIterationWithDouble(int pointsPerThread, int compression) -{ - unsigned int *chain = _CHAIN[0]; - unsigned int *xPtr = ec::getXPtr(); - unsigned int *yPtr = ec::getYPtr(); - - // Multiply together all (_Gx - x) and then invert - unsigned int inverse[8] = {0,0,0,0,0,0,0,1}; - for(int i = 0; i < pointsPerThread; i++) { - unsigned int x[8]; - - unsigned int digest[5]; - - readInt(xPtr, i, x); - - // uncompressed - if(compression == PointCompressionType::UNCOMPRESSED || compression == PointCompressionType::BOTH) { - unsigned int y[8]; - readInt(yPtr, i, y); - hashPublicKey(x, y, digest); - - if(checkHash(digest)) { - setResultFound(i, false, x, y, digest); - } - } - - // compressed - if(compression == PointCompressionType::COMPRESSED || compression == PointCompressionType::BOTH) { - - hashPublicKeyCompressed(x, readIntLSW(yPtr, i), digest); - - if(checkHash(digest)) { - - unsigned int y[8]; - readInt(yPtr, i, y); - - setResultFound(i, true, x, y, digest); - } - } - - beginBatchAddWithDouble(_INC_X, _INC_Y, xPtr, chain, i, i, inverse); - } - - doBatchInverse(inverse); - - for(int i = pointsPerThread - 1; i >= 0; i--) { - - unsigned int newX[8]; - unsigned int newY[8]; - - completeBatchAddWithDouble(_INC_X, _INC_Y, xPtr, yPtr, i, i, chain, inverse, newX, newY); - - writeInt(xPtr, i, newX); - writeInt(yPtr, i, newY); - } -} - -/** -* Performs a single iteration -*/ -__global__ void keyFinderKernel(int points, int compression) -{ - doIteration(points, compression); -} - -__global__ void keyFinderKernelWithDouble(int points, int compression) -{ - doIterationWithDouble(points, compression); -} \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.h b/CudaKeySearchDevice/CudaKeySearchDevice.h deleted file mode 100644 index fb8d1940..00000000 --- a/CudaKeySearchDevice/CudaKeySearchDevice.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef _CUDA_KEY_SEARCH_DEVICE -#define _CUDA_KEY_SEARCH_DEVICE - -#include "KeySearchDevice.h" -#include -#include -#include "secp256k1.h" -#include "CudaDeviceKeys.h" -#include "CudaHashLookup.h" -#include "CudaAtomicList.h" -#include "cudaUtil.h" - -// Structures that exist on both host and device side -struct CudaDeviceResult { - int thread; - int block; - int idx; - bool compressed; - unsigned int x[8]; - unsigned int y[8]; - unsigned int digest[5]; -}; - -class CudaKeySearchDevice : public KeySearchDevice { - -private: - - int _device; - - int _blocks; - - int _threads; - - int _pointsPerThread; - - int _compression; - - std::vector _results; - - std::string _deviceName; - - secp256k1::uint256 _startExponent; - - uint64_t _iterations; - - void cudaCall(cudaError_t err); - - void generateStartingPoints(); - - CudaDeviceKeys _deviceKeys; - - CudaAtomicList _resultList; - - CudaHashLookup _targetLookup; - - void getResultsInternal(); - - std::vector _targets; - - bool isTargetInList(const unsigned int hash[5]); - - void removeTargetFromList(const unsigned int hash[5]); - - uint32_t getPrivateKeyOffset(int thread, int block, int point); - - secp256k1::uint256 _stride; - - bool verifyKey(const secp256k1::uint256 &privateKey, const secp256k1::ecpoint &publicKey, const unsigned int hash[5], bool compressed); - -public: - - CudaKeySearchDevice(int device, int threads, int pointsPerThread, int blocks = 0); - - virtual void init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride); - - virtual void doStep(); - - virtual void setTargets(const std::set &targets); - - virtual size_t getResults(std::vector &results); - - virtual uint64_t keysPerStep(); - - virtual std::string getDeviceName(); - - virtual void getMemoryInfo(uint64_t &freeMem, uint64_t &totalMem); - - virtual secp256k1::uint256 getNextKey(); -}; - -#endif \ No newline at end of file diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj b/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj deleted file mode 100644 index 490c53e3..00000000 --- a/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj +++ /dev/null @@ -1,119 +0,0 @@ - - - - - Debug - x64 - - - Release - x64 - - - - - - - - - - - - - - - - - - - - - - - - - {150af404-1f80-4a13-855b-4383c4a3326f} - - - - {CCA3D02C-5E5A-4A24-B34B-5961DFA93946} - CudaKeySearchDevice - 10.0 - - - - StaticLibrary - true - MultiByte - v142 - - - StaticLibrary - false - true - MultiByte - v142 - - - - - - - - - - - - - - - - true - - - - Level4 - Disabled - WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - %(AdditionalUsingDirectories) - $(SolutionDir)\secp256k1lib;$(SolutionDir)\KeyFinderLib;$(SolutionDir)\Logger;$(SolutionDir)\Util;$(SolutionDir)\CudaMath;$(SolutionDir)\cudaUtil;$(SolutionDir)\AddressUtil;$(CUDA_INCLUDE) - - - true - Console - cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - 64 - true - %(CodeGeneration) - - - - - Level4 - MaxSpeed - true - true - WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - %(AdditionalUsingDirectories) - $(SolutionDir)\secp256k1lib;$(SolutionDir)\KeyFinderLib;$(SolutionDir)\Logger;$(SolutionDir)\Util;$(SolutionDir)\CudaMath;$(SolutionDir)\cudaUtil;$(SolutionDir)\AddressUtil;$(CUDA_INCLUDE) - - - true - true - true - Console - cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - 64 - true - %(CodeGeneration) - - - - - - - \ No newline at end of file diff --git a/CudaKeySearchDevice/Makefile b/CudaKeySearchDevice/Makefile deleted file mode 100644 index 5782c63e..00000000 --- a/CudaKeySearchDevice/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -NAME=CudaKeySearchDevice -CPPSRC:=$(wildcard *.cpp) -CUSRC:=$(wildcard *.cu) - -all: cuda - -cuda: - for file in ${CPPSRC} ; do\ - ${CXX} -c $$file ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS};\ - done - - for file in ${CUSRC} ; do\ - ${NVCC} -c $$file -o $$file".o" ${NVCCFLAGS} -rdc=true ${INCLUDE} -I${CUDA_INCLUDE} -I${CUDA_MATH};\ - done - - ${NVCC} -dlink -o cuda_libs.o *.cu.o -lcudadevrt -lcudart - - ar rvs ${LIBDIR}/lib$(NAME).a *.o - -clean: - rm -f *.o *.cu.o - rm -f *.a \ No newline at end of file diff --git a/CudaKeySearchDevice/cudabridge.cu b/CudaKeySearchDevice/cudabridge.cu deleted file mode 100644 index 33325d97..00000000 --- a/CudaKeySearchDevice/cudabridge.cu +++ /dev/null @@ -1,33 +0,0 @@ -#include "cudabridge.h" - - -__global__ void keyFinderKernel(int points, int compression); -__global__ void keyFinderKernelWithDouble(int points, int compression); - -void callKeyFinderKernel(int blocks, int threads, int points, bool useDouble, int compression) -{ - if(useDouble) { - keyFinderKernelWithDouble <<>>(points, compression); - } else { - keyFinderKernel <<>> (points, compression); - } - waitForKernel(); -} - - -void waitForKernel() -{ - // Check for kernel launch error - cudaError_t err = cudaGetLastError(); - - if(err != cudaSuccess) { - throw cuda::CudaException(err); - } - - // Wait for kernel to complete - err = cudaDeviceSynchronize(); - fflush(stdout); - if(err != cudaSuccess) { - throw cuda::CudaException(err); - } -} \ No newline at end of file diff --git a/CudaKeySearchDevice/cudabridge.h b/CudaKeySearchDevice/cudabridge.h deleted file mode 100644 index eaafe3a2..00000000 --- a/CudaKeySearchDevice/cudabridge.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _BRIDGE_H -#define _BRIDGE_H - -#include -#include -#include -#include "cudaUtil.h" -#include "secp256k1.h" - - -void callKeyFinderKernel(int blocks, int threads, int points, bool useDouble, int compression); - -void waitForKernel(); - -cudaError_t setIncrementorPoint(const secp256k1::uint256 &x, const secp256k1::uint256 &y); -cudaError_t allocateChainBuf(unsigned int count); -void cleanupChainBuf(); - -#endif \ No newline at end of file diff --git a/cudaInfo/Makefile b/cudaInfo/Makefile deleted file mode 100644 index 8c810550..00000000 --- a/cudaInfo/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -CPPSRC:=$(wildcard *.cpp) - -all: - ${CXX} -o cudainfo.bin ${CPPSRC} ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS} ${LIBS} -L${CUDA_LIB} -lcudautil -lcudart - mkdir -p $(BINDIR) - cp cudainfo.bin $(BINDIR)/cudainfo - -clean: - rm -rf cudainfo.bin \ No newline at end of file diff --git a/cudaInfo/cudaInfo.vcxproj b/cudaInfo/cudaInfo.vcxproj deleted file mode 100644 index 66365062..00000000 --- a/cudaInfo/cudaInfo.vcxproj +++ /dev/null @@ -1,94 +0,0 @@ - - - - - Debug - x64 - - - Release - x64 - - - - - - - - {eadaaa54-e304-4656-8263-e5e688ff323d} - - - - {9E8ECC85-AF9F-4F17-9397-633CA2FEE94E} - cudaInfo - 10.0 - - - - Application - true - MultiByte - v142 - - - Application - false - true - MultiByte - v142 - - - - - - - - - - - - - - true - - - - Level3 - Disabled - WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(SolutionDir)cudaUtil;%(AdditionalIncludeDirectories) - - - true - Console - cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - 64 - - - - - Level3 - MaxSpeed - true - true - WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(SolutionDir)cudaUtil;%(AdditionalIncludeDirectories) - - - true - true - true - Console - cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - 64 - - - - - - - \ No newline at end of file diff --git a/cudaInfo/main.cpp b/cudaInfo/main.cpp deleted file mode 100644 index 77d1fec5..00000000 --- a/cudaInfo/main.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include -#include - -#include"cudaUtil.h" - -void printDeviceInfo(const cuda::CudaDeviceInfo &info) -{ - printf("ID: %d\n", info.id); - printf("Name: %s\n", info.name.c_str()); - printf("Capability: %d.%d\n", info.major, info.minor); - printf("MP: %d\n", info.mpCount); - printf("Cores: %d (%d per MP)\n", info.mpCount * info.cores, info.cores); - printf("Memory: %dMB\n", (int)(info.mem / (1024 * 1024))); -} - -int main(int argc, char **argv) -{ - try { - std::vector devices = cuda::getDevices(); - - printf("Found %d devices\n\n", (int)devices.size()); - - for(int i = 0; i < (int)devices.size(); i++) { - printDeviceInfo(devices[i]); - printf("\n"); - } - } catch(cuda::CudaException &ex) { - printf("Error querying devices: %s\n", ex.msg.c_str()); - - return 1; - } - - return 0; -} \ No newline at end of file diff --git a/cudaMath/cudaMath.vcxproj b/cudaMath/cudaMath.vcxproj deleted file mode 100644 index fcf40975..00000000 --- a/cudaMath/cudaMath.vcxproj +++ /dev/null @@ -1,90 +0,0 @@ - - - - - Debug - x64 - - - Release - x64 - - - - {E1BDB205-8994-4E49-8B35-172A84E7118C} - cudaMath - 10.0 - - - - Application - true - MultiByte - v142 - - - Application - false - true - MultiByte - v142 - - - - - - - - - - - - - - true - - - - Level3 - Disabled - WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - true - Console - cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - 64 - - - - - Level3 - MaxSpeed - true - true - WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - true - true - true - Console - cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - 64 - - - - - - - - - - - - - \ No newline at end of file diff --git a/cudaMath/ptx.cuh b/cudaMath/ptx.cuh deleted file mode 100644 index 0bbcffc6..00000000 --- a/cudaMath/ptx.cuh +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _PTX_H -#define _PTX_H - -#include - -#define madc_hi(dest, a, x, b) asm volatile("madc.hi.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b)) -#define madc_hi_cc(dest, a, x, b) asm volatile("madc.hi.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b)) -#define mad_hi_cc(dest, a, x, b) asm volatile("mad.hi.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b)) - -#define mad_lo_cc(dest, a, x, b) asm volatile("mad.lo.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b)) -#define madc_lo(dest, a, x, b) asm volatile("madc.lo.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b)) -#define madc_lo_cc(dest, a, x, b) asm volatile("madc.lo.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x),"r"(b)) - -#define addc(dest, a, b) asm volatile("addc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b)) -#define add_cc(dest, a, b) asm volatile("add.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b)) -#define addc_cc(dest, a, b) asm volatile("addc.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b)) - -#define sub_cc(dest, a, b) asm volatile("sub.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b)) -#define subc_cc(dest, a, b) asm volatile("subc.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b)) -#define subc(dest, a, b) asm volatile("subc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b)) - -#define set_eq(dest,a,b) asm volatile("set.eq.u32.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b)) - -#define lsbpos(x) (__ffs((x))) - - -__device__ __forceinline__ unsigned int endian(unsigned int x) -{ - return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24); -} - -#endif \ No newline at end of file diff --git a/cudaMath/ripemd160.cuh b/cudaMath/ripemd160.cuh deleted file mode 100644 index 47590bb8..00000000 --- a/cudaMath/ripemd160.cuh +++ /dev/null @@ -1,539 +0,0 @@ -#ifndef _RIPEMD160_CUH -#define _RIPEMD160_CUH - -#include -#include -#include -#include "ptx.cuh" - -__constant__ unsigned int _RIPEMD160_IV[5] = { - 0x67452301, - 0xefcdab89, - 0x98badcfe, - 0x10325476, - 0xc3d2e1f0 -}; - -__constant__ unsigned int _K0 = 0x5a827999; -__constant__ unsigned int _K1 = 0x6ed9eba1; -__constant__ unsigned int _K2 = 0x8f1bbcdc; -__constant__ unsigned int _K3 = 0xa953fd4e; - -__constant__ unsigned int _K4 = 0x7a6d76e9; -__constant__ unsigned int _K5 = 0x6d703ef3; -__constant__ unsigned int _K6 = 0x5c4dd124; -__constant__ unsigned int _K7 = 0x50a28be6; - - -__device__ __forceinline__ unsigned int rotl(unsigned int x, int n) -{ - return (x << n) | (x >> (32 - n)); -} - -__device__ __forceinline__ unsigned int F(unsigned int x, unsigned int y, unsigned int z) -{ - return x ^ y ^ z; -} - -__device__ __forceinline__ unsigned int G(unsigned int x, unsigned int y, unsigned int z) -{ - return (((x) & (y)) | (~(x) & (z))); -} - -__device__ __forceinline__ unsigned int H(unsigned int x, unsigned int y, unsigned int z) -{ - return (((x) | ~(y)) ^ (z)); -} - -__device__ __forceinline__ unsigned int I(unsigned int x, unsigned int y, unsigned int z) -{ - return (((x) & (z)) | ((y) & ~(z))); -} - -__device__ __forceinline__ unsigned int J(unsigned int x, unsigned int y, unsigned int z) -{ - return ((x) ^ ((y) | ~(z))); -} - -__device__ __forceinline__ void FF(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += F(b, c, d) + x; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - -__device__ __forceinline__ void GG(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += G(b, c, d) + x + _K0; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - -__device__ __forceinline__ void HH(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += H(b, c, d) + x + _K1; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - -__device__ __forceinline__ void II(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += I(b, c, d) + x + _K2; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - -__device__ __forceinline__ void JJ(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += J(b, c, d) + x + _K3; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - -__device__ __forceinline__ void FFF(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += F(b, c, d) + x; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - -__device__ __forceinline__ void GGG(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += G(b, c, d) + x + _K4; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - -__device__ __forceinline__ void HHH(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += H(b, c, d) + x + _K5; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - -__device__ __forceinline__ void III(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += I(b, c, d) + x + _K6; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - -__device__ __forceinline__ void JJJ(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s) -{ - a += J(b, c, d) + x + _K7; - a = rotl(a, s) + e; - c = rotl(c, 10); -} - - - -__device__ void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) -{ - unsigned int a1 = _RIPEMD160_IV[0]; - unsigned int b1 = _RIPEMD160_IV[1]; - unsigned int c1 = _RIPEMD160_IV[2]; - unsigned int d1 = _RIPEMD160_IV[3]; - unsigned int e1 = _RIPEMD160_IV[4]; - - const unsigned int x8 = 0x00000080; - const unsigned int x14 = 256; - - /* round 1 */ - FF(a1, b1, c1, d1, e1, x[0], 11); - FF(e1, a1, b1, c1, d1, x[1], 14); - FF(d1, e1, a1, b1, c1, x[2], 15); - FF(c1, d1, e1, a1, b1, x[3], 12); - FF(b1, c1, d1, e1, a1, x[4], 5); - FF(a1, b1, c1, d1, e1, x[5], 8); - FF(e1, a1, b1, c1, d1, x[6], 7); - FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, x8, 11); - FF(b1, c1, d1, e1, a1, 0, 13); - FF(a1, b1, c1, d1, e1, 0, 14); - FF(e1, a1, b1, c1, d1, 0, 15); - FF(d1, e1, a1, b1, c1, 0, 6); - FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, x14, 9); - FF(a1, b1, c1, d1, e1, 0, 8); - - /* round 2 */ - GG(e1, a1, b1, c1, d1, x[7], 7); - GG(d1, e1, a1, b1, c1, x[4], 6); - GG(c1, d1, e1, a1, b1, 0, 8); - GG(b1, c1, d1, e1, a1, x[1], 13); - GG(a1, b1, c1, d1, e1, 0, 11); - GG(e1, a1, b1, c1, d1, x[6], 9); - GG(d1, e1, a1, b1, c1, 0, 7); - GG(c1, d1, e1, a1, b1, x[3], 15); - GG(b1, c1, d1, e1, a1, 0, 7); - GG(a1, b1, c1, d1, e1, x[0], 12); - GG(e1, a1, b1, c1, d1, 0, 15); - GG(d1, e1, a1, b1, c1, x[5], 9); - GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, x14, 7); - GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, x8, 12); - - /* round 3 */ - HH(d1, e1, a1, b1, c1, x[3], 11); - HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, x14, 6); - HH(a1, b1, c1, d1, e1, x[4], 7); - HH(e1, a1, b1, c1, d1, 0, 14); - HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, x8, 13); - HH(b1, c1, d1, e1, a1, x[1], 15); - HH(a1, b1, c1, d1, e1, x[2], 14); - HH(e1, a1, b1, c1, d1, x[7], 8); - HH(d1, e1, a1, b1, c1, x[0], 13); - HH(c1, d1, e1, a1, b1, x[6], 6); - HH(b1, c1, d1, e1, a1, 0, 5); - HH(a1, b1, c1, d1, e1, 0, 12); - HH(e1, a1, b1, c1, d1, x[5], 7); - HH(d1, e1, a1, b1, c1, 0, 5); - - /* round 4 */ - II(c1, d1, e1, a1, b1, x[1], 11); - II(b1, c1, d1, e1, a1, 0, 12); - II(a1, b1, c1, d1, e1, 0, 14); - II(e1, a1, b1, c1, d1, 0, 15); - II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, x8, 15); - II(b1, c1, d1, e1, a1, 0, 9); - II(a1, b1, c1, d1, e1, x[4], 8); - II(e1, a1, b1, c1, d1, 0, 9); - II(d1, e1, a1, b1, c1, x[3], 14); - II(c1, d1, e1, a1, b1, x[7], 5); - II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, x14, 8); - II(e1, a1, b1, c1, d1, x[5], 6); - II(d1, e1, a1, b1, c1, x[6], 5); - II(c1, d1, e1, a1, b1, x[2], 12); - - /* round 5 */ - JJ(b1, c1, d1, e1, a1, x[4], 9); - JJ(a1, b1, c1, d1, e1, x[0], 15); - JJ(e1, a1, b1, c1, d1, x[5], 5); - JJ(d1, e1, a1, b1, c1, 0, 11); - JJ(c1, d1, e1, a1, b1, x[7], 6); - JJ(b1, c1, d1, e1, a1, 0, 8); - JJ(a1, b1, c1, d1, e1, x[2], 13); - JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, x14, 5); - JJ(c1, d1, e1, a1, b1, x[1], 12); - JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, x8, 14); - JJ(e1, a1, b1, c1, d1, 0, 11); - JJ(d1, e1, a1, b1, c1, x[6], 8); - JJ(c1, d1, e1, a1, b1, 0, 5); - JJ(b1, c1, d1, e1, a1, 0, 6); - - unsigned int a2 = _RIPEMD160_IV[0]; - unsigned int b2 = _RIPEMD160_IV[1]; - unsigned int c2 = _RIPEMD160_IV[2]; - unsigned int d2 = _RIPEMD160_IV[3]; - unsigned int e2 = _RIPEMD160_IV[4]; - - /* parallel round 1 */ - JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, x14, 9); - JJJ(d2, e2, a2, b2, c2, x[7], 9); - JJJ(c2, d2, e2, a2, b2, x[0], 11); - JJJ(b2, c2, d2, e2, a2, 0, 13); - JJJ(a2, b2, c2, d2, e2, x[2], 15); - JJJ(e2, a2, b2, c2, d2, 0, 15); - JJJ(d2, e2, a2, b2, c2, x[4], 5); - JJJ(c2, d2, e2, a2, b2, 0, 7); - JJJ(b2, c2, d2, e2, a2, x[6], 7); - JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, x8, 11); - JJJ(d2, e2, a2, b2, c2, x[1], 14); - JJJ(c2, d2, e2, a2, b2, 0, 14); - JJJ(b2, c2, d2, e2, a2, x[3], 12); - JJJ(a2, b2, c2, d2, e2, 0, 6); - - /* parallel round 2 */ - III(e2, a2, b2, c2, d2, x[6], 9); - III(d2, e2, a2, b2, c2, 0, 13); - III(c2, d2, e2, a2, b2, x[3], 15); - III(b2, c2, d2, e2, a2, x[7], 7); - III(a2, b2, c2, d2, e2, x[0], 12); - III(e2, a2, b2, c2, d2, 0, 8); - III(d2, e2, a2, b2, c2, x[5], 9); - III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, x14, 7); - III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, x8, 12); - III(d2, e2, a2, b2, c2, 0, 7); - III(c2, d2, e2, a2, b2, x[4], 6); - III(b2, c2, d2, e2, a2, 0, 15); - III(a2, b2, c2, d2, e2, x[1], 13); - III(e2, a2, b2, c2, d2, x[2], 11); - - /* parallel round 3 */ - HHH(d2, e2, a2, b2, c2, 0, 9); - HHH(c2, d2, e2, a2, b2, x[5], 7); - HHH(b2, c2, d2, e2, a2, x[1], 15); - HHH(a2, b2, c2, d2, e2, x[3], 11); - HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, x14, 6); - HHH(c2, d2, e2, a2, b2, x[6], 6); - HHH(b2, c2, d2, e2, a2, 0, 14); - HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, x8, 13); - HHH(d2, e2, a2, b2, c2, 0, 5); - HHH(c2, d2, e2, a2, b2, x[2], 14); - HHH(b2, c2, d2, e2, a2, 0, 13); - HHH(a2, b2, c2, d2, e2, x[0], 13); - HHH(e2, a2, b2, c2, d2, x[4], 7); - HHH(d2, e2, a2, b2, c2, 0, 5); - - /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, x8, 15); - GGG(b2, c2, d2, e2, a2, x[6], 5); - GGG(a2, b2, c2, d2, e2, x[4], 8); - GGG(e2, a2, b2, c2, d2, x[1], 11); - GGG(d2, e2, a2, b2, c2, x[3], 14); - GGG(c2, d2, e2, a2, b2, 0, 14); - GGG(b2, c2, d2, e2, a2, 0, 6); - GGG(a2, b2, c2, d2, e2, x[0], 14); - GGG(e2, a2, b2, c2, d2, x[5], 6); - GGG(d2, e2, a2, b2, c2, 0, 9); - GGG(c2, d2, e2, a2, b2, x[2], 12); - GGG(b2, c2, d2, e2, a2, 0, 9); - GGG(a2, b2, c2, d2, e2, 0, 12); - GGG(e2, a2, b2, c2, d2, x[7], 5); - GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, x14, 8); - - /* parallel round 5 */ - FFF(b2, c2, d2, e2, a2, 0, 8); - FFF(a2, b2, c2, d2, e2, 0, 5); - FFF(e2, a2, b2, c2, d2, 0, 12); - FFF(d2, e2, a2, b2, c2, x[4], 9); - FFF(c2, d2, e2, a2, b2, x[1], 12); - FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, x8, 14); - FFF(e2, a2, b2, c2, d2, x[7], 6); - FFF(d2, e2, a2, b2, c2, x[6], 8); - FFF(c2, d2, e2, a2, b2, x[2], 13); - FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, x14, 5); - FFF(e2, a2, b2, c2, d2, x[0], 15); - FFF(d2, e2, a2, b2, c2, x[3], 13); - FFF(c2, d2, e2, a2, b2, 0, 11); - FFF(b2, c2, d2, e2, a2, 0, 11); - - digest[0] = _RIPEMD160_IV[1] + c1 + d2; - digest[1] = _RIPEMD160_IV[2] + d1 + e2; - digest[2] = _RIPEMD160_IV[3] + e1 + a2; - digest[3] = _RIPEMD160_IV[4] + a1 + b2; - digest[4] = _RIPEMD160_IV[0] + b1 + c2; -} - - - -__device__ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) -{ - unsigned int a1 = _RIPEMD160_IV[0]; - unsigned int b1 = _RIPEMD160_IV[1]; - unsigned int c1 = _RIPEMD160_IV[2]; - unsigned int d1 = _RIPEMD160_IV[3]; - unsigned int e1 = _RIPEMD160_IV[4]; - - const unsigned int x8 = 0x00000080; - const unsigned int x14 = 256; - - /* round 1 */ - FF(a1, b1, c1, d1, e1, x[0], 11); - FF(e1, a1, b1, c1, d1, x[1], 14); - FF(d1, e1, a1, b1, c1, x[2], 15); - FF(c1, d1, e1, a1, b1, x[3], 12); - FF(b1, c1, d1, e1, a1, x[4], 5); - FF(a1, b1, c1, d1, e1, x[5], 8); - FF(e1, a1, b1, c1, d1, x[6], 7); - FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, x8, 11); - FF(b1, c1, d1, e1, a1, 0, 13); - FF(a1, b1, c1, d1, e1, 0, 14); - FF(e1, a1, b1, c1, d1, 0, 15); - FF(d1, e1, a1, b1, c1, 0, 6); - FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, x14, 9); - FF(a1, b1, c1, d1, e1, 0, 8); - - /* round 2 */ - GG(e1, a1, b1, c1, d1, x[7], 7); - GG(d1, e1, a1, b1, c1, x[4], 6); - GG(c1, d1, e1, a1, b1, 0, 8); - GG(b1, c1, d1, e1, a1, x[1], 13); - GG(a1, b1, c1, d1, e1, 0, 11); - GG(e1, a1, b1, c1, d1, x[6], 9); - GG(d1, e1, a1, b1, c1, 0, 7); - GG(c1, d1, e1, a1, b1, x[3], 15); - GG(b1, c1, d1, e1, a1, 0, 7); - GG(a1, b1, c1, d1, e1, x[0], 12); - GG(e1, a1, b1, c1, d1, 0, 15); - GG(d1, e1, a1, b1, c1, x[5], 9); - GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, x14, 7); - GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, x8, 12); - - /* round 3 */ - HH(d1, e1, a1, b1, c1, x[3], 11); - HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, x14, 6); - HH(a1, b1, c1, d1, e1, x[4], 7); - HH(e1, a1, b1, c1, d1, 0, 14); - HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, x8, 13); - HH(b1, c1, d1, e1, a1, x[1], 15); - HH(a1, b1, c1, d1, e1, x[2], 14); - HH(e1, a1, b1, c1, d1, x[7], 8); - HH(d1, e1, a1, b1, c1, x[0], 13); - HH(c1, d1, e1, a1, b1, x[6], 6); - HH(b1, c1, d1, e1, a1, 0, 5); - HH(a1, b1, c1, d1, e1, 0, 12); - HH(e1, a1, b1, c1, d1, x[5], 7); - HH(d1, e1, a1, b1, c1, 0, 5); - - /* round 4 */ - II(c1, d1, e1, a1, b1, x[1], 11); - II(b1, c1, d1, e1, a1, 0, 12); - II(a1, b1, c1, d1, e1, 0, 14); - II(e1, a1, b1, c1, d1, 0, 15); - II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, x8, 15); - II(b1, c1, d1, e1, a1, 0, 9); - II(a1, b1, c1, d1, e1, x[4], 8); - II(e1, a1, b1, c1, d1, 0, 9); - II(d1, e1, a1, b1, c1, x[3], 14); - II(c1, d1, e1, a1, b1, x[7], 5); - II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, x14, 8); - II(e1, a1, b1, c1, d1, x[5], 6); - II(d1, e1, a1, b1, c1, x[6], 5); - II(c1, d1, e1, a1, b1, x[2], 12); - - /* round 5 */ - JJ(b1, c1, d1, e1, a1, x[4], 9); - JJ(a1, b1, c1, d1, e1, x[0], 15); - JJ(e1, a1, b1, c1, d1, x[5], 5); - JJ(d1, e1, a1, b1, c1, 0, 11); - JJ(c1, d1, e1, a1, b1, x[7], 6); - JJ(b1, c1, d1, e1, a1, 0, 8); - JJ(a1, b1, c1, d1, e1, x[2], 13); - JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, x14, 5); - JJ(c1, d1, e1, a1, b1, x[1], 12); - JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, x8, 14); - JJ(e1, a1, b1, c1, d1, 0, 11); - JJ(d1, e1, a1, b1, c1, x[6], 8); - JJ(c1, d1, e1, a1, b1, 0, 5); - JJ(b1, c1, d1, e1, a1, 0, 6); - - unsigned int a2 = _RIPEMD160_IV[0]; - unsigned int b2 = _RIPEMD160_IV[1]; - unsigned int c2 = _RIPEMD160_IV[2]; - unsigned int d2 = _RIPEMD160_IV[3]; - unsigned int e2 = _RIPEMD160_IV[4]; - - /* parallel round 1 */ - JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, x14, 9); - JJJ(d2, e2, a2, b2, c2, x[7], 9); - JJJ(c2, d2, e2, a2, b2, x[0], 11); - JJJ(b2, c2, d2, e2, a2, 0, 13); - JJJ(a2, b2, c2, d2, e2, x[2], 15); - JJJ(e2, a2, b2, c2, d2, 0, 15); - JJJ(d2, e2, a2, b2, c2, x[4], 5); - JJJ(c2, d2, e2, a2, b2, 0, 7); - JJJ(b2, c2, d2, e2, a2, x[6], 7); - JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, x8, 11); - JJJ(d2, e2, a2, b2, c2, x[1], 14); - JJJ(c2, d2, e2, a2, b2, 0, 14); - JJJ(b2, c2, d2, e2, a2, x[3], 12); - JJJ(a2, b2, c2, d2, e2, 0, 6); - - /* parallel round 2 */ - III(e2, a2, b2, c2, d2, x[6], 9); - III(d2, e2, a2, b2, c2, 0, 13); - III(c2, d2, e2, a2, b2, x[3], 15); - III(b2, c2, d2, e2, a2, x[7], 7); - III(a2, b2, c2, d2, e2, x[0], 12); - III(e2, a2, b2, c2, d2, 0, 8); - III(d2, e2, a2, b2, c2, x[5], 9); - III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, x14, 7); - III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, x8, 12); - III(d2, e2, a2, b2, c2, 0, 7); - III(c2, d2, e2, a2, b2, x[4], 6); - III(b2, c2, d2, e2, a2, 0, 15); - III(a2, b2, c2, d2, e2, x[1], 13); - III(e2, a2, b2, c2, d2, x[2], 11); - - /* parallel round 3 */ - HHH(d2, e2, a2, b2, c2, 0, 9); - HHH(c2, d2, e2, a2, b2, x[5], 7); - HHH(b2, c2, d2, e2, a2, x[1], 15); - HHH(a2, b2, c2, d2, e2, x[3], 11); - HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, x14, 6); - HHH(c2, d2, e2, a2, b2, x[6], 6); - HHH(b2, c2, d2, e2, a2, 0, 14); - HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, x8, 13); - HHH(d2, e2, a2, b2, c2, 0, 5); - HHH(c2, d2, e2, a2, b2, x[2], 14); - HHH(b2, c2, d2, e2, a2, 0, 13); - HHH(a2, b2, c2, d2, e2, x[0], 13); - HHH(e2, a2, b2, c2, d2, x[4], 7); - HHH(d2, e2, a2, b2, c2, 0, 5); - - /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, x8, 15); - GGG(b2, c2, d2, e2, a2, x[6], 5); - GGG(a2, b2, c2, d2, e2, x[4], 8); - GGG(e2, a2, b2, c2, d2, x[1], 11); - GGG(d2, e2, a2, b2, c2, x[3], 14); - GGG(c2, d2, e2, a2, b2, 0, 14); - GGG(b2, c2, d2, e2, a2, 0, 6); - GGG(a2, b2, c2, d2, e2, x[0], 14); - GGG(e2, a2, b2, c2, d2, x[5], 6); - GGG(d2, e2, a2, b2, c2, 0, 9); - GGG(c2, d2, e2, a2, b2, x[2], 12); - GGG(b2, c2, d2, e2, a2, 0, 9); - GGG(a2, b2, c2, d2, e2, 0, 12); - GGG(e2, a2, b2, c2, d2, x[7], 5); - GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, x14, 8); - - /* parallel round 5 */ - FFF(b2, c2, d2, e2, a2, 0, 8); - FFF(a2, b2, c2, d2, e2, 0, 5); - FFF(e2, a2, b2, c2, d2, 0, 12); - FFF(d2, e2, a2, b2, c2, x[4], 9); - FFF(c2, d2, e2, a2, b2, x[1], 12); - FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, x8, 14); - FFF(e2, a2, b2, c2, d2, x[7], 6); - FFF(d2, e2, a2, b2, c2, x[6], 8); - FFF(c2, d2, e2, a2, b2, x[2], 13); - FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, x14, 5); - FFF(e2, a2, b2, c2, d2, x[0], 15); - FFF(d2, e2, a2, b2, c2, x[3], 13); - FFF(c2, d2, e2, a2, b2, 0, 11); - FFF(b2, c2, d2, e2, a2, 0, 11); - - digest[0] = c1 + d2; - digest[1] = d1 + e2; - digest[2] = e1 + a2; - digest[3] = a1 + b2; - digest[4] = b1 + c2; -} -#endif \ No newline at end of file diff --git a/cudaMath/secp256k1.cuh b/cudaMath/secp256k1.cuh deleted file mode 100644 index 88a3fed5..00000000 --- a/cudaMath/secp256k1.cuh +++ /dev/null @@ -1,802 +0,0 @@ -#ifndef _SECP256K1_CUH -#define _SECP256K1_CUH - -#include -#include - -#include "ptx.cuh" - - -/** - Prime modulus 2^256 - 2^32 - 977 - */ -__constant__ static unsigned int _P[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F -}; - -/** - Base point X - */ -__constant__ static unsigned int _GX[8] = { - 0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798 -}; - - -/** - Base point Y - */ -__constant__ static unsigned int _GY[8] = { - 0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8 -}; - - -/** - * Group order - */ -__constant__ static unsigned int _N[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141 -}; - -__constant__ static unsigned int _BETA[8] = { - 0x7AE96A2B, 0x657C0710, 0x6E64479E, 0xAC3434E9, 0x9CF04975, 0x12F58995, 0xC1396C28, 0x719501EE -}; - - -__constant__ static unsigned int _LAMBDA[8] = { - 0x5363AD4C, 0xC05C30E0, 0xA5261C02, 0x8812645A, 0x122E22EA, 0x20816678, 0xDF02967C, 0x1B23BD72 -}; - - -__device__ __forceinline__ bool isInfinity(const unsigned int x[8]) -{ - bool isf = true; - - for(int i = 0; i < 8; i++) { - if(x[i] != 0xffffffff) { - isf = false; - } - } - - return isf; -} - -__device__ __forceinline__ static void copyBigInt(const unsigned int src[8], unsigned int dest[8]) -{ - for(int i = 0; i < 8; i++) { - dest[i] = src[i]; - } -} - -__device__ static bool equal(const unsigned int *a, const unsigned int *b) -{ - bool eq = true; - - for(int i = 0; i < 8; i++) { - eq &= (a[i] == b[i]); - } - - return eq; -} - -/** - * Reads an 8-word big integer from device memory - */ -__device__ static void readInt(const unsigned int *ara, int idx, unsigned int x[8]) -{ - int totalThreads = gridDim.x * blockDim.x; - - int base = idx * totalThreads * 8; - - int threadId = blockDim.x * blockIdx.x + threadIdx.x; - - int index = base + threadId; - - for (int i = 0; i < 8; i++) { - x[i] = ara[index]; - index += totalThreads; - } -} - -__device__ static unsigned int readIntLSW(const unsigned int *ara, int idx) -{ - int totalThreads = gridDim.x * blockDim.x; - - int base = idx * totalThreads * 8; - - int threadId = blockDim.x * blockIdx.x + threadIdx.x; - - int index = base + threadId; - - return ara[index + totalThreads * 7]; -} - -/** - * Writes an 8-word big integer to device memory - */ -__device__ static void writeInt(unsigned int *ara, int idx, const unsigned int x[8]) -{ - int totalThreads = gridDim.x * blockDim.x; - - int base = idx * totalThreads * 8; - - int threadId = blockDim.x * blockIdx.x + threadIdx.x; - - int index = base + threadId; - - for (int i = 0; i < 8; i++) { - ara[index] = x[i]; - index += totalThreads; - } -} - -/** - * Subtraction mod p - */ -__device__ static void subModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - sub_cc(c[7], a[7], b[7]); - subc_cc(c[6], a[6], b[6]); - subc_cc(c[5], a[5], b[5]); - subc_cc(c[4], a[4], b[4]); - subc_cc(c[3], a[3], b[3]); - subc_cc(c[2], a[2], b[2]); - subc_cc(c[1], a[1], b[1]); - subc_cc(c[0], a[0], b[0]); - - unsigned int borrow = 0; - subc(borrow, 0, 0); - - if (borrow) { - add_cc(c[7], c[7], _P[7]); - addc_cc(c[6], c[6], _P[6]); - addc_cc(c[5], c[5], _P[5]); - addc_cc(c[4], c[4], _P[4]); - addc_cc(c[3], c[3], _P[3]); - addc_cc(c[2], c[2], _P[2]); - addc_cc(c[1], c[1], _P[1]); - addc(c[0], c[0], _P[0]); - } -} - -__device__ static unsigned int add(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - add_cc(c[7], a[7], b[7]); - addc_cc(c[6], a[6], b[6]); - addc_cc(c[5], a[5], b[5]); - addc_cc(c[4], a[4], b[4]); - addc_cc(c[3], a[3], b[3]); - addc_cc(c[2], a[2], b[2]); - addc_cc(c[1], a[1], b[1]); - addc_cc(c[0], a[0], b[0]); - - unsigned int carry = 0; - addc(carry, 0, 0); - - return carry; -} - -__device__ static unsigned int sub(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - sub_cc(c[7], a[7], b[7]); - subc_cc(c[6], a[6], b[6]); - subc_cc(c[5], a[5], b[5]); - subc_cc(c[4], a[4], b[4]); - subc_cc(c[3], a[3], b[3]); - subc_cc(c[2], a[2], b[2]); - subc_cc(c[1], a[1], b[1]); - subc_cc(c[0], a[0], b[0]); - - unsigned int borrow = 0; - subc(borrow, 0, 0); - - return (borrow & 0x01); -} - - -__device__ static void addModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - add_cc(c[7], a[7], b[7]); - addc_cc(c[6], a[6], b[6]); - addc_cc(c[5], a[5], b[5]); - addc_cc(c[4], a[4], b[4]); - addc_cc(c[3], a[3], b[3]); - addc_cc(c[2], a[2], b[2]); - addc_cc(c[1], a[1], b[1]); - addc_cc(c[0], a[0], b[0]); - - unsigned int carry = 0; - addc(carry, 0, 0); - - bool gt = false; - for(int i = 0; i < 8; i++) { - if(c[i] > _P[i]) { - gt = true; - break; - } else if(c[i] < _P[i]) { - break; - } - } - - if(carry || gt) { - sub_cc(c[7], c[7], _P[7]); - subc_cc(c[6], c[6], _P[6]); - subc_cc(c[5], c[5], _P[5]); - subc_cc(c[4], c[4], _P[4]); - subc_cc(c[3], c[3], _P[3]); - subc_cc(c[2], c[2], _P[2]); - subc_cc(c[1], c[1], _P[1]); - subc(c[0], c[0], _P[0]); - } -} - - - -__device__ static void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - unsigned int high[8] = { 0 }; - - unsigned int t = a[7]; - - // a[7] * b (low) - for(int i = 7; i >= 0; i--) { - c[i] = t * b[i]; - } - - // a[7] * b (high) - mad_hi_cc(c[6], t, b[7], c[6]); - madc_hi_cc(c[5], t, b[6], c[5]); - madc_hi_cc(c[4], t, b[5], c[4]); - madc_hi_cc(c[3], t, b[4], c[3]); - madc_hi_cc(c[2], t, b[3], c[2]); - madc_hi_cc(c[1], t, b[2], c[1]); - madc_hi_cc(c[0], t, b[1], c[0]); - madc_hi(high[7], t, b[0], high[7]); - - - - // a[6] * b (low) - t = a[6]; - mad_lo_cc(c[6], t, b[7], c[6]); - madc_lo_cc(c[5], t, b[6], c[5]); - madc_lo_cc(c[4], t, b[5], c[4]); - madc_lo_cc(c[3], t, b[4], c[3]); - madc_lo_cc(c[2], t, b[3], c[2]); - madc_lo_cc(c[1], t, b[2], c[1]); - madc_lo_cc(c[0], t, b[1], c[0]); - madc_lo_cc(high[7], t, b[0], high[7]); - addc(high[6], high[6], 0); - - // a[6] * b (high) - mad_hi_cc(c[5], t, b[7], c[5]); - madc_hi_cc(c[4], t, b[6], c[4]); - madc_hi_cc(c[3], t, b[5], c[3]); - madc_hi_cc(c[2], t, b[4], c[2]); - madc_hi_cc(c[1], t, b[3], c[1]); - madc_hi_cc(c[0], t, b[2], c[0]); - madc_hi_cc(high[7], t, b[1], high[7]); - madc_hi(high[6], t, b[0], high[6]); - - // a[5] * b (low) - t = a[5]; - mad_lo_cc(c[5], t, b[7], c[5]); - madc_lo_cc(c[4], t, b[6], c[4]); - madc_lo_cc(c[3], t, b[5], c[3]); - madc_lo_cc(c[2], t, b[4], c[2]); - madc_lo_cc(c[1], t, b[3], c[1]); - madc_lo_cc(c[0], t, b[2], c[0]); - madc_lo_cc(high[7], t, b[1], high[7]); - madc_lo_cc(high[6], t, b[0], high[6]); - addc(high[5], high[5], 0); - - // a[5] * b (high) - mad_hi_cc(c[4], t, b[7], c[4]); - madc_hi_cc(c[3], t, b[6], c[3]); - madc_hi_cc(c[2], t, b[5], c[2]); - madc_hi_cc(c[1], t, b[4], c[1]); - madc_hi_cc(c[0], t, b[3], c[0]); - madc_hi_cc(high[7], t, b[2], high[7]); - madc_hi_cc(high[6], t, b[1], high[6]); - madc_hi(high[5], t, b[0], high[5]); - - - - // a[4] * b (low) - t = a[4]; - mad_lo_cc(c[4], t, b[7], c[4]); - madc_lo_cc(c[3], t, b[6], c[3]); - madc_lo_cc(c[2], t, b[5], c[2]); - madc_lo_cc(c[1], t, b[4], c[1]); - madc_lo_cc(c[0], t, b[3], c[0]); - madc_lo_cc(high[7], t, b[2], high[7]); - madc_lo_cc(high[6], t, b[1], high[6]); - madc_lo_cc(high[5], t, b[0], high[5]); - addc(high[4], high[4], 0); - - // a[4] * b (high) - mad_hi_cc(c[3], t, b[7], c[3]); - madc_hi_cc(c[2], t, b[6], c[2]); - madc_hi_cc(c[1], t, b[5], c[1]); - madc_hi_cc(c[0], t, b[4], c[0]); - madc_hi_cc(high[7], t, b[3], high[7]); - madc_hi_cc(high[6], t, b[2], high[6]); - madc_hi_cc(high[5], t, b[1], high[5]); - madc_hi(high[4], t, b[0], high[4]); - - - - // a[3] * b (low) - t = a[3]; - mad_lo_cc(c[3], t, b[7], c[3]); - madc_lo_cc(c[2], t, b[6], c[2]); - madc_lo_cc(c[1], t, b[5], c[1]); - madc_lo_cc(c[0], t, b[4], c[0]); - madc_lo_cc(high[7], t, b[3], high[7]); - madc_lo_cc(high[6], t, b[2], high[6]); - madc_lo_cc(high[5], t, b[1], high[5]); - madc_lo_cc(high[4], t, b[0], high[4]); - addc(high[3], high[3], 0); - - // a[3] * b (high) - mad_hi_cc(c[2], t, b[7], c[2]); - madc_hi_cc(c[1], t, b[6], c[1]); - madc_hi_cc(c[0], t, b[5], c[0]); - madc_hi_cc(high[7], t, b[4], high[7]); - madc_hi_cc(high[6], t, b[3], high[6]); - madc_hi_cc(high[5], t, b[2], high[5]); - madc_hi_cc(high[4], t, b[1], high[4]); - madc_hi(high[3], t, b[0], high[3]); - - - - // a[2] * b (low) - t = a[2]; - mad_lo_cc(c[2], t, b[7], c[2]); - madc_lo_cc(c[1], t, b[6], c[1]); - madc_lo_cc(c[0], t, b[5], c[0]); - madc_lo_cc(high[7], t, b[4], high[7]); - madc_lo_cc(high[6], t, b[3], high[6]); - madc_lo_cc(high[5], t, b[2], high[5]); - madc_lo_cc(high[4], t, b[1], high[4]); - madc_lo_cc(high[3], t, b[0], high[3]); - addc(high[2], high[2], 0); - - // a[2] * b (high) - mad_hi_cc(c[1], t, b[7], c[1]); - madc_hi_cc(c[0], t, b[6], c[0]); - madc_hi_cc(high[7], t, b[5], high[7]); - madc_hi_cc(high[6], t, b[4], high[6]); - madc_hi_cc(high[5], t, b[3], high[5]); - madc_hi_cc(high[4], t, b[2], high[4]); - madc_hi_cc(high[3], t, b[1], high[3]); - madc_hi(high[2], t, b[0], high[2]); - - - - // a[1] * b (low) - t = a[1]; - mad_lo_cc(c[1], t, b[7], c[1]); - madc_lo_cc(c[0], t, b[6], c[0]); - madc_lo_cc(high[7], t, b[5], high[7]); - madc_lo_cc(high[6], t, b[4], high[6]); - madc_lo_cc(high[5], t, b[3], high[5]); - madc_lo_cc(high[4], t, b[2], high[4]); - madc_lo_cc(high[3], t, b[1], high[3]); - madc_lo_cc(high[2], t, b[0], high[2]); - addc(high[1], high[1], 0); - - // a[1] * b (high) - mad_hi_cc(c[0], t, b[7], c[0]); - madc_hi_cc(high[7], t, b[6], high[7]); - madc_hi_cc(high[6], t, b[5], high[6]); - madc_hi_cc(high[5], t, b[4], high[5]); - madc_hi_cc(high[4], t, b[3], high[4]); - madc_hi_cc(high[3], t, b[2], high[3]); - madc_hi_cc(high[2], t, b[1], high[2]); - madc_hi(high[1], t, b[0], high[1]); - - - - // a[0] * b (low) - t = a[0]; - mad_lo_cc(c[0], t, b[7], c[0]); - madc_lo_cc(high[7], t, b[6], high[7]); - madc_lo_cc(high[6], t, b[5], high[6]); - madc_lo_cc(high[5], t, b[4], high[5]); - madc_lo_cc(high[4], t, b[3], high[4]); - madc_lo_cc(high[3], t, b[2], high[3]); - madc_lo_cc(high[2], t, b[1], high[2]); - madc_lo_cc(high[1], t, b[0], high[1]); - addc(high[0], high[0], 0); - - // a[0] * b (high) - mad_hi_cc(high[7], t, b[7], high[7]); - madc_hi_cc(high[6], t, b[6], high[6]); - madc_hi_cc(high[5], t, b[5], high[5]); - madc_hi_cc(high[4], t, b[4], high[4]); - madc_hi_cc(high[3], t, b[3], high[3]); - madc_hi_cc(high[2], t, b[2], high[2]); - madc_hi_cc(high[1], t, b[1], high[1]); - madc_hi(high[0], t, b[0], high[0]); - - - - // At this point we have 16 32-bit words representing a 512-bit value - // high[0 ... 7] and c[0 ... 7] - const unsigned int s = 977; - - // Store high[6] and high[7] since they will be overwritten - unsigned int high7 = high[7]; - unsigned int high6 = high[6]; - - - // Take high 256 bits, multiply by 2^32, add to low 256 bits - // That is, take high[0 ... 7], shift it left 1 word and add it to c[0 ... 7] - add_cc(c[6], high[7], c[6]); - addc_cc(c[5], high[6], c[5]); - addc_cc(c[4], high[5], c[4]); - addc_cc(c[3], high[4], c[3]); - addc_cc(c[2], high[3], c[2]); - addc_cc(c[1], high[2], c[1]); - addc_cc(c[0], high[1], c[0]); - addc_cc(high[7], high[0], 0); - addc(high[6], 0, 0); - - - // Take high 256 bits, multiply by 977, add to low 256 bits - // That is, take high[0 ... 5], high6, high7, multiply by 977 and add to c[0 ... 7] - mad_lo_cc(c[7], high7, s, c[7]); - madc_lo_cc(c[6], high6, s, c[6]); - madc_lo_cc(c[5], high[5], s, c[5]); - madc_lo_cc(c[4], high[4], s, c[4]); - madc_lo_cc(c[3], high[3], s, c[3]); - madc_lo_cc(c[2], high[2], s, c[2]); - madc_lo_cc(c[1], high[1], s, c[1]); - madc_lo_cc(c[0], high[0], s, c[0]); - addc_cc(high[7], high[7], 0); - addc(high[6], high[6], 0); - - - mad_hi_cc(c[6], high7, s, c[6]); - madc_hi_cc(c[5], high6, s, c[5]); - madc_hi_cc(c[4], high[5], s, c[4]); - madc_hi_cc(c[3], high[4], s, c[3]); - madc_hi_cc(c[2], high[3], s, c[2]); - madc_hi_cc(c[1], high[2], s, c[1]); - madc_hi_cc(c[0], high[1], s, c[0]); - madc_hi_cc(high[7], high[0], s, high[7]); - addc(high[6], high[6], 0); - - - // Repeat the same steps, but this time we only need to handle high[6] and high[7] - high7 = high[7]; - high6 = high[6]; - - // Take the high 64 bits, multiply by 2^32 and add to the low 256 bits - add_cc(c[6], high[7], c[6]); - addc_cc(c[5], high[6], c[5]); - addc_cc(c[4], c[4], 0); - addc_cc(c[3], c[3], 0); - addc_cc(c[2], c[2], 0); - addc_cc(c[1], c[1], 0); - addc_cc(c[0], c[0], 0); - addc(high[7], 0, 0); - - - // Take the high 64 bits, multiply by 977 and add to the low 256 bits - mad_lo_cc(c[7], high7, s, c[7]); - madc_lo_cc(c[6], high6, s, c[6]); - addc_cc(c[5], c[5], 0); - addc_cc(c[4], c[4], 0); - addc_cc(c[3], c[3], 0); - addc_cc(c[2], c[2], 0); - addc_cc(c[1], c[1], 0); - addc_cc(c[0], c[0], 0); - addc(high[7], high[7], 0); - - mad_hi_cc(c[6], high7, s, c[6]); - madc_hi_cc(c[5], high6, s, c[5]); - addc_cc(c[4], c[4], 0); - addc_cc(c[3], c[3], 0); - addc_cc(c[2], c[2], 0); - addc_cc(c[1], c[1], 0); - addc_cc(c[0], c[0], 0); - addc(high[7], high[7], 0); - - - bool overflow = high[7] != 0; - - unsigned int borrow = sub(c, _P, c); - - if(overflow) { - if(!borrow) { - sub(c, _P, c); - } - } else { - if(borrow) { - add(c, _P, c); - } - } -} - - -/** - * Square mod P - * b = a * a - */ -__device__ static void squareModP(const unsigned int a[8], unsigned int b[8]) -{ - mulModP(a, a, b); -} - -/** - * Square mod P - * x = x * x - */ -__device__ static void squareModP(unsigned int x[8]) -{ - unsigned int tmp[8]; - squareModP(x, tmp); - copyBigInt(tmp, x); -} - -/** - * Multiply mod P - * c = a * c - */ -__device__ static void mulModP(const unsigned int a[8], unsigned int c[8]) -{ - unsigned int tmp[8]; - mulModP(a, c, tmp); - - copyBigInt(tmp, c); -} - -/** - * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains - */ -__device__ static void invModP(unsigned int value[8]) -{ - unsigned int x[8]; - - copyBigInt(value, x); - - unsigned int y[8] = { 0, 0, 0, 0, 0, 0, 0, 1 }; - - // 0xd - 1101 - mulModP(x, y); - squareModP(x); - //mulModP(x, y); - squareModP(x); - mulModP(x, y); - squareModP(x); - mulModP(x, y); - squareModP(x); - - - // 0x2 - 0010 - //mulModP(x, y); - squareModP(x); - mulModP(x, y); - squareModP(x); - //mulModP(x, y); - squareModP(x); - //mulModP(x, y); - squareModP(x); - - // 0xc = 0x1100 - //mulModP(x, y); - squareModP(x); - //mulModP(x, y); - squareModP(x); - mulModP(x, y); - squareModP(x); - mulModP(x, y); - squareModP(x); - - // 0xfffff - for(int i = 0; i < 20; i++) { - mulModP(x, y); - squareModP(x); - } - - // 0xe - 1110 - //mulModP(x, y); - squareModP(x); - mulModP(x, y); - squareModP(x); - mulModP(x, y); - squareModP(x); - mulModP(x, y); - squareModP(x); - - // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff - for(int i = 0; i < 219; i++) { - mulModP(x, y); - squareModP(x); - } - mulModP(x, y); - - copyBigInt(y, value); -} - -__device__ static void invModP(const unsigned int *value, unsigned int *inverse) -{ - copyBigInt(value, inverse); - - invModP(inverse); -} - -__device__ static void negModP(const unsigned int *value, unsigned int *negative) -{ - sub_cc(negative[0], _P[0], value[0]); - subc_cc(negative[1], _P[1], value[1]); - subc_cc(negative[2], _P[2], value[2]); - subc_cc(negative[3], _P[3], value[3]); - subc_cc(negative[4], _P[4], value[4]); - subc_cc(negative[5], _P[5], value[5]); - subc_cc(negative[6], _P[6], value[6]); - subc(negative[7], _P[7], value[7]); -} - - -__device__ __forceinline__ static void beginBatchAdd(const unsigned int *px, const unsigned int *x, unsigned int *chain, int i, int batchIdx, unsigned int inverse[8]) -{ - // x = Gx - x - unsigned int t[8]; - subModP(px, x, t); - - // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, - // c[2] = diff2 * diff1 * diff0, etc - mulModP(t, inverse); - - writeInt(chain, batchIdx, inverse); -} - - -__device__ __forceinline__ static void beginBatchAddWithDouble(const unsigned int *px, const unsigned int *py, unsigned int *xPtr, unsigned int *chain, int i, int batchIdx, unsigned int inverse[8]) -{ - unsigned int x[8]; - readInt(xPtr, i, x); - - if(equal(px, x)) { - addModP(py, py, x); - } else { - // x = Gx - x - subModP(px, x, x); - } - - // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, - // c[2] = diff2 * diff1 * diff0, etc - mulModP(x, inverse); - - writeInt(chain, batchIdx, inverse); -} - -__device__ static void completeBatchAddWithDouble(const unsigned int *px, const unsigned int *py, const unsigned int *xPtr, const unsigned int *yPtr, int i, int batchIdx, unsigned int *chain, unsigned int *inverse, unsigned int newX[8], unsigned int newY[8]) -{ - unsigned int s[8]; - unsigned int x[8]; - unsigned int y[8]; - - readInt(xPtr, i, x); - readInt(yPtr, i, y); - - if(batchIdx >= 1) { - unsigned int c[8]; - - readInt(chain, batchIdx - 1, c); - - mulModP(inverse, c, s); - - unsigned int diff[8]; - if(equal(px, x)) { - addModP(py, py, diff); - } else { - subModP(px, x, diff); - } - - mulModP(diff, inverse); - } else { - copyBigInt(inverse, s); - } - - - if(equal(px, x)) { - // currently s = 1 / 2y - - unsigned int x2[8]; - unsigned int tx2[8]; - - // 3x^2 - mulModP(x, x, x2); - addModP(x2, x2, tx2); - addModP(x2, tx2, tx2); - - - // s = 3x^2 * 1/2y - mulModP(tx2, s); - - // s^2 - unsigned int s2[8]; - mulModP(s, s, s2); - - // Rx = s^2 - 2px - subModP(s2, x, newX); - subModP(newX, x, newX); - - // Ry = s(px - rx) - py - unsigned int k[8]; - subModP(px, newX, k); - mulModP(s, k, newY); - subModP(newY, py, newY); - - } else { - - unsigned int rise[8]; - subModP(py, y, rise); - - mulModP(rise, s); - - // Rx = s^2 - Gx - Qx - unsigned int s2[8]; - mulModP(s, s, s2); - - subModP(s2, px, newX); - subModP(newX, x, newX); - - // Ry = s(px - rx) - py - unsigned int k[8]; - subModP(px, newX, k); - mulModP(s, k, newY); - subModP(newY, py, newY); - } -} - -__device__ static void completeBatchAdd(const unsigned int *px, const unsigned int *py, unsigned int *xPtr, unsigned int *yPtr, int i, int batchIdx, unsigned int *chain, unsigned int *inverse, unsigned int newX[8], unsigned int newY[8]) -{ - unsigned int s[8]; - unsigned int x[8]; - - readInt(xPtr, i, x); - - if(batchIdx >= 1) { - unsigned int c[8]; - - readInt(chain, batchIdx - 1, c); - mulModP(inverse, c, s); - - unsigned int diff[8]; - subModP(px, x, diff); - mulModP(diff, inverse); - } else { - copyBigInt(inverse, s); - } - - unsigned int y[8]; - readInt(yPtr, i, y); - - unsigned int rise[8]; - subModP(py, y, rise); - - mulModP(rise, s); - - // Rx = s^2 - Gx - Qx - unsigned int s2[8]; - mulModP(s, s, s2); - subModP(s2, px, newX); - subModP(newX, x, newX); - - // Ry = s(px - rx) - py - unsigned int k[8]; - subModP(px, newX, k); - mulModP(s, k, newY); - subModP(newY, py, newY); -} - - -__device__ __forceinline__ static void doBatchInverse(unsigned int inverse[8]) -{ - invModP(inverse); -} - -#endif \ No newline at end of file diff --git a/cudaMath/sha256.cuh b/cudaMath/sha256.cuh deleted file mode 100644 index b04aa9a0..00000000 --- a/cudaMath/sha256.cuh +++ /dev/null @@ -1,545 +0,0 @@ -#ifndef _SHA256_CUH -#define _SHA256_CUH - -#include -#include - -#include - - -__constant__ unsigned int _K[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -__constant__ unsigned int _IV[8] = { - 0x6a09e667, - 0xbb67ae85, - 0x3c6ef372, - 0xa54ff53a, - 0x510e527f, - 0x9b05688c, - 0x1f83d9ab, - 0x5be0cd19 -}; - - -__device__ __forceinline__ unsigned int rotr(unsigned int x, int n) -{ - return (x >> n) ^ (x << (32 - n)); -} - -__device__ __forceinline__ unsigned int MAJ(unsigned int a, unsigned int b, unsigned int c) -{ - return (a & b) ^ (a & c) ^ (b & c); -} - -__device__ __forceinline__ unsigned int CH(unsigned int e, unsigned int f, unsigned int g) -{ - return (e & f) ^ (~e & g); -} - -__device__ __forceinline__ unsigned int s0(unsigned int x) -{ - return rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3); -} - -__device__ __forceinline__ unsigned int s1(unsigned int x) -{ - return rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10); -} - - -__device__ __forceinline__ void round(unsigned int a, unsigned int b, unsigned int c, unsigned int &d, unsigned e, unsigned int f, unsigned int g, unsigned int &h, unsigned int m, unsigned int k) -{ - unsigned int s = CH(e, f, g) + (rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25)) + k + m; - - d += s + h; - - h += s + MAJ(a, b, c) + (rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22)); -} - -__device__ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8]) -{ - unsigned int a, b, c, d, e, f, g, h; - unsigned int w[16]; - - // 0x04 || x || y - w[0] = (x[0] >> 8) | 0x04000000; - w[1] = (x[1] >> 8) | (x[0] << 24); - w[2] = (x[2] >> 8) | (x[1] << 24); - w[3] = (x[3] >> 8) | (x[2] << 24); - w[4] = (x[4] >> 8) | (x[3] << 24); - w[5] = (x[5] >> 8) | (x[4] << 24); - w[6] = (x[6] >> 8) | (x[5] << 24); - w[7] = (x[7] >> 8) | (x[6] << 24); - w[8] = (y[0] >> 8) | (x[7] << 24); - w[9] = (y[1] >> 8) | (y[0] << 24); - w[10] = (y[2] >> 8) | (y[1] << 24); - w[11] = (y[3] >> 8) | (y[2] << 24); - w[12] = (y[4] >> 8) | (y[3] << 24); - w[13] = (y[5] >> 8) | (y[4] << 24); - w[14] = (y[6] >> 8) | (y[5] << 24); - w[15] = (y[7] >> 8) | (y[6] << 24); - - a = _IV[0]; - b = _IV[1]; - c = _IV[2]; - d = _IV[3]; - e = _IV[4]; - f = _IV[5]; - g = _IV[6]; - h = _IV[7]; - - - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, w[1], _K[1]); - round(g, h, a, b, c, d, e, f, w[2], _K[2]); - round(f, g, h, a, b, c, d, e, w[3], _K[3]); - round(e, f, g, h, a, b, c, d, w[4], _K[4]); - round(d, e, f, g, h, a, b, c, w[5], _K[5]); - round(c, d, e, f, g, h, a, b, w[6], _K[6]); - round(b, c, d, e, f, g, h, a, w[7], _K[7]); - round(a, b, c, d, e, f, g, h, w[8], _K[8]); - round(h, a, b, c, d, e, f, g, w[9], _K[9]); - round(g, h, a, b, c, d, e, f, w[10], _K[10]); - round(f, g, h, a, b, c, d, e, w[11], _K[11]); - round(e, f, g, h, a, b, c, d, w[12], _K[12]); - round(d, e, f, g, h, a, b, c, w[13], _K[13]); - round(c, d, e, f, g, h, a, b, w[14], _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); - - - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); - - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); - - - - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); - - a += _IV[0]; - b += _IV[1]; - c += _IV[2]; - d += _IV[3]; - e += _IV[4]; - f += _IV[5]; - g += _IV[6]; - h += _IV[7]; - - // store the intermediate hash value - unsigned int tmp[8]; - tmp[0] = a; - tmp[1] = b; - tmp[2] = c; - tmp[3] = d; - tmp[4] = e; - tmp[5] = f; - tmp[6] = g; - tmp[7] = h; - - w[0] = (y[7] << 24) | 0x00800000; - w[15] = 65 * 8; - - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, 0, _K[1]); - round(g, h, a, b, c, d, e, f, 0, _K[2]); - round(f, g, h, a, b, c, d, e, 0, _K[3]); - round(e, f, g, h, a, b, c, d, 0, _K[4]); - round(d, e, f, g, h, a, b, c, 0, _K[5]); - round(c, d, e, f, g, h, a, b, 0, _K[6]); - round(b, c, d, e, f, g, h, a, 0, _K[7]); - round(a, b, c, d, e, f, g, h, 0, _K[8]); - round(h, a, b, c, d, e, f, g, 0, _K[9]); - round(g, h, a, b, c, d, e, f, 0, _K[10]); - round(f, g, h, a, b, c, d, e, 0, _K[11]); - round(e, f, g, h, a, b, c, d, 0, _K[12]); - round(d, e, f, g, h, a, b, c, 0, _K[13]); - round(c, d, e, f, g, h, a, b, 0, _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); - - w[0] = w[0] + s0(0) + 0 + s1(0); - w[1] = 0 + s0(0) + 0 + s1(w[15]); - w[2] = 0 + s0(0) + 0 + s1(w[0]); - w[3] = 0 + s0(0) + 0 + s1(w[1]); - w[4] = 0 + s0(0) + 0 + s1(w[2]); - w[5] = 0 + s0(0) + 0 + s1(w[3]); - w[6] = 0 + s0(0) + w[15] + s1(w[4]); - w[7] = 0 + s0(0) + w[0] + s1(w[5]); - w[8] = 0 + s0(0) + w[1] + s1(w[6]); - w[9] = 0 + s0(0) + w[2] + s1(w[7]); - w[10] = 0 + s0(0) + w[3] + s1(w[8]); - w[11] = 0 + s0(0) + w[4] + s1(w[9]); - w[12] = 0 + s0(0) + w[5] + s1(w[10]); - w[13] = 0 + s0(0) + w[6] + s1(w[11]); - w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - - - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); - - digest[0] = tmp[0] + a; - digest[1] = tmp[1] + b; - digest[2] = tmp[2] + c; - digest[3] = tmp[3] + d; - digest[4] = tmp[4] + e; - digest[5] = tmp[5] + f; - digest[6] = tmp[6] + g; - digest[7] = tmp[7] + h; -} - -__device__ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8]) -{ - unsigned int a, b, c, d, e, f, g, h; - unsigned int w[16]; - - // 0x03 || x or 0x02 || x - w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8); - - w[1] = (x[1] >> 8) | (x[0] << 24); - w[2] = (x[2] >> 8) | (x[1] << 24); - w[3] = (x[3] >> 8) | (x[2] << 24); - w[4] = (x[4] >> 8) | (x[3] << 24); - w[5] = (x[5] >> 8) | (x[4] << 24); - w[6] = (x[6] >> 8) | (x[5] << 24); - w[7] = (x[7] >> 8) | (x[6] << 24); - w[8] = (x[7] << 24) | 0x00800000; - w[15] = 33 * 8; - - a = _IV[0]; - b = _IV[1]; - c = _IV[2]; - d = _IV[3]; - e = _IV[4]; - f = _IV[5]; - g = _IV[6]; - h = _IV[7]; - - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, w[1], _K[1]); - round(g, h, a, b, c, d, e, f, w[2], _K[2]); - round(f, g, h, a, b, c, d, e, w[3], _K[3]); - round(e, f, g, h, a, b, c, d, w[4], _K[4]); - round(d, e, f, g, h, a, b, c, w[5], _K[5]); - round(c, d, e, f, g, h, a, b, w[6], _K[6]); - round(b, c, d, e, f, g, h, a, w[7], _K[7]); - round(a, b, c, d, e, f, g, h, w[8], _K[8]); - round(h, a, b, c, d, e, f, g, 0, _K[9]); - round(g, h, a, b, c, d, e, f, 0, _K[10]); - round(f, g, h, a, b, c, d, e, 0, _K[11]); - round(e, f, g, h, a, b, c, d, 0, _K[12]); - round(d, e, f, g, h, a, b, c, 0, _K[13]); - round(c, d, e, f, g, h, a, b, 0, _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); - - w[0] = w[0] + s0(w[1]) + 0 + s1(0); - w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]); - w[2] = w[2] + s0(w[3]) + 0 + s1(w[0]); - w[3] = w[3] + s0(w[4]) + 0 + s1(w[1]); - w[4] = w[4] + s0(w[5]) + 0 + s1(w[2]); - w[5] = w[5] + s0(w[6]) + 0 + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(0) + w[1] + s1(w[6]); - w[9] = 0 + s0(0) + w[2] + s1(w[7]); - w[10] = 0 + s0(0) + w[3] + s1(w[8]); - w[11] = 0 + s0(0) + w[4] + s1(w[9]); - w[12] = 0 + s0(0) + w[5] + s1(w[10]); - w[13] = 0 + s0(0) + w[6] + s1(w[11]); - w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); - - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); - - a += _IV[0]; - b += _IV[1]; - c += _IV[2]; - d += _IV[3]; - e += _IV[4]; - f += _IV[5]; - g += _IV[6]; - h += _IV[7]; - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} -#endif \ No newline at end of file diff --git a/cudaUtil/Makefile b/cudaUtil/Makefile deleted file mode 100644 index f443b144..00000000 --- a/cudaUtil/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -NAME=cudautil -SRC=$(wildcard *.cpp) -OBJS=$(SRC:.cpp=.o) - -all: ${SRC} - for file in ${SRC} ; do\ - ${CXX} -c $$file ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS};\ - done - mkdir -p ${LIBDIR} - ar rvs ${LIBDIR}/lib$(NAME).a ${OBJS} - -clean: - rm -rf *.o diff --git a/cudaUtil/cudaUtil.cpp b/cudaUtil/cudaUtil.cpp deleted file mode 100644 index 349a6046..00000000 --- a/cudaUtil/cudaUtil.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "cudaUtil.h" - - -cuda::CudaDeviceInfo cuda::getDeviceInfo(int device) -{ - cuda::CudaDeviceInfo devInfo; - - cudaDeviceProp properties; - cudaError_t err = cudaSuccess; - - err = cudaSetDevice(device); - - if(err) { - throw cuda::CudaException(err); - } - - err = cudaGetDeviceProperties(&properties, device); - - if(err) { - throw cuda::CudaException(err); - } - - devInfo.id = device; - devInfo.major = properties.major; - devInfo.minor = properties.minor; - devInfo.mpCount = properties.multiProcessorCount; - devInfo.mem = properties.totalGlobalMem; - devInfo.name = std::string(properties.name); - - int cores = 0; - switch(devInfo.major) { - case 1: - cores = 8; - break; - case 2: - if(devInfo.minor == 0) { - cores = 32; - } else { - cores = 48; - } - break; - case 3: - cores = 192; - break; - case 5: - cores = 128; - break; - case 6: - if(devInfo.minor == 1 || devInfo.minor == 2) { - cores = 128; - } else { - cores = 64; - } - break; - case 7: - cores = 64; - break; - default: - cores = 8; - break; - } - devInfo.cores = cores; - - return devInfo; -} - - -std::vector cuda::getDevices() -{ - int count = getDeviceCount(); - - std::vector devList; - - for(int device = 0; device < count; device++) { - devList.push_back(getDeviceInfo(device)); - } - - return devList; -} - -int cuda::getDeviceCount() -{ - int count = 0; - - cudaError_t err = cudaGetDeviceCount(&count); - - if(err) { - throw cuda::CudaException(err); - } - - return count; -} \ No newline at end of file diff --git a/cudaUtil/cudaUtil.h b/cudaUtil/cudaUtil.h deleted file mode 100644 index eaf7eab2..00000000 --- a/cudaUtil/cudaUtil.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _CUDA_UTIL_H -#define _CUDA_UTIL_H - -#include -#include - -#include -#include - -namespace cuda { - typedef struct { - - int id; - int major; - int minor; - int mpCount; - int cores; - uint64_t mem; - std::string name; - - }CudaDeviceInfo; - - class CudaException - { - public: - cudaError_t error; - std::string msg; - - CudaException(cudaError_t err) - { - this->error = err; - this->msg = std::string(cudaGetErrorString(err)); - } - }; - - CudaDeviceInfo getDeviceInfo(int device); - - std::vector getDevices(); - - int getDeviceCount(); -} -#endif \ No newline at end of file diff --git a/cudaUtil/cudaUtil.vcxproj b/cudaUtil/cudaUtil.vcxproj deleted file mode 100644 index 24d5ccb6..00000000 --- a/cudaUtil/cudaUtil.vcxproj +++ /dev/null @@ -1,160 +0,0 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - Debug - x64 - - - Release - x64 - - - - - - - - - - {EADAAA54-E304-4656-8263-E5E688FF323D} - Win32Proj - cudaUtil - 10.0 - - - - StaticLibrary - true - v141 - Unicode - - - StaticLibrary - false - v141 - true - Unicode - - - StaticLibrary - true - v142 - Unicode - - - StaticLibrary - false - v142 - true - Unicode - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Level3 - Disabled - _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories) - - - Windows - - - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\Win32;%(AdditionalLibraryDirectories) - cuda.lib;cudart.lib;%(AdditionalDependencies) - - - - - NotUsing - Level3 - Disabled - _CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) - $(CUDA_INCLUDE);%(AdditionalIncludeDirectories) - - - Windows - - - %(AdditionalLibraryDirectories) - %(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - _CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories) - - - Windows - true - true - - - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\Win32;%(AdditionalLibraryDirectories) - cuda.lib;cudart.lib;%(AdditionalDependencies) - - - - - Level3 - NotUsing - MaxSpeed - true - true - _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions) - $(CUDA_INCLUDE);%(AdditionalIncludeDirectories) - - - Windows - true - true - - - %(AdditionalLibraryDirectories) - %(AdditionalDependencies) - - - - - - \ No newline at end of file From 1b7e86dc0ac38453015acbf74cae03cf8be4a662 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:33:56 +0200 Subject: [PATCH 04/62] improve Logger --- Logger/Logger.cpp | 28 +++++++++++----- Logger/Logger.h | 9 ++---- Logger/Logger.vcxproj | 74 +++++++++++++++++++++++++++++++++++++++---- 3 files changed, 91 insertions(+), 20 deletions(-) diff --git a/Logger/Logger.cpp b/Logger/Logger.cpp index 8e910baf..c0d57ab6 100644 --- a/Logger/Logger.cpp +++ b/Logger/Logger.cpp @@ -4,12 +4,29 @@ #include "Logger.h" #include "util.h" +inline tm localtime_xp(time_t timer) +{ + tm bt{}; +#if defined(__unix__) + localtime_r(&timer, &bt); +#elif defined(_MSC_VER) + localtime_s(&bt, &timer); +#else + static std::mutex mtx; + std::lock_guard lock(mtx); + bt = *std::localtime(&timer); +#endif + return bt; +} + + bool LogLevel::isValid(int level) { switch(level) { case Info: case Error: case Debug: + case Warning: return true; default: return false; @@ -27,9 +44,9 @@ std::string LogLevel::toString(int level) return "Debug"; case Warning: return "Warning"; + default: + return ""; } - - return ""; } std::string Logger::getDateTimeString() @@ -37,7 +54,7 @@ std::string Logger::getDateTimeString() time_t now = time(0); struct tm tstruct; char buf[80]; - tstruct = *localtime(&now); + tstruct = localtime_xp(now); strftime(buf, sizeof(buf), "%Y-%m-%d.%X", &tstruct); @@ -78,8 +95,3 @@ void Logger::log(int logLevel, std::string msg) fprintf(stderr, "%s\n", str.c_str()); } - -void Logger::setLogFile(std::string path) -{ - -} diff --git a/Logger/Logger.h b/Logger/Logger.h index 40cf73b4..2bedc89a 100644 --- a/Logger/Logger.h +++ b/Logger/Logger.h @@ -1,9 +1,8 @@ -#ifndef _LOGGER_H -#define _LOGGER_H +#ifndef LOGGER_H +#define LOGGER_H #include - namespace LogLevel { enum Level { Info = 1, @@ -15,8 +14,7 @@ namespace LogLevel { bool isValid(int level); std::string toString(int level); -}; - +} class Logger { @@ -35,7 +33,6 @@ class Logger { static void log(int logLevel, std::string msg); - static void setLogFile(std::string path); }; #endif \ No newline at end of file diff --git a/Logger/Logger.vcxproj b/Logger/Logger.vcxproj index 8e1d7c33..1b11df70 100644 --- a/Logger/Logger.vcxproj +++ b/Logger/Logger.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -33,32 +41,46 @@ {150AF404-1F80-4A13-855B-4383C4A3326F} Win32Proj Logger - 10.0 + 10.0.19041.0 StaticLibrary true - v141 + ClangCl + Unicode + + + StaticLibrary + false + ClangCl Unicode StaticLibrary false - v141 + ClangCl true Unicode StaticLibrary true - v142 + ClangCl Unicode + + StaticLibrary + false + ClangCl + Unicode + true + x64 + StaticLibrary false - v142 + ClangCL true Unicode @@ -71,6 +93,10 @@ + + + + @@ -79,6 +105,10 @@ + + + + @@ -98,10 +128,23 @@ Windows + + + + + Level3 + Disabled + WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + $(SolutionDir)Util;%(AdditionalIncludeDirectories) + + + Windows + + NotUsing - Level3 + EnableAllWarnings Disabled _DEBUG;_LIB;%(PreprocessorDefinitions) $(SolutionDir)Util;%(AdditionalIncludeDirectories) @@ -110,6 +153,25 @@ Windows + + + NotUsing + Level3 + MaxSpeed + NDEBUG;_LIB;%(PreprocessorDefinitions) + $(SolutionDir)Util;%(AdditionalIncludeDirectories) + None + AnySuitable + true + Speed + true + true + true + + + Windows + + Level3 From 198f9eda7f5ca418cb6c329a556bf89daf4a034b Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:35:51 +0200 Subject: [PATCH 05/62] improve KeyFinder --- KeyFinder/DeviceManager.cpp | 31 -------- KeyFinder/DeviceManager.h | 5 -- KeyFinder/KeyFinder.vcxproj | 10 +-- KeyFinder/main.cpp | 113 ++++++++++-------------------- KeyFinderLib/KeyFinder.cpp | 15 +--- KeyFinderLib/KeyFinderLib.vcxproj | 45 +++++++++++- KeyFinderLib/KeySearchDevice.h | 6 +- 7 files changed, 92 insertions(+), 133 deletions(-) diff --git a/KeyFinder/DeviceManager.cpp b/KeyFinder/DeviceManager.cpp index cec6c77e..31776aea 100644 --- a/KeyFinder/DeviceManager.cpp +++ b/KeyFinder/DeviceManager.cpp @@ -1,12 +1,5 @@ #include "DeviceManager.h" - -#ifdef BUILD_CUDA -#include "cudaUtil.h" -#endif - -#ifdef BUILD_OPENCL #include "clutil.h" -#endif std::vector DeviceManager::getDevices() { @@ -14,29 +7,6 @@ std::vector DeviceManager::getDevices() std::vector devices; -#ifdef BUILD_CUDA - // Get CUDA devices - try { - std::vector cudaDevices = cuda::getDevices(); - - for(int i = 0; i < cudaDevices.size(); i++) { - DeviceManager::DeviceInfo device; - device.name = cudaDevices[i].name; - device.type = DeviceType::CUDA; - device.id = deviceId; - device.physicalId = cudaDevices[i].id; - device.memory = cudaDevices[i].mem; - device.computeUnits = cudaDevices[i].mpCount; - devices.push_back(device); - - deviceId++; - } - } catch(cuda::CudaException ex) { - throw DeviceManager::DeviceManagerException(ex.msg); - } -#endif - -#ifdef BUILD_OPENCL // Get OpenCL devices try { std::vector clDevices = cl::getDevices(); @@ -55,7 +25,6 @@ std::vector DeviceManager::getDevices() } catch(cl::CLException ex) { throw DeviceManager::DeviceManagerException(ex.msg); } -#endif return devices; } \ No newline at end of file diff --git a/KeyFinder/DeviceManager.h b/KeyFinder/DeviceManager.h index 5f76fd41..f906a070 100644 --- a/KeyFinder/DeviceManager.h +++ b/KeyFinder/DeviceManager.h @@ -21,7 +21,6 @@ class DeviceManagerException { class DeviceType { public: enum { - CUDA = 0, OpenCL }; }; @@ -37,10 +36,6 @@ typedef struct { uint64_t memory; int computeUnits; - // CUDA device info - int cudaMajor; - int cudaMinor; - int cudaCores; }DeviceInfo; std::vector getDevices(); diff --git a/KeyFinder/KeyFinder.vcxproj b/KeyFinder/KeyFinder.vcxproj index 2a70f13f..79d133d9 100644 --- a/KeyFinder/KeyFinder.vcxproj +++ b/KeyFinder/KeyFinder.vcxproj @@ -29,26 +29,26 @@ Application true - v141 + ClangCl NotSet Application false - v141 + ClangCl true NotSet Application true - v142 + ClangCl NotSet Application false - v142 + ClangCL true NotSet @@ -109,7 +109,7 @@ NotUsing - Level3 + EnableAllWarnings Disabled _CRT_SECURE_NO_WARNINGS;BUILD_CUDA;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) $(SolutionDir)\KeyFinderLib;$(SolutionDir)\util;$(SolutionDir)\AddressUtil;$(SolutionDir)\secp256k1lib;$(SolutionDir)\CmdParse;$(SolutionDir)\cudaDeviceContext;$(CUDA_INCLUDE);$(SolutionDir)\cudaUtil;$(SolutionDir)\Logger;$(SolutionDir)\CudaKeySearchDevice diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index 22c699df..f81a2cf2 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -9,18 +9,10 @@ #include "CmdParse.h" #include "Logger.h" #include "ConfigFile.h" - #include "DeviceManager.h" - -#ifdef BUILD_CUDA -#include "CudaKeySearchDevice.h" -#endif - -#ifdef BUILD_OPENCL #include "CLKeySearchDevice.h" -#endif -typedef struct { +struct RunConfig{ // startKey is the first key. We store it so that if the --continue // option is used, the correct progress is displayed. startKey and // nextKey are only equal at the very beginning. nextKey gets saved @@ -55,7 +47,7 @@ typedef struct { secp256k1::uint256 stride = 1; bool follow = false; -}RunConfig; +}; static RunConfig _config; @@ -64,7 +56,6 @@ std::vector _devices; void writeCheckpoint(secp256k1::uint256 nextKey); static uint64_t _lastUpdate = 0; -static uint64_t _runningTime = 0; static uint64_t _startTime = 0; /** @@ -75,30 +66,16 @@ void resultCallback(KeySearchResult info) if(_config.resultsFile.length() != 0) { Logger::log(LogLevel::Info, "Found key for address '" + info.address + "'. Written to '" + _config.resultsFile + "'"); - std::string s = info.address + " " + info.privateKey.toString(16) + " " + info.publicKey.toString(info.compressed); + std::string s = info.address + " " + info.privateKey.toString() + " " + info.publicKey.toString(info.compressed); util::appendToFile(_config.resultsFile, s); return; } - std::string logStr = "Address: " + info.address + "\n"; - logStr += "Private key: " + info.privateKey.toString(16) + "\n"; - logStr += "Compressed: "; - - if(info.compressed) { - logStr += "yes\n"; - } else { - logStr += "no\n"; - } - - logStr += "Public key: \n"; - - if(info.compressed) { - logStr += info.publicKey.toString(true) + "\n"; - } else { - logStr += info.publicKey.x.toString(16) + "\n"; - logStr += info.publicKey.y.toString(16) + "\n"; - } + std::string logStr = "\nAddress: " + info.address + "\n"; + logStr += "Private key: " + info.privateKey.toString() + "\n"; + logStr += "Compressed: "; logStr += (info.compressed) ? "yes\n" : "no\n"; + logStr += "Public key: "; logStr += (info.compressed) ? info.publicKey.toString(true) + "\n" : info.publicKey.x.toString() + "\n " + info.publicKey.y.toString() + "\n"; Logger::log(LogLevel::Info, logStr); } @@ -205,7 +182,7 @@ void usage() printf("-o, --out FILE Write keys to FILE\n"); printf("-f, --follow Follow text output\n"); printf("--list-devices List available devices\n"); - printf("--keyspace KEYSPACE Specify the keyspace:\n"); + printf("-k, --keyspace KEYSPACE Specify the keyspace:\n"); printf(" START:END\n"); printf(" START:+COUNT\n"); printf(" START\n"); @@ -229,27 +206,19 @@ typedef struct { DeviceParameters getDefaultParameters(const DeviceManager::DeviceInfo &device) { - DeviceParameters p; - p.threads = 256; - p.blocks = 32; - p.pointsPerThread = 32; + DeviceParameters parameters; + parameters.threads = 256; + parameters.blocks = 32; + parameters.pointsPerThread = 32; - return p; + return parameters; } static KeySearchDevice *getDeviceContext(DeviceManager::DeviceInfo &device, int blocks, int threads, int pointsPerThread) { -#ifdef BUILD_CUDA - if(device.type == DeviceManager::DeviceType::CUDA) { - return new CudaKeySearchDevice((int)device.physicalId, threads, pointsPerThread, blocks); - } -#endif - -#ifdef BUILD_OPENCL if(device.type == DeviceManager::DeviceType::OpenCL) { return new CLKeySearchDevice(device.physicalId, threads, pointsPerThread, blocks); } -#endif return NULL; } @@ -309,19 +278,19 @@ static std::string getCompressionString(int mode) void writeCheckpoint(secp256k1::uint256 nextKey) { - std::ofstream tmp(_config.checkpointFile, std::ios::out); - - tmp << "start=" << _config.startKey.toString() << std::endl; - tmp << "next=" << nextKey.toString() << std::endl; - tmp << "end=" << _config.endKey.toString() << std::endl; - tmp << "blocks=" << _config.blocks << std::endl; - tmp << "threads=" << _config.threads << std::endl; - tmp << "points=" << _config.pointsPerThread << std::endl; - tmp << "compression=" << getCompressionString(_config.compression) << std::endl; - tmp << "device=" << _config.device << std::endl; - tmp << "elapsed=" << (_config.elapsed + util::getSystemTime() - _startTime) << std::endl; - tmp << "stride=" << _config.stride.toString(); - tmp.close(); + std::ofstream fileStream(_config.checkpointFile, std::ios::out); + + fileStream << "start=" << _config.startKey.toString() << "\n"; + fileStream << "next=" << nextKey.toString() << "\n"; + fileStream << "end=" << _config.endKey.toString() << "\n"; + fileStream << "blocks=" << _config.blocks << "\n"; + fileStream << "threads=" << _config.threads << "\n"; + fileStream << "points=" << _config.pointsPerThread << "\n"; + fileStream << "compression=" << getCompressionString(_config.compression) << "\n"; + fileStream << "device=" << _config.device << "\n"; + fileStream << "elapsed=" << (_config.elapsed + util::getSystemTime() - _startTime) << "\n"; + fileStream << "stride=" << _config.stride.toString(); + fileStream.close(); } void readCheckpointFile() @@ -399,27 +368,27 @@ int run() } // Get device context - KeySearchDevice *d = getDeviceContext(_devices[_config.device], _config.blocks, _config.threads, _config.pointsPerThread); + KeySearchDevice *keySearchDevice = getDeviceContext(_devices[_config.device], _config.blocks, _config.threads, _config.pointsPerThread); - KeyFinder f(_config.nextKey, _config.endKey, _config.compression, d, _config.stride); + KeyFinder keyFinder(_config.nextKey, _config.endKey, _config.compression, keySearchDevice, _config.stride); - f.setResultCallback(resultCallback); - f.setStatusInterval(_config.statusInterval); - f.setStatusCallback(statusCallback); + keyFinder.setResultCallback(resultCallback); + keyFinder.setStatusInterval(_config.statusInterval); + keyFinder.setStatusCallback(statusCallback); - f.init(); + keyFinder.init(); if(!_config.targetsFile.empty()) { - f.setTargets(_config.targetsFile); + keyFinder.setTargets(_config.targetsFile); } else { - f.setTargets(_config.targets); + keyFinder.setTargets(_config.targets); } - f.run(); + keyFinder.run(); - delete d; + delete keySearchDevice; } catch(KeySearchException ex) { - Logger::log(LogLevel::Info, "Error: " + ex.msg); + Logger::log(LogLevel::Info, "Error: " + ex.msg + ": " + ex.description); return 1; } @@ -465,9 +434,6 @@ int main(int argc, char **argv) bool optUncompressed = false; bool listDevices = false; bool optShares = false; - bool optThreads = false; - bool optBlocks = false; - bool optPoints = false; uint32_t shareIdx = 0; uint32_t numShares = 0; @@ -513,7 +479,7 @@ int main(int argc, char **argv) parser.add("-o", "--out", true); parser.add("-f", "--follow", false); parser.add("", "--list-devices", false); - parser.add("", "--keyspace", true); + parser.add("-k", "--keyspace", true); parser.add("", "--continue", true); parser.add("", "--share", true); parser.add("", "--stride", true); @@ -534,13 +500,10 @@ int main(int argc, char **argv) try { if(optArg.equals("-t", "--threads")) { _config.threads = util::parseUInt32(optArg.arg); - optThreads = true; } else if(optArg.equals("-b", "--blocks")) { _config.blocks = util::parseUInt32(optArg.arg); - optBlocks = true; } else if(optArg.equals("-p", "--points")) { _config.pointsPerThread = util::parseUInt32(optArg.arg); - optPoints = true; } else if(optArg.equals("-d", "--device")) { _config.device = util::parseUInt32(optArg.arg); } else if(optArg.equals("-c", "--compressed")) { diff --git a/KeyFinderLib/KeyFinder.cpp b/KeyFinderLib/KeyFinder.cpp index 19f56cbd..5e396dd5 100644 --- a/KeyFinderLib/KeyFinder.cpp +++ b/KeyFinderLib/KeyFinder.cpp @@ -7,17 +7,6 @@ #include "Logger.h" - -void KeyFinder::defaultResultCallback(KeySearchResult result) -{ - // Do nothing -} - -void KeyFinder::defaultStatusCallback(KeySearchStatus status) -{ - // Do nothing -} - KeyFinder::KeyFinder(const secp256k1::uint256 &startKey, const secp256k1::uint256 &endKey, int compression, KeySearchDevice* device, const secp256k1::uint256 &stride) { _total = 0; @@ -46,7 +35,7 @@ KeyFinder::~KeyFinder() void KeyFinder::setTargets(std::vector &targets) { if(targets.size() == 0) { - throw KeySearchException("Requires at least 1 target"); + throw KeySearchException("KEYSEARCH_NO_TARGET", "Requires at least 1 target"); } _targets.clear(); @@ -55,7 +44,7 @@ void KeyFinder::setTargets(std::vector &targets) for(unsigned int i = 0; i < targets.size(); i++) { if(!Address::verifyAddress(targets[i])) { - throw KeySearchException("Invalid address '" + targets[i] + "'"); + throw KeySearchException("KEYSEARCH_INVALID_ADDRESS", "Invalid address '" + targets[i] + "'"); } KeySearchTarget t; diff --git a/KeyFinderLib/KeyFinderLib.vcxproj b/KeyFinderLib/KeyFinderLib.vcxproj index dd022535..7cfe1962 100644 --- a/KeyFinderLib/KeyFinderLib.vcxproj +++ b/KeyFinderLib/KeyFinderLib.vcxproj @@ -5,6 +5,10 @@ Debug x64 + + Performance Release + x64 + Release x64 @@ -13,21 +17,29 @@ {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} KeyFinderLib - 10.0 + 10.0.19041.0 StaticLibrary true MultiByte - v142 + ClangCl + + + StaticLibrary + false + MultiByte + ClangCl + true + x64 StaticLibrary false true MultiByte - v142 + ClangCL @@ -36,6 +48,10 @@ + + + + @@ -44,6 +60,9 @@ true + + true + Level4 @@ -57,6 +76,26 @@ kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + Level4 + WIN32;WIN64;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(SolutionDir)secp256k1lib;$(SolutionDir)AddressUtil;$(SolutionDir)Logger;$(SolutionDir)util;$(SolutionDir)KeySearchDevice;$(SolutionDir)clUtil;%(AdditionalIncludeDirectories) + None + MaxSpeed + AnySuitable + true + Speed + true + true + true + + + true + Console + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + Level4 diff --git a/KeyFinderLib/KeySearchDevice.h b/KeyFinderLib/KeySearchDevice.h index 4139dc9f..82afb967 100644 --- a/KeyFinderLib/KeySearchDevice.h +++ b/KeyFinderLib/KeySearchDevice.h @@ -16,12 +16,14 @@ class KeySearchException { } - KeySearchException(const std::string &msg) + KeySearchException(const std::string &msg, const std::string &description) { this->msg = msg; + this->description = description; } std::string msg; + std::string description; }; @@ -38,6 +40,8 @@ class KeySearchDevice { public: + virtual ~KeySearchDevice() {}; + // Initialize the device virtual void init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride) = 0; From b55ed9792ccebe8cb4a7a4f53155af8eabe185f3 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:38:29 +0200 Subject: [PATCH 06/62] improve secp256k1lib --- secp256k1lib/secp256k1.cpp | 52 +++++++++------------- secp256k1lib/secp256k1.h | 22 ++++----- secp256k1lib/secp256k1lib.vcxproj | 74 ++++++++++++++++++++++++++++--- 3 files changed, 100 insertions(+), 48 deletions(-) diff --git a/secp256k1lib/secp256k1.cpp b/secp256k1lib/secp256k1.cpp index 6107ea01..c38f160f 100644 --- a/secp256k1lib/secp256k1.cpp +++ b/secp256k1lib/secp256k1.cpp @@ -1,8 +1,8 @@ -#include -#include -#include -#include"CryptoUtil.h" +#include +#include +#include +#include "CryptoUtil.h" #include "secp256k1.h" @@ -12,28 +12,26 @@ static uint256 _ONE(1); static uint256 _ZERO; static crypto::Rng _rng; -static inline void addc(unsigned int a, unsigned int b, unsigned int carryIn, unsigned int &sum, int &carryOut) +static inline void addc(unsigned int a, unsigned int b, int& carry, unsigned int &sum) { - uint64_t sum64 = (uint64_t)a + b + carryIn; + uint64_t sum64 = (uint64_t)a + b + carry; sum = (unsigned int)sum64; - carryOut = (int)(sum64 >> 32) & 1; + carry = (int)(sum64 >> 32) & 1; } -static inline void subc(unsigned int a, unsigned int b, unsigned int borrowIn, unsigned int &diff, int &borrowOut) +static inline void subc(unsigned int a, unsigned int b, int& borrow, unsigned int &diff) { - uint64_t diff64 = (uint64_t)a - b - borrowIn; + uint64_t diff64 = (uint64_t)a - b - borrow; diff = (unsigned int)diff64; - borrowOut = (int)((diff64 >> 32) & 1); + borrow = (int)((diff64 >> 32) & 1); } - - -static bool lessThanEqualTo(const unsigned int *a, const unsigned int *b, int len) +static bool lessThanEqualTo(const unsigned int *a, const unsigned int *b) { - for(int i = len - 1; i >= 0; i--) { + for(int i = 7; i >= 0; i--) { if(a[i] < b[i]) { // is greater than return true; @@ -68,7 +66,7 @@ static int add(const unsigned int *a, const unsigned int *b, unsigned int *c, in int carry = 0; for(int i = 0; i < len; i++) { - addc(a[i], b[i], carry, c[i], carry); + addc(a[i], b[i], carry, c[i]); } return carry; @@ -79,7 +77,7 @@ static int sub(const unsigned int *a, const unsigned int *b, unsigned int *c, in int borrow = 0; for(int i = 0; i < len; i++) { - subc(a[i], b[i], borrow, c[i], borrow); + subc(a[i], b[i], borrow, c[i]); } return borrow & 1; @@ -386,7 +384,7 @@ uint256 secp256k1::invModP(const uint256 &x) } } - if(lessThanEqualTo(v.v, u.v, 8)) { + if(lessThanEqualTo(v.v, u.v)) { sub(u.v, v.v, u.v, 8); // x1 = x1 - x2 @@ -613,7 +611,7 @@ uint256 secp256k1::multiplyModN(const uint256 &a, const uint256 &b) return r; } -std::string secp256k1::uint256::toString(int base) +std::string secp256k1::uint256::toString() { std::string s = ""; @@ -731,17 +729,6 @@ ecpoint secp256k1::multiplyPoint(const uint256 &k, const ecpoint &p) return sum; } -uint256 generatePrivateKey() -{ - uint256 k; - - for(int i = 0; i < 8; i++) { - k.v[i] = ((unsigned int)rand() | ((unsigned int)rand()) << 17); - } - - return k; -} - bool secp256k1::pointExists(const ecpoint &p) { uint256 y = multiplyModP(p.y, p.y); @@ -767,14 +754,15 @@ static void bulkInversionModP(std::vector &in) uint256 inverse = secp256k1::invModP(total); - for(int i = (int)in.size() - 1; i >= 0; i--) { + for(size_t i = in.size() - 1;; i--) { - if(i > 0) { + if(i != 0) { uint256 newValue = secp256k1::multiplyModP(products[i - 1], inverse); inverse = multiplyModP(inverse, in[i]); in[i] = newValue; } else { - in[i] = inverse; + in[0] = inverse; + break; } } } diff --git a/secp256k1lib/secp256k1.h b/secp256k1lib/secp256k1.h index fb20619c..0214454f 100644 --- a/secp256k1lib/secp256k1.h +++ b/secp256k1lib/secp256k1.h @@ -1,5 +1,5 @@ -#ifndef _HOST_SECP256K1_H -#define _HOST_SECP256K1_H +#ifndef HOST_SECP256K1_H +#define HOST_SECP256K1_H #include #include @@ -27,7 +27,7 @@ namespace secp256k1 { std::string t = s; // 0x prefix - if(t.length() >= 2 && (t[0] == '0' && t[1] == 'x' || t[1] == 'X')) { + if(t.length() >= 2 && (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))) { t = t.substr(2); } @@ -41,7 +41,7 @@ namespace secp256k1 { } // Verify only valid hex characters - for(int i = 0; i < (int)t.length(); i++) { + for(size_t i = 0, tl = t.length(); i < tl; i++) { if(!((t[i] >= 'a' && t[i] <= 'f') || (t[i] >= 'A' && t[i] <= 'F') || (t[i] >= '0' && t[i] <= '9'))) { throw std::string("Incorrect hex formatting"); } @@ -61,7 +61,7 @@ namespace secp256k1 { int j = 0; for(int i = len - 8; i >= 0; i-= 8) { - std::string sub = t.substr(i, 8); + std::string sub = t.substr((unsigned long long)i, 8); uint32_t val; if(sscanf(sub.c_str(), "%x", &val) != 1) { throw std::string("Incorrect hex formatting"); @@ -261,12 +261,13 @@ namespace secp256k1 { return (this->v[0] & 1) == 0; } - std::string toString(int base = 16); + std::string toString(); uint64_t toUint64() { return ((uint64_t)this->v[1] << 32) | v[0]; } + }; const unsigned int _POINT_AT_INFINITY_WORDS[8] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; @@ -290,10 +291,10 @@ namespace secp256k1 { this->y = uint256(_POINT_AT_INFINITY_WORDS); } - ecpoint(const uint256 &x, const uint256 &y) + ecpoint(const uint256 &pX, const uint256 &pY) { - this->x = x; - this->y = y; + this->x = pX; + this->y = pY; } ecpoint(const ecpoint &p) @@ -365,7 +366,8 @@ namespace secp256k1 { void generateKeyPairsBulk(unsigned int count, const ecpoint &basePoint, std::vector &privKeysOut, std::vector &pubKeysOut); void generateKeyPairsBulk(const ecpoint &basePoint, std::vector &privKeys, std::vector &pubKeysOut); + uint256 generatePrivateKey(); ecpoint parsePublicKey(const std::string &pubKeyString); } -#endif \ No newline at end of file +#endif diff --git a/secp256k1lib/secp256k1lib.vcxproj b/secp256k1lib/secp256k1lib.vcxproj index d9270a94..21aa7f32 100644 --- a/secp256k1lib/secp256k1lib.vcxproj +++ b/secp256k1lib/secp256k1lib.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -33,32 +41,46 @@ {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} Win32Proj secp256k1lib - 10.0 + 10.0.19041.0 StaticLibrary true - v141 + ClangCl + Unicode + + + StaticLibrary + false + ClangCl Unicode StaticLibrary false - v141 + ClangCl true Unicode StaticLibrary true - v142 + ClangCl NotSet + + StaticLibrary + false + ClangCl + NotSet + true + x64 + StaticLibrary false - v142 + ClangCL true NotSet @@ -71,6 +93,10 @@ + + + + @@ -79,6 +105,10 @@ + + + + @@ -98,10 +128,23 @@ Windows + + + + + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + $(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories) + + + Windows + + NotUsing - Level3 + EnableAllWarnings Disabled _CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) $(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories) @@ -110,6 +153,25 @@ Windows + + + NotUsing + Level3 + _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions) + $(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories) + None + MaxSpeed + AnySuitable + true + Speed + true + true + true + + + Windows + + Level3 From 22cd79f108a60330d08452a761302d5ff5892c1e Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:39:30 +0200 Subject: [PATCH 07/62] improve CryptoUtil --- CryptoUtil/CryptoUtil.h | 6 +-- CryptoUtil/CryptoUtil.vcxproj | 74 +++++++++++++++++++++++++++++++---- CryptoUtil/Rng.cpp | 4 +- CryptoUtil/hash.cpp | 7 +--- CryptoUtil/sha256.cpp | 7 +++- 5 files changed, 80 insertions(+), 18 deletions(-) diff --git a/CryptoUtil/CryptoUtil.h b/CryptoUtil/CryptoUtil.h index fdd2b5c1..aab42b66 100644 --- a/CryptoUtil/CryptoUtil.h +++ b/CryptoUtil/CryptoUtil.h @@ -10,7 +10,7 @@ namespace crypto { public: Rng(); - void get(unsigned char *buf, int len); + void get(unsigned char *buf, size_t len); }; @@ -20,6 +20,6 @@ namespace crypto { void sha256(unsigned int *msg, unsigned int *digest); unsigned int checksum(const unsigned int *hash); -}; +} -#endif \ No newline at end of file +#endif diff --git a/CryptoUtil/CryptoUtil.vcxproj b/CryptoUtil/CryptoUtil.vcxproj index 8b0ed8bf..53ab0fc2 100644 --- a/CryptoUtil/CryptoUtil.vcxproj +++ b/CryptoUtil/CryptoUtil.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -31,32 +39,46 @@ {CA46856A-1D1E-4F6F-A69C-6707D540BF36} Win32Proj CryptoUtil - 10.0 + 10.0.19041.0 StaticLibrary true - v141 + ClangCl + Unicode + + + StaticLibrary + ClangCl Unicode StaticLibrary false - v141 + ClangCl true Unicode StaticLibrary - true - v142 + false + ClangCl Unicode + true + + + StaticLibrary + false + ClangCl + Unicode + true + x64 StaticLibrary false - v142 + ClangCL true Unicode @@ -69,6 +91,10 @@ + + + + @@ -77,6 +103,10 @@ + + + + @@ -95,10 +125,22 @@ Windows + + + + + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + + + Windows + + NotUsing - Level3 + EnableAllWarnings Disabled _CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) @@ -106,6 +148,24 @@ Windows + + + NotUsing + Level3 + MaxSpeed + _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions) + None + AnySuitable + true + Speed + true + true + true + + + Windows + + Level3 diff --git a/CryptoUtil/Rng.cpp b/CryptoUtil/Rng.cpp index ada92b16..c07b4ce0 100644 --- a/CryptoUtil/Rng.cpp +++ b/CryptoUtil/Rng.cpp @@ -46,7 +46,7 @@ void crypto::Rng::reseed() secureRandom((unsigned char *)_state, 32); } -void crypto::Rng::get(unsigned char *buf, int len) +void crypto::Rng::get(unsigned char *buf, size_t len) { int i = 0; while(len > 0) { @@ -70,4 +70,4 @@ void crypto::Rng::get(unsigned char *buf, int len) len -= len; } } -} \ No newline at end of file +} diff --git a/CryptoUtil/hash.cpp b/CryptoUtil/hash.cpp index 138a562f..5d534aac 100644 --- a/CryptoUtil/hash.cpp +++ b/CryptoUtil/hash.cpp @@ -2,11 +2,6 @@ #include #include -static unsigned int endian(unsigned int x) -{ - return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24); -} - unsigned int crypto::checksum(const unsigned int *hash) { unsigned int msg[16] = { 0 }; @@ -30,6 +25,8 @@ unsigned int crypto::checksum(const unsigned int *hash) // Prepare to make a hash of the digest memset(msg, 0, 16 * sizeof(unsigned int)); + + #pragma clang loop unroll(full) for(int i = 0; i < 8; i++) { msg[i] = digest[i]; } diff --git a/CryptoUtil/sha256.cpp b/CryptoUtil/sha256.cpp index 1f8d8531..6247272a 100644 --- a/CryptoUtil/sha256.cpp +++ b/CryptoUtil/sha256.cpp @@ -50,6 +50,7 @@ static void round(unsigned int a, unsigned int b, unsigned int c, unsigned int & void crypto::sha256Init(unsigned int *digest) { + #pragma clang loop unroll(full) for(int i = 0; i < 8; i++) { digest[i] = _IV[i]; } @@ -70,11 +71,14 @@ void crypto::sha256(unsigned int *msg, unsigned int *digest) h = digest[7]; unsigned int w[80] = { 0 }; + #pragma clang loop unroll(full) for(int i = 0; i < 16; i++) { w[i] = msg[i]; } // Expand 16 words to 64 words + + #pragma clang loop unroll(full) for(int i = 16; i < 64; i++) { unsigned int x = w[i - 15]; unsigned int y = w[i - 2]; @@ -84,6 +88,7 @@ void crypto::sha256(unsigned int *msg, unsigned int *digest) w[i] = w[i - 16] + s0 + w[i - 7] + s1; } + #pragma clang loop unroll(full) for(int i = 0; i < 64; i += 8) { round(a, b, c, d, e, f, g, h, w[i], _K[i]); round(h, a, b, c, d, e, f, g, w[i + 1], _K[i + 1]); @@ -103,4 +108,4 @@ void crypto::sha256(unsigned int *msg, unsigned int *digest) digest[5] += f; digest[6] += g; digest[7] += h; -} \ No newline at end of file +} From 4030d2d8b04bae77f932d554207dd34959bab8f3 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:41:31 +0200 Subject: [PATCH 08/62] improve clUtil --- clUtil/clContext.cpp | 2 +- clUtil/clContext.h | 30 +-------- clUtil/clError.cpp | 142 ++++++++++++++++++++++++++++++++++++++++++ clUtil/clUtil.cpp | 6 +- clUtil/clUtil.vcxproj | 69 +++++++++++++++++--- clUtil/clerrors.cpp | 69 -------------------- clUtil/clutil.h | 33 ++++++---- 7 files changed, 231 insertions(+), 120 deletions(-) create mode 100644 clUtil/clError.cpp delete mode 100644 clUtil/clerrors.cpp diff --git a/clUtil/clContext.cpp b/clUtil/clContext.cpp index 0a275182..8d368419 100644 --- a/clUtil/clContext.cpp +++ b/clUtil/clContext.cpp @@ -15,7 +15,7 @@ cl::CLContext::CLContext(cl_device_id device) _ctx = clCreateContext(0, 1, &_device, NULL, NULL, &err); clCall(err); - _queue = clCreateCommandQueue(_ctx, _device, 0, &err); + _queue = clCreateCommandQueueWithProperties(_ctx, _device, 0, &err); clCall(err); } diff --git a/clUtil/clContext.h b/clUtil/clContext.h index dd8f4ec7..5e9e28d5 100644 --- a/clUtil/clContext.h +++ b/clUtil/clContext.h @@ -1,5 +1,5 @@ -#ifndef _CL_CONTEXT_H -#define _CL_CONTEXT_H +#ifndef CL_CONTEXT_H +#define CL_CONTEXT_H #include #include "clutil.h" @@ -115,7 +115,7 @@ class CLKernel { clCall(clSetKernelArg(_kernel, 2, sizeof(arg3), &arg3)); clCall(clSetKernelArg(_kernel, 3, sizeof(arg4), &arg4)); clCall(clSetKernelArg(_kernel, 4, sizeof(arg5), &arg5)); - clCall(clSetKernelArg(_kernel, 4, sizeof(arg6), &arg6)); + clCall(clSetKernelArg(_kernel, 5, sizeof(arg6), &arg6)); } template @@ -245,30 +245,6 @@ class CLKernel { clCall(clSetKernelArg(_kernel, 15, sizeof(T16), &arg16)); } - template - void set_args(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10, T11 arg11, T12 arg12, - T13 arg13, T14 arg14, T15 arg15, T16 arg16) - { - clCall(clSetKernelArg(_kernel, 0, sizeof(T1), &arg1)); - clCall(clSetKernelArg(_kernel, 1, sizeof(T2), &arg2)); - clCall(clSetKernelArg(_kernel, 2, sizeof(T3), &arg3)); - clCall(clSetKernelArg(_kernel, 3, sizeof(T4), &arg4)); - clCall(clSetKernelArg(_kernel, 4, sizeof(T5), &arg5)); - clCall(clSetKernelArg(_kernel, 5, sizeof(T6), &arg6)); - clCall(clSetKernelArg(_kernel, 6, sizeof(T7), &arg7)); - clCall(clSetKernelArg(_kernel, 7, sizeof(T8), &arg8)); - clCall(clSetKernelArg(_kernel, 8, sizeof(T9), &arg9)); - clCall(clSetKernelArg(_kernel, 9, sizeof(T10), &arg10)); - clCall(clSetKernelArg(_kernel, 10, sizeof(T11), &arg11)); - clCall(clSetKernelArg(_kernel, 11, sizeof(T12), &arg12)); - clCall(clSetKernelArg(_kernel, 12, sizeof(T13), &arg13)); - clCall(clSetKernelArg(_kernel, 13, sizeof(T14), &arg14)); - clCall(clSetKernelArg(_kernel, 14, sizeof(T15), &arg15)); - clCall(clSetKernelArg(_kernel, 15, sizeof(T16), &arg16)); - } - template diff --git a/clUtil/clError.cpp b/clUtil/clError.cpp new file mode 100644 index 00000000..4f7d5306 --- /dev/null +++ b/clUtil/clError.cpp @@ -0,0 +1,142 @@ +#include "clutil.h" + +std::string cl::getOpenCLErrorName(cl_int errorCode) +{ + switch (errorCode) + { + case CL_SUCCESS: return "CL_SUCCESS"; + case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND"; + case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE"; + case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES"; + case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY"; + case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE"; + case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP"; + case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; + case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE"; + case CL_MAP_FAILURE: return "CL_MAP_FAILURE"; + case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; + case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE"; + case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE"; + case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE"; + case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED"; + case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; + case CL_INVALID_VALUE: return "CL_INVALID_VALUE"; + case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE"; + case CL_INVALID_PLATFORM: return "CL_INVALID_PLATFORM"; + case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE"; + case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT"; + case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES"; + case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE"; + case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR"; + case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; + case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE"; + case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER"; + case CL_INVALID_BINARY: return "CL_INVALID_BINARY"; + case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS"; + case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM"; + case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE"; + case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME"; + case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION"; + case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL"; + case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX"; + case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE"; + case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE"; + case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS"; + case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION"; + case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE"; + case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE"; + case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET"; + case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST"; + case CL_INVALID_EVENT: return "CL_INVALID_EVENT"; + case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION"; + case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT"; + case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE"; + case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL"; + case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE"; + case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY"; + case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR"; + case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS"; + case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS"; + case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT"; + case CL_INVALID_PIPE_SIZE: return "CL_INVALID_PIPE_SIZE"; + case CL_INVALID_DEVICE_QUEUE: return "CL_INVALID_DEVICE_QUEUE"; + case CL_INVALID_SPEC_ID: return "CL_INVALID_SPEC_ID"; + case CL_MAX_SIZE_RESTRICTION_EXCEEDED: return "CL_MAX_SIZE_RESTRICTION_EXCEEDED"; + + default: + return "CL_UNKNOWN_ERROR_CODE"; + } +} + +// from http://www.techdarting.com/2014/01/opencl-errors.html +std::string cl::getOpenCLErrorDescription(cl_int err) { + switch (err) { + case CL_SUCCESS: return "Everything is good!"; + case CL_DEVICE_NOT_FOUND: return "No OpenCL devices that matched given device type were found"; + case CL_DEVICE_NOT_AVAILABLE: return "No OpenCL compatible device was found"; + case CL_COMPILER_NOT_AVAILABLE: return "OpenCL Compiler perhaps failed to configure itself, or check your OpenCL installation"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Failed to allocate memory for buffer object"; + case CL_OUT_OF_RESOURCES: return "failure to allocate resources required by the OpenCL implementation on the device"; + case CL_OUT_OF_HOST_MEMORY: return "failure to allocate resources required by the OpenCL implementation on the host"; + case CL_PROFILING_INFO_NOT_AVAILABLE: return "returned by clGetEventProfilingInfo, if the CL_QUEUE_PROFILING_ENABLE flag is not set for the command-queue and if the profiling information is currently not available"; + case CL_MEM_COPY_OVERLAP: return "if source and destination buffers are the same buffer object and the source and destination regions overlap"; + case CL_IMAGE_FORMAT_MISMATCH: return "src and dst image do not use the same image format"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "the image format is not supported."; + case CL_BUILD_PROGRAM_FAILURE: return "program build error for given device, Use clGetProgramBuildInfo API call to get the build log of the kernel compilation."; + case CL_MAP_FAILURE: return "failed to map the requested region into the host address space. This error does not occur for buffer objects created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR"; + case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "no devices in given context associated with buffer for which the origin value is aligned to the CL_DEVICE_MEM_BASE_ADDR_ALIGN value"; + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "returned by clWaitForEvents(), execution status of any of the events in event list is a negative integer value i.e., error"; + case CL_COMPILE_PROGRAM_FAILURE: return "failed to compile the program source. Error occurs if clCompileProgram does not return until the compile has completed"; + case CL_LINKER_NOT_AVAILABLE: return "Linker unavailable"; + case CL_LINK_PROGRAM_FAILURE: return "failed to link the compiled binaries and perhaps libraries"; + case CL_DEVICE_PARTITION_FAILED: return "given partition name is supported by the implementation but input device couldn't be partitioned further"; + case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "argument information is not available for the given kernel"; + case CL_INVALID_VALUE: return "values passed in the flags parameter is not valid"; + case CL_INVALID_DEVICE_TYPE: return "device type specified is not valid, its returned by clCreateContextFromType / clGetDeviceIDs"; + case CL_INVALID_PLATFORM: return "the specified platform is not a valid platform, its returned by clGetPlatformInfo /clGetDeviceIDs / clCreateContext / clCreateContextFromType"; + case CL_INVALID_DEVICE: return "device/s specified are not valid"; + case CL_INVALID_CONTEXT: return "the given context is invalid OpenCL context, or the context associated with certain parameters are not the same"; + case CL_INVALID_QUEUE_PROPERTIES: return "specified properties are valid but are not supported by the device, its returned by clCreateCommandQueue / clSetCommandQueueProperty"; + case CL_INVALID_COMMAND_QUEUE: return "the specified command-queue is not a valid command-queue"; + case CL_INVALID_HOST_PTR: return "host pointer is NULL and CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are set in flags or if host_ptr is not NULL but CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in flags. returned by clCreateBuffer / clCreateImage2D / clCreateImage3D"; + case CL_INVALID_MEM_OBJECT: return "the passed parameter is not a valid memory, image, or buffer object"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "image format specified is not valid or is NULL, clCreateImage2D /clCreateImage3D returns this."; + case CL_INVALID_IMAGE_SIZE: return "Its returned by create Image functions 2D/3D, if specified image width or height are outbound or 0"; + case CL_INVALID_SAMPLER: return "specified sampler is an invalid sampler object"; + case CL_INVALID_BINARY: return "program binary is not a valid binary for the specified device, returned by clBuildProgram / clCreateProgramWithBinary"; + case CL_INVALID_BUILD_OPTIONS: return "the given build options are not valid"; + case CL_INVALID_PROGRAM: return "the given program is an invalid program object, returned by clRetainProgram / clReleaseProgram / clBuildProgram / clGetProgramInfo / clGetProgramBuildInfo / clCreateKernel / clCreateKernelsInProgram"; + case CL_INVALID_PROGRAM_EXECUTABLE: return "if there is no successfully built executable for program returned by clCreateKernel, there is no device in program then returned by clCreateKernelsInProgram, if no successfully built program executable present for device associated with command queue then returned by clEnqueueNDRangeKernel / clEnqueueTask"; + case CL_INVALID_KERNEL_NAME: return "mentioned kernel name is not found in program"; + case CL_INVALID_KERNEL_DEFINITION: return "arguments mismatch for the __kernel function definition and the passed ones, returned by clCreateKernel"; + case CL_INVALID_KERNEL: return "specified kernel is an invalid kernel object"; + case CL_INVALID_ARG_INDEX: return "clSetKernelArg if an invalid argument index is specified"; + case CL_INVALID_ARG_VALUE: return "the argument value specified is NULL, returned by clSetKernelArg"; + case CL_INVALID_ARG_SIZE: return "the given argument size (arg_size) do not match size of the data type for an argument, returned by clSetKernelArg"; + case CL_INVALID_KERNEL_ARGS: return "the kernel argument values have not been specified, returned by clEnqueueNDRangeKernel / clEnqueueTask"; + case CL_INVALID_WORK_DIMENSION: return "given work dimension is an invalid value, returned by clEnqueueNDRangeKernel"; + case CL_INVALID_WORK_GROUP_SIZE: return "the specified local workgroup size and number of workitems specified by global workgroup size is not evenly divisible by local workgroup size"; + case CL_INVALID_WORK_ITEM_SIZE: return "no. of workitems specified in any of local work group sizes is greater than the corresponding values specified by CL_DEVICE_MAX_WORK_ITEM_SIZES in that particular dimension"; + case CL_INVALID_GLOBAL_OFFSET: return "global_work_offset is not NULL. Must currently be a NULL value. In a future revision of OpenCL, global_work_offset can be used but not until OCL 1.2"; + case CL_INVALID_EVENT_WAIT_LIST: return "event wait list is NULL and (no. of events in wait list > 0), or event wait list is not NULL and no. of events in wait list is 0, or specified event objects are not valid events"; + case CL_INVALID_EVENT: return "invalid event objects specified"; + case CL_INVALID_GL_OBJECT: return "not a valid GL buffer object"; + case CL_INVALID_BUFFER_SIZE: return "the value of the parameter size is 0 or exceeds CL_DEVICE_MAX_MEM_ALLOC_SIZE for all devices specified in the parameter context, returned by clCreateBuffer"; + case CL_INVALID_GLOBAL_WORK_SIZE: return "specified global work size is NULL, or any of the values specified in global work dimensions are 0 or exceeds the range given by the sizeof(size_t) for the device on which the kernel will be enqueued, returned by clEnqueueNDRangeKernel"; + case CL_INVALID_PROPERTY: return "context property name in properties is not a supported property name, returned by clCreateContext"; + case CL_INVALID_IMAGE_DESCRIPTOR: return "values specified in image description are invalid"; + case CL_INVALID_COMPILER_OPTIONS: return "compiler options specified by options are invalid, returned by clCompileProgram"; + case CL_INVALID_LINKER_OPTIONS: return "linker options specified by options are invalid, returned by clLinkProgram"; + case CL_INVALID_DEVICE_PARTITION_COUNT: return "partition name specified in properties is CL_DEVICE_PARTITION_BY_COUNTS and the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_SUB_DEVICES or the total number of compute units requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device, or the number of compute units requested for one or more sub-devices is less than zero or the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device"; + case CL_INVALID_PIPE_SIZE: return "pipe_packet_size is 0 or the pipe_packet_size exceeds CL_DEVICE_PIPE_MAX_PACKET_SIZE value specified in table 4.3 (see clGetDeviceInfo) for all devices in context or pipe_max_packets is 0"; + case CL_INVALID_SPEC_ID: return "spec_id is not a valid specialization constant identifier"; + case CL_MAX_SIZE_RESTRICTION_EXCEEDED: return "the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration"; + + default: return "No description available"; + } +} \ No newline at end of file diff --git a/clUtil/clUtil.cpp b/clUtil/clUtil.cpp index 02b38353..0a24d2c6 100644 --- a/clUtil/clUtil.cpp +++ b/clUtil/clUtil.cpp @@ -1,6 +1,5 @@ #include "clutil.h" - void cl::clCall(cl_int err) { if(err != CL_SUCCESS) { @@ -8,7 +7,6 @@ void cl::clCall(cl_int err) } } - std::vector cl::getDevices() { std::vector deviceList; @@ -60,10 +58,10 @@ std::vector cl::getDevices() deviceList.push_back(info); } - delete devices; + delete[] devices; } - delete platforms; + delete[] platforms; return deviceList; } \ No newline at end of file diff --git a/clUtil/clUtil.vcxproj b/clUtil/clUtil.vcxproj index 4a66f61d..9d15c2ca 100644 --- a/clUtil/clUtil.vcxproj +++ b/clUtil/clUtil.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -22,32 +30,46 @@ 15.0 {D9A5823D-C472-40AC-B23A-21B1586CEEB0} clUtil - 10.0 + 10.0.19041.0 StaticLibrary true - v141 + ClangCl + MultiByte + + + StaticLibrary + false + ClangCl MultiByte StaticLibrary false - v141 + ClangCl true MultiByte StaticLibrary true - v142 + ClangCl MultiByte + + StaticLibrary + false + ClangCl + MultiByte + true + x64 + StaticLibrary false - v142 + ClangCL true MultiByte @@ -60,6 +82,10 @@ + + + + @@ -68,6 +94,10 @@ + + + + @@ -98,15 +128,40 @@ C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories) - + Level3 Disabled true true + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories) + + + + + EnableAllWarnings + Disabled + true + true $(OPENCL_INCLUDE);$(SolutionDir)\util;%(AdditionalIncludeDirectories) + + + Level3 + true + true + $(OPENCL_INCLUDE);$(SolutionDir)\util;%(AdditionalIncludeDirectories) + None + MaxSpeed + AnySuitable + true + Speed + true + true + true + + Level3 @@ -124,7 +179,7 @@ - + diff --git a/clUtil/clerrors.cpp b/clUtil/clerrors.cpp deleted file mode 100644 index 56b15422..00000000 --- a/clUtil/clerrors.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include "clutil.h" - - -std::string cl::getErrorString(cl_int err) -{ - switch(err) { - case 0: return "CL_SUCCESS"; - case -1: return "CL_DEVICE_NOT_FOUND"; - case -2: return "CL_DEVICE_NOT_AVAILABLE"; - case -3: return "CL_COMPILER_NOT_AVAILABLE"; - case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - case -5: return "CL_OUT_OF_RESOURCES"; - case -6: return "CL_OUT_OF_HOST_MEMORY"; - case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE"; - case -8: return "CL_MEM_COPY_OVERLAP"; - case -9: return "CL_IMAGE_FORMAT_MISMATCH"; - case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; - case -11: return "CL_BUILD_PROGRAM_FAILURE"; - case -12: return "CL_MAP_FAILURE"; - case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; - case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; - case -15: return "CL_COMPILE_PROGRAM_FAILURE"; - case -16: return "CL_LINKER_NOT_AVAILABLE"; - case -17: return "CL_LINK_PROGRAM_FAILURE"; - case -18: return "CL_DEVICE_PARTITION_FAILED"; - case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; - case -30: return "CL_INVALID_VALUE"; - case -31: return "CL_INVALID_DEVICE_TYPE"; - case -32: return "CL_INVALID_PLATFORM"; - case -33: return "CL_INVALID_DEVICE"; - case -34: return "CL_INVALID_CONTEXT"; - case -35: return "CL_INVALID_QUEUE_PROPERTIES"; - case -36: return "CL_INVALID_COMMAND_QUEUE"; - case -37: return "CL_INVALID_HOST_PTR"; - case -38: return "CL_INVALID_MEM_OBJECT"; - case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; - case -40: return "CL_INVALID_IMAGE_SIZE"; - case -41: return "CL_INVALID_SAMPLER"; - case -42: return "CL_INVALID_BINARY"; - case -43: return "CL_INVALID_BUILD_OPTIONS"; - case -44: return "CL_INVALID_PROGRAM"; - case -45: return "CL_INVALID_PROGRAM_EXECUTABLE"; - case -46: return "CL_INVALID_KERNEL_NAME"; - case -47: return "CL_INVALID_KERNEL_DEFINITION"; - case -48: return "CL_INVALID_KERNEL"; - case -49: return "CL_INVALID_ARG_INDEX"; - case -50: return "CL_INVALID_ARG_VALUE"; - case -51: return "CL_INVALID_ARG_SIZE"; - case -52: return "CL_INVALID_KERNEL_ARGS"; - case -53: return "CL_INVALID_WORK_DIMENSION"; - case -54: return "CL_INVALID_WORK_GROUP_SIZE"; - case -55: return "CL_INVALID_WORK_ITEM_SIZE"; - case -56: return "CL_INVALID_GLOBAL_OFFSET"; - case -57: return "CL_INVALID_EVENT_WAIT_LIST"; - case -58: return "CL_INVALID_EVENT"; - case -59: return "CL_INVALID_OPERATION"; - case -60: return "CL_INVALID_GL_OBJECT"; - case -61: return "CL_INVALID_BUFFER_SIZE"; - case -62: return "CL_INVALID_MIP_LEVEL"; - case -63: return "CL_INVALID_GLOBAL_WORK_SIZE"; - case -64: return "CL_INVALID_PROPERTY"; - case -65: return "CL_INVALID_IMAGE_DESCRIPTOR"; - case -66: return "CL_INVALID_COMPILER_OPTIONS"; - case -67: return "CL_INVALID_LINKER_OPTIONS"; - case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT"; - default: return "CL_UNKNOWN_ERROR"; - } - -} \ No newline at end of file diff --git a/clUtil/clutil.h b/clUtil/clutil.h index 35591450..c5a8e953 100644 --- a/clUtil/clutil.h +++ b/clUtil/clutil.h @@ -1,5 +1,5 @@ -#ifndef _CL_UTIL_H -#define _CL_UTIL_H +#ifndef CL_UTIL_H +#define CL_UTIL_H #ifdef __APPLE__ #define CL_SILENCE_DEPRECATION @@ -12,7 +12,8 @@ #include namespace cl { - std::string getErrorString(cl_int err); + std::string getOpenCLErrorName(cl_int errorCode); + std::string getOpenCLErrorDescription(cl_int errorCode); typedef struct { cl_device_id id; @@ -26,17 +27,27 @@ namespace cl { public: int error; std::string msg; + std::string description; - CLException(cl_int err) + CLException(cl_int errorCode) { - this->error = err; - this->msg = getErrorString(err); + this->error = errorCode; + this->msg = getOpenCLErrorName(errorCode); + this->description = getOpenCLErrorDescription(errorCode); } - CLException(cl_int err, std::string msg) + CLException(cl_int errorCode, std::string pMsg) { - this->error = err; - this->msg = msg; + this->error = errorCode; + this->msg = pMsg; + this->description = getOpenCLErrorDescription(errorCode); + } + + CLException(cl_int errorCode, std::string pMsg, std::string pDescription) + { + this->error = errorCode; + this->msg = pMsg; + this->description = pDescription; } }; @@ -44,10 +55,8 @@ namespace cl { std::vector getDevices(); - int getDeviceCount(); - void clCall(cl_int err); } -#endif \ No newline at end of file +#endif From 8ca20dc82e8d0e777bd10e7024d1922130fdc00f Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:42:01 +0200 Subject: [PATCH 09/62] improve AddrGen --- AddrGen/AddrGen.vcxproj | 10 +++++----- AddrGen/main.cpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/AddrGen/AddrGen.vcxproj b/AddrGen/AddrGen.vcxproj index d7a25e60..bf50ce3a 100644 --- a/AddrGen/AddrGen.vcxproj +++ b/AddrGen/AddrGen.vcxproj @@ -28,26 +28,26 @@ Application true - v141 + ClangCl Unicode Application false - v141 + ClangCl true Unicode Application true - v142 + ClangCl Unicode Application false - v142 + ClangCL true Unicode @@ -103,7 +103,7 @@ NotUsing - Level3 + EnableAllWarnings Disabled _CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) $(SolutionDir)secp256k1lib;$(SolutionDir)util;$(SolutionDir)AddressUtil;$(SolutionDir)CmdParse;%(AdditionalIncludeDirectories) diff --git a/AddrGen/main.cpp b/AddrGen/main.cpp index 7becf5d6..f5a1089d 100644 --- a/AddrGen/main.cpp +++ b/AddrGen/main.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include "secp256k1.h" #include "util.h" #include "AddressUtil.h" From b4388c396c052db501f80a6eb38a9ba17f90464e Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:43:41 +0200 Subject: [PATCH 10/62] improve clKeyFinder --- CLKeySearchDevice/CLKeySearchDevice.cpp | 22 +- CLKeySearchDevice/CLKeySearchDevice.vcxproj | 88 +- CLKeySearchDevice/bitcrack.cl | 1178 ++++++++----------- CLKeySearchDevice/keysearch.cl | 133 +-- clKeyFinder/clKeyFinder.vcxproj | 96 +- 5 files changed, 750 insertions(+), 767 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index 4035e642..c3368c1d 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -39,11 +39,11 @@ CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPer if(threads <= 0 || threads % 32 != 0) { - throw KeySearchException("The number of threads must be a multiple of 32"); + throw KeySearchException("KEYSEARCH_THREAD_MULTIPLE_EXCEPTION", "The number of threads must be a multiple of 32"); } if(pointsPerThread <= 0) { - throw KeySearchException("At least 1 point per thread required"); + throw KeySearchException("KEYSEARCH_MINIMUM_POINT_EXCEPTION", "At least 1 point per thread required"); } try { @@ -63,7 +63,7 @@ CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPer _deviceName = _clContext->getDeviceName(); } catch(cl::CLException ex) { - throw KeySearchException(ex.msg); + throw KeySearchException(ex.msg, ex.description); } _iterations = 0; @@ -187,7 +187,7 @@ void CLKeySearchDevice::setIncrementor(secp256k1::ecpoint &p) void CLKeySearchDevice::init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride) { if(start.cmp(secp256k1::N) >= 0) { - throw KeySearchException("Starting key is out of range"); + throw KeySearchException("KEYSEARCH_STARTINGKEY_OUT_OF_RANGE", "Starting key is out of range"); } _start = start; @@ -207,7 +207,7 @@ void CLKeySearchDevice::init(const secp256k1::uint256 &start, int compression, c setIncrementor(p); } catch(cl::CLException ex) { - throw KeySearchException(ex.msg); + throw KeySearchException(ex.msg, ex.description); } } @@ -255,7 +255,7 @@ void CLKeySearchDevice::doStep() _iterations++; } catch(cl::CLException ex) { - throw KeySearchException(ex.msg); + throw KeySearchException(ex.msg, ex.description); } } @@ -311,7 +311,7 @@ void CLKeySearchDevice::setTargets(const std::set &targets) setTargetsInternal(); } catch(cl::CLException ex) { - throw KeySearchException(ex.msg); + throw KeySearchException(ex.msg, ex.description); } } @@ -418,6 +418,8 @@ void CLKeySearchDevice::getResultsInternal() _results.push_back(minerResult); } + + delete[] ptr; // Reset device counter numResults = 0; @@ -563,7 +565,7 @@ void CLKeySearchDevice::generateStartingPoints() _initKeysKernel->set_args(_points, i, _privateKeys, _chain, _xTable, _yTable, _x, _y); _initKeysKernel->call(_blocks, _threads); - if(((double)(i+1) / 256.0) * 100.0 >= pct) { + if(((double)(i+1.0) / 256.0) * 100.0 >= pct) { Logger::log(LogLevel::Info, util::format("%.1f%%", pct)); pct += 10.0; } @@ -575,7 +577,5 @@ void CLKeySearchDevice::generateStartingPoints() secp256k1::uint256 CLKeySearchDevice::getNextKey() { - uint64_t totalPoints = (uint64_t)_points * _threads * _blocks; - - return _start + secp256k1::uint256(totalPoints) * _iterations * _stride; + return _start + secp256k1::uint256((uint64_t)_points) * _iterations * _stride; } \ No newline at end of file diff --git a/CLKeySearchDevice/CLKeySearchDevice.vcxproj b/CLKeySearchDevice/CLKeySearchDevice.vcxproj index 4326e0f5..b475054d 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.vcxproj +++ b/CLKeySearchDevice/CLKeySearchDevice.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -22,32 +30,46 @@ 15.0 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9} CLKeySearchDevice - 10.0 + 10.0.19041.0 StaticLibrary true - v141 + ClangCl + MultiByte + + + StaticLibrary + false + ClangCl MultiByte StaticLibrary false - v141 + ClangCl true MultiByte StaticLibrary true - v142 + ClangCl MultiByte + + StaticLibrary + false + ClangCl + MultiByte + true + x64 + StaticLibrary false - v142 + ClangCL true MultiByte @@ -60,6 +82,10 @@ + + + + @@ -68,6 +94,10 @@ + + + + @@ -109,7 +139,22 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - type ripemd160.cl secp256k1.cl sha256.cl keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl +$(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl + Embed bitcrack.cl into bitcrack_cl.cpp + + + + + Level3 + Disabled + true + true + $(SolutionDir)\KeyFinderLib;$(SolutionDir)\clUtil;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;$(SolutionDir)\secp256k1lib;$(SolutionDir)\Logger;$(SolutionDir)\util;%(AdditionalIncludeDirectories) + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + + + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -128,7 +173,33 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl - type ripemd160.cl secp256k1.cl sha256.cl keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl +$(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl + Embed bitcrack.cl into bitcrack_cl.cpp + + + + + Level3 + true + true + $(SolutionDir)\KeyFinderLib;$(SolutionDir)\clUtil;$(OPENCL_INCLUDE);$(SolutionDir)\secp256k1lib;$(SolutionDir)\Logger;$(SolutionDir)\util;%(AdditionalIncludeDirectories) + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + None + MaxSpeed + AnySuitable + true + Speed + true + true + true + + + + + + + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -149,7 +220,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cltrue - type ripemd160.cl secp256k1.cl sha256.cl keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -162,6 +233,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl + diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 974faefa..15639b33 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1,24 +1,11 @@ -#ifndef _RIPEMD160_CL -#define _RIPEMD160_CL +#ifndef RIPEMD160_CL +#define RIPEMD160_CL - -__constant unsigned int _RIPEMD160_IV[5] = { - 0x67452301, - 0xefcdab89, - 0x98badcfe, - 0x10325476, - 0xc3d2e1f0 -}; - -__constant unsigned int _K0 = 0x5a827999; -__constant unsigned int _K1 = 0x6ed9eba1; -__constant unsigned int _K2 = 0x8f1bbcdc; -__constant unsigned int _K3 = 0xa953fd4e; - -__constant unsigned int _K4 = 0x7a6d76e9; -__constant unsigned int _K5 = 0x6d703ef3; -__constant unsigned int _K6 = 0x5c4dd124; -__constant unsigned int _K7 = 0x50a28be6; +#define RIPEMD160_IV_0 (0x67452301) +#define RIPEMD160_IV_1 (0xefcdab89) +#define RIPEMD160_IV_2 (0x98badcfe) +#define RIPEMD160_IV_3 (0x10325476) +#define RIPEMD160_IV_4 (0xc3d2e1f0) #define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) @@ -38,22 +25,22 @@ __constant unsigned int _K7 = 0x50a28be6; c = rotl((c), 10) #define GG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + (x) + _K0;\ + a += G((b), (c), (d)) + (x) + 0x5a827999;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + _K1;\ + a += H((b), (c), (d)) + (x) + 0x6ed9eba1;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define II(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + _K2;\ + a += I((b), (c), (d)) + (x) + 0x8f1bbcdc;\ a = rotl((a), (s)) + e;\ c = rotl((c), 10) #define JJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + _K3;\ + a += J((b), (c), (d)) + (x) + 0xa953fd4e;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) @@ -63,33 +50,33 @@ __constant unsigned int _K7 = 0x50a28be6; c = rotl((c), 10) #define GGG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + x + _K4;\ + a += G((b), (c), (d)) + x + 0x7a6d76e9;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HHH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + _K5;\ + a += H((b), (c), (d)) + (x) + 0x6d703ef3;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define III(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + _K6;\ + a += I((b), (c), (d)) + (x) + 0x5c4dd124;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define JJJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + _K7;\ + a += J((b), (c), (d)) + (x) + 0x50a28be6;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a1 = _RIPEMD160_IV[0]; - unsigned int b1 = _RIPEMD160_IV[1]; - unsigned int c1 = _RIPEMD160_IV[2]; - unsigned int d1 = _RIPEMD160_IV[3]; - unsigned int e1 = _RIPEMD160_IV[4]; + unsigned int a1 = RIPEMD160_IV_0; + unsigned int b1 = RIPEMD160_IV_1; + unsigned int c1 = RIPEMD160_IV_2; + unsigned int d1 = RIPEMD160_IV_3; + unsigned int e1 = RIPEMD160_IV_4; const unsigned int x8 = 0x00000080; const unsigned int x14 = 256; @@ -184,11 +171,11 @@ void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) JJ(c1, d1, e1, a1, b1, 0, 5); JJ(b1, c1, d1, e1, a1, 0, 6); - unsigned int a2 = _RIPEMD160_IV[0]; - unsigned int b2 = _RIPEMD160_IV[1]; - unsigned int c2 = _RIPEMD160_IV[2]; - unsigned int d2 = _RIPEMD160_IV[3]; - unsigned int e2 = _RIPEMD160_IV[4]; + unsigned int a2 = RIPEMD160_IV_0; + unsigned int b2 = RIPEMD160_IV_1; + unsigned int c2 = RIPEMD160_IV_2; + unsigned int d2 = RIPEMD160_IV_3; + unsigned int e2 = RIPEMD160_IV_4; /* parallel round 1 */ JJJ(a2, b2, c2, d2, e2, x[5], 8); @@ -280,21 +267,21 @@ void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) FFF(c2, d2, e2, a2, b2, 0, 11); FFF(b2, c2, d2, e2, a2, 0, 11); - digest[0] = _RIPEMD160_IV[1] + c1 + d2; - digest[1] = _RIPEMD160_IV[2] + d1 + e2; - digest[2] = _RIPEMD160_IV[3] + e1 + a2; - digest[3] = _RIPEMD160_IV[4] + a1 + b2; - digest[4] = _RIPEMD160_IV[0] + b1 + c2; + digest[0] = RIPEMD160_IV_1 + c1 + d2; + digest[1] = RIPEMD160_IV_2 + d1 + e2; + digest[2] = RIPEMD160_IV_3 + e1 + a2; + digest[3] = RIPEMD160_IV_4 + a1 + b2; + digest[4] = RIPEMD160_IV_0 + b1 + c2; } void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a1 = _RIPEMD160_IV[0]; - unsigned int b1 = _RIPEMD160_IV[1]; - unsigned int c1 = _RIPEMD160_IV[2]; - unsigned int d1 = _RIPEMD160_IV[3]; - unsigned int e1 = _RIPEMD160_IV[4]; + unsigned int a1 = RIPEMD160_IV_0; + unsigned int b1 = RIPEMD160_IV_1; + unsigned int c1 = RIPEMD160_IV_2; + unsigned int d1 = RIPEMD160_IV_3; + unsigned int e1 = RIPEMD160_IV_4; const unsigned int x8 = 0x00000080; const unsigned int x14 = 256; @@ -389,11 +376,11 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) JJ(c1, d1, e1, a1, b1, 0, 5); JJ(b1, c1, d1, e1, a1, 0, 6); - unsigned int a2 = _RIPEMD160_IV[0]; - unsigned int b2 = _RIPEMD160_IV[1]; - unsigned int c2 = _RIPEMD160_IV[2]; - unsigned int d2 = _RIPEMD160_IV[3]; - unsigned int e2 = _RIPEMD160_IV[4]; + unsigned int a2 = RIPEMD160_IV_0; + unsigned int b2 = RIPEMD160_IV_1; + unsigned int c2 = RIPEMD160_IV_2; + unsigned int d2 = RIPEMD160_IV_3; + unsigned int e2 = RIPEMD160_IV_4; /* parallel round 1 */ JJJ(a2, b2, c2, d2, e2, x[5], 8); @@ -491,89 +478,46 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) digest[3] = a1 + b2; digest[4] = b1 + c2; } -#endif -#ifndef _SECP256K1_CL -#define _SECP256K1_CL -typedef ulong uint64_t; +#endif +#ifndef SECP256K1_CL +#define SECP256K1_CL -typedef struct { - uint v[8]; -}uint256_t; +typedef unsigned long uint64_t; +typedef struct uint256_t { + unsigned int v[8]; +} uint256_t; /** Prime modulus 2^256 - 2^32 - 977 */ -__constant unsigned int _P[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F -}; - -__constant unsigned int _P_MINUS1[8] = { +__constant unsigned int P[8] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F }; -/** - Base point X - */ -__constant unsigned int _GX[8] = { - 0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798 -}; - -/** - Base point Y - */ -__constant unsigned int _GY[8] = { - 0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8 -}; - - -/** - * Group order - */ -__constant unsigned int _N[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141 -}; - -__constant unsigned int _INFINITY[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF -}; - -void printBigInt(const unsigned int x[8]) -{ - printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", - x[0], x[1], x[2], x[3], - x[4], x[5], x[6], x[7]); -} - // Add with carry -unsigned int addc(unsigned int a, unsigned int b, unsigned int *carry) +void addc(unsigned int *a, unsigned int *b, unsigned int *carry, unsigned int *sum) { - unsigned int sum = a + *carry; + *sum = *a + *carry; - unsigned int c1 = (sum < a) ? 1 : 0; + unsigned int c1 = (*sum < *a) * 1; - sum = sum + b; + *sum = *sum + *b; - unsigned int c2 = (sum < b) ? 1 : 0; - - *carry = c1 | c2; - - return sum; + *carry = c1 | ((*sum < *b) * 1); } // Subtract with borrow -unsigned int subc(unsigned int a, unsigned int b, unsigned int *borrow) +void subc(unsigned int *a, unsigned int *b, unsigned int *borrow, unsigned int *diff) { - unsigned int diff = a - *borrow; - - *borrow = (diff > a) ? 1 : 0; + unsigned int tmp = *a - *borrow; - unsigned int diff2 = diff - b; + *borrow = (tmp > *a) * 1; - *borrow |= (diff2 > diff) ? 1 : 0; + *diff = tmp - *b; - return diff2; + *borrow |= (*diff > tmp) ? 1 : 0; } #ifdef DEVICE_VENDOR_INTEL @@ -588,54 +532,42 @@ unsigned int mul_hi977(unsigned int x) } // 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c) +void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { - *low = a * 977; - unsigned int tmp = *low + c; + *low = *a * 977; + unsigned int tmp = *low + *c; unsigned int carry = tmp < *low ? 1 : 0; *low = tmp; - *high = mul_hi977(a) + carry; + *high = mul_hi977(*a) + carry; } #else // 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c) +void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { - *low = a * 977; - unsigned int tmp = *low + c; + *low = *a * 977; + unsigned int tmp = *low + *c; unsigned int carry = tmp < *low ? 1 : 0; *low = tmp; - *high = mad_hi(a, (unsigned int)977, carry); + *high = mad_hi(*a, (unsigned int)977, carry); } #endif -// 32 x 32 multiply-add -void madd(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b, unsigned int c) -{ - *low = a * b; - unsigned int tmp = *low + c; - unsigned int carry = tmp < *low ? 1 : 0; - *low = tmp; - *high = mad_hi(a, b, carry); -} - -void mull(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b) -{ - *low = a * b; - *high = mul_hi(a, b); -} - - uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr) { unsigned int borrow = 0; uint256_t c; - for(int i = 7; i >= 0; i--) { - c.v[i] = subc(a.v[i], b.v[i], &borrow); - } + subc(&a.v[7], &b.v[7], &borrow, &c.v[7]); + subc(&a.v[6], &b.v[6], &borrow, &c.v[6]); + subc(&a.v[5], &b.v[5], &borrow, &c.v[5]); + subc(&a.v[4], &b.v[4], &borrow, &c.v[4]); + subc(&a.v[3], &b.v[3], &borrow, &c.v[3]); + subc(&a.v[2], &b.v[2], &borrow, &c.v[2]); + subc(&a.v[1], &b.v[1], &borrow, &c.v[1]); + subc(&a.v[0], &b.v[0], &borrow, &c.v[0]); *borrow_ptr = borrow; @@ -644,13 +576,29 @@ uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr) bool greaterThanEqualToP(const unsigned int a[8]) { - for(int i = 0; i < 8; i++) { - if(a[i] > _P_MINUS1[i]) { - return true; - } else if(a[i] < _P_MINUS1[i]) { - return false; - } - } + if(a[0] > P[0]) { return true; } + if(a[0] < P[0]) { return false; } + + if(a[1] > P[1]) { return true; } + if(a[1] < P[1]) { return false; } + + if(a[2] > P[2]) { return true; } + if(a[2] < P[2]) { return false; } + + if(a[3] > P[3]) { return true; } + if(a[3] < P[3]) { return false; } + + if(a[4] > P[4]) { return true; } + if(a[4] < P[4]) { return false; } + + if(a[5] > P[5]) { return true; } + if(a[5] < P[5]) { return false; } + + if(a[6] > P[6]) { return true; } + if(a[6] < P[6]) { return false; } + + if(a[7] > P[7]) { return true; } + if(a[7] < P[7]) { return false; } return true; } @@ -658,15 +606,13 @@ bool greaterThanEqualToP(const unsigned int a[8]) void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8]) { unsigned int z[16]; - unsigned int high = 0; + uint64_t product = 0; // First round, overwrite z for(int j = 7; j >= 0; j--) { - uint64_t product = (uint64_t)x[7] * y[j]; - - product = product + high; + product = (uint64_t)x[7] * y[j] + high; z[7 + j + 1] = (unsigned int)product; high = (unsigned int)(product >> 32); @@ -679,9 +625,7 @@ void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int for(int j = 7; j >= 0; j--) { - uint64_t product = (uint64_t)x[i] * y[j]; - - product = product + z[i + j + 1] + high; + product = (uint64_t)x[i] * y[j] + z[i + j + 1] + high; z[i + j + 1] = (unsigned int)product; @@ -697,25 +641,13 @@ void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int } } - -unsigned int add256(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - unsigned int carry = 0; - - for(int i = 7; i >= 0; i--) { - c[i] = addc(a[i], b[i], &carry); - } - - return carry; -} - uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr) { uint256_t c; unsigned int carry = 0; for(int i = 7; i >= 0; i--) { - c.v[i] = addc(a.v[i], b.v[i], &carry); + addc(&a.v[i], &b.v[i], &carry, &c.v[i]); } *carry_ptr = carry; @@ -723,65 +655,32 @@ uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr) return c; } -bool isInfinity(const unsigned int x[8]) -{ - bool isf = true; - - for(int i = 0; i < 8; i++) { - if(x[i] != 0xffffffff) { - isf = false; - } - } - - return isf; -} - -bool isInfinity256k(const uint256_t x) -{ - bool isf = true; - - for(int i = 0; i < 8; i++) { - if(x.v[i] != 0xffffffff) { - isf = false; - } - } - - return isf; -} - -bool equal(const unsigned int a[8], const unsigned int b[8]) -{ - for(int i = 0; i < 8; i++) { - if(a[i] != b[i]) { - return false; - } - } - - return true; -} - -bool equal256k(uint256_t a, uint256_t b) -{ - for(int i = 0; i < 8; i++) { - if(a.v[i] != b.v[i]) { - return false; - } - } - - return true; -} - -inline uint256_t readInt256(__global const uint256_t* ara, int idx) +bool isInfinity256k(const uint256_t *x) { - return ara[idx]; + return ( + (x->v[0] == 0xffffffff) && + (x->v[1] == 0xffffffff) && + (x->v[2] == 0xffffffff) && + (x->v[3] == 0xffffffff) && + (x->v[4] == 0xffffffff) && + (x->v[5] == 0xffffffff) && + (x->v[6] == 0xffffffff) && + (x->v[7] == 0xffffffff) + ); } -/* - * Read least-significant word - */ -unsigned int readLSW(__global const unsigned int *ara, int idx) +bool equal256k(uint256_t *a, uint256_t *b) { - return ara[idx * 8 + 7]; + return ( + (a->v[0] == b->v[0]) && + (a->v[1] == b->v[1]) && + (a->v[2] == b->v[2]) && + (a->v[3] == b->v[3]) && + (a->v[4] == b->v[4]) && + (a->v[5] == b->v[5]) && + (a->v[6] == b->v[6]) && + (a->v[7] == b->v[7]) + ); } unsigned int readLSW256k(__global const uint256_t* ara, int idx) @@ -794,25 +693,36 @@ unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) return ara[idx].v[word]; } -unsigned int addP(const unsigned int a[8], unsigned int c[8]) +void addP(unsigned int a[8], unsigned int c[8]) { unsigned int carry = 0; - - for(int i = 7; i >= 0; i--) { - c[i] = addc(a[i], _P[i], &carry); - } - - return carry; + unsigned int P[8] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F + }; + addc(&a[7], &P[7], &carry, &c[7]); + addc(&a[6], &P[6], &carry, &c[6]); + addc(&a[5], &P[5], &carry, &c[5]); + addc(&a[4], &P[4], &carry, &c[4]); + addc(&a[3], &P[3], &carry, &c[3]); + addc(&a[2], &P[2], &carry, &c[2]); + addc(&a[1], &P[1], &carry, &c[1]); + addc(&a[0], &P[0], &carry, &c[0]); } -unsigned int subP(const unsigned int a[8], unsigned int c[8]) +void subP(unsigned int a[8], unsigned int c[8]) { unsigned int borrow = 0; - for(int i = 7; i >= 0; i--) { - c[i] = subc(a[i], _P[i], &borrow); - } - - return borrow; + unsigned int P[8] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F + }; + subc(&a[7], &P[7], &borrow, &c[7]); + subc(&a[6], &P[6], &borrow, &c[6]); + subc(&a[5], &P[5], &borrow, &c[5]); + subc(&a[4], &P[4], &borrow, &c[4]); + subc(&a[3], &P[3], &borrow, &c[3]); + subc(&a[2], &P[2], &borrow, &c[2]); + subc(&a[1], &P[1], &borrow, &c[1]); + subc(&a[0], &P[0], &borrow, &c[0]); } /** @@ -829,60 +739,79 @@ uint256_t subModP256k(uint256_t a, uint256_t b) return c; } - -uint256_t addModP256k(uint256_t a, uint256_t b) +void addModP256k(uint256_t *a, uint256_t *b, uint256_t *cP) { unsigned int carry = 0; - uint256_t c = add256k(a, b, &carry); + uint256_t c = add256k(*a, *b, &carry); - bool gt = false; - for(int i = 0; i < 8; i++) { - if(c.v[i] > _P[i]) { - gt = true; - break; - } else if(c.v[i] < _P[i]) { - break; - } - } + if(carry) { subP(c.v, c.v); *cP = c; } - if(carry || gt) { - subP(c.v, c.v); - } + else if(c.v[0] > P[0]) { subP(c.v, c.v); *cP = c; } + else if(c.v[0] < P[0]) { *cP = c; } - return c; + else if(c.v[1] > P[1]) { subP(c.v, c.v); *cP = c; } + else if(c.v[1] < P[1]) { *cP = c; } + + else if(c.v[2] > P[2]) { subP(c.v, c.v); *cP = c; } + else if(c.v[2] < P[2]) { *cP = c; } + + else if(c.v[3] > P[3]) { subP(c.v, c.v); *cP = c; } + else if(c.v[3] < P[3]) { *cP = c; } + + else if(c.v[4] > P[4]) { subP(c.v, c.v); *cP = c; } + else if(c.v[4] < P[4]) { *cP = c; } + + else if(c.v[5] > P[5]) { subP(c.v, c.v); *cP = c; } + else if(c.v[5] < P[5]) { *cP = c; } + + else if(c.v[6] > P[6]) { subP(c.v, c.v); *cP = c; } + else if(c.v[6] < P[6]) { *cP = c; } + + else if(c.v[7] > P[7]) { subP(c.v, c.v); *cP = c; } + else { *cP = c; } } -void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int product_low[8]) +void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) { + unsigned int ZERO = 0; unsigned int high[8]; unsigned int hWord = 0; unsigned int carry = 0; + unsigned int t = 0; + unsigned int product6 = 0; + unsigned int product7 = 0; + // 256 x 256 multiply multiply256(a, b, high, product_low); // Add 2^32 * high to the low 256 bits (shift left 1 word and add) // Affects product[14] to product[6] - for(int i = 6; i >= 0; i--) { - product_low[i] = addc(product_low[i], high[i + 1], &carry); - } - unsigned int product7 = addc(high[0], 0, &carry); - unsigned int product6 = carry; + addc(&product_low[6], &high[7], &carry, &product_low[6]); + addc(&product_low[5], &high[6], &carry, &product_low[5]); + addc(&product_low[4], &high[5], &carry, &product_low[4]); + addc(&product_low[3], &high[4], &carry, &product_low[3]); + addc(&product_low[2], &high[3], &carry, &product_low[2]); + addc(&product_low[1], &high[2], &carry, &product_low[1]); + addc(&product_low[0], &high[1], &carry, &product_low[0]); + + addc(&high[0], &ZERO, &carry, &product7); + product6 = carry; carry = 0; // Multiply high by 977 and add to low // Affects product[15] to product[5] for(int i = 7; i >= 0; i--) { - unsigned int t = 0; - madd977(&hWord, &t, high[i], hWord); - product_low[i] = addc(product_low[i], t, &carry); + madd977(&hWord, &t, &high[i], &hWord); + addc(&product_low[i], &t, &carry, &product_low[i]); + t = 0; } - product7 = addc(product7, hWord, &carry); - product6 = addc(product6, 0, &carry); + addc(&product7, &hWord, &carry, &product7); + addc(&product6, &ZERO, &carry, &product6); // Multiply high 2 words by 2^32 and add to low // Affects product[14] to product[7] @@ -893,30 +822,33 @@ void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int prod product7 = 0; product6 = 0; - product_low[6] = addc(product_low[6], high[7], &carry); - product_low[5] = addc(product_low[5], high[6], &carry); + addc(&product_low[6], &high[7], &carry, &product_low[6]); + addc(&product_low[5], &high[6], &carry, &product_low[5]); + + addc(&product_low[4], &ZERO, &carry, &product_low[4]); + addc(&product_low[3], &ZERO, &carry, &product_low[3]); + addc(&product_low[2], &ZERO, &carry, &product_low[2]); + addc(&product_low[1], &ZERO, &carry, &product_low[1]); + addc(&product_low[0], &ZERO, &carry, &product_low[0]); - // Propagate the carry - for(int i = 4; i >= 0; i--) { - product_low[i] = addc(product_low[i], 0, &carry); - } product7 = carry; // Multiply top 2 words by 977 and add to low // Affects product[15] to product[7] carry = 0; hWord = 0; - unsigned int t = 0; - madd977(&hWord, &t, high[7], hWord); - product_low[7] = addc(product_low[7], t, &carry); - madd977(&hWord, &t, high[6], hWord); - product_low[6] = addc(product_low[6], t, &carry); - product_low[5] = addc(product_low[5], hWord, &carry); + madd977(&hWord, &t, &high[7], &hWord); + addc(&product_low[7], &t, &carry, &product_low[7]); + madd977(&hWord, &t, &high[6], &hWord); + addc(&product_low[6], &t, &carry, &product_low[6]); + addc(&product_low[5], &hWord, &carry, &product_low[5]); // Propagate carry - for(int i = 4; i >= 0; i--) { - product_low[i] = addc(product_low[i], 0, &carry); - } + addc(&product_low[4], &ZERO, &carry, &product_low[4]); + addc(&product_low[3], &ZERO, &carry, &product_low[3]); + addc(&product_low[2], &ZERO, &carry, &product_low[2]); + addc(&product_low[1], &ZERO, &carry, &product_low[1]); + addc(&product_low[0], &ZERO, &carry, &product_low[0]); product7 = carry; // Reduce if >= P @@ -934,118 +866,50 @@ uint256_t mulModP256k(uint256_t a, uint256_t b) return c; } - -uint256_t squareModP256k(uint256_t a) +void mulModP256kv(uint256_t *a, uint256_t *b, uint256_t *c) { - uint256_t b; - mulModP(a.v, a.v, b.v); - - return b; + mulModP(a->v, b->v, c->v); } +void squareModP256k(uint256_t *a) +{ + mulModP(a->v, a->v, a->v); +} /** * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains */ -uint256_t invModP256k(uint256_t value) +uint256_t invModP256k(uint256_t x) { - uint256_t x = value; + uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}}; + mulModP256kv(&x, &y, &y); + squareModP256k(&x); + squareModP256k(&x); + mulModP256kv(&x, &y, &y); + squareModP256k(&x); + mulModP256kv(&x, &y, &y); + squareModP256k(&x); + squareModP256k(&x); + mulModP256kv(&x, &y, &y); - //unsigned int y[8] = { 0, 0, 0, 0, 0, 0, 0, 1 }; - uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}}; + for(int i = 0; i < 5; i++) { + squareModP256k(&x); + } + + for(int i = 0; i < 22; i++) { + mulModP256kv(&x, &y, &y); + squareModP256k(&x); + } - // 0xd - 1101 - y = mulModP256k(x, y); - x = squareModP256k(x); - //y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - - // 0x2 - 0010 - //y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - //y = mulModP256k(x, y); - x = squareModP256k(x); - //y = mulModP256k(x, y); - x = squareModP256k(x); - - // 0xc = 0x1100 - //y = mulModP256k(x, y); - x = squareModP256k(x); - //y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - - - // 0xfffff - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - - - // 0xe - 1110 - //y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - y = mulModP256k(x, y); - x = squareModP256k(x); - // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff - for(int i = 0; i < 219; i++) { - y = mulModP256k(x, y); - x = squareModP256k(x); + squareModP256k(&x); + + for(int i = 0; i < 222; i++) { + mulModP256kv(&x, &y, &y); + squareModP256k(&x); } - y = mulModP256k(x, y); - return y; + return mulModP256k(x, y); } @@ -1073,8 +937,8 @@ void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* uint256_t x = xPtr[i]; - if(equal256k(px, x)) { - x = addModP256k(py, py); + if(equal256k(&px, &x)) { + addModP256k(&py,&py, &x); } else { // x = Gx - x x = subModP256k(px, x); @@ -1117,8 +981,8 @@ void completeBatchAddWithDouble256k( s = mulModP256k(*inverse, c); uint256_t diff; - if(equal256k(px, x)) { - diff = addModP256k(py, py); + if(equal256k(&px, &x)) { + addModP256k(&py, &py, &diff); } else { diff = subModP256k(px, x); } @@ -1129,32 +993,30 @@ void completeBatchAddWithDouble256k( } - if(equal256k(px, x)) { + if(equal256k(&px, &x)) { // currently s = 1 / 2y uint256_t x2; uint256_t tx2; - uint256_t x3; // 3x^2 - x2 = mulModP256k(x, x); - tx2 = addModP256k(x2, x2); - tx2 = addModP256k(x2, tx2); + mulModP256kv(&x, &x, &x2); + addModP256k(&x2, &x2, &tx2); + addModP256k(&x2, &tx2, &tx2); // s = 3x^2 * 1/2y - s = mulModP256k(tx2, s); + mulModP256kv(&tx2, &s, &s); // s^2 uint256_t s2; - s2 = mulModP256k(s, s); + mulModP256kv(&s, &s, &s2); // Rx = s^2 - 2px *newX = subModP256k(s2, x); *newX = subModP256k(*newX, x); // Ry = s(px - rx) - py - uint256_t k; - k = subModP256k(px, *newX); + uint256_t k = subModP256k(px, *newX); *newY = mulModP256k(s, k); *newY = subModP256k(*newY, py); } else { @@ -1162,11 +1024,11 @@ void completeBatchAddWithDouble256k( uint256_t rise; rise = subModP256k(py, y); - s = mulModP256k(rise, s); + mulModP256kv(&rise, &s, &s); // Rx = s^2 - Gx - Qx uint256_t s2; - s2 = mulModP256k(s, s); + mulModP256kv(&s, &s, &s2); *newX = subModP256k(s2, px); *newX = subModP256k(*newX, x); @@ -1213,24 +1075,21 @@ void completeBatchAdd256k( s = *inverse; } - uint256_t y; - y = yPtr[i]; + uint256_t y = yPtr[i]; - uint256_t rise; - rise = subModP256k(py, y); + uint256_t rise = subModP256k(py, y); s = mulModP256k(rise, s); // Rx = s^2 - Gx - Qx uint256_t s2; - s2 = mulModP256k(s, s); + mulModP256kv(&s, &s, &s2); *newX = subModP256k(s2, px); *newX = subModP256k(*newX, x); // Ry = s(px - rx) - py - uint256_t k; - k = subModP256k(px, *newX); + uint256_t k = subModP256k(px, *newX); *newY = mulModP256k(s, k); *newY = subModP256k(*newY, py); } @@ -1279,7 +1138,7 @@ __constant unsigned int _IV[8] = { #define s1(x) (rotr((x), 17) ^ rotr((x), 19) ^ ((x) >> 10)) -#define round(a, b, c, d, e, f, g, h, m, k)\ +#define roundSha(a, b, c, d, e, f, g, h, m, k)\ t = CH((e), (f), (g)) + (rotr((e), 6) ^ rotr((e), 11) ^ rotr((e), 25)) + (k) + (m);\ (d) += (t) + (h);\ (h) += (t) + MAJ((a), (b), (c)) + (rotr((a), 2) ^ rotr((a), 13) ^ rotr((a), 22)) @@ -1318,22 +1177,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned g = _IV[6]; h = _IV[7]; - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, w[1], _K[1]); - round(g, h, a, b, c, d, e, f, w[2], _K[2]); - round(f, g, h, a, b, c, d, e, w[3], _K[3]); - round(e, f, g, h, a, b, c, d, w[4], _K[4]); - round(d, e, f, g, h, a, b, c, w[5], _K[5]); - round(c, d, e, f, g, h, a, b, w[6], _K[6]); - round(b, c, d, e, f, g, h, a, w[7], _K[7]); - round(a, b, c, d, e, f, g, h, w[8], _K[8]); - round(h, a, b, c, d, e, f, g, w[9], _K[9]); - round(g, h, a, b, c, d, e, f, w[10], _K[10]); - round(f, g, h, a, b, c, d, e, w[11], _K[11]); - round(e, f, g, h, a, b, c, d, w[12], _K[12]); - round(d, e, f, g, h, a, b, c, w[13], _K[13]); - round(c, d, e, f, g, h, a, b, w[14], _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[9]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[10]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[11]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[12]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[13]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[14]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -1352,22 +1211,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -1386,22 +1245,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -1420,22 +1279,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); a += _IV[0]; b += _IV[1]; @@ -1460,22 +1319,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[0] = (y[7] << 24) | 0x00800000; w[15] = 65 * 8; - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, 0, _K[1]); - round(g, h, a, b, c, d, e, f, 0, _K[2]); - round(f, g, h, a, b, c, d, e, 0, _K[3]); - round(e, f, g, h, a, b, c, d, 0, _K[4]); - round(d, e, f, g, h, a, b, c, 0, _K[5]); - round(c, d, e, f, g, h, a, b, 0, _K[6]); - round(b, c, d, e, f, g, h, a, 0, _K[7]); - round(a, b, c, d, e, f, g, h, 0, _K[8]); - round(h, a, b, c, d, e, f, g, 0, _K[9]); - round(g, h, a, b, c, d, e, f, 0, _K[10]); - round(f, g, h, a, b, c, d, e, 0, _K[11]); - round(e, f, g, h, a, b, c, d, 0, _K[12]); - round(d, e, f, g, h, a, b, c, 0, _K[13]); - round(c, d, e, f, g, h, a, b, 0, _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); + roundSha(h, a, b, c, d, e, f, g, 0, _K[1]); + roundSha(g, h, a, b, c, d, e, f, 0, _K[2]); + roundSha(f, g, h, a, b, c, d, e, 0, _K[3]); + roundSha(e, f, g, h, a, b, c, d, 0, _K[4]); + roundSha(d, e, f, g, h, a, b, c, 0, _K[5]); + roundSha(c, d, e, f, g, h, a, b, 0, _K[6]); + roundSha(b, c, d, e, f, g, h, a, 0, _K[7]); + roundSha(a, b, c, d, e, f, g, h, 0, _K[8]); + roundSha(h, a, b, c, d, e, f, g, 0, _K[9]); + roundSha(g, h, a, b, c, d, e, f, 0, _K[10]); + roundSha(f, g, h, a, b, c, d, e, 0, _K[11]); + roundSha(e, f, g, h, a, b, c, d, 0, _K[12]); + roundSha(d, e, f, g, h, a, b, c, 0, _K[13]); + roundSha(c, d, e, f, g, h, a, b, 0, _K[14]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]); w[0] = w[0] + s0(0) + 0 + s1(0); w[1] = 0 + s0(0) + 0 + s1(w[15]); @@ -1494,22 +1353,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -1528,22 +1387,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -1562,22 +1421,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); digest[0] = tmp[0] + a; digest[1] = tmp[1] + b; @@ -1617,22 +1476,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un g = _IV[6]; h = _IV[7]; - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, w[1], _K[1]); - round(g, h, a, b, c, d, e, f, w[2], _K[2]); - round(f, g, h, a, b, c, d, e, w[3], _K[3]); - round(e, f, g, h, a, b, c, d, w[4], _K[4]); - round(d, e, f, g, h, a, b, c, w[5], _K[5]); - round(c, d, e, f, g, h, a, b, w[6], _K[6]); - round(b, c, d, e, f, g, h, a, w[7], _K[7]); - round(a, b, c, d, e, f, g, h, w[8], _K[8]); - round(h, a, b, c, d, e, f, g, 0, _K[9]); - round(g, h, a, b, c, d, e, f, 0, _K[10]); - round(f, g, h, a, b, c, d, e, 0, _K[11]); - round(e, f, g, h, a, b, c, d, 0, _K[12]); - round(d, e, f, g, h, a, b, c, 0, _K[13]); - round(c, d, e, f, g, h, a, b, 0, _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]); + roundSha(h, a, b, c, d, e, f, g, 0, _K[9]); + roundSha(g, h, a, b, c, d, e, f, 0, _K[10]); + roundSha(f, g, h, a, b, c, d, e, 0, _K[11]); + roundSha(e, f, g, h, a, b, c, d, 0, _K[12]); + roundSha(d, e, f, g, h, a, b, c, 0, _K[13]); + roundSha(c, d, e, f, g, h, a, b, 0, _K[14]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]); w[0] = w[0] + s0(w[1]) + 0 + s1(0); w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]); @@ -1651,22 +1510,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); @@ -1685,22 +1544,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]); w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); @@ -1720,22 +1579,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); + roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]); + roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]); + roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]); + roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]); + roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]); + roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]); + roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]); + roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]); + roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]); + roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]); + roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]); + roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]); + roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]); + roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]); + roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); + roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); a += _IV[0]; b += _IV[1]; @@ -1773,36 +1632,11 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; -bool isInList(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets) -{ - bool found = false; - - for(size_t i = 0; i < numTargets; i++) { - int equal = 0; - - for(int j = 0; j < 5; j++) { - if(hash[j] == targetList[5 * i + j]) { - equal++; - } - } - - if(equal == 5) { - found = true; - } - } - - return found; -} - bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong mask) { - bool foundMatch = true; + bool notFoundMatch = true; - unsigned int h5 = 0; - - for(int i = 0; i < 5; i++) { - h5 += hash[i]; - } + unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; uint64_t idx[5]; @@ -1812,41 +1646,22 @@ bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ul idx[3] = ((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & mask; idx[4] = ((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & mask; - for(int i = 0; i < 5; i++) { - unsigned int j = idx[i]; - unsigned int f = targetList[j / 32]; - - if((f & (0x01 << (j % 32))) == 0) { - foundMatch = false; - } - } - - return foundMatch; -} + notFoundMatch = (targetList[idx[0] / 32] & (0x01 << (idx[0] % 32))) == 0 + || (targetList[idx[1] / 32] & (0x01 << (idx[1] % 32))) == 0 + || (targetList[idx[2] / 32] & (0x01 << (idx[2] % 32))) == 0 + || (targetList[idx[3] / 32] & (0x01 << (idx[3] % 32))) == 0 + || (targetList[idx[4] / 32] & (0x01 << (idx[4] % 32))) == 0; -bool checkHash(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets, ulong mask) -{ - if(numTargets > 16) { - return isInBloomFilter(hash, targetList, mask); - } else { - return isInList(hash, targetList, numTargets); - } + return notFoundMatch == false; } - void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) { - const unsigned int iv[5] = { - 0x67452301, - 0xefcdab89, - 0x98badcfe, - 0x10325476, - 0xc3d2e1f0 - }; - - for(int i = 0; i < 5; i++) { - hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]); - } + hOut[0] = endian(hIn[0] + 0xefcdab89); + hOut[1] = endian(hIn[1] + 0x98badcfe); + hOut[2] = endian(hIn[2] + 0x10325476); + hOut[3] = endian(hIn[3] + 0xc3d2e1f0); + hOut[4] = endian(hIn[4] + 0x67452301); } @@ -1883,14 +1698,13 @@ __kernel void multiplyStepKernel( uint256_t x = xPtr[i]; if(bit != 0) { - if(!isInfinity256k(x)) { + if(!isInfinity256k(&x)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } } } - //doBatchInverse(inverse); inverse = doBatchInverse256k(inverse); i -= dim; @@ -1903,10 +1717,9 @@ __kernel void multiplyStepKernel( unsigned int bit = p & (1 << (step % 32)); uint256_t x = xPtr[i]; - bool infinity = isInfinity256k(x); if(bit != 0) { - if(!infinity) { + if(!isInfinity256k(&x)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); } else { @@ -1928,9 +1741,14 @@ void hashPublicKey(uint256_t x, uint256_t y, unsigned int* digestOut) sha256PublicKey(x.v, y.v, hash); // Swap to little-endian - for(int i = 0; i < 8; i++) { - hash[i] = endian(hash[i]); - } + hash[0] = endian(hash[0]); + hash[1] = endian(hash[1]); + hash[2] = endian(hash[2]); + hash[3] = endian(hash[3]); + hash[4] = endian(hash[4]); + hash[5] = endian(hash[5]); + hash[6] = endian(hash[6]); + hash[7] = endian(hash[7]); ripemd160sha256NoFinal(hash, digestOut); } @@ -1942,9 +1760,14 @@ void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int* di sha256PublicKeyCompressed(x.v, yParity, hash); // Swap to little-endian - for(int i = 0; i < 8; i++) { - hash[i] = endian(hash[i]); - } + hash[0] = endian(hash[0]); + hash[1] = endian(hash[1]); + hash[2] = endian(hash[2]); + hash[3] = endian(hash[3]); + hash[4] = endian(hash[4]); + hash[5] = endian(hash[5]); + hash[6] = endian(hash[6]); + hash[7] = endian(hash[7]); ripemd160sha256NoFinal(hash, digestOut); @@ -1964,10 +1787,29 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned r.idx = idx; r.compressed = compressed; - for(int i = 0; i < 8; i++) { - r.x[i] = x.v[i]; - r.y[i] = y.v[i]; - } + r.x[0] = x.v[0]; + r.y[0] = y.v[0]; + + r.x[1] = x.v[1]; + r.y[1] = y.v[1]; + + r.x[2] = x.v[2]; + r.y[2] = y.v[2]; + + r.x[3] = x.v[3]; + r.y[3] = y.v[3]; + + r.x[4] = x.v[4]; + r.y[4] = y.v[4]; + + r.x[5] = x.v[5]; + r.y[5] = y.v[5]; + + r.x[6] = x.v[6]; + r.y[6] = y.v[6]; + + r.x[7] = x.v[7]; + r.y[7] = y.v[7]; doRMD160FinalRound(digest, r.digest); @@ -2011,7 +1853,7 @@ void doIteration( hashPublicKey(x, y, digest); - if(checkHash(digest, targetList, numTargets, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { setResultFound(i, false, x, y, digest, results, numResults); } } @@ -2020,7 +1862,7 @@ void doIteration( hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest); - if(checkHash(digest, targetList, numTargets, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { uint256_t y = yPtr[i]; setResultFound(i, true, x, y, digest, results, numResults); } @@ -2084,7 +1926,7 @@ void doIterationWithDouble( uint256_t y = yPtr[i]; hashPublicKey(x, y, digest); - if(checkHash(digest, targetList, numTargets, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { setResultFound(i, false, x, y, digest, results, numResults); } } @@ -2094,7 +1936,7 @@ void doIterationWithDouble( hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest); - if(checkHash(digest, targetList, numTargets, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { uint256_t y = yPtr[i]; setResultFound(i, true, x, y, digest, results, numResults); diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 5da94c01..ade126f7 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -15,36 +15,11 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; -bool isInList(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets) -{ - bool found = false; - - for(size_t i = 0; i < numTargets; i++) { - int equal = 0; - - for(int j = 0; j < 5; j++) { - if(hash[j] == targetList[5 * i + j]) { - equal++; - } - } - - if(equal == 5) { - found = true; - } - } - - return found; -} - bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong mask) { - bool foundMatch = true; - - unsigned int h5 = 0; + bool notFoundMatch = true; - for(int i = 0; i < 5; i++) { - h5 += hash[i]; - } + unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; uint64_t idx[5]; @@ -54,41 +29,22 @@ bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ul idx[3] = ((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & mask; idx[4] = ((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & mask; - for(int i = 0; i < 5; i++) { - unsigned int j = idx[i]; - unsigned int f = targetList[j / 32]; + notFoundMatch = (targetList[idx[0] / 32] & (0x01 << (idx[0] % 32))) == 0 + || (targetList[idx[1] / 32] & (0x01 << (idx[1] % 32))) == 0 + || (targetList[idx[2] / 32] & (0x01 << (idx[2] % 32))) == 0 + || (targetList[idx[3] / 32] & (0x01 << (idx[3] % 32))) == 0 + || (targetList[idx[4] / 32] & (0x01 << (idx[4] % 32))) == 0; - if((f & (0x01 << (j % 32))) == 0) { - foundMatch = false; - } - } - - return foundMatch; -} - -bool checkHash(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets, ulong mask) -{ - if(numTargets > 16) { - return isInBloomFilter(hash, targetList, mask); - } else { - return isInList(hash, targetList, numTargets); - } + return notFoundMatch == false; } - void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) { - const unsigned int iv[5] = { - 0x67452301, - 0xefcdab89, - 0x98badcfe, - 0x10325476, - 0xc3d2e1f0 - }; - - for(int i = 0; i < 5; i++) { - hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]); - } + hOut[0] = endian(hIn[0] + 0xefcdab89); + hOut[1] = endian(hIn[1] + 0x98badcfe); + hOut[2] = endian(hIn[2] + 0x10325476); + hOut[3] = endian(hIn[3] + 0xc3d2e1f0); + hOut[4] = endian(hIn[4] + 0x67452301); } @@ -125,14 +81,13 @@ __kernel void multiplyStepKernel( uint256_t x = xPtr[i]; if(bit != 0) { - if(!isInfinity256k(x)) { + if(!isInfinity256k(&x)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } } } - //doBatchInverse(inverse); inverse = doBatchInverse256k(inverse); i -= dim; @@ -145,10 +100,9 @@ __kernel void multiplyStepKernel( unsigned int bit = p & (1 << (step % 32)); uint256_t x = xPtr[i]; - bool infinity = isInfinity256k(x); if(bit != 0) { - if(!infinity) { + if(!isInfinity256k(&x)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); } else { @@ -170,9 +124,14 @@ void hashPublicKey(uint256_t x, uint256_t y, unsigned int* digestOut) sha256PublicKey(x.v, y.v, hash); // Swap to little-endian - for(int i = 0; i < 8; i++) { - hash[i] = endian(hash[i]); - } + hash[0] = endian(hash[0]); + hash[1] = endian(hash[1]); + hash[2] = endian(hash[2]); + hash[3] = endian(hash[3]); + hash[4] = endian(hash[4]); + hash[5] = endian(hash[5]); + hash[6] = endian(hash[6]); + hash[7] = endian(hash[7]); ripemd160sha256NoFinal(hash, digestOut); } @@ -184,9 +143,14 @@ void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int* di sha256PublicKeyCompressed(x.v, yParity, hash); // Swap to little-endian - for(int i = 0; i < 8; i++) { - hash[i] = endian(hash[i]); - } + hash[0] = endian(hash[0]); + hash[1] = endian(hash[1]); + hash[2] = endian(hash[2]); + hash[3] = endian(hash[3]); + hash[4] = endian(hash[4]); + hash[5] = endian(hash[5]); + hash[6] = endian(hash[6]); + hash[7] = endian(hash[7]); ripemd160sha256NoFinal(hash, digestOut); @@ -206,10 +170,29 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned r.idx = idx; r.compressed = compressed; - for(int i = 0; i < 8; i++) { - r.x[i] = x.v[i]; - r.y[i] = y.v[i]; - } + r.x[0] = x.v[0]; + r.y[0] = y.v[0]; + + r.x[1] = x.v[1]; + r.y[1] = y.v[1]; + + r.x[2] = x.v[2]; + r.y[2] = y.v[2]; + + r.x[3] = x.v[3]; + r.y[3] = y.v[3]; + + r.x[4] = x.v[4]; + r.y[4] = y.v[4]; + + r.x[5] = x.v[5]; + r.y[5] = y.v[5]; + + r.x[6] = x.v[6]; + r.y[6] = y.v[6]; + + r.x[7] = x.v[7]; + r.y[7] = y.v[7]; doRMD160FinalRound(digest, r.digest); @@ -253,7 +236,7 @@ void doIteration( hashPublicKey(x, y, digest); - if(checkHash(digest, targetList, numTargets, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { setResultFound(i, false, x, y, digest, results, numResults); } } @@ -262,7 +245,7 @@ void doIteration( hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest); - if(checkHash(digest, targetList, numTargets, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { uint256_t y = yPtr[i]; setResultFound(i, true, x, y, digest, results, numResults); } @@ -326,7 +309,7 @@ void doIterationWithDouble( uint256_t y = yPtr[i]; hashPublicKey(x, y, digest); - if(checkHash(digest, targetList, numTargets, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { setResultFound(i, false, x, y, digest, results, numResults); } } @@ -336,7 +319,7 @@ void doIterationWithDouble( hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest); - if(checkHash(digest, targetList, numTargets, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { uint256_t y = yPtr[i]; setResultFound(i, true, x, y, digest, results, numResults); diff --git a/clKeyFinder/clKeyFinder.vcxproj b/clKeyFinder/clKeyFinder.vcxproj index c73ca1a8..874c5001 100644 --- a/clKeyFinder/clKeyFinder.vcxproj +++ b/clKeyFinder/clKeyFinder.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -22,32 +30,46 @@ 15.0 {36400E8D-3D04-430C-90A4-FC989E460B3C} clKeyFinder - 10.0 + 10.0.19041.0 Application true - v141 + ClangCl + MultiByte + + + Application + false + ClangCl MultiByte Application false - v141 + ClangCl true MultiByte Application true - v142 + ClangCl MultiByte + + Application + false + ClangCl + MultiByte + true + x64 + Application false - v142 + ClangCL true MultiByte @@ -60,6 +82,10 @@ + + + + @@ -68,6 +94,10 @@ + + + + @@ -78,6 +108,11 @@ Build false + + clBitCrack + Build + false + clBitCrack Build @@ -95,6 +130,48 @@ OpenCL.lib;Shlwapi.lib;BCrypt.lib;%(AdditionalDependencies) $(OPENCL_LIB);%(AdditionalLibraryDirectories) + Console + + + + + + + + + + + true + + + + + + + + + + + + + Level3 + true + true + $(SolutionDir)\KeyFinderLib;$(SolutionDir)\AddressUtil;$(SolutionDir)\Logger;$(SolutionDir)\CmdParse;$(SolutionDir)\CLKeySearchDevice;$(SolutionDir)\secp256k1lib;$(SolutionDir)\util;$(SolutionDir)\clUtil;$(OPENCL_INCLUDE);%(AdditionalIncludeDirectories) + _CRT_SECURE_NO_WARNINGS;BUILD_OPENCL;%(PreprocessorDefinitions) + None + MaxSpeed + AnySuitable + true + Speed + true + true + true + + + OpenCL.lib;Shlwapi.lib;BCrypt.lib;%(AdditionalDependencies) + $(OPENCL_LIB);%(AdditionalLibraryDirectories) + Console @@ -124,6 +201,14 @@ true + + + Level3 + Disabled + true + true + + Level3 @@ -154,6 +239,7 @@ true OpenCL.lib;Shlwapi.lib;BCrypt.lib;%(AdditionalDependencies) $(OPENCL_LIB);%(AdditionalLibraryDirectories) + Console From eb1933192ef8f8c8dd8c2e6c1ce18dba4a7b0019 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:44:10 +0200 Subject: [PATCH 11/62] add CLUnitTests --- CLUnitTests/CLUnitTests.vcxproj | 10 +++++----- CLUnitTests/main.cpp | 4 ++-- CLUnitTests/secp256k1test.cl | 11 +++++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/CLUnitTests/CLUnitTests.vcxproj b/CLUnitTests/CLUnitTests.vcxproj index 6a8e4152..60562855 100644 --- a/CLUnitTests/CLUnitTests.vcxproj +++ b/CLUnitTests/CLUnitTests.vcxproj @@ -28,26 +28,26 @@ Application true - v141 + ClangCl MultiByte Application false - v141 + ClangCl true MultiByte Application true - v142 + ClangCl MultiByte Application false - v142 + ClangCL true MultiByte @@ -107,7 +107,7 @@ $(SolutionDir)\tools\embedcl.exe test.cl test.cpp _secp256k1_test_cl - Level3 + EnableAllWarnings Disabled true true diff --git a/CLUnitTests/main.cpp b/CLUnitTests/main.cpp index f221f2fa..43202541 100644 --- a/CLUnitTests/main.cpp +++ b/CLUnitTests/main.cpp @@ -67,7 +67,7 @@ int main(int argc, char **argv) try { devices = cl::getDevices(); }catch(cl::CLException ex) { - std::cout << "Error: " << ex.msg << std::endl; + std::cout << "Error " << ex.msg << ": " << ex.description << std::endl; return 1; } @@ -86,7 +86,7 @@ int main(int argc, char **argv) numErrors += runTest(devices[i].id); } catch(cl::CLException ex) { - std::cout << "Error " << ex.msg << std::endl; + std::cout << "Error " << ex.msg << ": " << ex.description << std::endl; } } diff --git a/CLUnitTests/secp256k1test.cl b/CLUnitTests/secp256k1test.cl index d3119a18..de7e0ba9 100644 --- a/CLUnitTests/secp256k1test.cl +++ b/CLUnitTests/secp256k1test.cl @@ -8,6 +8,17 @@ typedef struct { }CLErrorInfo; +bool equal(const unsigned int a[8], const unsigned int b[8]) +{ + for(int i = 0; i < 8; i++) { + if(a[i] != b[i]) { + return false; + } + } + + return true; +} + bool addTest() { unsigned int x[8] = { 0xa4aea9b8, 0x6fe248f5, 0x1fc74965, 0xe9493264, 0x4e2dff0c, 0x009f7c9c, 0x832fa59b, 0x3361f837 }; From 4f14fe91d744bce6ae27350bd536f85a43a1f176 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:44:33 +0200 Subject: [PATCH 12/62] improve CmdParse --- CmdParse/CmdParse.cpp | 2 +- CmdParse/CmdParse.h | 6 ++-- CmdParse/CmdParse.vcxproj | 70 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 69 insertions(+), 9 deletions(-) diff --git a/CmdParse/CmdParse.cpp b/CmdParse/CmdParse.cpp index adc1c6c1..7855bd26 100644 --- a/CmdParse/CmdParse.cpp +++ b/CmdParse/CmdParse.cpp @@ -80,4 +80,4 @@ std::vector CmdParse::getArgs() std::vector CmdParse::getOperands() { return _operands; -} \ No newline at end of file +} diff --git a/CmdParse/CmdParse.h b/CmdParse/CmdParse.h index 3135fbfd..ca85e034 100644 --- a/CmdParse/CmdParse.h +++ b/CmdParse/CmdParse.h @@ -1,5 +1,5 @@ -#ifndef _CMD_PARSE -#define _CMD_PARSE +#ifndef CMD_PARSE_H +#define CMD_PARSE_H #include #include @@ -53,4 +53,4 @@ class CmdParse { std::vector getOperands(); }; -#endif \ No newline at end of file +#endif diff --git a/CmdParse/CmdParse.vcxproj b/CmdParse/CmdParse.vcxproj index e42fc4ce..116946a8 100644 --- a/CmdParse/CmdParse.vcxproj +++ b/CmdParse/CmdParse.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -34,26 +42,40 @@ StaticLibrary true - v141 + ClangCl + Unicode + + + StaticLibrary + false + ClangCl Unicode StaticLibrary false - v141 + ClangCl true Unicode StaticLibrary true - v142 + ClangCl + Unicode + + + StaticLibrary + false + ClangCl Unicode + true + x64 StaticLibrary false - v142 + ClangCL true Unicode @@ -66,6 +88,10 @@ + + + + @@ -74,6 +100,10 @@ + + + + @@ -92,10 +122,22 @@ Windows + + + + + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + + + Windows + + NotUsing - Level3 + EnableAllWarnings Disabled _CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) @@ -103,6 +145,24 @@ Windows + + + NotUsing + Level3 + _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions) + None + MaxSpeed + AnySuitable + true + Speed + true + true + true + + + Windows + + Level3 From bbfbb8137c5576dba18ebfd2fb98e7e29b646630 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:44:55 +0200 Subject: [PATCH 13/62] fix utils --- util/util.cpp | 2 +- util/util.h | 4 +-- util/util.vcxproj | 72 +++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 69 insertions(+), 9 deletions(-) diff --git a/util/util.cpp b/util/util.cpp index b48750b7..2034330f 100644 --- a/util/util.cpp +++ b/util/util.cpp @@ -295,4 +295,4 @@ namespace util { return s.substr(left, right - left + 1); } -} \ No newline at end of file +} diff --git a/util/util.h b/util/util.h index 83b81b29..4514d2d5 100644 --- a/util/util.h +++ b/util/util.h @@ -1,5 +1,5 @@ -#ifndef _UTIL_H -#define _UTIL_H +#ifndef UTIL_H +#define UTIL_H #include #include diff --git a/util/util.vcxproj b/util/util.vcxproj index 687925a9..5c7800e1 100644 --- a/util/util.vcxproj +++ b/util/util.vcxproj @@ -5,6 +5,14 @@ Debug Win32 + + Performance Release + Win32 + + + Performance Release + x64 + Release Win32 @@ -28,32 +36,46 @@ {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} Win32Proj util - 10.0 + 10.0.19041.0 StaticLibrary true - v141 + ClangCl + Unicode + + + StaticLibrary + false + ClangCl Unicode StaticLibrary false - v141 + ClangCl true Unicode StaticLibrary true - v142 + ClangCl + Unicode + + + StaticLibrary + false + ClangCl Unicode + true + x64 StaticLibrary false - v142 + ClangCL true Unicode @@ -66,6 +88,10 @@ + + + + @@ -74,6 +100,10 @@ + + + + @@ -92,10 +122,22 @@ Windows + + + + + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + + + Windows + + NotUsing - Level3 + EnableAllWarnings Disabled _CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) @@ -103,6 +145,24 @@ Windows + + + NotUsing + Level3 + _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions) + None + MaxSpeed + AnySuitable + true + Speed + true + true + true + + + Windows + + Level3 From b82f76be84d36ebd07649b9a267da225a4b130c2 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:45:42 +0200 Subject: [PATCH 14/62] the rest --- .gitignore | 2 +- BitCrack.props | 8 +- BitCrack.sln | 161 ++++++++++++++-------------------------- README.md | 2 +- embedcl/embedcl.vcxproj | 8 +- 5 files changed, 67 insertions(+), 114 deletions(-) diff --git a/.gitignore b/.gitignore index c8adb8a9..53cb3673 100644 --- a/.gitignore +++ b/.gitignore @@ -30,8 +30,8 @@ CLUnitTests/test.cl [Dd]ebug/ [Dd]ebugPublic/ [Rr]elease/ +[Pp]erformance Release/ [Rr]eleases/ -Performance Release/ x64/ x86/ bld/ diff --git a/BitCrack.props b/BitCrack.props index 0f980a56..f6a0e5ef 100644 --- a/BitCrack.props +++ b/BitCrack.props @@ -2,10 +2,10 @@ - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\include - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\lib\x64 - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\include - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\lib\x64 + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\lib\x64 + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\lib\x64 diff --git a/BitCrack.sln b/BitCrack.sln index 9913b171..54ab1112 100644 --- a/BitCrack.sln +++ b/BitCrack.sln @@ -1,31 +1,14 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27703.2018 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.31229.75 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "secp256k1lib", "secp256k1lib\secp256k1lib.vcxproj", "{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CmdParse", "CmdParse\CmdParse.vcxproj", "{F7037134-28C5-4EB9-BE5D-587E79A40628}" -EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util", "util\util.vcxproj", "{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddressUtil", "AddressUtil\AddressUtil.vcxproj", "{34042455-D274-432D-9134-C9EA41FD1B54}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuKeyFinder", "KeyFinder\KeyFinder.vcxproj", "{D77642A9-365C-420C-A726-469649D2927E}" - ProjectSection(ProjectDependencies) = postProject - {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} = {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} - {CCA3D02C-5E5A-4A24-B34B-5961DFA93946} = {CCA3D02C-5E5A-4A24-B34B-5961DFA93946} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddrGen", "AddrGen\AddrGen.vcxproj", "{7AE5E38D-5731-404E-A4F3-229ADF981EFC}" - ProjectSection(ProjectDependencies) = postProject - {F7037134-28C5-4EB9-BE5D-587E79A40628} = {F7037134-28C5-4EB9-BE5D-587E79A40628} - {34042455-D274-432D-9134-C9EA41FD1B54} = {34042455-D274-432D-9134-C9EA41FD1B54} - {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} = {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cudaUtil", "cudaUtil\cudaUtil.vcxproj", "{EADAAA54-E304-4656-8263-E5E688FF323D}" -EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CryptoUtil", "CryptoUtil\CryptoUtil.vcxproj", "{CA46856A-1D1E-4F6F-A69C-6707D540BF36}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Logger", "Logger\Logger.vcxproj", "{150AF404-1F80-4A13-855B-4383C4A3326F}" @@ -41,15 +24,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "KeyFinderLib", "KeyFinderLi {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} = {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cudaInfo", "cudaInfo\cudaInfo.vcxproj", "{9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}" - ProjectSection(ProjectDependencies) = postProject - {EADAAA54-E304-4656-8263-E5E688FF323D} = {EADAAA54-E304-4656-8263-E5E688FF323D} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cudaMath", "cudaMath\cudaMath.vcxproj", "{E1BDB205-8994-4E49-8B35-172A84E7118C}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CudaKeySearchDevice", "CudaKeySearchDevice\CudaKeySearchDevice.vcxproj", "{CCA3D02C-5E5A-4A24-B34B-5961DFA93946}" -EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clUtil", "clUtil\clUtil.vcxproj", "{D9A5823D-C472-40AC-B23A-21B1586CEEB0}" ProjectSection(ProjectDependencies) = postProject {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} = {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} @@ -62,27 +36,21 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clKeyFinder", "clKeyFinder\ {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} = {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} {150AF404-1F80-4A13-855B-4383C4A3326F} = {150AF404-1F80-4A13-855B-4383C4A3326F} {546C8D1F-127F-4EF4-914F-2A7F9367C0F9} = {546C8D1F-127F-4EF4-914F-2A7F9367C0F9} - {F7037134-28C5-4EB9-BE5D-587E79A40628} = {F7037134-28C5-4EB9-BE5D-587E79A40628} {34042455-D274-432D-9134-C9EA41FD1B54} = {34042455-D274-432D-9134-C9EA41FD1B54} {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} = {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} = {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "embedcl", "embedcl\embedcl.vcxproj", "{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLUnitTests", "CLUnitTests\CLUnitTests.vcxproj", "{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}" - ProjectSection(ProjectDependencies) = postProject - {D9A5823D-C472-40AC-B23A-21B1586CEEB0} = {D9A5823D-C472-40AC-B23A-21B1586CEEB0} - {83327841-C283-4D46-A873-97AC674C68AC} = {83327841-C283-4D46-A873-97AC674C68AC} - {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} = {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} - EndProjectSection -EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clMath", "clMath\clMath.vcxproj", "{83327841-C283-4D46-A873-97AC674C68AC}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CmdParse", "CmdParse\CmdParse.vcxproj", "{F7037134-28C5-4EB9-BE5D-587E79A40628}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 Debug|x86 = Debug|x86 + Performance Release|x64 = Performance Release|x64 + Performance Release|x86 = Performance Release|x86 Release|x64 = Release|x64 Release|x86 = Release|x86 EndGlobalSection @@ -91,22 +59,22 @@ Global {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Debug|x64.Build.0 = Debug|x64 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Debug|x86.ActiveCfg = Debug|Win32 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Debug|x86.Build.0 = Debug|Win32 + {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x64.Build.0 = Performance Release|x64 + {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x86.Build.0 = Performance Release|Win32 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x64.ActiveCfg = Release|x64 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x64.Build.0 = Release|x64 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x86.ActiveCfg = Release|Win32 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x86.Build.0 = Release|Win32 - {F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x64.ActiveCfg = Debug|x64 - {F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x64.Build.0 = Debug|x64 - {F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x86.ActiveCfg = Debug|Win32 - {F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x86.Build.0 = Debug|Win32 - {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.ActiveCfg = Release|x64 - {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.Build.0 = Release|x64 - {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x86.ActiveCfg = Release|Win32 - {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x86.Build.0 = Release|Win32 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Debug|x64.ActiveCfg = Debug|x64 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Debug|x64.Build.0 = Debug|x64 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Debug|x86.ActiveCfg = Debug|Win32 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Debug|x86.Build.0 = Debug|Win32 + {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x64.Build.0 = Performance Release|x64 + {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x86.Build.0 = Performance Release|Win32 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Release|x64.ActiveCfg = Release|x64 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Release|x64.Build.0 = Release|x64 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Release|x86.ActiveCfg = Release|Win32 @@ -115,38 +83,22 @@ Global {34042455-D274-432D-9134-C9EA41FD1B54}.Debug|x64.Build.0 = Debug|x64 {34042455-D274-432D-9134-C9EA41FD1B54}.Debug|x86.ActiveCfg = Debug|Win32 {34042455-D274-432D-9134-C9EA41FD1B54}.Debug|x86.Build.0 = Debug|Win32 + {34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x64.Build.0 = Performance Release|x64 + {34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x86.Build.0 = Performance Release|Win32 {34042455-D274-432D-9134-C9EA41FD1B54}.Release|x64.ActiveCfg = Release|x64 {34042455-D274-432D-9134-C9EA41FD1B54}.Release|x64.Build.0 = Release|x64 {34042455-D274-432D-9134-C9EA41FD1B54}.Release|x86.ActiveCfg = Release|Win32 {34042455-D274-432D-9134-C9EA41FD1B54}.Release|x86.Build.0 = Release|Win32 - {D77642A9-365C-420C-A726-469649D2927E}.Debug|x64.ActiveCfg = Debug|x64 - {D77642A9-365C-420C-A726-469649D2927E}.Debug|x64.Build.0 = Debug|x64 - {D77642A9-365C-420C-A726-469649D2927E}.Debug|x86.ActiveCfg = Debug|Win32 - {D77642A9-365C-420C-A726-469649D2927E}.Debug|x86.Build.0 = Debug|Win32 - {D77642A9-365C-420C-A726-469649D2927E}.Release|x64.ActiveCfg = Release|x64 - {D77642A9-365C-420C-A726-469649D2927E}.Release|x64.Build.0 = Release|x64 - {D77642A9-365C-420C-A726-469649D2927E}.Release|x86.ActiveCfg = Release|Win32 - {D77642A9-365C-420C-A726-469649D2927E}.Release|x86.Build.0 = Release|Win32 - {7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Debug|x64.ActiveCfg = Debug|x64 - {7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Debug|x64.Build.0 = Debug|x64 - {7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Debug|x86.ActiveCfg = Debug|Win32 - {7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Debug|x86.Build.0 = Debug|Win32 - {7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Release|x64.ActiveCfg = Release|x64 - {7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Release|x64.Build.0 = Release|x64 - {7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Release|x86.ActiveCfg = Release|Win32 - {7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Release|x86.Build.0 = Release|Win32 - {EADAAA54-E304-4656-8263-E5E688FF323D}.Debug|x64.ActiveCfg = Debug|x64 - {EADAAA54-E304-4656-8263-E5E688FF323D}.Debug|x64.Build.0 = Debug|x64 - {EADAAA54-E304-4656-8263-E5E688FF323D}.Debug|x86.ActiveCfg = Debug|Win32 - {EADAAA54-E304-4656-8263-E5E688FF323D}.Debug|x86.Build.0 = Debug|Win32 - {EADAAA54-E304-4656-8263-E5E688FF323D}.Release|x64.ActiveCfg = Release|x64 - {EADAAA54-E304-4656-8263-E5E688FF323D}.Release|x64.Build.0 = Release|x64 - {EADAAA54-E304-4656-8263-E5E688FF323D}.Release|x86.ActiveCfg = Release|Win32 - {EADAAA54-E304-4656-8263-E5E688FF323D}.Release|x86.Build.0 = Release|Win32 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Debug|x64.ActiveCfg = Debug|x64 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Debug|x64.Build.0 = Debug|x64 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Debug|x86.ActiveCfg = Debug|Win32 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Debug|x86.Build.0 = Debug|Win32 + {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x64.Build.0 = Performance Release|x64 + {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x86.Build.0 = Performance Release|Win32 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Release|x64.ActiveCfg = Release|x64 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Release|x64.Build.0 = Release|x64 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Release|x86.ActiveCfg = Release|Win32 @@ -155,6 +107,10 @@ Global {150AF404-1F80-4A13-855B-4383C4A3326F}.Debug|x64.Build.0 = Debug|x64 {150AF404-1F80-4A13-855B-4383C4A3326F}.Debug|x86.ActiveCfg = Debug|Win32 {150AF404-1F80-4A13-855B-4383C4A3326F}.Debug|x86.Build.0 = Debug|Win32 + {150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x64.Build.0 = Performance Release|x64 + {150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x86.Build.0 = Performance Release|Win32 {150AF404-1F80-4A13-855B-4383C4A3326F}.Release|x64.ActiveCfg = Release|x64 {150AF404-1F80-4A13-855B-4383C4A3326F}.Release|x64.Build.0 = Release|x64 {150AF404-1F80-4A13-855B-4383C4A3326F}.Release|x86.ActiveCfg = Release|Win32 @@ -162,31 +118,20 @@ Global {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Debug|x64.ActiveCfg = Debug|x64 {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Debug|x64.Build.0 = Debug|x64 {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Debug|x86.ActiveCfg = Debug|x64 + {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Performance Release|x64.Build.0 = Performance Release|x64 + {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Performance Release|x86.ActiveCfg = Performance Release|x64 {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Release|x64.ActiveCfg = Release|x64 {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Release|x64.Build.0 = Release|x64 {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Release|x86.ActiveCfg = Release|x64 - {9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Debug|x64.ActiveCfg = Debug|x64 - {9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Debug|x64.Build.0 = Debug|x64 - {9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Debug|x86.ActiveCfg = Debug|x64 - {9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Release|x64.ActiveCfg = Release|x64 - {9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Release|x64.Build.0 = Release|x64 - {9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Release|x86.ActiveCfg = Release|x64 - {E1BDB205-8994-4E49-8B35-172A84E7118C}.Debug|x64.ActiveCfg = Debug|x64 - {E1BDB205-8994-4E49-8B35-172A84E7118C}.Debug|x64.Build.0 = Debug|x64 - {E1BDB205-8994-4E49-8B35-172A84E7118C}.Debug|x86.ActiveCfg = Debug|x64 - {E1BDB205-8994-4E49-8B35-172A84E7118C}.Release|x64.ActiveCfg = Release|x64 - {E1BDB205-8994-4E49-8B35-172A84E7118C}.Release|x64.Build.0 = Release|x64 - {E1BDB205-8994-4E49-8B35-172A84E7118C}.Release|x86.ActiveCfg = Release|x64 - {CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Debug|x64.ActiveCfg = Debug|x64 - {CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Debug|x64.Build.0 = Debug|x64 - {CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Debug|x86.ActiveCfg = Debug|x64 - {CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Release|x64.ActiveCfg = Release|x64 - {CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Release|x64.Build.0 = Release|x64 - {CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Release|x86.ActiveCfg = Release|x64 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Debug|x64.ActiveCfg = Debug|x64 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Debug|x64.Build.0 = Debug|x64 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Debug|x86.ActiveCfg = Debug|Win32 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Debug|x86.Build.0 = Debug|Win32 + {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x64.Build.0 = Performance Release|x64 + {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x86.Build.0 = Performance Release|Win32 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Release|x64.ActiveCfg = Release|x64 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Release|x64.Build.0 = Release|x64 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Release|x86.ActiveCfg = Release|Win32 @@ -195,6 +140,10 @@ Global {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Debug|x64.Build.0 = Debug|x64 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Debug|x86.ActiveCfg = Debug|Win32 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Debug|x86.Build.0 = Debug|Win32 + {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x64.Build.0 = Performance Release|x64 + {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x86.Build.0 = Performance Release|Win32 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Release|x64.ActiveCfg = Release|x64 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Release|x64.Build.0 = Release|x64 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Release|x86.ActiveCfg = Release|Win32 @@ -203,34 +152,38 @@ Global {36400E8D-3D04-430C-90A4-FC989E460B3C}.Debug|x64.Build.0 = Debug|x64 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Debug|x86.ActiveCfg = Debug|Win32 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Debug|x86.Build.0 = Debug|Win32 + {36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x64.Build.0 = Performance Release|x64 + {36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x86.Build.0 = Performance Release|Win32 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x64.ActiveCfg = Release|x64 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x64.Build.0 = Release|x64 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x86.ActiveCfg = Release|Win32 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x86.Build.0 = Release|Win32 - {8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Debug|x64.ActiveCfg = Debug|x64 - {8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Debug|x64.Build.0 = Debug|x64 - {8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Debug|x86.ActiveCfg = Debug|Win32 - {8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Debug|x86.Build.0 = Debug|Win32 - {8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Release|x64.ActiveCfg = Release|x64 - {8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Release|x64.Build.0 = Release|x64 - {8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Release|x86.ActiveCfg = Release|Win32 - {8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Release|x86.Build.0 = Release|Win32 - {146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Debug|x64.ActiveCfg = Debug|x64 - {146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Debug|x64.Build.0 = Debug|x64 - {146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Debug|x86.ActiveCfg = Debug|Win32 - {146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Debug|x86.Build.0 = Debug|Win32 - {146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Release|x64.ActiveCfg = Release|x64 - {146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Release|x64.Build.0 = Release|x64 - {146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Release|x86.ActiveCfg = Release|Win32 - {146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Release|x86.Build.0 = Release|Win32 {83327841-C283-4D46-A873-97AC674C68AC}.Debug|x64.ActiveCfg = Debug|x64 {83327841-C283-4D46-A873-97AC674C68AC}.Debug|x64.Build.0 = Debug|x64 {83327841-C283-4D46-A873-97AC674C68AC}.Debug|x86.ActiveCfg = Debug|Win32 {83327841-C283-4D46-A873-97AC674C68AC}.Debug|x86.Build.0 = Debug|Win32 + {83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x64.Build.0 = Performance Release|x64 + {83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x86.Build.0 = Performance Release|Win32 {83327841-C283-4D46-A873-97AC674C68AC}.Release|x64.ActiveCfg = Release|x64 {83327841-C283-4D46-A873-97AC674C68AC}.Release|x64.Build.0 = Release|x64 {83327841-C283-4D46-A873-97AC674C68AC}.Release|x86.ActiveCfg = Release|Win32 {83327841-C283-4D46-A873-97AC674C68AC}.Release|x86.Build.0 = Release|Win32 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x64.ActiveCfg = Debug|x64 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x64.Build.0 = Debug|x64 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x86.ActiveCfg = Debug|Win32 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x86.Build.0 = Debug|Win32 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x64.ActiveCfg = Performance Release|x64 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x64.Build.0 = Performance Release|x64 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x86.ActiveCfg = Performance Release|Win32 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x86.Build.0 = Performance Release|Win32 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.ActiveCfg = Release|x64 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.Build.0 = Release|x64 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x86.ActiveCfg = Release|Win32 + {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/README.md b/README.md index 73bebe17..e429308a 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ kernel to run longer, but more keys will be processed. Visual Studio 2019 (if on Windows) -For CUDA: CUDA Toolkit 10.1 +For CUDA: CUDA Toolkit 11.3 For OpenCL: An OpenCL SDK (The CUDA toolkit contains an OpenCL SDK). diff --git a/embedcl/embedcl.vcxproj b/embedcl/embedcl.vcxproj index 8d2dd7f6..3013aec3 100644 --- a/embedcl/embedcl.vcxproj +++ b/embedcl/embedcl.vcxproj @@ -28,26 +28,26 @@ Application true - v141 + ClangCl MultiByte Application false - v141 + ClangCl true MultiByte Application true - v142 + ClangCl MultiByte Application false - v142 + ClangCL true MultiByte From 1bd24de226f7c82880bc968ca7e58636d7fbe3cf Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:48:39 +0200 Subject: [PATCH 15/62] rename BitCrack to BitCrackOpenCL --- BitCrack.sln => BitCrackOpenCL.sln | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename BitCrack.sln => BitCrackOpenCL.sln (100%) diff --git a/BitCrack.sln b/BitCrackOpenCL.sln similarity index 100% rename from BitCrack.sln rename to BitCrackOpenCL.sln From 2a041358da9a807fff03cede0e00f57275630ed0 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 25 May 2021 04:49:54 +0200 Subject: [PATCH 16/62] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e429308a..c8f5206b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# BitCrack +# BitCrackOpenCL A tool for brute-forcing Bitcoin private keys. The main purpose of this project is to contribute to the effort of solving the [Bitcoin puzzle transaction](https://blockchain.info/tx/08389f34c98c606322740c0be6a7125d9860bb8d5cb182c02f98461e5fa6cd15): A transaction with 32 addresses that become increasingly difficult to crack. From f51a38d08aedacab8d7948a685e02c65890c99df Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 07:01:24 +0200 Subject: [PATCH 17/62] remove unused code in ripemd160.cl to improve on-the-fly-compile time --- clMath/ripemd160.cl | 206 -------------------------------------------- 1 file changed, 206 deletions(-) diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index 6e36a63e..e619b7da 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -69,212 +69,6 @@ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) - -void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) -{ - unsigned int a1 = RIPEMD160_IV_0; - unsigned int b1 = RIPEMD160_IV_1; - unsigned int c1 = RIPEMD160_IV_2; - unsigned int d1 = RIPEMD160_IV_3; - unsigned int e1 = RIPEMD160_IV_4; - - const unsigned int x8 = 0x00000080; - const unsigned int x14 = 256; - - /* round 1 */ - FF(a1, b1, c1, d1, e1, x[0], 11); - FF(e1, a1, b1, c1, d1, x[1], 14); - FF(d1, e1, a1, b1, c1, x[2], 15); - FF(c1, d1, e1, a1, b1, x[3], 12); - FF(b1, c1, d1, e1, a1, x[4], 5); - FF(a1, b1, c1, d1, e1, x[5], 8); - FF(e1, a1, b1, c1, d1, x[6], 7); - FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, x8, 11); - FF(b1, c1, d1, e1, a1, 0, 13); - FF(a1, b1, c1, d1, e1, 0, 14); - FF(e1, a1, b1, c1, d1, 0, 15); - FF(d1, e1, a1, b1, c1, 0, 6); - FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, x14, 9); - FF(a1, b1, c1, d1, e1, 0, 8); - - /* round 2 */ - GG(e1, a1, b1, c1, d1, x[7], 7); - GG(d1, e1, a1, b1, c1, x[4], 6); - GG(c1, d1, e1, a1, b1, 0, 8); - GG(b1, c1, d1, e1, a1, x[1], 13); - GG(a1, b1, c1, d1, e1, 0, 11); - GG(e1, a1, b1, c1, d1, x[6], 9); - GG(d1, e1, a1, b1, c1, 0, 7); - GG(c1, d1, e1, a1, b1, x[3], 15); - GG(b1, c1, d1, e1, a1, 0, 7); - GG(a1, b1, c1, d1, e1, x[0], 12); - GG(e1, a1, b1, c1, d1, 0, 15); - GG(d1, e1, a1, b1, c1, x[5], 9); - GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, x14, 7); - GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, x8, 12); - - /* round 3 */ - HH(d1, e1, a1, b1, c1, x[3], 11); - HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, x14, 6); - HH(a1, b1, c1, d1, e1, x[4], 7); - HH(e1, a1, b1, c1, d1, 0, 14); - HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, x8, 13); - HH(b1, c1, d1, e1, a1, x[1], 15); - HH(a1, b1, c1, d1, e1, x[2], 14); - HH(e1, a1, b1, c1, d1, x[7], 8); - HH(d1, e1, a1, b1, c1, x[0], 13); - HH(c1, d1, e1, a1, b1, x[6], 6); - HH(b1, c1, d1, e1, a1, 0, 5); - HH(a1, b1, c1, d1, e1, 0, 12); - HH(e1, a1, b1, c1, d1, x[5], 7); - HH(d1, e1, a1, b1, c1, 0, 5); - - /* round 4 */ - II(c1, d1, e1, a1, b1, x[1], 11); - II(b1, c1, d1, e1, a1, 0, 12); - II(a1, b1, c1, d1, e1, 0, 14); - II(e1, a1, b1, c1, d1, 0, 15); - II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, x8, 15); - II(b1, c1, d1, e1, a1, 0, 9); - II(a1, b1, c1, d1, e1, x[4], 8); - II(e1, a1, b1, c1, d1, 0, 9); - II(d1, e1, a1, b1, c1, x[3], 14); - II(c1, d1, e1, a1, b1, x[7], 5); - II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, x14, 8); - II(e1, a1, b1, c1, d1, x[5], 6); - II(d1, e1, a1, b1, c1, x[6], 5); - II(c1, d1, e1, a1, b1, x[2], 12); - - /* round 5 */ - JJ(b1, c1, d1, e1, a1, x[4], 9); - JJ(a1, b1, c1, d1, e1, x[0], 15); - JJ(e1, a1, b1, c1, d1, x[5], 5); - JJ(d1, e1, a1, b1, c1, 0, 11); - JJ(c1, d1, e1, a1, b1, x[7], 6); - JJ(b1, c1, d1, e1, a1, 0, 8); - JJ(a1, b1, c1, d1, e1, x[2], 13); - JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, x14, 5); - JJ(c1, d1, e1, a1, b1, x[1], 12); - JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, x8, 14); - JJ(e1, a1, b1, c1, d1, 0, 11); - JJ(d1, e1, a1, b1, c1, x[6], 8); - JJ(c1, d1, e1, a1, b1, 0, 5); - JJ(b1, c1, d1, e1, a1, 0, 6); - - unsigned int a2 = RIPEMD160_IV_0; - unsigned int b2 = RIPEMD160_IV_1; - unsigned int c2 = RIPEMD160_IV_2; - unsigned int d2 = RIPEMD160_IV_3; - unsigned int e2 = RIPEMD160_IV_4; - - /* parallel round 1 */ - JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, x14, 9); - JJJ(d2, e2, a2, b2, c2, x[7], 9); - JJJ(c2, d2, e2, a2, b2, x[0], 11); - JJJ(b2, c2, d2, e2, a2, 0, 13); - JJJ(a2, b2, c2, d2, e2, x[2], 15); - JJJ(e2, a2, b2, c2, d2, 0, 15); - JJJ(d2, e2, a2, b2, c2, x[4], 5); - JJJ(c2, d2, e2, a2, b2, 0, 7); - JJJ(b2, c2, d2, e2, a2, x[6], 7); - JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, x8, 11); - JJJ(d2, e2, a2, b2, c2, x[1], 14); - JJJ(c2, d2, e2, a2, b2, 0, 14); - JJJ(b2, c2, d2, e2, a2, x[3], 12); - JJJ(a2, b2, c2, d2, e2, 0, 6); - - /* parallel round 2 */ - III(e2, a2, b2, c2, d2, x[6], 9); - III(d2, e2, a2, b2, c2, 0, 13); - III(c2, d2, e2, a2, b2, x[3], 15); - III(b2, c2, d2, e2, a2, x[7], 7); - III(a2, b2, c2, d2, e2, x[0], 12); - III(e2, a2, b2, c2, d2, 0, 8); - III(d2, e2, a2, b2, c2, x[5], 9); - III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, x14, 7); - III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, x8, 12); - III(d2, e2, a2, b2, c2, 0, 7); - III(c2, d2, e2, a2, b2, x[4], 6); - III(b2, c2, d2, e2, a2, 0, 15); - III(a2, b2, c2, d2, e2, x[1], 13); - III(e2, a2, b2, c2, d2, x[2], 11); - - /* parallel round 3 */ - HHH(d2, e2, a2, b2, c2, 0, 9); - HHH(c2, d2, e2, a2, b2, x[5], 7); - HHH(b2, c2, d2, e2, a2, x[1], 15); - HHH(a2, b2, c2, d2, e2, x[3], 11); - HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, x14, 6); - HHH(c2, d2, e2, a2, b2, x[6], 6); - HHH(b2, c2, d2, e2, a2, 0, 14); - HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, x8, 13); - HHH(d2, e2, a2, b2, c2, 0, 5); - HHH(c2, d2, e2, a2, b2, x[2], 14); - HHH(b2, c2, d2, e2, a2, 0, 13); - HHH(a2, b2, c2, d2, e2, x[0], 13); - HHH(e2, a2, b2, c2, d2, x[4], 7); - HHH(d2, e2, a2, b2, c2, 0, 5); - - /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, x8, 15); - GGG(b2, c2, d2, e2, a2, x[6], 5); - GGG(a2, b2, c2, d2, e2, x[4], 8); - GGG(e2, a2, b2, c2, d2, x[1], 11); - GGG(d2, e2, a2, b2, c2, x[3], 14); - GGG(c2, d2, e2, a2, b2, 0, 14); - GGG(b2, c2, d2, e2, a2, 0, 6); - GGG(a2, b2, c2, d2, e2, x[0], 14); - GGG(e2, a2, b2, c2, d2, x[5], 6); - GGG(d2, e2, a2, b2, c2, 0, 9); - GGG(c2, d2, e2, a2, b2, x[2], 12); - GGG(b2, c2, d2, e2, a2, 0, 9); - GGG(a2, b2, c2, d2, e2, 0, 12); - GGG(e2, a2, b2, c2, d2, x[7], 5); - GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, x14, 8); - - /* parallel round 5 */ - FFF(b2, c2, d2, e2, a2, 0, 8); - FFF(a2, b2, c2, d2, e2, 0, 5); - FFF(e2, a2, b2, c2, d2, 0, 12); - FFF(d2, e2, a2, b2, c2, x[4], 9); - FFF(c2, d2, e2, a2, b2, x[1], 12); - FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, x8, 14); - FFF(e2, a2, b2, c2, d2, x[7], 6); - FFF(d2, e2, a2, b2, c2, x[6], 8); - FFF(c2, d2, e2, a2, b2, x[2], 13); - FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, x14, 5); - FFF(e2, a2, b2, c2, d2, x[0], 15); - FFF(d2, e2, a2, b2, c2, x[3], 13); - FFF(c2, d2, e2, a2, b2, 0, 11); - FFF(b2, c2, d2, e2, a2, 0, 11); - - digest[0] = RIPEMD160_IV_1 + c1 + d2; - digest[1] = RIPEMD160_IV_2 + d1 + e2; - digest[2] = RIPEMD160_IV_3 + e1 + a2; - digest[3] = RIPEMD160_IV_4 + a1 + b2; - digest[4] = RIPEMD160_IV_0 + b1 + c2; -} - - void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { unsigned int a1 = RIPEMD160_IV_0; From d6e201901c651fc4daf47cf0213c191daa449f8c Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 07:02:47 +0200 Subject: [PATCH 18/62] pass directly the kernel instead of wrapping them in doIteration function --- CLKeySearchDevice/bitcrack.cl | 258 +-------------------------------- CLKeySearchDevice/keysearch.cl | 52 +------ 2 files changed, 14 insertions(+), 296 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 15639b33..a42be15d 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -69,212 +69,6 @@ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) - -void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) -{ - unsigned int a1 = RIPEMD160_IV_0; - unsigned int b1 = RIPEMD160_IV_1; - unsigned int c1 = RIPEMD160_IV_2; - unsigned int d1 = RIPEMD160_IV_3; - unsigned int e1 = RIPEMD160_IV_4; - - const unsigned int x8 = 0x00000080; - const unsigned int x14 = 256; - - /* round 1 */ - FF(a1, b1, c1, d1, e1, x[0], 11); - FF(e1, a1, b1, c1, d1, x[1], 14); - FF(d1, e1, a1, b1, c1, x[2], 15); - FF(c1, d1, e1, a1, b1, x[3], 12); - FF(b1, c1, d1, e1, a1, x[4], 5); - FF(a1, b1, c1, d1, e1, x[5], 8); - FF(e1, a1, b1, c1, d1, x[6], 7); - FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, x8, 11); - FF(b1, c1, d1, e1, a1, 0, 13); - FF(a1, b1, c1, d1, e1, 0, 14); - FF(e1, a1, b1, c1, d1, 0, 15); - FF(d1, e1, a1, b1, c1, 0, 6); - FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, x14, 9); - FF(a1, b1, c1, d1, e1, 0, 8); - - /* round 2 */ - GG(e1, a1, b1, c1, d1, x[7], 7); - GG(d1, e1, a1, b1, c1, x[4], 6); - GG(c1, d1, e1, a1, b1, 0, 8); - GG(b1, c1, d1, e1, a1, x[1], 13); - GG(a1, b1, c1, d1, e1, 0, 11); - GG(e1, a1, b1, c1, d1, x[6], 9); - GG(d1, e1, a1, b1, c1, 0, 7); - GG(c1, d1, e1, a1, b1, x[3], 15); - GG(b1, c1, d1, e1, a1, 0, 7); - GG(a1, b1, c1, d1, e1, x[0], 12); - GG(e1, a1, b1, c1, d1, 0, 15); - GG(d1, e1, a1, b1, c1, x[5], 9); - GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, x14, 7); - GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, x8, 12); - - /* round 3 */ - HH(d1, e1, a1, b1, c1, x[3], 11); - HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, x14, 6); - HH(a1, b1, c1, d1, e1, x[4], 7); - HH(e1, a1, b1, c1, d1, 0, 14); - HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, x8, 13); - HH(b1, c1, d1, e1, a1, x[1], 15); - HH(a1, b1, c1, d1, e1, x[2], 14); - HH(e1, a1, b1, c1, d1, x[7], 8); - HH(d1, e1, a1, b1, c1, x[0], 13); - HH(c1, d1, e1, a1, b1, x[6], 6); - HH(b1, c1, d1, e1, a1, 0, 5); - HH(a1, b1, c1, d1, e1, 0, 12); - HH(e1, a1, b1, c1, d1, x[5], 7); - HH(d1, e1, a1, b1, c1, 0, 5); - - /* round 4 */ - II(c1, d1, e1, a1, b1, x[1], 11); - II(b1, c1, d1, e1, a1, 0, 12); - II(a1, b1, c1, d1, e1, 0, 14); - II(e1, a1, b1, c1, d1, 0, 15); - II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, x8, 15); - II(b1, c1, d1, e1, a1, 0, 9); - II(a1, b1, c1, d1, e1, x[4], 8); - II(e1, a1, b1, c1, d1, 0, 9); - II(d1, e1, a1, b1, c1, x[3], 14); - II(c1, d1, e1, a1, b1, x[7], 5); - II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, x14, 8); - II(e1, a1, b1, c1, d1, x[5], 6); - II(d1, e1, a1, b1, c1, x[6], 5); - II(c1, d1, e1, a1, b1, x[2], 12); - - /* round 5 */ - JJ(b1, c1, d1, e1, a1, x[4], 9); - JJ(a1, b1, c1, d1, e1, x[0], 15); - JJ(e1, a1, b1, c1, d1, x[5], 5); - JJ(d1, e1, a1, b1, c1, 0, 11); - JJ(c1, d1, e1, a1, b1, x[7], 6); - JJ(b1, c1, d1, e1, a1, 0, 8); - JJ(a1, b1, c1, d1, e1, x[2], 13); - JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, x14, 5); - JJ(c1, d1, e1, a1, b1, x[1], 12); - JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, x8, 14); - JJ(e1, a1, b1, c1, d1, 0, 11); - JJ(d1, e1, a1, b1, c1, x[6], 8); - JJ(c1, d1, e1, a1, b1, 0, 5); - JJ(b1, c1, d1, e1, a1, 0, 6); - - unsigned int a2 = RIPEMD160_IV_0; - unsigned int b2 = RIPEMD160_IV_1; - unsigned int c2 = RIPEMD160_IV_2; - unsigned int d2 = RIPEMD160_IV_3; - unsigned int e2 = RIPEMD160_IV_4; - - /* parallel round 1 */ - JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, x14, 9); - JJJ(d2, e2, a2, b2, c2, x[7], 9); - JJJ(c2, d2, e2, a2, b2, x[0], 11); - JJJ(b2, c2, d2, e2, a2, 0, 13); - JJJ(a2, b2, c2, d2, e2, x[2], 15); - JJJ(e2, a2, b2, c2, d2, 0, 15); - JJJ(d2, e2, a2, b2, c2, x[4], 5); - JJJ(c2, d2, e2, a2, b2, 0, 7); - JJJ(b2, c2, d2, e2, a2, x[6], 7); - JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, x8, 11); - JJJ(d2, e2, a2, b2, c2, x[1], 14); - JJJ(c2, d2, e2, a2, b2, 0, 14); - JJJ(b2, c2, d2, e2, a2, x[3], 12); - JJJ(a2, b2, c2, d2, e2, 0, 6); - - /* parallel round 2 */ - III(e2, a2, b2, c2, d2, x[6], 9); - III(d2, e2, a2, b2, c2, 0, 13); - III(c2, d2, e2, a2, b2, x[3], 15); - III(b2, c2, d2, e2, a2, x[7], 7); - III(a2, b2, c2, d2, e2, x[0], 12); - III(e2, a2, b2, c2, d2, 0, 8); - III(d2, e2, a2, b2, c2, x[5], 9); - III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, x14, 7); - III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, x8, 12); - III(d2, e2, a2, b2, c2, 0, 7); - III(c2, d2, e2, a2, b2, x[4], 6); - III(b2, c2, d2, e2, a2, 0, 15); - III(a2, b2, c2, d2, e2, x[1], 13); - III(e2, a2, b2, c2, d2, x[2], 11); - - /* parallel round 3 */ - HHH(d2, e2, a2, b2, c2, 0, 9); - HHH(c2, d2, e2, a2, b2, x[5], 7); - HHH(b2, c2, d2, e2, a2, x[1], 15); - HHH(a2, b2, c2, d2, e2, x[3], 11); - HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, x14, 6); - HHH(c2, d2, e2, a2, b2, x[6], 6); - HHH(b2, c2, d2, e2, a2, 0, 14); - HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, x8, 13); - HHH(d2, e2, a2, b2, c2, 0, 5); - HHH(c2, d2, e2, a2, b2, x[2], 14); - HHH(b2, c2, d2, e2, a2, 0, 13); - HHH(a2, b2, c2, d2, e2, x[0], 13); - HHH(e2, a2, b2, c2, d2, x[4], 7); - HHH(d2, e2, a2, b2, c2, 0, 5); - - /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, x8, 15); - GGG(b2, c2, d2, e2, a2, x[6], 5); - GGG(a2, b2, c2, d2, e2, x[4], 8); - GGG(e2, a2, b2, c2, d2, x[1], 11); - GGG(d2, e2, a2, b2, c2, x[3], 14); - GGG(c2, d2, e2, a2, b2, 0, 14); - GGG(b2, c2, d2, e2, a2, 0, 6); - GGG(a2, b2, c2, d2, e2, x[0], 14); - GGG(e2, a2, b2, c2, d2, x[5], 6); - GGG(d2, e2, a2, b2, c2, 0, 9); - GGG(c2, d2, e2, a2, b2, x[2], 12); - GGG(b2, c2, d2, e2, a2, 0, 9); - GGG(a2, b2, c2, d2, e2, 0, 12); - GGG(e2, a2, b2, c2, d2, x[7], 5); - GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, x14, 8); - - /* parallel round 5 */ - FFF(b2, c2, d2, e2, a2, 0, 8); - FFF(a2, b2, c2, d2, e2, 0, 5); - FFF(e2, a2, b2, c2, d2, 0, 12); - FFF(d2, e2, a2, b2, c2, x[4], 9); - FFF(c2, d2, e2, a2, b2, x[1], 12); - FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, x8, 14); - FFF(e2, a2, b2, c2, d2, x[7], 6); - FFF(d2, e2, a2, b2, c2, x[6], 8); - FFF(c2, d2, e2, a2, b2, x[2], 13); - FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, x14, 5); - FFF(e2, a2, b2, c2, d2, x[0], 15); - FFF(d2, e2, a2, b2, c2, x[3], 13); - FFF(c2, d2, e2, a2, b2, 0, 11); - FFF(b2, c2, d2, e2, a2, 0, 11); - - digest[0] = RIPEMD160_IV_1 + c1 + d2; - digest[1] = RIPEMD160_IV_2 + d1 + e2; - digest[2] = RIPEMD160_IV_3 + e1 + a2; - digest[3] = RIPEMD160_IV_4 + a1 + b2; - digest[4] = RIPEMD160_IV_0 + b1 + c2; -} - - void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { unsigned int a1 = RIPEMD160_IV_0; @@ -1816,16 +1610,16 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned atomicListAdd(results, numResults, &r); } -void doIteration( - size_t totalPoints, +__kernel void keyFinderKernel( + unsigned int totalPoints, int compression, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, __global uint256_t* incXPtr, __global uint256_t* incYPtr, - __global unsigned int *targetList, - size_t numTargets, + __global unsigned int* targetList, + ulong numTargets, ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) @@ -1888,9 +1682,8 @@ void doIteration( } } - -void doIterationWithDouble( - size_t totalPoints, +__kernel void keyFinderKernelWithDouble( + unsigned int totalPoints, int compression, __global uint256_t* chain, __global uint256_t* xPtr, @@ -1898,7 +1691,7 @@ void doIterationWithDouble( __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - size_t numTargets, + ulong numTargets, ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) @@ -1961,40 +1754,3 @@ void doIterationWithDouble( yPtr[i] = newY; } } - -/** -* Performs a single iteration -*/ -__kernel void keyFinderKernel( - unsigned int totalPoints, - int compression, - __global uint256_t* chain, - __global uint256_t* xPtr, - __global uint256_t* yPtr, - __global uint256_t* incXPtr, - __global uint256_t* incYPtr, - __global unsigned int* targetList, - ulong numTargets, - ulong mask, - __global CLDeviceResult *results, - __global unsigned int *numResults) -{ - doIteration(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults); -} - -__kernel void keyFinderKernelWithDouble( - unsigned int totalPoints, - int compression, - __global uint256_t* chain, - __global uint256_t* xPtr, - __global uint256_t* yPtr, - __global uint256_t* incXPtr, - __global uint256_t* incYPtr, - __global unsigned int* targetList, - ulong numTargets, - ulong mask, - __global CLDeviceResult *results, - __global unsigned int *numResults) -{ - doIterationWithDouble(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults); -} diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index ade126f7..d2eb1a7f 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -199,16 +199,16 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned atomicListAdd(results, numResults, &r); } -void doIteration( - size_t totalPoints, +__kernel void keyFinderKernel( + unsigned int totalPoints, int compression, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, __global uint256_t* incXPtr, __global uint256_t* incYPtr, - __global unsigned int *targetList, - size_t numTargets, + __global unsigned int* targetList, + ulong numTargets, ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) @@ -271,9 +271,8 @@ void doIteration( } } - -void doIterationWithDouble( - size_t totalPoints, +__kernel void keyFinderKernelWithDouble( + unsigned int totalPoints, int compression, __global uint256_t* chain, __global uint256_t* xPtr, @@ -281,7 +280,7 @@ void doIterationWithDouble( __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - size_t numTargets, + ulong numTargets, ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) @@ -344,40 +343,3 @@ void doIterationWithDouble( yPtr[i] = newY; } } - -/** -* Performs a single iteration -*/ -__kernel void keyFinderKernel( - unsigned int totalPoints, - int compression, - __global uint256_t* chain, - __global uint256_t* xPtr, - __global uint256_t* yPtr, - __global uint256_t* incXPtr, - __global uint256_t* incYPtr, - __global unsigned int* targetList, - ulong numTargets, - ulong mask, - __global CLDeviceResult *results, - __global unsigned int *numResults) -{ - doIteration(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults); -} - -__kernel void keyFinderKernelWithDouble( - unsigned int totalPoints, - int compression, - __global uint256_t* chain, - __global uint256_t* xPtr, - __global uint256_t* yPtr, - __global uint256_t* incXPtr, - __global uint256_t* incYPtr, - __global unsigned int* targetList, - ulong numTargets, - ulong mask, - __global CLDeviceResult *results, - __global unsigned int *numResults) -{ - doIterationWithDouble(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults); -} From 147d98d309d65246d9248ef43c8174757c3cb256 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 07:41:55 +0200 Subject: [PATCH 19/62] simplify ripemd160 --- CLKeySearchDevice/bitcrack.cl | 43 ++++++++++++++++------------------- clMath/ripemd160.cl | 43 ++++++++++++++++------------------- 2 files changed, 40 insertions(+), 46 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index a42be15d..6df33099 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -77,9 +77,6 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) unsigned int d1 = RIPEMD160_IV_3; unsigned int e1 = RIPEMD160_IV_4; - const unsigned int x8 = 0x00000080; - const unsigned int x14 = 256; - /* round 1 */ FF(a1, b1, c1, d1, e1, x[0], 11); FF(e1, a1, b1, c1, d1, x[1], 14); @@ -89,13 +86,13 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) FF(a1, b1, c1, d1, e1, x[5], 8); FF(e1, a1, b1, c1, d1, x[6], 7); FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, x8, 11); + FF(c1, d1, e1, a1, b1, 128, 11); FF(b1, c1, d1, e1, a1, 0, 13); FF(a1, b1, c1, d1, e1, 0, 14); FF(e1, a1, b1, c1, d1, 0, 15); FF(d1, e1, a1, b1, c1, 0, 6); FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, x14, 9); + FF(b1, c1, d1, e1, a1, 256, 9); FF(a1, b1, c1, d1, e1, 0, 8); /* round 2 */ @@ -112,18 +109,18 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) GG(e1, a1, b1, c1, d1, 0, 15); GG(d1, e1, a1, b1, c1, x[5], 9); GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, x14, 7); + GG(b1, c1, d1, e1, a1, 256, 7); GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, x8, 12); + GG(e1, a1, b1, c1, d1, 0x80, 12); /* round 3 */ HH(d1, e1, a1, b1, c1, x[3], 11); HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, x14, 6); + HH(b1, c1, d1, e1, a1, 256, 6); HH(a1, b1, c1, d1, e1, x[4], 7); HH(e1, a1, b1, c1, d1, 0, 14); HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, x8, 13); + HH(c1, d1, e1, a1, b1, 0x80, 13); HH(b1, c1, d1, e1, a1, x[1], 15); HH(a1, b1, c1, d1, e1, x[2], 14); HH(e1, a1, b1, c1, d1, x[7], 8); @@ -140,14 +137,14 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) II(a1, b1, c1, d1, e1, 0, 14); II(e1, a1, b1, c1, d1, 0, 15); II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, x8, 15); + II(c1, d1, e1, a1, b1, 0x80, 15); II(b1, c1, d1, e1, a1, 0, 9); II(a1, b1, c1, d1, e1, x[4], 8); II(e1, a1, b1, c1, d1, 0, 9); II(d1, e1, a1, b1, c1, x[3], 14); II(c1, d1, e1, a1, b1, x[7], 5); II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, x14, 8); + II(a1, b1, c1, d1, e1, 256, 8); II(e1, a1, b1, c1, d1, x[5], 6); II(d1, e1, a1, b1, c1, x[6], 5); II(c1, d1, e1, a1, b1, x[2], 12); @@ -161,10 +158,10 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) JJ(b1, c1, d1, e1, a1, 0, 8); JJ(a1, b1, c1, d1, e1, x[2], 13); JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, x14, 5); + JJ(d1, e1, a1, b1, c1, 256, 5); JJ(c1, d1, e1, a1, b1, x[1], 12); JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, x8, 14); + JJ(a1, b1, c1, d1, e1, 0x80, 14); JJ(e1, a1, b1, c1, d1, 0, 11); JJ(d1, e1, a1, b1, c1, x[6], 8); JJ(c1, d1, e1, a1, b1, 0, 5); @@ -178,7 +175,7 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) /* parallel round 1 */ JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, x14, 9); + JJJ(e2, a2, b2, c2, d2, 256, 9); JJJ(d2, e2, a2, b2, c2, x[7], 9); JJJ(c2, d2, e2, a2, b2, x[0], 11); JJJ(b2, c2, d2, e2, a2, 0, 13); @@ -188,7 +185,7 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) JJJ(c2, d2, e2, a2, b2, 0, 7); JJJ(b2, c2, d2, e2, a2, x[6], 7); JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, x8, 11); + JJJ(e2, a2, b2, c2, d2, 0x80, 11); JJJ(d2, e2, a2, b2, c2, x[1], 14); JJJ(c2, d2, e2, a2, b2, 0, 14); JJJ(b2, c2, d2, e2, a2, x[3], 12); @@ -203,9 +200,9 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) III(e2, a2, b2, c2, d2, 0, 8); III(d2, e2, a2, b2, c2, x[5], 9); III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, x14, 7); + III(b2, c2, d2, e2, a2, 256, 7); III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, x8, 12); + III(e2, a2, b2, c2, d2, 0x80, 12); III(d2, e2, a2, b2, c2, 0, 7); III(c2, d2, e2, a2, b2, x[4], 6); III(b2, c2, d2, e2, a2, 0, 15); @@ -218,11 +215,11 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) HHH(b2, c2, d2, e2, a2, x[1], 15); HHH(a2, b2, c2, d2, e2, x[3], 11); HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, x14, 6); + HHH(d2, e2, a2, b2, c2, 256, 6); HHH(c2, d2, e2, a2, b2, x[6], 6); HHH(b2, c2, d2, e2, a2, 0, 14); HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, x8, 13); + HHH(e2, a2, b2, c2, d2, 0x80, 13); HHH(d2, e2, a2, b2, c2, 0, 5); HHH(c2, d2, e2, a2, b2, x[2], 14); HHH(b2, c2, d2, e2, a2, 0, 13); @@ -231,7 +228,7 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) HHH(d2, e2, a2, b2, c2, 0, 5); /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, x8, 15); + GGG(c2, d2, e2, a2, b2, 0x80, 15); GGG(b2, c2, d2, e2, a2, x[6], 5); GGG(a2, b2, c2, d2, e2, x[4], 8); GGG(e2, a2, b2, c2, d2, x[1], 11); @@ -246,7 +243,7 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) GGG(a2, b2, c2, d2, e2, 0, 12); GGG(e2, a2, b2, c2, d2, x[7], 5); GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, x14, 8); + GGG(c2, d2, e2, a2, b2, 256, 8); /* parallel round 5 */ FFF(b2, c2, d2, e2, a2, 0, 8); @@ -255,12 +252,12 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) FFF(d2, e2, a2, b2, c2, x[4], 9); FFF(c2, d2, e2, a2, b2, x[1], 12); FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, x8, 14); + FFF(a2, b2, c2, d2, e2, 0x80, 14); FFF(e2, a2, b2, c2, d2, x[7], 6); FFF(d2, e2, a2, b2, c2, x[6], 8); FFF(c2, d2, e2, a2, b2, x[2], 13); FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, x14, 5); + FFF(a2, b2, c2, d2, e2, 256, 5); FFF(e2, a2, b2, c2, d2, x[0], 15); FFF(d2, e2, a2, b2, c2, x[3], 13); FFF(c2, d2, e2, a2, b2, 0, 11); diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index e619b7da..2331fd9d 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -77,9 +77,6 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) unsigned int d1 = RIPEMD160_IV_3; unsigned int e1 = RIPEMD160_IV_4; - const unsigned int x8 = 0x00000080; - const unsigned int x14 = 256; - /* round 1 */ FF(a1, b1, c1, d1, e1, x[0], 11); FF(e1, a1, b1, c1, d1, x[1], 14); @@ -89,13 +86,13 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) FF(a1, b1, c1, d1, e1, x[5], 8); FF(e1, a1, b1, c1, d1, x[6], 7); FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, x8, 11); + FF(c1, d1, e1, a1, b1, 128, 11); FF(b1, c1, d1, e1, a1, 0, 13); FF(a1, b1, c1, d1, e1, 0, 14); FF(e1, a1, b1, c1, d1, 0, 15); FF(d1, e1, a1, b1, c1, 0, 6); FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, x14, 9); + FF(b1, c1, d1, e1, a1, 256, 9); FF(a1, b1, c1, d1, e1, 0, 8); /* round 2 */ @@ -112,18 +109,18 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) GG(e1, a1, b1, c1, d1, 0, 15); GG(d1, e1, a1, b1, c1, x[5], 9); GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, x14, 7); + GG(b1, c1, d1, e1, a1, 256, 7); GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, x8, 12); + GG(e1, a1, b1, c1, d1, 0x80, 12); /* round 3 */ HH(d1, e1, a1, b1, c1, x[3], 11); HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, x14, 6); + HH(b1, c1, d1, e1, a1, 256, 6); HH(a1, b1, c1, d1, e1, x[4], 7); HH(e1, a1, b1, c1, d1, 0, 14); HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, x8, 13); + HH(c1, d1, e1, a1, b1, 0x80, 13); HH(b1, c1, d1, e1, a1, x[1], 15); HH(a1, b1, c1, d1, e1, x[2], 14); HH(e1, a1, b1, c1, d1, x[7], 8); @@ -140,14 +137,14 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) II(a1, b1, c1, d1, e1, 0, 14); II(e1, a1, b1, c1, d1, 0, 15); II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, x8, 15); + II(c1, d1, e1, a1, b1, 0x80, 15); II(b1, c1, d1, e1, a1, 0, 9); II(a1, b1, c1, d1, e1, x[4], 8); II(e1, a1, b1, c1, d1, 0, 9); II(d1, e1, a1, b1, c1, x[3], 14); II(c1, d1, e1, a1, b1, x[7], 5); II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, x14, 8); + II(a1, b1, c1, d1, e1, 256, 8); II(e1, a1, b1, c1, d1, x[5], 6); II(d1, e1, a1, b1, c1, x[6], 5); II(c1, d1, e1, a1, b1, x[2], 12); @@ -161,10 +158,10 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) JJ(b1, c1, d1, e1, a1, 0, 8); JJ(a1, b1, c1, d1, e1, x[2], 13); JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, x14, 5); + JJ(d1, e1, a1, b1, c1, 256, 5); JJ(c1, d1, e1, a1, b1, x[1], 12); JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, x8, 14); + JJ(a1, b1, c1, d1, e1, 0x80, 14); JJ(e1, a1, b1, c1, d1, 0, 11); JJ(d1, e1, a1, b1, c1, x[6], 8); JJ(c1, d1, e1, a1, b1, 0, 5); @@ -178,7 +175,7 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) /* parallel round 1 */ JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, x14, 9); + JJJ(e2, a2, b2, c2, d2, 256, 9); JJJ(d2, e2, a2, b2, c2, x[7], 9); JJJ(c2, d2, e2, a2, b2, x[0], 11); JJJ(b2, c2, d2, e2, a2, 0, 13); @@ -188,7 +185,7 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) JJJ(c2, d2, e2, a2, b2, 0, 7); JJJ(b2, c2, d2, e2, a2, x[6], 7); JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, x8, 11); + JJJ(e2, a2, b2, c2, d2, 0x80, 11); JJJ(d2, e2, a2, b2, c2, x[1], 14); JJJ(c2, d2, e2, a2, b2, 0, 14); JJJ(b2, c2, d2, e2, a2, x[3], 12); @@ -203,9 +200,9 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) III(e2, a2, b2, c2, d2, 0, 8); III(d2, e2, a2, b2, c2, x[5], 9); III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, x14, 7); + III(b2, c2, d2, e2, a2, 256, 7); III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, x8, 12); + III(e2, a2, b2, c2, d2, 0x80, 12); III(d2, e2, a2, b2, c2, 0, 7); III(c2, d2, e2, a2, b2, x[4], 6); III(b2, c2, d2, e2, a2, 0, 15); @@ -218,11 +215,11 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) HHH(b2, c2, d2, e2, a2, x[1], 15); HHH(a2, b2, c2, d2, e2, x[3], 11); HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, x14, 6); + HHH(d2, e2, a2, b2, c2, 256, 6); HHH(c2, d2, e2, a2, b2, x[6], 6); HHH(b2, c2, d2, e2, a2, 0, 14); HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, x8, 13); + HHH(e2, a2, b2, c2, d2, 0x80, 13); HHH(d2, e2, a2, b2, c2, 0, 5); HHH(c2, d2, e2, a2, b2, x[2], 14); HHH(b2, c2, d2, e2, a2, 0, 13); @@ -231,7 +228,7 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) HHH(d2, e2, a2, b2, c2, 0, 5); /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, x8, 15); + GGG(c2, d2, e2, a2, b2, 0x80, 15); GGG(b2, c2, d2, e2, a2, x[6], 5); GGG(a2, b2, c2, d2, e2, x[4], 8); GGG(e2, a2, b2, c2, d2, x[1], 11); @@ -246,7 +243,7 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) GGG(a2, b2, c2, d2, e2, 0, 12); GGG(e2, a2, b2, c2, d2, x[7], 5); GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, x14, 8); + GGG(c2, d2, e2, a2, b2, 256, 8); /* parallel round 5 */ FFF(b2, c2, d2, e2, a2, 0, 8); @@ -255,12 +252,12 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) FFF(d2, e2, a2, b2, c2, x[4], 9); FFF(c2, d2, e2, a2, b2, x[1], 12); FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, x8, 14); + FFF(a2, b2, c2, d2, e2, 0x80, 14); FFF(e2, a2, b2, c2, d2, x[7], 6); FFF(d2, e2, a2, b2, c2, x[6], 8); FFF(c2, d2, e2, a2, b2, x[2], 13); FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, x14, 5); + FFF(a2, b2, c2, d2, e2, 256, 5); FFF(e2, a2, b2, c2, d2, x[0], 15); FFF(d2, e2, a2, b2, c2, x[3], 13); FFF(c2, d2, e2, a2, b2, 0, 11); From e2bef00b35e7e88ef398193afa30f31643e6138b Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 07:57:52 +0200 Subject: [PATCH 20/62] simplify ripemd160 by using only one set of scalar variables --- CLKeySearchDevice/bitcrack.cl | 360 +++++++++++++++++----------------- clMath/ripemd160.cl | 360 +++++++++++++++++----------------- 2 files changed, 366 insertions(+), 354 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 6df33099..a5eb47ca 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -71,203 +71,209 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a1 = RIPEMD160_IV_0; - unsigned int b1 = RIPEMD160_IV_1; - unsigned int c1 = RIPEMD160_IV_2; - unsigned int d1 = RIPEMD160_IV_3; - unsigned int e1 = RIPEMD160_IV_4; + unsigned int a = RIPEMD160_IV_0; + unsigned int b = RIPEMD160_IV_1; + unsigned int c = RIPEMD160_IV_2; + unsigned int d = RIPEMD160_IV_3; + unsigned int e = RIPEMD160_IV_4; /* round 1 */ - FF(a1, b1, c1, d1, e1, x[0], 11); - FF(e1, a1, b1, c1, d1, x[1], 14); - FF(d1, e1, a1, b1, c1, x[2], 15); - FF(c1, d1, e1, a1, b1, x[3], 12); - FF(b1, c1, d1, e1, a1, x[4], 5); - FF(a1, b1, c1, d1, e1, x[5], 8); - FF(e1, a1, b1, c1, d1, x[6], 7); - FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, 128, 11); - FF(b1, c1, d1, e1, a1, 0, 13); - FF(a1, b1, c1, d1, e1, 0, 14); - FF(e1, a1, b1, c1, d1, 0, 15); - FF(d1, e1, a1, b1, c1, 0, 6); - FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, 256, 9); - FF(a1, b1, c1, d1, e1, 0, 8); + FF(a, b, c, d, e, x[0], 11); + FF(e, a, b, c, d, x[1], 14); + FF(d, e, a, b, c, x[2], 15); + FF(c, d, e, a, b, x[3], 12); + FF(b, c, d, e, a, x[4], 5); + FF(a, b, c, d, e, x[5], 8); + FF(e, a, b, c, d, x[6], 7); + FF(d, e, a, b, c, x[7], 9); + FF(c, d, e, a, b, 128, 11); + FF(b, c, d, e, a, 0, 13); + FF(a, b, c, d, e, 0, 14); + FF(e, a, b, c, d, 0, 15); + FF(d, e, a, b, c, 0, 6); + FF(c, d, e, a, b, 0, 7); + FF(b, c, d, e, a, 256, 9); + FF(a, b, c, d, e, 0, 8); /* round 2 */ - GG(e1, a1, b1, c1, d1, x[7], 7); - GG(d1, e1, a1, b1, c1, x[4], 6); - GG(c1, d1, e1, a1, b1, 0, 8); - GG(b1, c1, d1, e1, a1, x[1], 13); - GG(a1, b1, c1, d1, e1, 0, 11); - GG(e1, a1, b1, c1, d1, x[6], 9); - GG(d1, e1, a1, b1, c1, 0, 7); - GG(c1, d1, e1, a1, b1, x[3], 15); - GG(b1, c1, d1, e1, a1, 0, 7); - GG(a1, b1, c1, d1, e1, x[0], 12); - GG(e1, a1, b1, c1, d1, 0, 15); - GG(d1, e1, a1, b1, c1, x[5], 9); - GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, 256, 7); - GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, 0x80, 12); + GG(e, a, b, c, d, x[7], 7); + GG(d, e, a, b, c, x[4], 6); + GG(c, d, e, a, b, 0, 8); + GG(b, c, d, e, a, x[1], 13); + GG(a, b, c, d, e, 0, 11); + GG(e, a, b, c, d, x[6], 9); + GG(d, e, a, b, c, 0, 7); + GG(c, d, e, a, b, x[3], 15); + GG(b, c, d, e, a, 0, 7); + GG(a, b, c, d, e, x[0], 12); + GG(e, a, b, c, d, 0, 15); + GG(d, e, a, b, c, x[5], 9); + GG(c, d, e, a, b, x[2], 11); + GG(b, c, d, e, a, 256, 7); + GG(a, b, c, d, e, 0, 13); + GG(e, a, b, c, d, 0x80, 12); /* round 3 */ - HH(d1, e1, a1, b1, c1, x[3], 11); - HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, 256, 6); - HH(a1, b1, c1, d1, e1, x[4], 7); - HH(e1, a1, b1, c1, d1, 0, 14); - HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, 0x80, 13); - HH(b1, c1, d1, e1, a1, x[1], 15); - HH(a1, b1, c1, d1, e1, x[2], 14); - HH(e1, a1, b1, c1, d1, x[7], 8); - HH(d1, e1, a1, b1, c1, x[0], 13); - HH(c1, d1, e1, a1, b1, x[6], 6); - HH(b1, c1, d1, e1, a1, 0, 5); - HH(a1, b1, c1, d1, e1, 0, 12); - HH(e1, a1, b1, c1, d1, x[5], 7); - HH(d1, e1, a1, b1, c1, 0, 5); + HH(d, e, a, b, c, x[3], 11); + HH(c, d, e, a, b, 0, 13); + HH(b, c, d, e, a, 256, 6); + HH(a, b, c, d, e, x[4], 7); + HH(e, a, b, c, d, 0, 14); + HH(d, e, a, b, c, 0, 9); + HH(c, d, e, a, b, 0x80, 13); + HH(b, c, d, e, a, x[1], 15); + HH(a, b, c, d, e, x[2], 14); + HH(e, a, b, c, d, x[7], 8); + HH(d, e, a, b, c, x[0], 13); + HH(c, d, e, a, b, x[6], 6); + HH(b, c, d, e, a, 0, 5); + HH(a, b, c, d, e, 0, 12); + HH(e, a, b, c, d, x[5], 7); + HH(d, e, a, b, c, 0, 5); /* round 4 */ - II(c1, d1, e1, a1, b1, x[1], 11); - II(b1, c1, d1, e1, a1, 0, 12); - II(a1, b1, c1, d1, e1, 0, 14); - II(e1, a1, b1, c1, d1, 0, 15); - II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, 0x80, 15); - II(b1, c1, d1, e1, a1, 0, 9); - II(a1, b1, c1, d1, e1, x[4], 8); - II(e1, a1, b1, c1, d1, 0, 9); - II(d1, e1, a1, b1, c1, x[3], 14); - II(c1, d1, e1, a1, b1, x[7], 5); - II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, 256, 8); - II(e1, a1, b1, c1, d1, x[5], 6); - II(d1, e1, a1, b1, c1, x[6], 5); - II(c1, d1, e1, a1, b1, x[2], 12); + II(c, d, e, a, b, x[1], 11); + II(b, c, d, e, a, 0, 12); + II(a, b, c, d, e, 0, 14); + II(e, a, b, c, d, 0, 15); + II(d, e, a, b, c, x[0], 14); + II(c, d, e, a, b, 0x80, 15); + II(b, c, d, e, a, 0, 9); + II(a, b, c, d, e, x[4], 8); + II(e, a, b, c, d, 0, 9); + II(d, e, a, b, c, x[3], 14); + II(c, d, e, a, b, x[7], 5); + II(b, c, d, e, a, 0, 6); + II(a, b, c, d, e, 256, 8); + II(e, a, b, c, d, x[5], 6); + II(d, e, a, b, c, x[6], 5); + II(c, d, e, a, b, x[2], 12); /* round 5 */ - JJ(b1, c1, d1, e1, a1, x[4], 9); - JJ(a1, b1, c1, d1, e1, x[0], 15); - JJ(e1, a1, b1, c1, d1, x[5], 5); - JJ(d1, e1, a1, b1, c1, 0, 11); - JJ(c1, d1, e1, a1, b1, x[7], 6); - JJ(b1, c1, d1, e1, a1, 0, 8); - JJ(a1, b1, c1, d1, e1, x[2], 13); - JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, 256, 5); - JJ(c1, d1, e1, a1, b1, x[1], 12); - JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, 0x80, 14); - JJ(e1, a1, b1, c1, d1, 0, 11); - JJ(d1, e1, a1, b1, c1, x[6], 8); - JJ(c1, d1, e1, a1, b1, 0, 5); - JJ(b1, c1, d1, e1, a1, 0, 6); - - unsigned int a2 = RIPEMD160_IV_0; - unsigned int b2 = RIPEMD160_IV_1; - unsigned int c2 = RIPEMD160_IV_2; - unsigned int d2 = RIPEMD160_IV_3; - unsigned int e2 = RIPEMD160_IV_4; + JJ(b, c, d, e, a, x[4], 9); + JJ(a, b, c, d, e, x[0], 15); + JJ(e, a, b, c, d, x[5], 5); + JJ(d, e, a, b, c, 0, 11); + JJ(c, d, e, a, b, x[7], 6); + JJ(b, c, d, e, a, 0, 8); + JJ(a, b, c, d, e, x[2], 13); + JJ(e, a, b, c, d, 0, 12); + JJ(d, e, a, b, c, 256, 5); + JJ(c, d, e, a, b, x[1], 12); + JJ(b, c, d, e, a, x[3], 13); + JJ(a, b, c, d, e, 0x80, 14); + JJ(e, a, b, c, d, 0, 11); + JJ(d, e, a, b, c, x[6], 8); + JJ(c, d, e, a, b, 0, 5); + JJ(b, c, d, e, a, 0, 6); + + digest[0] += c; + digest[1] += d; + digest[2] += e; + digest[3] += a; + digest[4] += b; + + a = RIPEMD160_IV_0; + b = RIPEMD160_IV_1; + c = RIPEMD160_IV_2; + d = RIPEMD160_IV_3; + e = RIPEMD160_IV_4; /* parallel round 1 */ - JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, 256, 9); - JJJ(d2, e2, a2, b2, c2, x[7], 9); - JJJ(c2, d2, e2, a2, b2, x[0], 11); - JJJ(b2, c2, d2, e2, a2, 0, 13); - JJJ(a2, b2, c2, d2, e2, x[2], 15); - JJJ(e2, a2, b2, c2, d2, 0, 15); - JJJ(d2, e2, a2, b2, c2, x[4], 5); - JJJ(c2, d2, e2, a2, b2, 0, 7); - JJJ(b2, c2, d2, e2, a2, x[6], 7); - JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, 0x80, 11); - JJJ(d2, e2, a2, b2, c2, x[1], 14); - JJJ(c2, d2, e2, a2, b2, 0, 14); - JJJ(b2, c2, d2, e2, a2, x[3], 12); - JJJ(a2, b2, c2, d2, e2, 0, 6); + JJJ(a, b, c, d, e, x[5], 8); + JJJ(e, a, b, c, d, 256, 9); + JJJ(d, e, a, b, c, x[7], 9); + JJJ(c, d, e, a, b, x[0], 11); + JJJ(b, c, d, e, a, 0, 13); + JJJ(a, b, c, d, e, x[2], 15); + JJJ(e, a, b, c, d, 0, 15); + JJJ(d, e, a, b, c, x[4], 5); + JJJ(c, d, e, a, b, 0, 7); + JJJ(b, c, d, e, a, x[6], 7); + JJJ(a, b, c, d, e, 0, 8); + JJJ(e, a, b, c, d, 0x80, 11); + JJJ(d, e, a, b, c, x[1], 14); + JJJ(c, d, e, a, b, 0, 14); + JJJ(b, c, d, e, a, x[3], 12); + JJJ(a, b, c, d, e, 0, 6); /* parallel round 2 */ - III(e2, a2, b2, c2, d2, x[6], 9); - III(d2, e2, a2, b2, c2, 0, 13); - III(c2, d2, e2, a2, b2, x[3], 15); - III(b2, c2, d2, e2, a2, x[7], 7); - III(a2, b2, c2, d2, e2, x[0], 12); - III(e2, a2, b2, c2, d2, 0, 8); - III(d2, e2, a2, b2, c2, x[5], 9); - III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, 256, 7); - III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, 0x80, 12); - III(d2, e2, a2, b2, c2, 0, 7); - III(c2, d2, e2, a2, b2, x[4], 6); - III(b2, c2, d2, e2, a2, 0, 15); - III(a2, b2, c2, d2, e2, x[1], 13); - III(e2, a2, b2, c2, d2, x[2], 11); + III(e, a, b, c, d, x[6], 9); + III(d, e, a, b, c, 0, 13); + III(c, d, e, a, b, x[3], 15); + III(b, c, d, e, a, x[7], 7); + III(a, b, c, d, e, x[0], 12); + III(e, a, b, c, d, 0, 8); + III(d, e, a, b, c, x[5], 9); + III(c, d, e, a, b, 0, 11); + III(b, c, d, e, a, 256, 7); + III(a, b, c, d, e, 0, 7); + III(e, a, b, c, d, 0x80, 12); + III(d, e, a, b, c, 0, 7); + III(c, d, e, a, b, x[4], 6); + III(b, c, d, e, a, 0, 15); + III(a, b, c, d, e, x[1], 13); + III(e, a, b, c, d, x[2], 11); /* parallel round 3 */ - HHH(d2, e2, a2, b2, c2, 0, 9); - HHH(c2, d2, e2, a2, b2, x[5], 7); - HHH(b2, c2, d2, e2, a2, x[1], 15); - HHH(a2, b2, c2, d2, e2, x[3], 11); - HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, 256, 6); - HHH(c2, d2, e2, a2, b2, x[6], 6); - HHH(b2, c2, d2, e2, a2, 0, 14); - HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, 0x80, 13); - HHH(d2, e2, a2, b2, c2, 0, 5); - HHH(c2, d2, e2, a2, b2, x[2], 14); - HHH(b2, c2, d2, e2, a2, 0, 13); - HHH(a2, b2, c2, d2, e2, x[0], 13); - HHH(e2, a2, b2, c2, d2, x[4], 7); - HHH(d2, e2, a2, b2, c2, 0, 5); + HHH(d, e, a, b, c, 0, 9); + HHH(c, d, e, a, b, x[5], 7); + HHH(b, c, d, e, a, x[1], 15); + HHH(a, b, c, d, e, x[3], 11); + HHH(e, a, b, c, d, x[7], 8); + HHH(d, e, a, b, c, 256, 6); + HHH(c, d, e, a, b, x[6], 6); + HHH(b, c, d, e, a, 0, 14); + HHH(a, b, c, d, e, 0, 12); + HHH(e, a, b, c, d, 0x80, 13); + HHH(d, e, a, b, c, 0, 5); + HHH(c, d, e, a, b, x[2], 14); + HHH(b, c, d, e, a, 0, 13); + HHH(a, b, c, d, e, x[0], 13); + HHH(e, a, b, c, d, x[4], 7); + HHH(d, e, a, b, c, 0, 5); /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, 0x80, 15); - GGG(b2, c2, d2, e2, a2, x[6], 5); - GGG(a2, b2, c2, d2, e2, x[4], 8); - GGG(e2, a2, b2, c2, d2, x[1], 11); - GGG(d2, e2, a2, b2, c2, x[3], 14); - GGG(c2, d2, e2, a2, b2, 0, 14); - GGG(b2, c2, d2, e2, a2, 0, 6); - GGG(a2, b2, c2, d2, e2, x[0], 14); - GGG(e2, a2, b2, c2, d2, x[5], 6); - GGG(d2, e2, a2, b2, c2, 0, 9); - GGG(c2, d2, e2, a2, b2, x[2], 12); - GGG(b2, c2, d2, e2, a2, 0, 9); - GGG(a2, b2, c2, d2, e2, 0, 12); - GGG(e2, a2, b2, c2, d2, x[7], 5); - GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, 256, 8); + GGG(c, d, e, a, b, 0x80, 15); + GGG(b, c, d, e, a, x[6], 5); + GGG(a, b, c, d, e, x[4], 8); + GGG(e, a, b, c, d, x[1], 11); + GGG(d, e, a, b, c, x[3], 14); + GGG(c, d, e, a, b, 0, 14); + GGG(b, c, d, e, a, 0, 6); + GGG(a, b, c, d, e, x[0], 14); + GGG(e, a, b, c, d, x[5], 6); + GGG(d, e, a, b, c, 0, 9); + GGG(c, d, e, a, b, x[2], 12); + GGG(b, c, d, e, a, 0, 9); + GGG(a, b, c, d, e, 0, 12); + GGG(e, a, b, c, d, x[7], 5); + GGG(d, e, a, b, c, 0, 15); + GGG(c, d, e, a, b, 256, 8); /* parallel round 5 */ - FFF(b2, c2, d2, e2, a2, 0, 8); - FFF(a2, b2, c2, d2, e2, 0, 5); - FFF(e2, a2, b2, c2, d2, 0, 12); - FFF(d2, e2, a2, b2, c2, x[4], 9); - FFF(c2, d2, e2, a2, b2, x[1], 12); - FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, 0x80, 14); - FFF(e2, a2, b2, c2, d2, x[7], 6); - FFF(d2, e2, a2, b2, c2, x[6], 8); - FFF(c2, d2, e2, a2, b2, x[2], 13); - FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, 256, 5); - FFF(e2, a2, b2, c2, d2, x[0], 15); - FFF(d2, e2, a2, b2, c2, x[3], 13); - FFF(c2, d2, e2, a2, b2, 0, 11); - FFF(b2, c2, d2, e2, a2, 0, 11); - - digest[0] = c1 + d2; - digest[1] = d1 + e2; - digest[2] = e1 + a2; - digest[3] = a1 + b2; - digest[4] = b1 + c2; + FFF(b, c, d, e, a, 0, 8); + FFF(a, b, c, d, e, 0, 5); + FFF(e, a, b, c, d, 0, 12); + FFF(d, e, a, b, c, x[4], 9); + FFF(c, d, e, a, b, x[1], 12); + FFF(b, c, d, e, a, x[5], 5); + FFF(a, b, c, d, e, 0x80, 14); + FFF(e, a, b, c, d, x[7], 6); + FFF(d, e, a, b, c, x[6], 8); + FFF(c, d, e, a, b, x[2], 13); + FFF(b, c, d, e, a, 0, 6); + FFF(a, b, c, d, e, 256, 5); + FFF(e, a, b, c, d, x[0], 15); + FFF(d, e, a, b, c, x[3], 13); + FFF(c, d, e, a, b, 0, 11); + FFF(b, c, d, e, a, 0, 11); + + digest[0] += d; + digest[1] += e; + digest[2] += a; + digest[3] += b; + digest[4] += c; } #endif diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index 2331fd9d..cbd757a4 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -71,203 +71,209 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a1 = RIPEMD160_IV_0; - unsigned int b1 = RIPEMD160_IV_1; - unsigned int c1 = RIPEMD160_IV_2; - unsigned int d1 = RIPEMD160_IV_3; - unsigned int e1 = RIPEMD160_IV_4; + unsigned int a = RIPEMD160_IV_0; + unsigned int b = RIPEMD160_IV_1; + unsigned int c = RIPEMD160_IV_2; + unsigned int d = RIPEMD160_IV_3; + unsigned int e = RIPEMD160_IV_4; /* round 1 */ - FF(a1, b1, c1, d1, e1, x[0], 11); - FF(e1, a1, b1, c1, d1, x[1], 14); - FF(d1, e1, a1, b1, c1, x[2], 15); - FF(c1, d1, e1, a1, b1, x[3], 12); - FF(b1, c1, d1, e1, a1, x[4], 5); - FF(a1, b1, c1, d1, e1, x[5], 8); - FF(e1, a1, b1, c1, d1, x[6], 7); - FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, 128, 11); - FF(b1, c1, d1, e1, a1, 0, 13); - FF(a1, b1, c1, d1, e1, 0, 14); - FF(e1, a1, b1, c1, d1, 0, 15); - FF(d1, e1, a1, b1, c1, 0, 6); - FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, 256, 9); - FF(a1, b1, c1, d1, e1, 0, 8); + FF(a, b, c, d, e, x[0], 11); + FF(e, a, b, c, d, x[1], 14); + FF(d, e, a, b, c, x[2], 15); + FF(c, d, e, a, b, x[3], 12); + FF(b, c, d, e, a, x[4], 5); + FF(a, b, c, d, e, x[5], 8); + FF(e, a, b, c, d, x[6], 7); + FF(d, e, a, b, c, x[7], 9); + FF(c, d, e, a, b, 128, 11); + FF(b, c, d, e, a, 0, 13); + FF(a, b, c, d, e, 0, 14); + FF(e, a, b, c, d, 0, 15); + FF(d, e, a, b, c, 0, 6); + FF(c, d, e, a, b, 0, 7); + FF(b, c, d, e, a, 256, 9); + FF(a, b, c, d, e, 0, 8); /* round 2 */ - GG(e1, a1, b1, c1, d1, x[7], 7); - GG(d1, e1, a1, b1, c1, x[4], 6); - GG(c1, d1, e1, a1, b1, 0, 8); - GG(b1, c1, d1, e1, a1, x[1], 13); - GG(a1, b1, c1, d1, e1, 0, 11); - GG(e1, a1, b1, c1, d1, x[6], 9); - GG(d1, e1, a1, b1, c1, 0, 7); - GG(c1, d1, e1, a1, b1, x[3], 15); - GG(b1, c1, d1, e1, a1, 0, 7); - GG(a1, b1, c1, d1, e1, x[0], 12); - GG(e1, a1, b1, c1, d1, 0, 15); - GG(d1, e1, a1, b1, c1, x[5], 9); - GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, 256, 7); - GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, 0x80, 12); + GG(e, a, b, c, d, x[7], 7); + GG(d, e, a, b, c, x[4], 6); + GG(c, d, e, a, b, 0, 8); + GG(b, c, d, e, a, x[1], 13); + GG(a, b, c, d, e, 0, 11); + GG(e, a, b, c, d, x[6], 9); + GG(d, e, a, b, c, 0, 7); + GG(c, d, e, a, b, x[3], 15); + GG(b, c, d, e, a, 0, 7); + GG(a, b, c, d, e, x[0], 12); + GG(e, a, b, c, d, 0, 15); + GG(d, e, a, b, c, x[5], 9); + GG(c, d, e, a, b, x[2], 11); + GG(b, c, d, e, a, 256, 7); + GG(a, b, c, d, e, 0, 13); + GG(e, a, b, c, d, 0x80, 12); /* round 3 */ - HH(d1, e1, a1, b1, c1, x[3], 11); - HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, 256, 6); - HH(a1, b1, c1, d1, e1, x[4], 7); - HH(e1, a1, b1, c1, d1, 0, 14); - HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, 0x80, 13); - HH(b1, c1, d1, e1, a1, x[1], 15); - HH(a1, b1, c1, d1, e1, x[2], 14); - HH(e1, a1, b1, c1, d1, x[7], 8); - HH(d1, e1, a1, b1, c1, x[0], 13); - HH(c1, d1, e1, a1, b1, x[6], 6); - HH(b1, c1, d1, e1, a1, 0, 5); - HH(a1, b1, c1, d1, e1, 0, 12); - HH(e1, a1, b1, c1, d1, x[5], 7); - HH(d1, e1, a1, b1, c1, 0, 5); + HH(d, e, a, b, c, x[3], 11); + HH(c, d, e, a, b, 0, 13); + HH(b, c, d, e, a, 256, 6); + HH(a, b, c, d, e, x[4], 7); + HH(e, a, b, c, d, 0, 14); + HH(d, e, a, b, c, 0, 9); + HH(c, d, e, a, b, 0x80, 13); + HH(b, c, d, e, a, x[1], 15); + HH(a, b, c, d, e, x[2], 14); + HH(e, a, b, c, d, x[7], 8); + HH(d, e, a, b, c, x[0], 13); + HH(c, d, e, a, b, x[6], 6); + HH(b, c, d, e, a, 0, 5); + HH(a, b, c, d, e, 0, 12); + HH(e, a, b, c, d, x[5], 7); + HH(d, e, a, b, c, 0, 5); /* round 4 */ - II(c1, d1, e1, a1, b1, x[1], 11); - II(b1, c1, d1, e1, a1, 0, 12); - II(a1, b1, c1, d1, e1, 0, 14); - II(e1, a1, b1, c1, d1, 0, 15); - II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, 0x80, 15); - II(b1, c1, d1, e1, a1, 0, 9); - II(a1, b1, c1, d1, e1, x[4], 8); - II(e1, a1, b1, c1, d1, 0, 9); - II(d1, e1, a1, b1, c1, x[3], 14); - II(c1, d1, e1, a1, b1, x[7], 5); - II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, 256, 8); - II(e1, a1, b1, c1, d1, x[5], 6); - II(d1, e1, a1, b1, c1, x[6], 5); - II(c1, d1, e1, a1, b1, x[2], 12); + II(c, d, e, a, b, x[1], 11); + II(b, c, d, e, a, 0, 12); + II(a, b, c, d, e, 0, 14); + II(e, a, b, c, d, 0, 15); + II(d, e, a, b, c, x[0], 14); + II(c, d, e, a, b, 0x80, 15); + II(b, c, d, e, a, 0, 9); + II(a, b, c, d, e, x[4], 8); + II(e, a, b, c, d, 0, 9); + II(d, e, a, b, c, x[3], 14); + II(c, d, e, a, b, x[7], 5); + II(b, c, d, e, a, 0, 6); + II(a, b, c, d, e, 256, 8); + II(e, a, b, c, d, x[5], 6); + II(d, e, a, b, c, x[6], 5); + II(c, d, e, a, b, x[2], 12); /* round 5 */ - JJ(b1, c1, d1, e1, a1, x[4], 9); - JJ(a1, b1, c1, d1, e1, x[0], 15); - JJ(e1, a1, b1, c1, d1, x[5], 5); - JJ(d1, e1, a1, b1, c1, 0, 11); - JJ(c1, d1, e1, a1, b1, x[7], 6); - JJ(b1, c1, d1, e1, a1, 0, 8); - JJ(a1, b1, c1, d1, e1, x[2], 13); - JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, 256, 5); - JJ(c1, d1, e1, a1, b1, x[1], 12); - JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, 0x80, 14); - JJ(e1, a1, b1, c1, d1, 0, 11); - JJ(d1, e1, a1, b1, c1, x[6], 8); - JJ(c1, d1, e1, a1, b1, 0, 5); - JJ(b1, c1, d1, e1, a1, 0, 6); - - unsigned int a2 = RIPEMD160_IV_0; - unsigned int b2 = RIPEMD160_IV_1; - unsigned int c2 = RIPEMD160_IV_2; - unsigned int d2 = RIPEMD160_IV_3; - unsigned int e2 = RIPEMD160_IV_4; + JJ(b, c, d, e, a, x[4], 9); + JJ(a, b, c, d, e, x[0], 15); + JJ(e, a, b, c, d, x[5], 5); + JJ(d, e, a, b, c, 0, 11); + JJ(c, d, e, a, b, x[7], 6); + JJ(b, c, d, e, a, 0, 8); + JJ(a, b, c, d, e, x[2], 13); + JJ(e, a, b, c, d, 0, 12); + JJ(d, e, a, b, c, 256, 5); + JJ(c, d, e, a, b, x[1], 12); + JJ(b, c, d, e, a, x[3], 13); + JJ(a, b, c, d, e, 0x80, 14); + JJ(e, a, b, c, d, 0, 11); + JJ(d, e, a, b, c, x[6], 8); + JJ(c, d, e, a, b, 0, 5); + JJ(b, c, d, e, a, 0, 6); + + digest[0] = c; + digest[1] = d; + digest[2] = e; + digest[3] = a; + digest[4] = b; + + a = RIPEMD160_IV_0; + b = RIPEMD160_IV_1; + c = RIPEMD160_IV_2; + d = RIPEMD160_IV_3; + e = RIPEMD160_IV_4; /* parallel round 1 */ - JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, 256, 9); - JJJ(d2, e2, a2, b2, c2, x[7], 9); - JJJ(c2, d2, e2, a2, b2, x[0], 11); - JJJ(b2, c2, d2, e2, a2, 0, 13); - JJJ(a2, b2, c2, d2, e2, x[2], 15); - JJJ(e2, a2, b2, c2, d2, 0, 15); - JJJ(d2, e2, a2, b2, c2, x[4], 5); - JJJ(c2, d2, e2, a2, b2, 0, 7); - JJJ(b2, c2, d2, e2, a2, x[6], 7); - JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, 0x80, 11); - JJJ(d2, e2, a2, b2, c2, x[1], 14); - JJJ(c2, d2, e2, a2, b2, 0, 14); - JJJ(b2, c2, d2, e2, a2, x[3], 12); - JJJ(a2, b2, c2, d2, e2, 0, 6); + JJJ(a, b, c, d, e, x[5], 8); + JJJ(e, a, b, c, d, 256, 9); + JJJ(d, e, a, b, c, x[7], 9); + JJJ(c, d, e, a, b, x[0], 11); + JJJ(b, c, d, e, a, 0, 13); + JJJ(a, b, c, d, e, x[2], 15); + JJJ(e, a, b, c, d, 0, 15); + JJJ(d, e, a, b, c, x[4], 5); + JJJ(c, d, e, a, b, 0, 7); + JJJ(b, c, d, e, a, x[6], 7); + JJJ(a, b, c, d, e, 0, 8); + JJJ(e, a, b, c, d, 0x80, 11); + JJJ(d, e, a, b, c, x[1], 14); + JJJ(c, d, e, a, b, 0, 14); + JJJ(b, c, d, e, a, x[3], 12); + JJJ(a, b, c, d, e, 0, 6); /* parallel round 2 */ - III(e2, a2, b2, c2, d2, x[6], 9); - III(d2, e2, a2, b2, c2, 0, 13); - III(c2, d2, e2, a2, b2, x[3], 15); - III(b2, c2, d2, e2, a2, x[7], 7); - III(a2, b2, c2, d2, e2, x[0], 12); - III(e2, a2, b2, c2, d2, 0, 8); - III(d2, e2, a2, b2, c2, x[5], 9); - III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, 256, 7); - III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, 0x80, 12); - III(d2, e2, a2, b2, c2, 0, 7); - III(c2, d2, e2, a2, b2, x[4], 6); - III(b2, c2, d2, e2, a2, 0, 15); - III(a2, b2, c2, d2, e2, x[1], 13); - III(e2, a2, b2, c2, d2, x[2], 11); + III(e, a, b, c, d, x[6], 9); + III(d, e, a, b, c, 0, 13); + III(c, d, e, a, b, x[3], 15); + III(b, c, d, e, a, x[7], 7); + III(a, b, c, d, e, x[0], 12); + III(e, a, b, c, d, 0, 8); + III(d, e, a, b, c, x[5], 9); + III(c, d, e, a, b, 0, 11); + III(b, c, d, e, a, 256, 7); + III(a, b, c, d, e, 0, 7); + III(e, a, b, c, d, 0x80, 12); + III(d, e, a, b, c, 0, 7); + III(c, d, e, a, b, x[4], 6); + III(b, c, d, e, a, 0, 15); + III(a, b, c, d, e, x[1], 13); + III(e, a, b, c, d, x[2], 11); /* parallel round 3 */ - HHH(d2, e2, a2, b2, c2, 0, 9); - HHH(c2, d2, e2, a2, b2, x[5], 7); - HHH(b2, c2, d2, e2, a2, x[1], 15); - HHH(a2, b2, c2, d2, e2, x[3], 11); - HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, 256, 6); - HHH(c2, d2, e2, a2, b2, x[6], 6); - HHH(b2, c2, d2, e2, a2, 0, 14); - HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, 0x80, 13); - HHH(d2, e2, a2, b2, c2, 0, 5); - HHH(c2, d2, e2, a2, b2, x[2], 14); - HHH(b2, c2, d2, e2, a2, 0, 13); - HHH(a2, b2, c2, d2, e2, x[0], 13); - HHH(e2, a2, b2, c2, d2, x[4], 7); - HHH(d2, e2, a2, b2, c2, 0, 5); + HHH(d, e, a, b, c, 0, 9); + HHH(c, d, e, a, b, x[5], 7); + HHH(b, c, d, e, a, x[1], 15); + HHH(a, b, c, d, e, x[3], 11); + HHH(e, a, b, c, d, x[7], 8); + HHH(d, e, a, b, c, 256, 6); + HHH(c, d, e, a, b, x[6], 6); + HHH(b, c, d, e, a, 0, 14); + HHH(a, b, c, d, e, 0, 12); + HHH(e, a, b, c, d, 0x80, 13); + HHH(d, e, a, b, c, 0, 5); + HHH(c, d, e, a, b, x[2], 14); + HHH(b, c, d, e, a, 0, 13); + HHH(a, b, c, d, e, x[0], 13); + HHH(e, a, b, c, d, x[4], 7); + HHH(d, e, a, b, c, 0, 5); /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, 0x80, 15); - GGG(b2, c2, d2, e2, a2, x[6], 5); - GGG(a2, b2, c2, d2, e2, x[4], 8); - GGG(e2, a2, b2, c2, d2, x[1], 11); - GGG(d2, e2, a2, b2, c2, x[3], 14); - GGG(c2, d2, e2, a2, b2, 0, 14); - GGG(b2, c2, d2, e2, a2, 0, 6); - GGG(a2, b2, c2, d2, e2, x[0], 14); - GGG(e2, a2, b2, c2, d2, x[5], 6); - GGG(d2, e2, a2, b2, c2, 0, 9); - GGG(c2, d2, e2, a2, b2, x[2], 12); - GGG(b2, c2, d2, e2, a2, 0, 9); - GGG(a2, b2, c2, d2, e2, 0, 12); - GGG(e2, a2, b2, c2, d2, x[7], 5); - GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, 256, 8); + GGG(c, d, e, a, b, 0x80, 15); + GGG(b, c, d, e, a, x[6], 5); + GGG(a, b, c, d, e, x[4], 8); + GGG(e, a, b, c, d, x[1], 11); + GGG(d, e, a, b, c, x[3], 14); + GGG(c, d, e, a, b, 0, 14); + GGG(b, c, d, e, a, 0, 6); + GGG(a, b, c, d, e, x[0], 14); + GGG(e, a, b, c, d, x[5], 6); + GGG(d, e, a, b, c, 0, 9); + GGG(c, d, e, a, b, x[2], 12); + GGG(b, c, d, e, a, 0, 9); + GGG(a, b, c, d, e, 0, 12); + GGG(e, a, b, c, d, x[7], 5); + GGG(d, e, a, b, c, 0, 15); + GGG(c, d, e, a, b, 256, 8); /* parallel round 5 */ - FFF(b2, c2, d2, e2, a2, 0, 8); - FFF(a2, b2, c2, d2, e2, 0, 5); - FFF(e2, a2, b2, c2, d2, 0, 12); - FFF(d2, e2, a2, b2, c2, x[4], 9); - FFF(c2, d2, e2, a2, b2, x[1], 12); - FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, 0x80, 14); - FFF(e2, a2, b2, c2, d2, x[7], 6); - FFF(d2, e2, a2, b2, c2, x[6], 8); - FFF(c2, d2, e2, a2, b2, x[2], 13); - FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, 256, 5); - FFF(e2, a2, b2, c2, d2, x[0], 15); - FFF(d2, e2, a2, b2, c2, x[3], 13); - FFF(c2, d2, e2, a2, b2, 0, 11); - FFF(b2, c2, d2, e2, a2, 0, 11); - - digest[0] = c1 + d2; - digest[1] = d1 + e2; - digest[2] = e1 + a2; - digest[3] = a1 + b2; - digest[4] = b1 + c2; + FFF(b, c, d, e, a, 0, 8); + FFF(a, b, c, d, e, 0, 5); + FFF(e, a, b, c, d, 0, 12); + FFF(d, e, a, b, c, x[4], 9); + FFF(c, d, e, a, b, x[1], 12); + FFF(b, c, d, e, a, x[5], 5); + FFF(a, b, c, d, e, 0x80, 14); + FFF(e, a, b, c, d, x[7], 6); + FFF(d, e, a, b, c, x[6], 8); + FFF(c, d, e, a, b, x[2], 13); + FFF(b, c, d, e, a, 0, 6); + FFF(a, b, c, d, e, 256, 5); + FFF(e, a, b, c, d, x[0], 15); + FFF(d, e, a, b, c, x[3], 13); + FFF(c, d, e, a, b, 0, 11); + FFF(b, c, d, e, a, 0, 11); + + digest[0] += d; + digest[1] += e; + digest[2] += a; + digest[3] += b; + digest[4] += c; } #endif From 49e67de8d4d02c7181ee42f8851128727152163e Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 10:29:54 +0200 Subject: [PATCH 21/62] separate runs in ripemd160.cl --- CLKeySearchDevice/bitcrack.cl | 36 ++++++++++++++++++++++++----------- clMath/ripemd160.cl | 26 +++++++++++++++++++------ 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index a5eb47ca..bae5923f 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -69,7 +69,7 @@ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) -void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) +void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) { unsigned int a = RIPEMD160_IV_0; unsigned int b = RIPEMD160_IV_1; @@ -167,17 +167,20 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) JJ(c, d, e, a, b, 0, 5); JJ(b, c, d, e, a, 0, 6); - digest[0] += c; - digest[1] += d; - digest[2] += e; - digest[3] += a; - digest[4] += b; + digest[0] = c; + digest[1] = d; + digest[2] = e; + digest[3] = a; + digest[4] = b; +} - a = RIPEMD160_IV_0; - b = RIPEMD160_IV_1; - c = RIPEMD160_IV_2; - d = RIPEMD160_IV_3; - e = RIPEMD160_IV_4; +void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) +{ + unsigned int a = RIPEMD160_IV_0; + unsigned int b = RIPEMD160_IV_1; + unsigned int c = RIPEMD160_IV_2; + unsigned int d = RIPEMD160_IV_3; + unsigned int e = RIPEMD160_IV_4; /* parallel round 1 */ JJJ(a, b, c, d, e, x[5], 8); @@ -276,6 +279,17 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) digest[4] += c; } +void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) +{ + digest[0] = 0; + digest[1] = 0; + digest[2] = 0; + digest[3] = 0; + digest[4] = 0; + ripemd160p1(x, digest); + ripemd160p2(x, digest); +} + #endif #ifndef SECP256K1_CL #define SECP256K1_CL diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index cbd757a4..acffd44a 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -69,7 +69,7 @@ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) -void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) +void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) { unsigned int a = RIPEMD160_IV_0; unsigned int b = RIPEMD160_IV_1; @@ -172,12 +172,15 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) digest[2] = e; digest[3] = a; digest[4] = b; +} - a = RIPEMD160_IV_0; - b = RIPEMD160_IV_1; - c = RIPEMD160_IV_2; - d = RIPEMD160_IV_3; - e = RIPEMD160_IV_4; +void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) +{ + unsigned int a = RIPEMD160_IV_0; + unsigned int b = RIPEMD160_IV_1; + unsigned int c = RIPEMD160_IV_2; + unsigned int d = RIPEMD160_IV_3; + unsigned int e = RIPEMD160_IV_4; /* parallel round 1 */ JJJ(a, b, c, d, e, x[5], 8); @@ -276,4 +279,15 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) digest[4] += c; } +void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) +{ + digest[0] = 0; + digest[1] = 0; + digest[2] = 0; + digest[3] = 0; + digest[4] = 0; + ripemd160p1(x, digest); + ripemd160p2(x, digest); +} + #endif From 9ac843f14f6541b7fdd2817ce2ba20cb1050f790 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 10:30:34 +0200 Subject: [PATCH 22/62] make error log correct --- KeyFinderLib/KeyFinder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KeyFinderLib/KeyFinder.cpp b/KeyFinderLib/KeyFinder.cpp index 5e396dd5..58df9da9 100644 --- a/KeyFinderLib/KeyFinder.cpp +++ b/KeyFinderLib/KeyFinder.cpp @@ -63,7 +63,7 @@ void KeyFinder::setTargets(std::string targetsFile) if(!inFile.is_open()) { Logger::log(LogLevel::Error, "Unable to open '" + targetsFile + "'"); - throw KeySearchException(); + throw KeySearchException("FILE", "Unable to open '" + targetsFile + "'"); } _targets.clear(); From 6b71a75ee28fefe7193ed6633e35bf9f2d9b3d92 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 13:32:21 +0200 Subject: [PATCH 23/62] prepare --- CLKeySearchDevice/bitcrack.cl | 29 +++++++++++++++++------------ clMath/ripemd160.cl | 29 +++++++++++++++++------------ 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index bae5923f..8f0838c9 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -272,22 +272,27 @@ void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) FFF(c, d, e, a, b, 0, 11); FFF(b, c, d, e, a, 0, 11); - digest[0] += d; - digest[1] += e; - digest[2] += a; - digest[3] += b; - digest[4] += c; + digest[0] = d; + digest[1] = e; + digest[2] = a; + digest[3] = b; + digest[4] = c; } void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { - digest[0] = 0; - digest[1] = 0; - digest[2] = 0; - digest[3] = 0; - digest[4] = 0; - ripemd160p1(x, digest); - ripemd160p2(x, digest); + unsigned int digest1[5]; + unsigned int digest2[5]; + + ripemd160p1(x, &digest1); + ripemd160p2(x, &digest2); + + digest[0] = digest1[0] + digest2[0]; + digest[1] = digest1[1] + digest2[1]; + digest[2] = digest1[2] + digest2[2]; + digest[3] = digest1[3] + digest2[3]; + digest[4] = digest1[4] + digest2[4]; + } #endif diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index acffd44a..dda1f523 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -272,22 +272,27 @@ void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) FFF(c, d, e, a, b, 0, 11); FFF(b, c, d, e, a, 0, 11); - digest[0] += d; - digest[1] += e; - digest[2] += a; - digest[3] += b; - digest[4] += c; + digest[0] = d; + digest[1] = e; + digest[2] = a; + digest[3] = b; + digest[4] = c; } void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { - digest[0] = 0; - digest[1] = 0; - digest[2] = 0; - digest[3] = 0; - digest[4] = 0; - ripemd160p1(x, digest); - ripemd160p2(x, digest); + unsigned int digest1[5]; + unsigned int digest2[5]; + + ripemd160p1(x, &digest1); + ripemd160p2(x, &digest2); + + digest[0] = digest1[0] + digest2[0]; + digest[1] = digest1[1] + digest2[1]; + digest[2] = digest1[2] + digest2[2]; + digest[3] = digest1[3] + digest2[3]; + digest[4] = digest1[4] + digest2[4]; + } #endif From b37f2ef5b29cef3707a2103befe26cdb118e4d3f Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 13:42:42 +0200 Subject: [PATCH 24/62] fix ripemd160 --- clMath/ripemd160.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index dda1f523..23ceefd8 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -284,8 +284,8 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) unsigned int digest1[5]; unsigned int digest2[5]; - ripemd160p1(x, &digest1); - ripemd160p2(x, &digest2); + ripemd160p1(x, digest1); + ripemd160p2(x, digest2); digest[0] = digest1[0] + digest2[0]; digest[1] = digest1[1] + digest2[1]; From 800de6fc7bd1070f67ee565333b40f0829c78af7 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 14:43:31 +0200 Subject: [PATCH 25/62] fix mulModP256k --- CLKeySearchDevice/bitcrack.cl | 62 +++++++++++++++-------------------- clMath/secp256k1.cl | 58 ++++++++++++++------------------ 2 files changed, 52 insertions(+), 68 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 8f0838c9..468052ee 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -284,8 +284,8 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) unsigned int digest1[5]; unsigned int digest2[5]; - ripemd160p1(x, &digest1); - ripemd160p2(x, &digest2); + ripemd160p1(x, digest1); + ripemd160p2(x, digest2); digest[0] = digest1[0] + digest2[0]; digest[1] = digest1[1] + digest2[1]; @@ -673,16 +673,7 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) } } -uint256_t mulModP256k(uint256_t a, uint256_t b) -{ - uint256_t c; - - mulModP(a.v, b.v, c.v); - - return c; -} - -void mulModP256kv(uint256_t *a, uint256_t *b, uint256_t *c) +void mulModP256k(uint256_t *a, uint256_t *b, uint256_t *c) { mulModP(a->v, b->v, c->v); } @@ -699,33 +690,34 @@ uint256_t invModP256k(uint256_t x) { uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}}; - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); squareModP256k(&x); - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); squareModP256k(&x); - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); for(int i = 0; i < 5; i++) { squareModP256k(&x); } for(int i = 0; i < 22; i++) { - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); } squareModP256k(&x); for(int i = 0; i < 222; i++) { - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); } - return mulModP256k(x, y); + mulModP256k(&x, &y, &x); + return x; } @@ -740,7 +732,7 @@ void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc - *inverse = mulModP256k(*inverse, t); + mulModP256k(inverse, &t, inverse); chain[batchIdx * dim + gid] = *inverse; } @@ -762,7 +754,7 @@ void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc - *inverse = mulModP256k(x, *inverse); + mulModP256k(&x, inverse, inverse); chain[batchIdx * dim + gid] = *inverse; } @@ -794,7 +786,7 @@ void completeBatchAddWithDouble256k( uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; - s = mulModP256k(*inverse, c); + mulModP256k(inverse, &c, &s); uint256_t diff; if(equal256k(&px, &x)) { @@ -803,7 +795,7 @@ void completeBatchAddWithDouble256k( diff = subModP256k(px, x); } - *inverse = mulModP256k(diff, *inverse); + mulModP256k(&diff, inverse, inverse); } else { s = *inverse; } @@ -816,16 +808,16 @@ void completeBatchAddWithDouble256k( uint256_t tx2; // 3x^2 - mulModP256kv(&x, &x, &x2); + mulModP256k(&x, &x, &x2); addModP256k(&x2, &x2, &tx2); addModP256k(&x2, &tx2, &tx2); // s = 3x^2 * 1/2y - mulModP256kv(&tx2, &s, &s); + mulModP256k(&tx2, &s, &s); // s^2 uint256_t s2; - mulModP256kv(&s, &s, &s2); + mulModP256k(&s, &s, &s2); // Rx = s^2 - 2px *newX = subModP256k(s2, x); @@ -833,18 +825,18 @@ void completeBatchAddWithDouble256k( // Ry = s(px - rx) - py uint256_t k = subModP256k(px, *newX); - *newY = mulModP256k(s, k); + mulModP256k(&s, &k, newY); *newY = subModP256k(*newY, py); } else { uint256_t rise; rise = subModP256k(py, y); - mulModP256kv(&rise, &s, &s); + mulModP256k(&rise, &s, &s); // Rx = s^2 - Gx - Qx uint256_t s2; - mulModP256kv(&s, &s, &s2); + mulModP256k(&s, &s, &s2); *newX = subModP256k(s2, px); *newX = subModP256k(*newX, x); @@ -852,7 +844,7 @@ void completeBatchAddWithDouble256k( // Ry = s(px - rx) - py uint256_t k; k = subModP256k(px, *newX); - *newY = mulModP256k(s, k); + mulModP256k(&s, &k, newY); *newY = subModP256k(*newY, py); } } @@ -882,11 +874,11 @@ void completeBatchAdd256k( uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; - s = mulModP256k(*inverse, c); + mulModP256k(inverse, &c, &s); uint256_t diff; diff = subModP256k(px, x); - *inverse = mulModP256k(diff, *inverse); + mulModP256k(&diff, inverse, inverse); } else { s = *inverse; } @@ -895,18 +887,18 @@ void completeBatchAdd256k( uint256_t rise = subModP256k(py, y); - s = mulModP256k(rise, s); + mulModP256k(&rise, &s, &s); // Rx = s^2 - Gx - Qx uint256_t s2; - mulModP256kv(&s, &s, &s2); + mulModP256k(&s, &s, &s2); *newX = subModP256k(s2, px); *newX = subModP256k(*newX, x); // Ry = s(px - rx) - py uint256_t k = subModP256k(px, *newX); - *newY = mulModP256k(s, k); + mulModP256k(&s, &k, newY); *newY = subModP256k(*newY, py); } diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index c9471fc5..0b29c2a1 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -375,16 +375,7 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) } } -uint256_t mulModP256k(uint256_t a, uint256_t b) -{ - uint256_t c; - - mulModP(a.v, b.v, c.v); - - return c; -} - -void mulModP256kv(uint256_t *a, uint256_t *b, uint256_t *c) +void mulModP256k(uint256_t *a, uint256_t *b, uint256_t *c) { mulModP(a->v, b->v, c->v); } @@ -401,33 +392,34 @@ uint256_t invModP256k(uint256_t x) { uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}}; - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); squareModP256k(&x); - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); squareModP256k(&x); - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); for(int i = 0; i < 5; i++) { squareModP256k(&x); } for(int i = 0; i < 22; i++) { - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); } squareModP256k(&x); for(int i = 0; i < 222; i++) { - mulModP256kv(&x, &y, &y); + mulModP256k(&x, &y, &y); squareModP256k(&x); } - return mulModP256k(x, y); + mulModP256k(&x, &y, &x); + return x; } @@ -442,7 +434,7 @@ void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc - *inverse = mulModP256k(*inverse, t); + mulModP256k(inverse, &t, inverse); chain[batchIdx * dim + gid] = *inverse; } @@ -464,7 +456,7 @@ void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc - *inverse = mulModP256k(x, *inverse); + mulModP256k(&x, inverse, inverse); chain[batchIdx * dim + gid] = *inverse; } @@ -496,7 +488,7 @@ void completeBatchAddWithDouble256k( uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; - s = mulModP256k(*inverse, c); + mulModP256k(inverse, &c, &s); uint256_t diff; if(equal256k(&px, &x)) { @@ -505,7 +497,7 @@ void completeBatchAddWithDouble256k( diff = subModP256k(px, x); } - *inverse = mulModP256k(diff, *inverse); + mulModP256k(&diff, inverse, inverse); } else { s = *inverse; } @@ -518,16 +510,16 @@ void completeBatchAddWithDouble256k( uint256_t tx2; // 3x^2 - mulModP256kv(&x, &x, &x2); + mulModP256k(&x, &x, &x2); addModP256k(&x2, &x2, &tx2); addModP256k(&x2, &tx2, &tx2); // s = 3x^2 * 1/2y - mulModP256kv(&tx2, &s, &s); + mulModP256k(&tx2, &s, &s); // s^2 uint256_t s2; - mulModP256kv(&s, &s, &s2); + mulModP256k(&s, &s, &s2); // Rx = s^2 - 2px *newX = subModP256k(s2, x); @@ -535,18 +527,18 @@ void completeBatchAddWithDouble256k( // Ry = s(px - rx) - py uint256_t k = subModP256k(px, *newX); - *newY = mulModP256k(s, k); + mulModP256k(&s, &k, newY); *newY = subModP256k(*newY, py); } else { uint256_t rise; rise = subModP256k(py, y); - mulModP256kv(&rise, &s, &s); + mulModP256k(&rise, &s, &s); // Rx = s^2 - Gx - Qx uint256_t s2; - mulModP256kv(&s, &s, &s2); + mulModP256k(&s, &s, &s2); *newX = subModP256k(s2, px); *newX = subModP256k(*newX, x); @@ -554,7 +546,7 @@ void completeBatchAddWithDouble256k( // Ry = s(px - rx) - py uint256_t k; k = subModP256k(px, *newX); - *newY = mulModP256k(s, k); + mulModP256k(&s, &k, newY); *newY = subModP256k(*newY, py); } } @@ -584,11 +576,11 @@ void completeBatchAdd256k( uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; - s = mulModP256k(*inverse, c); + mulModP256k(inverse, &c, &s); uint256_t diff; diff = subModP256k(px, x); - *inverse = mulModP256k(diff, *inverse); + mulModP256k(&diff, inverse, inverse); } else { s = *inverse; } @@ -597,18 +589,18 @@ void completeBatchAdd256k( uint256_t rise = subModP256k(py, y); - s = mulModP256k(rise, s); + mulModP256k(&rise, &s, &s); // Rx = s^2 - Gx - Qx uint256_t s2; - mulModP256kv(&s, &s, &s2); + mulModP256k(&s, &s, &s2); *newX = subModP256k(s2, px); *newX = subModP256k(*newX, x); // Ry = s(px - rx) - py uint256_t k = subModP256k(px, *newX); - *newY = mulModP256k(s, k); + mulModP256k(&s, &k, newY); *newY = subModP256k(*newY, py); } From 3cdf089f31fa5ed27b70014c9c8ee92ba37fa5bc Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 15:15:26 +0200 Subject: [PATCH 26/62] fix project files --- CLKeySearchDevice/CLKeySearchDevice.vcxproj | 2 +- KeyFinderLib/KeyFinderLib.vcxproj | 2 +- clUtil/clUtil.vcxproj | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.vcxproj b/CLKeySearchDevice/CLKeySearchDevice.vcxproj index b475054d..1226d6eb 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.vcxproj +++ b/CLKeySearchDevice/CLKeySearchDevice.vcxproj @@ -30,7 +30,7 @@ 15.0 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9} CLKeySearchDevice - 10.0.19041.0 + 10.0 diff --git a/KeyFinderLib/KeyFinderLib.vcxproj b/KeyFinderLib/KeyFinderLib.vcxproj index 7cfe1962..6eb479ae 100644 --- a/KeyFinderLib/KeyFinderLib.vcxproj +++ b/KeyFinderLib/KeyFinderLib.vcxproj @@ -17,7 +17,7 @@ {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} KeyFinderLib - 10.0.19041.0 + 10.0 diff --git a/clUtil/clUtil.vcxproj b/clUtil/clUtil.vcxproj index 9d15c2ca..055f977b 100644 --- a/clUtil/clUtil.vcxproj +++ b/clUtil/clUtil.vcxproj @@ -30,7 +30,7 @@ 15.0 {D9A5823D-C472-40AC-B23A-21B1586CEEB0} clUtil - 10.0.19041.0 + 10.0 From 93397f44b9670205f3ec99c478ddc8e8e7a47807 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 26 May 2021 15:25:52 +0200 Subject: [PATCH 27/62] fix --- BitCrackOpenCL.sln | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/BitCrackOpenCL.sln b/BitCrackOpenCL.sln index 54ab1112..59355f0d 100644 --- a/BitCrackOpenCL.sln +++ b/BitCrackOpenCL.sln @@ -62,7 +62,6 @@ Global {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x64.ActiveCfg = Performance Release|x64 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x64.Build.0 = Performance Release|x64 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x86.Build.0 = Performance Release|Win32 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x64.ActiveCfg = Release|x64 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x64.Build.0 = Release|x64 {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x86.ActiveCfg = Release|Win32 @@ -74,7 +73,6 @@ Global {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x64.ActiveCfg = Performance Release|x64 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x64.Build.0 = Performance Release|x64 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x86.Build.0 = Performance Release|Win32 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Release|x64.ActiveCfg = Release|x64 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Release|x64.Build.0 = Release|x64 {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Release|x86.ActiveCfg = Release|Win32 @@ -86,7 +84,6 @@ Global {34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x64.ActiveCfg = Performance Release|x64 {34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x64.Build.0 = Performance Release|x64 {34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x86.Build.0 = Performance Release|Win32 {34042455-D274-432D-9134-C9EA41FD1B54}.Release|x64.ActiveCfg = Release|x64 {34042455-D274-432D-9134-C9EA41FD1B54}.Release|x64.Build.0 = Release|x64 {34042455-D274-432D-9134-C9EA41FD1B54}.Release|x86.ActiveCfg = Release|Win32 @@ -98,7 +95,6 @@ Global {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x64.ActiveCfg = Performance Release|x64 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x64.Build.0 = Performance Release|x64 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x86.Build.0 = Performance Release|Win32 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Release|x64.ActiveCfg = Release|x64 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Release|x64.Build.0 = Release|x64 {CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Release|x86.ActiveCfg = Release|Win32 @@ -110,7 +106,6 @@ Global {150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x64.ActiveCfg = Performance Release|x64 {150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x64.Build.0 = Performance Release|x64 {150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x86.Build.0 = Performance Release|Win32 {150AF404-1F80-4A13-855B-4383C4A3326F}.Release|x64.ActiveCfg = Release|x64 {150AF404-1F80-4A13-855B-4383C4A3326F}.Release|x64.Build.0 = Release|x64 {150AF404-1F80-4A13-855B-4383C4A3326F}.Release|x86.ActiveCfg = Release|Win32 @@ -131,7 +126,6 @@ Global {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x64.ActiveCfg = Performance Release|x64 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x64.Build.0 = Performance Release|x64 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x86.Build.0 = Performance Release|Win32 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Release|x64.ActiveCfg = Release|x64 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Release|x64.Build.0 = Release|x64 {D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Release|x86.ActiveCfg = Release|Win32 @@ -143,7 +137,6 @@ Global {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x64.ActiveCfg = Performance Release|x64 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x64.Build.0 = Performance Release|x64 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x86.Build.0 = Performance Release|Win32 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Release|x64.ActiveCfg = Release|x64 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Release|x64.Build.0 = Release|x64 {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Release|x86.ActiveCfg = Release|Win32 @@ -155,7 +148,6 @@ Global {36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x64.ActiveCfg = Performance Release|x64 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x64.Build.0 = Performance Release|x64 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x86.Build.0 = Performance Release|Win32 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x64.ActiveCfg = Release|x64 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x64.Build.0 = Release|x64 {36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x86.ActiveCfg = Release|Win32 @@ -167,7 +159,6 @@ Global {83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x64.ActiveCfg = Performance Release|x64 {83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x64.Build.0 = Performance Release|x64 {83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x86.Build.0 = Performance Release|Win32 {83327841-C283-4D46-A873-97AC674C68AC}.Release|x64.ActiveCfg = Release|x64 {83327841-C283-4D46-A873-97AC674C68AC}.Release|x64.Build.0 = Release|x64 {83327841-C283-4D46-A873-97AC674C68AC}.Release|x86.ActiveCfg = Release|Win32 @@ -179,7 +170,6 @@ Global {F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x64.ActiveCfg = Performance Release|x64 {F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x64.Build.0 = Performance Release|x64 {F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x86.ActiveCfg = Performance Release|Win32 - {F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x86.Build.0 = Performance Release|Win32 {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.ActiveCfg = Release|x64 {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.Build.0 = Release|x64 {F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x86.ActiveCfg = Release|Win32 From e4f92d69830ef768106f0c83d5f4d77916781ed2 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Thu, 27 May 2021 08:19:04 +0200 Subject: [PATCH 28/62] improve a little --- CLKeySearchDevice/CLKeySearchDevice.cpp | 17 ++- CLKeySearchDevice/CLKeySearchDevice.h | 2 +- CLKeySearchDevice/bitcrack.cl | 104 +++++++++------ CLKeySearchDevice/keysearch.cl | 104 +++++++++------ KeyFinder/main.cpp | 26 ++-- addresses.txt | 160 ++++++++++++++++++++++++ clUtil/clContext.cpp | 5 +- 7 files changed, 320 insertions(+), 98 deletions(-) create mode 100644 addresses.txt diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index c3368c1d..bc752bf0 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -30,7 +30,7 @@ static void undoRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5] } } -CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPerThread, int blocks) +CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPerThread, int blocks, int compressionMode) { _threads = threads; _blocks = blocks; @@ -46,11 +46,24 @@ CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPer throw KeySearchException("KEYSEARCH_MINIMUM_POINT_EXCEPTION", "At least 1 point per thread required"); } + std::string options = ""; + + switch (compressionMode) { + case PointCompressionType::COMPRESSED: + options += " -DCOMPRESSION_COMPRESSED"; + break; + case PointCompressionType::UNCOMPRESSED: + options += " -DCOMPRESSION_UNCOMPRESSED"; + break; + case PointCompressionType::BOTH: + options += " -DCOMPRESSION_BOTH"; + break; + } try { // Create the context _clContext = new cl::CLContext(_device); Logger::log(LogLevel::Info, "Compiling OpenCL kernels..."); - _clProgram = new cl::CLProgram(*_clContext, _bitcrack_cl); + _clProgram = new cl::CLProgram(*_clContext, _bitcrack_cl, options); // Load the kernels _initKeysKernel = new cl::CLKernel(*_clProgram, "multiplyStepKernel"); diff --git a/CLKeySearchDevice/CLKeySearchDevice.h b/CLKeySearchDevice/CLKeySearchDevice.h index 8a5497d6..0905f40e 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.h +++ b/CLKeySearchDevice/CLKeySearchDevice.h @@ -109,7 +109,7 @@ class CLKeySearchDevice : public KeySearchDevice { public: - CLKeySearchDevice(uint64_t device, int threads, int pointsPerThread, int blocks = 0); + CLKeySearchDevice(uint64_t device, int threads, int pointsPerThread, int blocks = 0, int compressionMode = PointCompressionType::COMPRESSED); ~CLKeySearchDevice(); diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 468052ee..a458e161 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1496,14 +1496,15 @@ __kernel void multiplyStepKernel( int batchIdx = 0; int i = gid; + unsigned int p; + unsigned int bit; + uint256_t x; + for(; i < totalPoints; i += dim) { - unsigned int p; p = readWord256k(privateKeys, i, 7 - step / 32); - - unsigned int bit = p & (1 << (step % 32)); - - uint256_t x = xPtr[i]; + bit = p & (1 << (step % 32)); + x = xPtr[i]; if(bit != 0) { if(!isInfinity256k(&x)) { @@ -1649,36 +1650,48 @@ __kernel void keyFinderKernel( int i = gid; int batchIdx = 0; + unsigned int digest[5]; + +#ifdef COMPRESSION_UNCOMPRESSED for(; i < totalPoints; i += dim) { - uint256_t x; + hashPublicKey(xPtr[i], yPtr[i], digest); - unsigned int digest[5]; + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); + } - x = xPtr[i]; + beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); + batchIdx++; + } +#elif COMPRESSION_BOTH + for(; i < totalPoints; i += dim) { + hashPublicKey(xPtr[i], yPtr[i], digest); - if((compression == UNCOMPRESSED) || (compression == BOTH)) { - uint256_t y = yPtr[i]; + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); + } - hashPublicKey(x, y, digest); + hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { - setResultFound(i, false, x, y, digest, results, numResults); - } + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } - if((compression == COMPRESSED) || (compression == BOTH)) { - - hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest); + beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); + batchIdx++; + } +#else + for(; i < totalPoints; i += dim) { + hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { - uint256_t y = yPtr[i]; - setResultFound(i, true, x, y, digest, results, numResults); - } + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } - beginBatchAdd256k(incX, x, chain, i, batchIdx, &inverse); + beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); batchIdx++; } +#endif inverse = doBatchInverse256k(inverse); @@ -1721,38 +1734,47 @@ __kernel void keyFinderKernelWithDouble( int i = gid; int batchIdx = 0; - for(; i < totalPoints; i += dim) { - uint256_t x; + unsigned int digest[5]; - unsigned int digest[5]; +#ifdef COMPRESSION_UNCOMPRESSED + for(; i < totalPoints; i += dim) { + hashPublicKey(xPtr[i], yPtr[i], digest); - x = xPtr[i]; + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); + } - // uncompressed - if((compression == UNCOMPRESSED) || (compression == BOTH)) { - uint256_t y = yPtr[i]; - hashPublicKey(x, y, digest); + beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); + batchIdx++; + } +#elif COMPRESSION_BOTH + for(; i < totalPoints; i += dim) { + hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { - setResultFound(i, false, x, y, digest, results, numResults); - } + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } - // compressed - if((compression == COMPRESSED) || (compression == BOTH)) { - - hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest); + hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); + } - uint256_t y = yPtr[i]; - setResultFound(i, true, x, y, digest, results, numResults); - } + beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); + batchIdx++; + } +#else + for(; i < totalPoints; i += dim) { + hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } +#endif inverse = doBatchInverse256k(inverse); diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index d2eb1a7f..f6bb2043 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -71,14 +71,15 @@ __kernel void multiplyStepKernel( int batchIdx = 0; int i = gid; + unsigned int p; + unsigned int bit; + uint256_t x; + for(; i < totalPoints; i += dim) { - unsigned int p; p = readWord256k(privateKeys, i, 7 - step / 32); - - unsigned int bit = p & (1 << (step % 32)); - - uint256_t x = xPtr[i]; + bit = p & (1 << (step % 32)); + x = xPtr[i]; if(bit != 0) { if(!isInfinity256k(&x)) { @@ -224,36 +225,48 @@ __kernel void keyFinderKernel( int i = gid; int batchIdx = 0; + unsigned int digest[5]; + +#ifdef COMPRESSION_UNCOMPRESSED for(; i < totalPoints; i += dim) { - uint256_t x; + hashPublicKey(xPtr[i], yPtr[i], digest); - unsigned int digest[5]; + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); + } - x = xPtr[i]; + beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); + batchIdx++; + } +#elif COMPRESSION_BOTH + for(; i < totalPoints; i += dim) { + hashPublicKey(xPtr[i], yPtr[i], digest); - if((compression == UNCOMPRESSED) || (compression == BOTH)) { - uint256_t y = yPtr[i]; + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); + } - hashPublicKey(x, y, digest); + hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { - setResultFound(i, false, x, y, digest, results, numResults); - } + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } - if((compression == COMPRESSED) || (compression == BOTH)) { - - hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest); + beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); + batchIdx++; + } +#else + for(; i < totalPoints; i += dim) { + hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { - uint256_t y = yPtr[i]; - setResultFound(i, true, x, y, digest, results, numResults); - } + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } - beginBatchAdd256k(incX, x, chain, i, batchIdx, &inverse); + beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); batchIdx++; } +#endif inverse = doBatchInverse256k(inverse); @@ -296,38 +309,47 @@ __kernel void keyFinderKernelWithDouble( int i = gid; int batchIdx = 0; - for(; i < totalPoints; i += dim) { - uint256_t x; + unsigned int digest[5]; - unsigned int digest[5]; +#ifdef COMPRESSION_UNCOMPRESSED + for(; i < totalPoints; i += dim) { + hashPublicKey(xPtr[i], yPtr[i], digest); - x = xPtr[i]; + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); + } - // uncompressed - if((compression == UNCOMPRESSED) || (compression == BOTH)) { - uint256_t y = yPtr[i]; - hashPublicKey(x, y, digest); + beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); + batchIdx++; + } +#elif COMPRESSION_BOTH + for(; i < totalPoints; i += dim) { + hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { - setResultFound(i, false, x, y, digest, results, numResults); - } + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } - // compressed - if((compression == COMPRESSED) || (compression == BOTH)) { - - hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest); + hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); + } - uint256_t y = yPtr[i]; - setResultFound(i, true, x, y, digest, results, numResults); - } + beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); + batchIdx++; + } +#else + for(; i < totalPoints; i += dim) { + hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); + if(isInBloomFilter(digest, targetList, mask)) { + setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } +#endif inverse = doBatchInverse256k(inverse); diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index f81a2cf2..bad1a215 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -30,7 +30,7 @@ struct RunConfig{ unsigned int blocks = 0; unsigned int pointsPerThread = 0; - int compression = PointCompressionType::COMPRESSED; + int compressionMode = PointCompressionType::COMPRESSED; std::vector targets; @@ -202,6 +202,7 @@ typedef struct { int threads; int blocks; int pointsPerThread; + int compressionMode; }DeviceParameters; DeviceParameters getDefaultParameters(const DeviceManager::DeviceInfo &device) @@ -210,14 +211,15 @@ DeviceParameters getDefaultParameters(const DeviceManager::DeviceInfo &device) parameters.threads = 256; parameters.blocks = 32; parameters.pointsPerThread = 32; + parameters.compressionMode = PointCompressionType::COMPRESSED; return parameters; } -static KeySearchDevice *getDeviceContext(DeviceManager::DeviceInfo &device, int blocks, int threads, int pointsPerThread) +static KeySearchDevice *getDeviceContext(DeviceManager::DeviceInfo &device, int blocks, int threads, int pointsPerThread, int compressionMode) { if(device.type == DeviceManager::DeviceType::OpenCL) { - return new CLKeySearchDevice(device.physicalId, threads, pointsPerThread, blocks); + return new CLKeySearchDevice(device.physicalId, threads, pointsPerThread, blocks, compressionMode); } return NULL; @@ -286,7 +288,7 @@ void writeCheckpoint(secp256k1::uint256 nextKey) fileStream << "blocks=" << _config.blocks << "\n"; fileStream << "threads=" << _config.threads << "\n"; fileStream << "points=" << _config.pointsPerThread << "\n"; - fileStream << "compression=" << getCompressionString(_config.compression) << "\n"; + fileStream << "compression=" << getCompressionString(_config.compressionMode) << "\n"; fileStream << "device=" << _config.device << "\n"; fileStream << "elapsed=" << (_config.elapsed + util::getSystemTime() - _startTime) << "\n"; fileStream << "stride=" << _config.stride.toString(); @@ -323,7 +325,7 @@ void readCheckpointFile() _config.pointsPerThread = util::parseUInt32(entries["points"].value); } if(entries.find("compression") != entries.end()) { - _config.compression = parseCompressionString(entries["compression"].value); + _config.compressionMode = parseCompressionString(entries["compression"].value); } if(entries.find("elapsed") != entries.end()) { _config.elapsed = util::parseUInt32(entries["elapsed"].value); @@ -342,7 +344,7 @@ int run() return 1; } - Logger::log(LogLevel::Info, "Compression: " + getCompressionString(_config.compression)); + Logger::log(LogLevel::Info, "Compression: " + getCompressionString(_config.compressionMode)); Logger::log(LogLevel::Info, "Starting at: " + _config.nextKey.toString()); Logger::log(LogLevel::Info, "Ending at: " + _config.endKey.toString()); Logger::log(LogLevel::Info, "Counting by: " + _config.stride.toString()); @@ -368,9 +370,9 @@ int run() } // Get device context - KeySearchDevice *keySearchDevice = getDeviceContext(_devices[_config.device], _config.blocks, _config.threads, _config.pointsPerThread); + KeySearchDevice *keySearchDevice = getDeviceContext(_devices[_config.device], _config.blocks, _config.threads, _config.pointsPerThread, _config.compressionMode); - KeyFinder keyFinder(_config.nextKey, _config.endKey, _config.compression, keySearchDevice, _config.stride); + KeyFinder keyFinder(_config.nextKey, _config.endKey, _config.compressionMode, keySearchDevice, _config.stride); keyFinder.setResultCallback(resultCallback); keyFinder.setStatusInterval(_config.statusInterval); @@ -511,7 +513,7 @@ int main(int argc, char **argv) } else if(optArg.equals("-u", "--uncompressed")) { optUncompressed = true; } else if(optArg.equals("", "--compression")) { - _config.compression = parseCompressionString(optArg.arg); + _config.compressionMode = parseCompressionString(optArg.arg); } else if(optArg.equals("-i", "--in")) { _config.targetsFile = optArg.arg; } else if(optArg.equals("-o", "--out")) { @@ -628,11 +630,11 @@ int main(int argc, char **argv) // Check option for compressed, uncompressed, or both if(optCompressed && optUncompressed) { - _config.compression = PointCompressionType::BOTH; + _config.compressionMode = PointCompressionType::BOTH; } else if(optCompressed) { - _config.compression = PointCompressionType::COMPRESSED; + _config.compressionMode = PointCompressionType::COMPRESSED; } else if(optUncompressed) { - _config.compression = PointCompressionType::UNCOMPRESSED; + _config.compressionMode = PointCompressionType::UNCOMPRESSED; } if(_config.checkpointFile.length() > 0) { diff --git a/addresses.txt b/addresses.txt new file mode 100644 index 00000000..71c20322 --- /dev/null +++ b/addresses.txt @@ -0,0 +1,160 @@ +1BgGZ9tcN4rm9KBzDn7KprQz87SZ26SAMH +1CUNEBjYrCn2y1SdiUMohaKUi4wpP326Lb +19ZewH8Kk1PDbSNdJ97FP4EiCjTRaZMZQA +1EhqbyUMvvs7BfL8goY6qcPbD6YKfPqb7e +1E6NuFjCi27W5zoXg8TRdcSRq84zJeBW3k +1PitScNLyp2HCygzadCh7FveTnfmpPbfp8 +1McVt1vMtCC7yn5b9wgX1833yCcLXzueeC +1M92tSqNmQLYw33fuBvjmeadirh1ysMBxK +1CQFwcjw1dwhtkVWBttNLDtqL7ivBonGPV +1LeBZP5QCwwgXRtmVUvTVrraqPUokyLHqe +1PgQVLmst3Z314JrQn5TNiys8Hc38TcXJu +1DBaumZxUkM4qMQRt2LVWyFJq5kDtSZQot +1Pie8JkxBT6MGPz9Nvi3fsPkr2D8q3GBc1 +1ErZWg5cFCe4Vw5BzgfzB74VNLaXEiEkhk +1QCbW9HWnwQWiQqVo5exhAnmfqKRrCRsvW +1BDyrQ6WoF8VN3g9SAS1iKZcPzFfnDVieY +1HduPEXZRdG26SUT5Yk83mLkPyjnZuJ7Bm +1GnNTmTVLZiqQfLbAdp9DVdicEnB5GoERE +1NWmZRpHH4XSPwsW6dsS3nrNWfL1yrJj4w +1HsMJxNiV7TLxmoF6uJNkydxPFDog4NQum +14oFNXucftsHiUMY8uctg6N487riuyXs4h +1CfZWK1QTQE3eS9qn61dQjV89KDjZzfNcv +1L2GM8eE7mJWLdo3HZS6su1832NX2txaac +1rSnXMr63jdCuegJFuidJqWxUPV7AtUf7 +15JhYXn6Mx3oF4Y7PcTAv2wVVAuCFFQNiP +1JVnST957hGztonaWK6FougdtjxzHzRMMg +128z5d7nN7PkCuX5qoA4Ys6pmxUYnEy86k +12jbtzBb54r97TCwW3G1gCFoumpckRAPdY +19EEC52krRUK1RkUAEZmQdjTyHT7Gp1TYT +1LHtnpd8nU5VHEMkG2TMYYNUjjLc992bps +1LhE6sCTuGae42Axu1L1ZB7L96yi9irEBE +1FRoHA9xewq7DjrZ1psWJVeTer8gHRqEvR +187swFMjz1G54ycVU56B7jZFHFTNVQFDiu +1PWABE7oUahG2AFFQhhvViQovnCr4rEv7Q +1PWCx5fovoEaoBowAvF5k91m2Xat9bMgwb +1Be2UF9NLfyLFbtm3TCbmuocc9N1Kduci1 +14iXhn8bGajVWegZHJ18vJLHhntcpL4dex +1HBtApAFA9B2YZw3G2YKSMCtb3dVnjuNe2 +122AJhKLEfkFBaGAd84pLp1kfE7xK3GdT8 +1EeAxcprB2PpCnr34VfZdFrkUWuxyiNEFv +1L5sU9qvJeuwQUdt4y1eiLmquFxKjtHr3E +1E32GPWgDyeyQac4aJxm9HVoLrrEYPnM4N +1PiFuqGpG8yGM5v6rNHWS3TjsG6awgEGA1 +1CkR2uS7LmFwc3T2jV8C1BhWb5mQaoxedF +1NtiLNGegHWE3Mp9g2JPkgx6wUg4TW7bbk +1F3JRMWudBaj48EhwcHDdpeuy2jwACNxjP +1Pd8VvT49sHKsmqrQiP61RsVwmXCZ6ay7Z +1DFYhaB2J9q1LLZJWKTnscPWos9VBqDHzv +12CiUhYVTTH33w3SPUBqcpMoqnApAV4WCF +1MEzite4ReNuWaL5Ds17ePKt2dCxWEofwk +1NpnQyZ7x24ud82b7WiRNvPm6N8bqGQnaS +15z9c9sVpu6fwNiK7dMAFgMYSK4GqsGZim +15K1YKJMiJ4fpesTVUcByoz334rHmknxmT +1KYUv7nSvXx4642TKeuC2SNdTk326uUpFy +1LzhS3k3e9Ub8i2W1V8xQFdB8n2MYCHPCa +17aPYR1m6pVAacXg1PTDDU7XafvK1dxvhi +15c9mPGLku1HuW9LRtBf4jcHVpBUt8txKz +1Dn8NF8qDyyfHMktmuoQLGyjWmZXgvosXf +1HAX2n9Uruu9YDt4cqRgYcvtGvZj1rbUyt +1Kn5h2qpgw9mWE5jKpk8PP4qvvJ1QVy8su +1AVJKwzs9AskraJLGHAZPiaZcrpDr1U6AB +1Me6EfpwZK5kQziBwBfvLiHjaPGxCKLoJi +1NpYjtLira16LfGbGwZJ5JbDPh3ai9bjf4 +16jY7qLJnxb7CHZyqBP8qca9d51gAjyXQN +18ZMbwUFLMHoZBbfpCjUJQTCMCbktshgpe +13zb1hQbWVsc2S7ZTZnP2G4undNNpdh5so +1BY8GQbnueYofwSuFAT3USAhGjPrkxDdW9 +1MVDYgVaSN6iKKEsbzRUAYFrYJadLYZvvZ +19vkiEajfhuZ8bs8Zu2jgmC6oqZbWqhxhG +19YZECXj3SxEZMoUeJ1yiPsw8xANe7M7QR +1PWo3JeB9jrGwfHDNpdGK54CRas7fsVzXU +1JTK7s9YVYywfm5XUH7RNhHJH1LshCaRFR +12VVRNPi4SJqUTsp6FmqDqY5sGosDtysn4 +1FWGcVDK3JGzCC3WtkYetULPszMaK2Jksv +1J36UjUByGroXcCvmj13U6uwaVv9caEeAt +1DJh2eHFYQfACPmrvpyWc8MSTYKh7w9eRF +1Bxk4CQdqL9p22JEtDfdXMsng1XacifUtE +15qF6X51huDjqTmF9BJgxXdt1xcj46Jmhb +1ARk8HWJMn8js8tQmGUJeQHjSE7KRkn2t8 +1BCf6rHUW6m3iH2ptsvnjgLruAiPQQepLe +15qsCm78whspNQFydGJQk5rexzxTQopnHZ +13zYrYhhJxp6Ui1VV7pqa5WDhNWM45ARAC +14MdEb4eFcT3MVG5sPFG4jGLuHJSnt1Dk2 +1CMq3SvFcVEcpLMuuH8PUcNiqsK1oicG2D +1Kh22PvXERd2xpTQk3ur6pPEqFeckCJfAr +1K3x5L6G57Y494fDqBfrojD28UJv4s5JcK +1PxH3K1Shdjb7gSEoTX7UPDZ6SH4qGPrvq +16AbnZjZZipwHMkYKBSfswGWKDmXHjEpSf +19QciEHbGVNY4hrhfKXmcBBCrJSBZ6TaVt +1L12FHH2FHjvTviyanuiFVfmzCy46RRATU +1EzVHtmbN4fs4MiNk3ppEnKKhsmXYJ4s74 +1AE8NzzgKE7Yhz7BWtAcAAxiFMbPo82NB5 +17Q7tuG2JwFFU9rXVj3uZqRtioH3mx2Jad +1K6xGMUbs6ZTXBnhw1pippqwK6wjBWtNpL +19eVSDuizydXxhohGh8Ki9WY9KsHdSwoQC +15ANYzzCp5BFHcCnVFzXqyibpzgPLWaD8b +18ywPwj39nGjqBrQJSzZVq2izR12MDpDr8 +1CaBVPrwUxbQYYswu32w7Mj4HR4maNoJSX +1JWnE6p6UN7ZJBN7TtcbNDoRcjFtuDWoNL +1KCgMv8fo2TPBpddVi9jqmMmcne9uSNJ5F +1CKCVdbDJasYmhswB6HKZHEAnNaDpK7W4n +1PXv28YxmYMaB8zxrKeZBW8dt2HK7RkRPX +1AcAmB6jmtU6AiEcXkmiNE9TNVPsj9DULf +1EQJvpsmhazYCcKX5Au6AZmZKRnzarMVZu +1CMjscKB3QW7SDyQ4c3C3DEUHiHRhiZVib +18KsfuHuzQaBTNLASyj15hy4LuqPUo1FNB +15EJFC5ZTs9nhsdvSUeBXjLAuYq3SWaxTc +1HB1iKUqeffnVsvQsbpC6dNi1XKbyNuqao +1GvgAXVCbA8FBjXfWiAms4ytFeJcKsoyhL +12JzYkkN76xkwvcPT6AWKZtGX6w2LAgsJg +1824ZJQ7nKJ9QFTRBqn7z7dHV5EGpzUpH3 +18A7NA9FTsnJxWgkoFfPAFbQzuQxpRtCos +1NeGn21dUDDeqFQ63xb2SpgUuXuBLA4WT4 +174SNxfqpdMGYy5YQcfLbSTK3MRNZEePoy +1NLbHuJebVwUZ1XqDjsAyfTRUPwDQbemfv +1MnJ6hdhvK37VLmqcdEwqC3iFxyWH2PHUV +1KNRfGWw7Q9Rmwsc6NT5zsdvEb9M2Wkj5Z +1PJZPzvGX19a7twf5HyD2VvNiPdHLzm9F6 +1GuBBhf61rnvRe4K8zu8vdQB3kHzwFqSy7 +17s2b9ksz5y7abUm92cHwG8jEPCzK3dLnT +1GDSuiThEV64c166LUFC9uDcVdGjqkxKyh +1Me3ASYt5JCTAK2XaC32RMeH34PdprrfDx +1CdufMQL892A69KXgv6UNBD17ywWqYpKut +1BkkGsX9ZM6iwL3zbqs7HWBV7SvosR6m8N +1PXAyUB8ZoH3WD8n5zoAthYjN15yN5CVq5 +1AWCLZAjKbV1P7AHvaPNCKiB7ZWVDMxFiz +1G6EFyBRU86sThN3SSt3GrHu1sA7w7nzi4 +1MZ2L1gFrCtkkn6DnTT2e4PFUTHw9gNwaj +1Hz3uv3nNZzBVMXLGadCucgjiCs5W9vaGz +1Fo65aKq8s8iquMt6weF1rku1moWVEd5Ua +16zRPnT8znwq42q7XeMkZUhb1bKqgRogyy +1KrU4dHE5WrW8rhWDsTRjR21r8t3dsrS3R +17uDfp5r4n441xkgLFmhNoSW1KWp6xVLD +13A3JrvXmvg5w9XGvyyR4JEJqiLz8ZySY3 +16RGFo6hjq9ym6Pj7N5H7L1NR1rVPJyw2v +1UDHPdovvR985NrWSkdWQDEQ1xuRiTALq +15nf31J46iLuK1ZkTnqHo7WgN5cARFK3RA +1Ab4vzG6wEQBDNQM1B2bvUz4fqXXdFk2WT +1Fz63c775VV9fNyj25d9Xfw3YHE6sKCxbt +1QKBaU6WAeycb3DbKbLBkX7vJiaS8r42Xo +1CD91Vm97mLQvXhrnoMChhJx4TP9MaQkJo +15MnK2jXPqTMURX4xC3h4mAZxyCcaWWEDD +13N66gCzWWHEZBxhVxG18P8wyjEWF9Yoi1 +1NevxKDYuDcCh1ZMMi6ftmWwGrZKC6j7Ux +19GpszRNUej5yYqxXoLnbZWKew3KdVLkXg +1M7ipcdYHey2Y5RZM34MBbpugghmjaV89P +18aNhurEAJsw6BAgtANpexk5ob1aGTwSeL +1FwZXt6EpRT7Fkndzv6K4b4DFoT4trbMrV +1CXvTzR6qv8wJ7eprzUKeWxyGcHwDYP1i2 +1MUJSJYtGPVGkBCTqGspnxyHahpt5Te8jy +13Q84TNNvgcL3HJiqQPvyBb9m4hxjS3jkV +1LuUHyrQr8PKSvbcY1v1PiuGuqFjWpDumN +18192XpzzdDi2K11QVHR7td2HcPS6Qs5vg +1NgVmsCCJaKLzGyKLFJfVequnFW9ZvnMLN +1AoeP37TmHdFh8uN72fu9AqgtLrUwcv2wJ +1FTpAbQa4h8trvhQXjXnmNhqdiGBd1oraE +14JHoRAdmJg3XR4RjMDh6Wed6ft6hzbQe9 +19z6waranEf8CcP8FqNgdwUe1QRxvUNKBG +14u4nA5sugaswb6SZgn5av2vuChdMnD9E5 +1NBC8uXJy1GiJ6drkiZa1WuKn51ps7EPTv diff --git a/clUtil/clContext.cpp b/clUtil/clContext.cpp index 8d368419..9ebe54e5 100644 --- a/clUtil/clContext.cpp +++ b/clUtil/clContext.cpp @@ -94,9 +94,12 @@ cl::CLProgram::CLProgram(cl::CLContext &ctx, std::string srcFile, std::string op cl_int err; if(util::toLower(_ctx.getDeviceVendor()).find("intel") != std::string::npos) { - options += "-DDEVICE_VENDOR_INTEL"; + options += " -DDEVICE_VENDOR_INTEL"; } + // disable optimization as codeXL shows it will result in higher throughput + options += " -O0"; + _prog = clCreateProgramWithSource(ctx.getContext(), 1, &ptr, &len, &err); clCall(err); From 998e4d60e0e98c404de6a66e7450c9927df77fae Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Thu, 27 May 2021 22:39:38 +0200 Subject: [PATCH 29/62] reduce cycles in isInBloomfilter --- CLKeySearchDevice/bitcrack.cl | 44 ++++++++++++++-------------------- CLKeySearchDevice/keysearch.cl | 44 ++++++++++++++-------------------- 2 files changed, 36 insertions(+), 52 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index a458e161..a9b7c3a9 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1440,27 +1440,19 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; -bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong mask) +bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong *mask) { - bool notFoundMatch = true; - unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; - uint64_t idx[5]; - - idx[0] = ((hash[0] << 6) | (h5 & 0x3f)) & mask; - idx[1] = ((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & mask; - idx[2] = ((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & mask; - idx[3] = ((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & mask; - idx[4] = ((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & mask; - - notFoundMatch = (targetList[idx[0] / 32] & (0x01 << (idx[0] % 32))) == 0 - || (targetList[idx[1] / 32] & (0x01 << (idx[1] % 32))) == 0 - || (targetList[idx[2] / 32] & (0x01 << (idx[2] % 32))) == 0 - || (targetList[idx[3] / 32] & (0x01 << (idx[3] % 32))) == 0 - || (targetList[idx[4] / 32] & (0x01 << (idx[4] % 32))) == 0; - - return notFoundMatch == false; + return (false == + ( + (targetList[(((hash[0] << 6) | (h5 & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[0] << 6) | (h5 & 0x3f)) & *mask) % 32))) == 0 || + (targetList[(((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) % 32))) == 0 || + (targetList[(((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) % 32))) == 0 || + (targetList[(((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) % 32))) == 0 || + (targetList[ (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) / 32] & (0x01 << ( (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) % 32))) == 0 + ) + ); } void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) @@ -1656,7 +1648,7 @@ __kernel void keyFinderKernel( for(; i < totalPoints; i += dim) { hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } @@ -1667,13 +1659,13 @@ __kernel void keyFinderKernel( for(; i < totalPoints; i += dim) { hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } @@ -1684,7 +1676,7 @@ __kernel void keyFinderKernel( for(; i < totalPoints; i += dim) { hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } @@ -1740,7 +1732,7 @@ __kernel void keyFinderKernelWithDouble( for(; i < totalPoints; i += dim) { hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } @@ -1751,13 +1743,13 @@ __kernel void keyFinderKernelWithDouble( for(; i < totalPoints; i += dim) { hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } @@ -1767,7 +1759,7 @@ __kernel void keyFinderKernelWithDouble( #else for(; i < totalPoints; i += dim) { hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index f6bb2043..53a19cd0 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -15,27 +15,19 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; -bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong mask) +bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong *mask) { - bool notFoundMatch = true; - unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; - uint64_t idx[5]; - - idx[0] = ((hash[0] << 6) | (h5 & 0x3f)) & mask; - idx[1] = ((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & mask; - idx[2] = ((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & mask; - idx[3] = ((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & mask; - idx[4] = ((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & mask; - - notFoundMatch = (targetList[idx[0] / 32] & (0x01 << (idx[0] % 32))) == 0 - || (targetList[idx[1] / 32] & (0x01 << (idx[1] % 32))) == 0 - || (targetList[idx[2] / 32] & (0x01 << (idx[2] % 32))) == 0 - || (targetList[idx[3] / 32] & (0x01 << (idx[3] % 32))) == 0 - || (targetList[idx[4] / 32] & (0x01 << (idx[4] % 32))) == 0; - - return notFoundMatch == false; + return (false == + ( + (targetList[(((hash[0] << 6) | (h5 & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[0] << 6) | (h5 & 0x3f)) & *mask) % 32))) == 0 || + (targetList[(((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) % 32))) == 0 || + (targetList[(((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) % 32))) == 0 || + (targetList[(((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) % 32))) == 0 || + (targetList[ (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) / 32] & (0x01 << ( (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) % 32))) == 0 + ) + ); } void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) @@ -231,7 +223,7 @@ __kernel void keyFinderKernel( for(; i < totalPoints; i += dim) { hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } @@ -242,13 +234,13 @@ __kernel void keyFinderKernel( for(; i < totalPoints; i += dim) { hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } @@ -259,7 +251,7 @@ __kernel void keyFinderKernel( for(; i < totalPoints; i += dim) { hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } @@ -315,7 +307,7 @@ __kernel void keyFinderKernelWithDouble( for(; i < totalPoints; i += dim) { hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } @@ -326,13 +318,13 @@ __kernel void keyFinderKernelWithDouble( for(; i < totalPoints; i += dim) { hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } @@ -342,7 +334,7 @@ __kernel void keyFinderKernelWithDouble( #else for(; i < totalPoints; i += dim) { hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, mask)) { + if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } From 279bb242cf3a5556ec84ff031300138e85486cdb Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Fri, 28 May 2021 02:05:48 +0200 Subject: [PATCH 30/62] some minor change --- CLKeySearchDevice/bitcrack.cl | 45 ++++++++++++---------------------- CLKeySearchDevice/keysearch.cl | 32 +++++++++--------------- clMath/secp256k1.cl | 13 +++------- clMath/sha256.cl | 4 +-- 4 files changed, 34 insertions(+), 60 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index a9b7c3a9..a613f0e7 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -866,26 +866,21 @@ void completeBatchAdd256k( int dim = get_global_size(0); uint256_t s; - uint256_t x; - - x = xPtr[i]; - if(batchIdx >= 1) { + if(batchIdx != 0) { uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; mulModP256k(inverse, &c, &s); uint256_t diff; - diff = subModP256k(px, x); + diff = subModP256k(px, xPtr[i]); mulModP256k(&diff, inverse, inverse); } else { s = *inverse; } - uint256_t y = yPtr[i]; - - uint256_t rise = subModP256k(py, y); + uint256_t rise = subModP256k(py, yPtr[i]); mulModP256k(&rise, &s, &s); @@ -894,7 +889,7 @@ void completeBatchAdd256k( mulModP256k(&s, &s, &s2); *newX = subModP256k(s2, px); - *newX = subModP256k(*newX, x); + *newX = subModP256k(*newX, xPtr[i]); // Ry = s(px - rx) - py uint256_t k = subModP256k(px, *newX); @@ -1427,10 +1422,9 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un #define UNCOMPRESSED 1 #define BOTH 2 -unsigned int endian(unsigned int x) -{ - return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24); -} +#ifndef endian +#define endian(x) (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24) +#endif typedef struct { int idx; @@ -1477,7 +1471,7 @@ __kernel void multiplyStepKernel( { uint256_t gx; uint256_t gy; - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + int i = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); gx = gxPtr[step]; @@ -1487,18 +1481,15 @@ __kernel void multiplyStepKernel( uint256_t inverse = { {0,0,0,0,0,0,0,1} }; int batchIdx = 0; - int i = gid; unsigned int p; - unsigned int bit; uint256_t x; for(; i < totalPoints; i += dim) { p = readWord256k(privateKeys, i, 7 - step / 32); - bit = p & (1 << (step % 32)); x = xPtr[i]; - if(bit != 0) { + if(( p & (1 << (step % 32))) != 0) { if(!isInfinity256k(&x)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; @@ -1515,11 +1506,10 @@ __kernel void multiplyStepKernel( unsigned int p; p = readWord256k(privateKeys, i, 7 - step / 32); - unsigned int bit = p & (1 << (step % 32)); uint256_t x = xPtr[i]; - if(bit != 0) { + if((p & (1 << (step % 32))) != 0) { if(!isInfinity256k(&x)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); @@ -1631,7 +1621,7 @@ __kernel void keyFinderKernel( __global CLDeviceResult *results, __global unsigned int *numResults) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + int i = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); uint256_t incX = *incXPtr; @@ -1639,7 +1629,6 @@ __kernel void keyFinderKernel( // Multiply together all (_Gx - x) and then invert uint256_t inverse = { {0,0,0,0,0,0,0,1} }; - int i = gid; int batchIdx = 0; unsigned int digest[5]; @@ -1688,11 +1677,10 @@ __kernel void keyFinderKernel( inverse = doBatchInverse256k(inverse); i -= dim; - + uint256_t newX; + uint256_t newY; for(; i >= 0; i -= dim) { - uint256_t newX; - uint256_t newY; batchIdx--; completeBatchAdd256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); @@ -1715,7 +1703,7 @@ __kernel void keyFinderKernelWithDouble( __global CLDeviceResult *results, __global unsigned int *numResults) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + int i = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); uint256_t incX = *incXPtr; @@ -1724,7 +1712,6 @@ __kernel void keyFinderKernelWithDouble( // Multiply together all (_Gx - x) and then invert uint256_t inverse = { {0,0,0,0,0,0,0,1} }; - int i = gid; int batchIdx = 0; unsigned int digest[5]; @@ -1772,9 +1759,9 @@ __kernel void keyFinderKernelWithDouble( i -= dim; + uint256_t newX; + uint256_t newY; for(; i >= 0; i -= dim) { - uint256_t newX; - uint256_t newY; batchIdx--; completeBatchAddWithDouble256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 53a19cd0..c97e8180 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -2,10 +2,9 @@ #define UNCOMPRESSED 1 #define BOTH 2 -unsigned int endian(unsigned int x) -{ - return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24); -} +#ifndef endian +#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) +#endif typedef struct { int idx; @@ -52,7 +51,7 @@ __kernel void multiplyStepKernel( { uint256_t gx; uint256_t gy; - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + int i = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); gx = gxPtr[step]; @@ -62,18 +61,15 @@ __kernel void multiplyStepKernel( uint256_t inverse = { {0,0,0,0,0,0,0,1} }; int batchIdx = 0; - int i = gid; unsigned int p; - unsigned int bit; uint256_t x; for(; i < totalPoints; i += dim) { p = readWord256k(privateKeys, i, 7 - step / 32); - bit = p & (1 << (step % 32)); x = xPtr[i]; - if(bit != 0) { + if(( p & (1 << (step % 32))) != 0) { if(!isInfinity256k(&x)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; @@ -90,11 +86,10 @@ __kernel void multiplyStepKernel( unsigned int p; p = readWord256k(privateKeys, i, 7 - step / 32); - unsigned int bit = p & (1 << (step % 32)); uint256_t x = xPtr[i]; - if(bit != 0) { + if((p & (1 << (step % 32))) != 0) { if(!isInfinity256k(&x)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); @@ -206,7 +201,7 @@ __kernel void keyFinderKernel( __global CLDeviceResult *results, __global unsigned int *numResults) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + int i = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); uint256_t incX = *incXPtr; @@ -214,7 +209,6 @@ __kernel void keyFinderKernel( // Multiply together all (_Gx - x) and then invert uint256_t inverse = { {0,0,0,0,0,0,0,1} }; - int i = gid; int batchIdx = 0; unsigned int digest[5]; @@ -263,11 +257,10 @@ __kernel void keyFinderKernel( inverse = doBatchInverse256k(inverse); i -= dim; - + uint256_t newX; + uint256_t newY; for(; i >= 0; i -= dim) { - uint256_t newX; - uint256_t newY; batchIdx--; completeBatchAdd256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); @@ -290,7 +283,7 @@ __kernel void keyFinderKernelWithDouble( __global CLDeviceResult *results, __global unsigned int *numResults) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + int i = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); uint256_t incX = *incXPtr; @@ -299,7 +292,6 @@ __kernel void keyFinderKernelWithDouble( // Multiply together all (_Gx - x) and then invert uint256_t inverse = { {0,0,0,0,0,0,0,1} }; - int i = gid; int batchIdx = 0; unsigned int digest[5]; @@ -347,9 +339,9 @@ __kernel void keyFinderKernelWithDouble( i -= dim; + uint256_t newX; + uint256_t newY; for(; i >= 0; i -= dim) { - uint256_t newX; - uint256_t newY; batchIdx--; completeBatchAddWithDouble256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index 0b29c2a1..53e825b1 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -568,26 +568,21 @@ void completeBatchAdd256k( int dim = get_global_size(0); uint256_t s; - uint256_t x; - - x = xPtr[i]; - if(batchIdx >= 1) { + if(batchIdx != 0) { uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; mulModP256k(inverse, &c, &s); uint256_t diff; - diff = subModP256k(px, x); + diff = subModP256k(px, xPtr[i]); mulModP256k(&diff, inverse, inverse); } else { s = *inverse; } - uint256_t y = yPtr[i]; - - uint256_t rise = subModP256k(py, y); + uint256_t rise = subModP256k(py, yPtr[i]); mulModP256k(&rise, &s, &s); @@ -596,7 +591,7 @@ void completeBatchAdd256k( mulModP256k(&s, &s, &s2); *newX = subModP256k(s2, px); - *newX = subModP256k(*newX, x); + *newX = subModP256k(*newX, xPtr[i]); // Ry = s(px - rx) - py uint256_t k = subModP256k(px, *newX); diff --git a/clMath/sha256.cl b/clMath/sha256.cl index 0168959a..83b8bd65 100644 --- a/clMath/sha256.cl +++ b/clMath/sha256.cl @@ -214,7 +214,7 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned tmp[7] = h; w[0] = (y[7] << 24) | 0x00800000; - w[15] = 65 * 8; + w[15] = 520; // 65 * 8 roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); roundSha(h, a, b, c, d, e, f, g, 0, _K[1]); @@ -362,7 +362,7 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[6] = (x[6] >> 8) | (x[5] << 24); w[7] = (x[7] >> 8) | (x[6] << 24); w[8] = (x[7] << 24) | 0x00800000; - w[15] = 33 * 8; + w[15] = 264; // 33 * 8 a = _IV[0]; b = _IV[1]; From 366dd0142eea91731d267062bd5b72cbf97cf002 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Fri, 28 May 2021 02:43:33 +0200 Subject: [PATCH 31/62] fix warnings --- CryptoUtil/ripemd160.cpp | 4 ++-- clUtil/clUtil.cpp | 2 +- util/util.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CryptoUtil/ripemd160.cpp b/CryptoUtil/ripemd160.cpp index f442f157..790a804c 100644 --- a/CryptoUtil/ripemd160.cpp +++ b/CryptoUtil/ripemd160.cpp @@ -25,7 +25,7 @@ static unsigned int endian(unsigned int x) return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24); } -static unsigned int rotl(unsigned int x, int n) +static unsigned int rotl(unsigned int x, unsigned int n) { return (x << n) | (x >> (32 - n)); } @@ -325,4 +325,4 @@ void crypto::ripemd160(unsigned int *x, unsigned int *digest) digest[2] = endian(_IV[3] + e1 + a2); digest[3] = endian(_IV[4] + a1 + b2); digest[4] = endian(_IV[0] + b1 + c2); -} \ No newline at end of file +} diff --git a/clUtil/clUtil.cpp b/clUtil/clUtil.cpp index 0a24d2c6..dfea8f1c 100644 --- a/clUtil/clUtil.cpp +++ b/clUtil/clUtil.cpp @@ -64,4 +64,4 @@ std::vector cl::getDevices() delete[] platforms; return deviceList; -} \ No newline at end of file +} diff --git a/util/util.h b/util/util.h index 4514d2d5..239e7af3 100644 --- a/util/util.h +++ b/util/util.h @@ -43,4 +43,4 @@ std::string trim(const std::string &s, char c=' '); } -#endif \ No newline at end of file +#endif From 569d854cf1e0366c56d715b3c0e0d985c3e5d786 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Fri, 28 May 2021 03:26:28 +0200 Subject: [PATCH 32/62] minor patch --- CLKeySearchDevice/CLKeySearchDevice.vcxproj | 12 ++++++------ CLKeySearchDevice/bitcrack.cl | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.vcxproj b/CLKeySearchDevice/CLKeySearchDevice.vcxproj index 1226d6eb..43b1a785 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.vcxproj +++ b/CLKeySearchDevice/CLKeySearchDevice.vcxproj @@ -124,7 +124,7 @@ - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -139,7 +139,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -154,7 +154,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -173,7 +173,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -199,7 +199,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -220,7 +220,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cltrue - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index a613f0e7..33f826e9 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1120,7 +1120,7 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned tmp[7] = h; w[0] = (y[7] << 24) | 0x00800000; - w[15] = 65 * 8; + w[15] = 520; // 65 * 8 roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); roundSha(h, a, b, c, d, e, f, g, 0, _K[1]); @@ -1268,7 +1268,7 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[6] = (x[6] >> 8) | (x[5] << 24); w[7] = (x[7] >> 8) | (x[6] << 24); w[8] = (x[7] << 24) | 0x00800000; - w[15] = 33 * 8; + w[15] = 264; // 33 * 8 a = _IV[0]; b = _IV[1]; @@ -1423,7 +1423,7 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un #define BOTH 2 #ifndef endian -#define endian(x) (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24) +#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) #endif typedef struct { From 2bac19f4d896c8ef91185def2b89e75d44934eab Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Fri, 28 May 2021 03:33:23 +0200 Subject: [PATCH 33/62] minor fix --- CLKeySearchDevice/CLKeySearchDevice.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index bc752bf0..93a4ad4f 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -67,8 +67,6 @@ CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPer // Load the kernels _initKeysKernel = new cl::CLKernel(*_clProgram, "multiplyStepKernel"); - _initKeysKernel->getWorkGroupSize(); - _stepKernel = new cl::CLKernel(*_clProgram, "keyFinderKernel"); _stepKernelWithDouble = new cl::CLKernel(*_clProgram, "keyFinderKernelWithDouble"); From 5a3900904dc6d702b4805af4301f9fdb28fb06aa Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Mon, 31 May 2021 05:52:38 +0200 Subject: [PATCH 34/62] improve multiply --- clMath/ripemd160.cl | 25 +++-- clMath/secp256k1.cl | 236 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 232 insertions(+), 29 deletions(-) diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index 23ceefd8..390615ab 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -7,6 +7,15 @@ #define RIPEMD160_IV_3 (0x10325476) #define RIPEMD160_IV_4 (0xc3d2e1f0) +#define K0 (0x5a827999); +#define K1 (0x6ed9eba1); +#define K2 (0x8f1bbcdc); +#define K3 (0xa953fd4e); +#define K4 (0x7a6d76e9); +#define K5 (0x6d703ef3); +#define K6 (0x5c4dd124); +#define K7 (0x50a28be6); + #define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) #define F(x, y, z) ((x) ^ (y) ^ (z)) @@ -25,22 +34,22 @@ c = rotl((c), 10) #define GG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + (x) + 0x5a827999;\ + a += G((b), (c), (d)) + (x) + K0;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + 0x6ed9eba1;\ + a += H((b), (c), (d)) + (x) + K1;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define II(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + 0x8f1bbcdc;\ + a += I((b), (c), (d)) + (x) + K2;\ a = rotl((a), (s)) + e;\ c = rotl((c), 10) #define JJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + 0xa953fd4e;\ + a += J((b), (c), (d)) + (x) + K3;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) @@ -50,22 +59,22 @@ c = rotl((c), 10) #define GGG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + x + 0x7a6d76e9;\ + a += G((b), (c), (d)) + x + K4;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HHH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + 0x6d703ef3;\ + a += H((b), (c), (d)) + (x) + K5;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define III(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + 0x5c4dd124;\ + a += I((b), (c), (d)) + (x) + K6;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define JJJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + 0x50a28be6;\ + a += J((b), (c), (d)) + (x) + K7;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index 53e825b1..2fd83b1d 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -121,42 +121,236 @@ bool greaterThanEqualToP(const unsigned int a[8]) return true; } + void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8]) { unsigned int z[16]; - unsigned int high = 0; - uint64_t product = 0; + unsigned long product; // First round, overwrite z - for(int j = 7; j >= 0; j--) { + product = (unsigned long)x[7] * y[7]; + z[15] = (unsigned int)product; + + product = (unsigned long)x[7] * y[6] + (unsigned int)(product >> 32); + z[14] = (unsigned int)product; + + product = (unsigned long)x[7] * y[5] + (unsigned int)(product >> 32); + z[13] = (unsigned int)product; + + product = (unsigned long)x[7] * y[4] + (unsigned int)(product >> 32); + z[12] = (unsigned int)product; + + product = (unsigned long)x[7] * y[3] + (unsigned int)(product >> 32); + z[11] = (unsigned int)product; + + product = (unsigned long)x[7] * y[2] + (unsigned int)(product >> 32); + z[10] = (unsigned int)product; + + product = (unsigned long)x[7] * y[1] + (unsigned int)(product >> 32); + z[9] = (unsigned int)product; + + product = (unsigned long)x[7] * y[0] + (unsigned int)(product >> 32); + z[8] = (unsigned int)product; + z[7] = (unsigned int)(product >> 32); + + product = (unsigned long)x[6] * y[7] + z[14]; + z[14] = (unsigned int)product; + + /** round6 */ + product = (unsigned long)x[6] * y[6] + z[13] + (product >> 32); + z[13] = (unsigned int)product; + + product = (unsigned long)x[6] * y[5] + z[12] + (product >> 32); + z[12] = (unsigned int)product; + + product = (unsigned long)x[6] * y[4] + z[11] + (product >> 32); + z[11] = (unsigned int)product; + + product = (unsigned long)x[6] * y[3] + z[10] + (product >> 32); + z[10] = (unsigned int)product; + + product = (unsigned long)x[6] * y[2] + z[9] + (product >> 32); + z[9] = (unsigned int)product; + + product = (unsigned long)x[6] * y[1] + z[8] + (product >> 32); + z[8] = (unsigned int)product; + + product = (unsigned long)x[6] * y[0] + z[7] + (product >> 32); + z[7] = (unsigned int)product; + z[6] = product >> 32; - product = (uint64_t)x[7] * y[j] + high; + /** round 5 */ + product = (unsigned long)x[5] * y[7] + z[13]; + z[13] = (unsigned int)product; - z[7 + j + 1] = (unsigned int)product; - high = (unsigned int)(product >> 32); - } - z[7] = high; + product = (unsigned long)x[5] * y[6] + z[12] + (product >> 32); + z[12] = (unsigned int)product; - for(int i = 6; i >= 0; i--) { + product = (unsigned long)x[5] * y[5] + z[11] + (product >> 32); + z[11] = (unsigned int)product; - high = 0; + product = (unsigned long)x[5] * y[4] + z[10] + (product >> 32); + z[10] = (unsigned int)product; - for(int j = 7; j >= 0; j--) { + product = (unsigned long)x[5] * y[3] + z[9] + (product >> 32); + z[9] = (unsigned int)product; - product = (uint64_t)x[i] * y[j] + z[i + j + 1] + high; + product = (unsigned long)x[5] * y[2] + z[8] + (product >> 32); + z[8] = (unsigned int)product; + + product = (unsigned long)x[5] * y[1] + z[7] + (product >> 32); + z[7] = (unsigned int)product; + + product = (unsigned long)x[5] * y[0] + z[6] + (product >> 32); + z[6] = (unsigned int)product; + z[5] = product >> 32; - z[i + j + 1] = (unsigned int)product; + /** round 4 */ + product = (unsigned long)x[4] * y[7] + z[12]; + z[12] = (unsigned int)product; - high = product >> 32; - } + product = (unsigned long)x[4] * y[6] + z[11] + (product >> 32); + z[11] = (unsigned int)product; - z[i] = high; - } + product = (unsigned long)x[4] * y[5] + z[10] + (product >> 32); + z[10] = (unsigned int)product; - for(int i = 0; i < 8; i++) { - out_high[i] = z[i]; - out_low[i] = z[8 + i]; - } + product = (unsigned long)x[4] * y[4] + z[9] + (product >> 32); + z[9] = (unsigned int)product; + + product = (unsigned long)x[4] * y[3] + z[8] + (product >> 32); + z[8] = (unsigned int)product; + + product = (unsigned long)x[4] * y[2] + z[7] + (product >> 32); + z[7] = (unsigned int)product; + + product = (unsigned long)x[4] * y[1] + z[6] + (product >> 32); + z[6] = (unsigned int)product; + + product = (unsigned long)x[4] * y[0] + z[5] + (product >> 32); + z[5] = (unsigned int)product; + z[4] = product >> 32; + + /** round 3 */ + product = (unsigned long)x[3] * y[7] + z[11]; + z[11] = (unsigned int)product; + + product = (unsigned long)x[3] * y[6] + z[10] + (product >> 32); + z[10] = (unsigned int)product; + + product = (unsigned long)x[3] * y[5] + z[9] + (product >> 32); + z[9] = (unsigned int)product; + + product = (unsigned long)x[3] * y[4] + z[8] + (product >> 32); + z[8] = (unsigned int)product; + + product = (unsigned long)x[3] * y[3] + z[7] + (product >> 32); + z[7] = (unsigned int)product; + + product = (unsigned long)x[3] * y[2] + z[6] + (product >> 32); + z[6] = (unsigned int)product; + + product = (unsigned long)x[3] * y[1] + z[5] + (product >> 32); + z[5] = (unsigned int)product; + + product = (unsigned long)x[3] * y[0] + z[4] + (product >> 32); + z[4] = (unsigned int)product; + z[3] = product >> 32; + + /** round 2 */ + product = (unsigned long)x[2] * y[7] + z[10]; + z[10] = (unsigned int)product; + + product = (unsigned long)x[2] * y[6] + z[9] + (product >> 32); + z[9] = (unsigned int)product; + + product = (unsigned long)x[2] * y[5] + z[8] + (product >> 32); + z[8] = (unsigned int)product; + + product = (unsigned long)x[2] * y[4] + z[7] + (product >> 32); + z[7] = (unsigned int)product; + + product = (unsigned long)x[2] * y[3] + z[6] + (product >> 32); + z[6] = (unsigned int)product; + + product = (unsigned long)x[2] * y[2] + z[5] + (product >> 32); + z[5] = (unsigned int)product; + + product = (unsigned long)x[2] * y[1] + z[4] + (product >> 32); + z[4] = (unsigned int)product; + + product = (unsigned long)x[2] * y[0] + z[3] + (product >> 32); + z[3] = (unsigned int)product; + z[2] = product >> 32; + + /** round 1 */ + product = (unsigned long)x[1] * y[7] + z[9]; + z[9] = (unsigned int)product; + + product = (unsigned long)x[1] * y[6] + z[8] + (product >> 32); + z[8] = (unsigned int)product; + + product = (unsigned long)x[1] * y[5] + z[7] + (product >> 32); + z[7] = (unsigned int)product; + + product = (unsigned long)x[1] * y[4] + z[6] + (product >> 32); + z[6] = (unsigned int)product; + + product = (unsigned long)x[1] * y[3] + z[5] + (product >> 32); + z[5] = (unsigned int)product; + + product = (unsigned long)x[1] * y[2] + z[4] + (product >> 32); + z[4] = (unsigned int)product; + + product = (unsigned long)x[1] * y[1] + z[3] + (product >> 32); + z[3] = (unsigned int)product; + + product = (unsigned long)x[1] * y[0] + z[2] + (product >> 32); + z[2] = (unsigned int)product; + z[1] = product >> 32; + + /** round 0 */ + product = (unsigned long)x[0] * y[7] + z[8]; + z[8] = (unsigned int)product; + + product = (unsigned long)x[0] * y[6] + z[7] + (product >> 32); + z[7] = (unsigned int)product; + + product = (unsigned long)x[0] * y[5] + z[6] + (product >> 32); + z[6] = (unsigned int)product; + + product = (unsigned long)x[0] * y[4] + z[5] + (product >> 32); + z[5] = (unsigned int)product; + + product = (unsigned long)x[0] * y[3] + z[4] + (product >> 32); + z[4] = (unsigned int)product; + + product = (unsigned long)x[0] * y[2] + z[3] + (product >> 32); + z[3] = (unsigned int)product; + + product = (unsigned long)x[0] * y[1] + z[2] + (product >> 32); + z[2] = (unsigned int)product; + + product = (unsigned long)x[0] * y[0] + z[1] + (product >> 32); + z[1] = (unsigned int)product; + out_high[0] = product >> 32; + + out_high[1] = z[1]; + out_high[2] = z[2]; + out_high[3] = z[3]; + out_high[4] = z[4]; + out_high[5] = z[5]; + out_high[6] = z[6]; + out_high[7] = z[7]; + + out_low[0] = z[8]; + out_low[1] = z[9]; + out_low[2] = z[10]; + out_low[3] = z[11]; + out_low[4] = z[12]; + out_low[5] = z[13]; + out_low[6] = z[14]; + out_low[7] = z[15]; } uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr) From 48e9d5307553a3c6d175bd7ee8ae6d9dfa92bebf Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 1 Jun 2021 04:11:34 +0200 Subject: [PATCH 35/62] improve performance --- BitCrackOpenCL.sln | 1 + CLKeySearchDevice/bitcrack.cl | 838 +++++++++++++++++------------- CLKeySearchDevice/keysearch.cl | 10 +- clMath/secp256k1.cl | 911 +++++++++++++++------------------ 4 files changed, 891 insertions(+), 869 deletions(-) diff --git a/BitCrackOpenCL.sln b/BitCrackOpenCL.sln index 59355f0d..e048ccce 100644 --- a/BitCrackOpenCL.sln +++ b/BitCrackOpenCL.sln @@ -36,6 +36,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clKeyFinder", "clKeyFinder\ {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} = {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} {150AF404-1F80-4A13-855B-4383C4A3326F} = {150AF404-1F80-4A13-855B-4383C4A3326F} {546C8D1F-127F-4EF4-914F-2A7F9367C0F9} = {546C8D1F-127F-4EF4-914F-2A7F9367C0F9} + {83327841-C283-4D46-A873-97AC674C68AC} = {83327841-C283-4D46-A873-97AC674C68AC} {34042455-D274-432D-9134-C9EA41FD1B54} = {34042455-D274-432D-9134-C9EA41FD1B54} {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} = {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} = {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 33f826e9..d6f7b737 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -7,6 +7,15 @@ #define RIPEMD160_IV_3 (0x10325476) #define RIPEMD160_IV_4 (0xc3d2e1f0) +#define K0 (0x5a827999); +#define K1 (0x6ed9eba1); +#define K2 (0x8f1bbcdc); +#define K3 (0xa953fd4e); +#define K4 (0x7a6d76e9); +#define K5 (0x6d703ef3); +#define K6 (0x5c4dd124); +#define K7 (0x50a28be6); + #define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) #define F(x, y, z) ((x) ^ (y) ^ (z)) @@ -25,22 +34,22 @@ c = rotl((c), 10) #define GG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + (x) + 0x5a827999;\ + a += G((b), (c), (d)) + (x) + K0;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + 0x6ed9eba1;\ + a += H((b), (c), (d)) + (x) + K1;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define II(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + 0x8f1bbcdc;\ + a += I((b), (c), (d)) + (x) + K2;\ a = rotl((a), (s)) + e;\ c = rotl((c), 10) #define JJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + 0xa953fd4e;\ + a += J((b), (c), (d)) + (x) + K3;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) @@ -50,22 +59,22 @@ c = rotl((c), 10) #define GGG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + x + 0x7a6d76e9;\ + a += G((b), (c), (d)) + x + K4;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HHH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + 0x6d703ef3;\ + a += H((b), (c), (d)) + (x) + K5;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define III(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + 0x5c4dd124;\ + a += I((b), (c), (d)) + (x) + K6;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define JJJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + 0x50a28be6;\ + a += J((b), (c), (d)) + (x) + K7;\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) @@ -299,45 +308,42 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) #ifndef SECP256K1_CL #define SECP256K1_CL -typedef unsigned long uint64_t; - typedef struct uint256_t { unsigned int v[8]; } uint256_t; /** - Prime modulus 2^256 - 2^32 - 977 + * Base point X */ -__constant unsigned int P[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F +__constant unsigned int GX[8] = { + 0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798 }; -// Add with carry -void addc(unsigned int *a, unsigned int *b, unsigned int *carry, unsigned int *sum) -{ - *sum = *a + *carry; - - unsigned int c1 = (*sum < *a) * 1; - - *sum = *sum + *b; - - *carry = c1 | ((*sum < *b) * 1); -} - -// Subtract with borrow -void subc(unsigned int *a, unsigned int *b, unsigned int *borrow, unsigned int *diff) -{ - unsigned int tmp = *a - *borrow; +/** + * Base point Y + */ +__constant unsigned int GY[8] = { + 0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8 +}; - *borrow = (tmp > *a) * 1; +/** + * Group order + */ +__constant unsigned int N[8] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141 +}; - *diff = tmp - *b; +/** + * Prime modulus 2^256 - 2^32 - 977 + */ +__constant unsigned int P[8] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F +}; - *borrow |= (*diff > tmp) ? 1 : 0; -} +#define P6 (0xFFFFFFFE) +#define P7 (0xFFFFFC2F) #ifdef DEVICE_VENDOR_INTEL - // Intel devices have a mul_hi bug unsigned int mul_hi977(unsigned int x) { @@ -356,11 +362,9 @@ void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned in *low = tmp; *high = mul_hi977(*a) + carry; } - #else -// 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) +__inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { *low = *a * 977; unsigned int tmp = *low + *c; @@ -371,250 +375,316 @@ void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned in #endif -uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr) -{ - unsigned int borrow = 0; - uint256_t c; - - subc(&a.v[7], &b.v[7], &borrow, &c.v[7]); - subc(&a.v[6], &b.v[6], &borrow, &c.v[6]); - subc(&a.v[5], &b.v[5], &borrow, &c.v[5]); - subc(&a.v[4], &b.v[4], &borrow, &c.v[4]); - subc(&a.v[3], &b.v[3], &borrow, &c.v[3]); - subc(&a.v[2], &b.v[2], &borrow, &c.v[2]); - subc(&a.v[1], &b.v[1], &borrow, &c.v[1]); - subc(&a.v[0], &b.v[0], &borrow, &c.v[0]); - - *borrow_ptr = borrow; - - return c; -} +// Add with carry +#define addc(a, b, sum, carry, tmp) \ + sum = (a) + (carry); \ + tmp = ((sum) < (a)) * 1; \ + sum = (sum) + (b); \ + carry = (tmp) | (((sum) < (b)) * 1); + +// subtract with borrow +#define subc(a, b, diff, borrow, tmp) \ + tmp = (a) - (borrow); \ + borrow = ((tmp) > (a)) * 1; \ + diff = (tmp) - (b); \ + borrow |= ((diff) > (tmp)) ? 1 : 0; + +#define add256k(a, b, c, carry, tmp) \ + addc(a[7], b[7], c[7], carry, tmp); \ + addc(a[6], b[6], c[6], carry, tmp); \ + addc(a[5], b[5], c[5], carry, tmp); \ + addc(a[4], b[4], c[4], carry, tmp); \ + addc(a[3], b[3], c[3], carry, tmp); \ + addc(a[2], b[2], c[2], carry, tmp); \ + addc(a[1], b[1], c[1], carry, tmp); \ + addc(a[0], b[0], c[0], carry, tmp); + +#define sub256k( a, b, c, borrow, tmp) \ + subc(a[7], b[7], c[7], borrow, tmp); \ + subc(a[6], b[6], c[6], borrow, tmp); \ + subc(a[5], b[5], c[5], borrow, tmp); \ + subc(a[4], b[4], c[4], borrow, tmp); \ + subc(a[3], b[3], c[3], borrow, tmp); \ + subc(a[2], b[2], c[2], borrow, tmp); \ + subc(a[1], b[1], c[1], borrow, tmp); \ + subc(a[0], b[0], c[0], borrow, tmp); + +#define isInfinity256k(a) \ + ( \ + (a[0] == 0xffffffff) && \ + (a[1] == 0xffffffff) && \ + (a[2] == 0xffffffff) && \ + (a[3] == 0xffffffff) && \ + (a[4] == 0xffffffff) && \ + (a[5] == 0xffffffff) && \ + (a[6] == 0xffffffff) && \ + (a[7] == 0xffffffff) \ + ) + +#define greaterOrEqualToP(a) \ + (a[6] >= P6 || a[7] >= P7) + +#define equal256k(a, b) \ + ( \ + (a[0] == b[0]) && \ + (a[1] == b[1]) && \ + (a[2] == b[2]) && \ + (a[3] == b[3]) && \ + (a[4] == b[4]) && \ + (a[5] == b[5]) && \ + (a[6] == b[6]) && \ + (a[7] == b[7]) \ + ) -bool greaterThanEqualToP(const unsigned int a[8]) +void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8]) { - if(a[0] > P[0]) { return true; } - if(a[0] < P[0]) { return false; } + unsigned long product; - if(a[1] > P[1]) { return true; } - if(a[1] < P[1]) { return false; } + // First round, overwrite z + product = (unsigned long)x[7] * y[7]; + out_low[7] = (unsigned int)product; - if(a[2] > P[2]) { return true; } - if(a[2] < P[2]) { return false; } + product = (unsigned long)x[7] * y[6] + (unsigned int)(product >> 32); + out_low[6] = (unsigned int)product; - if(a[3] > P[3]) { return true; } - if(a[3] < P[3]) { return false; } + product = (unsigned long)x[7] * y[5] + (unsigned int)(product >> 32); + out_low[5] = (unsigned int)product; - if(a[4] > P[4]) { return true; } - if(a[4] < P[4]) { return false; } + product = (unsigned long)x[7] * y[4] + (unsigned int)(product >> 32); + out_low[4] = (unsigned int)product; - if(a[5] > P[5]) { return true; } - if(a[5] < P[5]) { return false; } + product = (unsigned long)x[7] * y[3] + (unsigned int)(product >> 32); + out_low[3] = (unsigned int)product; - if(a[6] > P[6]) { return true; } - if(a[6] < P[6]) { return false; } + product = (unsigned long)x[7] * y[2] + (unsigned int)(product >> 32); + out_low[2] = (unsigned int)product; + + product = (unsigned long)x[7] * y[1] + (unsigned int)(product >> 32); + out_low[1] = (unsigned int)product; + + product = (unsigned long)x[7] * y[0] + (unsigned int)(product >> 32); + out_low[0] = (unsigned int)product; + out_high[7] = (unsigned int)(product >> 32); + + product = (unsigned long)x[6] * y[7] + out_low[6]; + out_low[6] = (unsigned int)product; + + /** round6 */ + product = (unsigned long)x[6] * y[6] + out_low[5] + (product >> 32); + out_low[5] = (unsigned int)product; + + product = (unsigned long)x[6] * y[5] + out_low[4] + (product >> 32); + out_low[4] = (unsigned int)product; + + product = (unsigned long)x[6] * y[4] + out_low[3] + (product >> 32); + out_low[3] = (unsigned int)product; + + product = (unsigned long)x[6] * y[3] + out_low[2] + (product >> 32); + out_low[2] = (unsigned int)product; + + product = (unsigned long)x[6] * y[2] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; - if(a[7] > P[7]) { return true; } - if(a[7] < P[7]) { return false; } - - return true; -} - -void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8]) -{ - unsigned int z[16]; - unsigned int high = 0; - uint64_t product = 0; + product = (unsigned long)x[6] * y[1] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; + + product = (unsigned long)x[6] * y[0] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; + out_high[6] = product >> 32; - // First round, overwrite z - for(int j = 7; j >= 0; j--) { + /** round 5 */ + product = (unsigned long)x[5] * y[7] + out_low[5]; + out_low[5] = (unsigned int)product; - product = (uint64_t)x[7] * y[j] + high; + product = (unsigned long)x[5] * y[6] + out_low[4] + (product >> 32); + out_low[4] = (unsigned int)product; - z[7 + j + 1] = (unsigned int)product; - high = (unsigned int)(product >> 32); - } - z[7] = high; + product = (unsigned long)x[5] * y[5] + out_low[3] + (product >> 32); + out_low[3] = (unsigned int)product; - for(int i = 6; i >= 0; i--) { + product = (unsigned long)x[5] * y[4] + out_low[2] + (product >> 32); + out_low[2] = (unsigned int)product; - high = 0; + product = (unsigned long)x[5] * y[3] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; - for(int j = 7; j >= 0; j--) { + product = (unsigned long)x[5] * y[2] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; + + product = (unsigned long)x[5] * y[1] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; + + product = (unsigned long)x[5] * y[0] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; + out_high[5] = product >> 32; - product = (uint64_t)x[i] * y[j] + z[i + j + 1] + high; + /** round 4 */ + product = (unsigned long)x[4] * y[7] + out_low[4]; + out_low[4] = (unsigned int)product; - z[i + j + 1] = (unsigned int)product; + product = (unsigned long)x[4] * y[6] + out_low[3] + (product >> 32); + out_low[3] = (unsigned int)product; - high = product >> 32; - } + product = (unsigned long)x[4] * y[5] + out_low[2] + (product >> 32); + out_low[2] = (unsigned int)product; - z[i] = high; - } + product = (unsigned long)x[4] * y[4] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; - for(int i = 0; i < 8; i++) { - out_high[i] = z[i]; - out_low[i] = z[8 + i]; - } -} + product = (unsigned long)x[4] * y[3] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; -uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr) -{ - uint256_t c; - unsigned int carry = 0; + product = (unsigned long)x[4] * y[2] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; + + product = (unsigned long)x[4] * y[1] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; + + product = (unsigned long)x[4] * y[0] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; + out_high[4] = product >> 32; - for(int i = 7; i >= 0; i--) { - addc(&a.v[i], &b.v[i], &carry, &c.v[i]); - } + /** round 3 */ + product = (unsigned long)x[3] * y[7] + out_low[3]; + out_low[3] = (unsigned int)product; - *carry_ptr = carry; + product = (unsigned long)x[3] * y[6] + out_low[2] + (product >> 32); + out_low[2] = (unsigned int)product; - return c; -} + product = (unsigned long)x[3] * y[5] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; -bool isInfinity256k(const uint256_t *x) -{ - return ( - (x->v[0] == 0xffffffff) && - (x->v[1] == 0xffffffff) && - (x->v[2] == 0xffffffff) && - (x->v[3] == 0xffffffff) && - (x->v[4] == 0xffffffff) && - (x->v[5] == 0xffffffff) && - (x->v[6] == 0xffffffff) && - (x->v[7] == 0xffffffff) - ); -} + product = (unsigned long)x[3] * y[4] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; -bool equal256k(uint256_t *a, uint256_t *b) -{ - return ( - (a->v[0] == b->v[0]) && - (a->v[1] == b->v[1]) && - (a->v[2] == b->v[2]) && - (a->v[3] == b->v[3]) && - (a->v[4] == b->v[4]) && - (a->v[5] == b->v[5]) && - (a->v[6] == b->v[6]) && - (a->v[7] == b->v[7]) - ); -} + product = (unsigned long)x[3] * y[3] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; -unsigned int readLSW256k(__global const uint256_t* ara, int idx) -{ - return ara[idx].v[7]; -} + product = (unsigned long)x[3] * y[2] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; + + product = (unsigned long)x[3] * y[1] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; + + product = (unsigned long)x[3] * y[0] + out_high[4] + (product >> 32); + out_high[4] = (unsigned int)product; + out_high[3] = product >> 32; -unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) -{ - return ara[idx].v[word]; -} + /** round 2 */ + product = (unsigned long)x[2] * y[7] + out_low[2]; + out_low[2] = (unsigned int)product; -void addP(unsigned int a[8], unsigned int c[8]) -{ - unsigned int carry = 0; - unsigned int P[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F - }; - addc(&a[7], &P[7], &carry, &c[7]); - addc(&a[6], &P[6], &carry, &c[6]); - addc(&a[5], &P[5], &carry, &c[5]); - addc(&a[4], &P[4], &carry, &c[4]); - addc(&a[3], &P[3], &carry, &c[3]); - addc(&a[2], &P[2], &carry, &c[2]); - addc(&a[1], &P[1], &carry, &c[1]); - addc(&a[0], &P[0], &carry, &c[0]); -} + product = (unsigned long)x[2] * y[6] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; -void subP(unsigned int a[8], unsigned int c[8]) -{ - unsigned int borrow = 0; - unsigned int P[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F - }; - subc(&a[7], &P[7], &borrow, &c[7]); - subc(&a[6], &P[6], &borrow, &c[6]); - subc(&a[5], &P[5], &borrow, &c[5]); - subc(&a[4], &P[4], &borrow, &c[4]); - subc(&a[3], &P[3], &borrow, &c[3]); - subc(&a[2], &P[2], &borrow, &c[2]); - subc(&a[1], &P[1], &borrow, &c[1]); - subc(&a[0], &P[0], &borrow, &c[0]); -} + product = (unsigned long)x[2] * y[5] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; -/** - * Subtraction mod p - */ -uint256_t subModP256k(uint256_t a, uint256_t b) -{ - unsigned int borrow = 0; - uint256_t c = sub256k(a, b, &borrow); - if(borrow) { - addP(c.v, c.v); - } + product = (unsigned long)x[2] * y[4] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; - return c; -} + product = (unsigned long)x[2] * y[3] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; -void addModP256k(uint256_t *a, uint256_t *b, uint256_t *cP) -{ - unsigned int carry = 0; + product = (unsigned long)x[2] * y[2] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; + + product = (unsigned long)x[2] * y[1] + out_high[4] + (product >> 32); + out_high[4] = (unsigned int)product; + + product = (unsigned long)x[2] * y[0] + out_high[3] + (product >> 32); + out_high[3] = (unsigned int)product; + out_high[2] = product >> 32; + + /** round 1 */ + product = (unsigned long)x[1] * y[7] + out_low[1]; + out_low[1] = (unsigned int)product; - uint256_t c = add256k(*a, *b, &carry); + product = (unsigned long)x[1] * y[6] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; - if(carry) { subP(c.v, c.v); *cP = c; } + product = (unsigned long)x[1] * y[5] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; - else if(c.v[0] > P[0]) { subP(c.v, c.v); *cP = c; } - else if(c.v[0] < P[0]) { *cP = c; } + product = (unsigned long)x[1] * y[4] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; - else if(c.v[1] > P[1]) { subP(c.v, c.v); *cP = c; } - else if(c.v[1] < P[1]) { *cP = c; } + product = (unsigned long)x[1] * y[3] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; - else if(c.v[2] > P[2]) { subP(c.v, c.v); *cP = c; } - else if(c.v[2] < P[2]) { *cP = c; } + product = (unsigned long)x[1] * y[2] + out_high[4] + (product >> 32); + out_high[4] = (unsigned int)product; - else if(c.v[3] > P[3]) { subP(c.v, c.v); *cP = c; } - else if(c.v[3] < P[3]) { *cP = c; } + product = (unsigned long)x[1] * y[1] + out_high[3] + (product >> 32); + out_high[3] = (unsigned int)product; - else if(c.v[4] > P[4]) { subP(c.v, c.v); *cP = c; } - else if(c.v[4] < P[4]) { *cP = c; } + product = (unsigned long)x[1] * y[0] + out_high[2] + (product >> 32); + out_high[2] = (unsigned int)product; + out_high[1] = product >> 32; + + /** round 0 */ + product = (unsigned long)x[0] * y[7] + out_low[0]; + out_low[0] = (unsigned int)product; + + product = (unsigned long)x[0] * y[6] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; + + product = (unsigned long)x[0] * y[5] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; + + product = (unsigned long)x[0] * y[4] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; + + product = (unsigned long)x[0] * y[3] + out_high[4] + (product >> 32); + out_high[4] = (unsigned int)product; + + product = (unsigned long)x[0] * y[2] + out_high[3] + (product >> 32); + out_high[3] = (unsigned int)product; - else if(c.v[5] > P[5]) { subP(c.v, c.v); *cP = c; } - else if(c.v[5] < P[5]) { *cP = c; } + product = (unsigned long)x[0] * y[1] + out_high[2] + (product >> 32); + out_high[2] = (unsigned int)product; - else if(c.v[6] > P[6]) { subP(c.v, c.v); *cP = c; } - else if(c.v[6] < P[6]) { *cP = c; } - - else if(c.v[7] > P[7]) { subP(c.v, c.v); *cP = c; } - else { *cP = c; } + product = (unsigned long)x[0] * y[0] + out_high[1] + (product >> 32); + out_high[1] = (unsigned int)product; + out_high[0] = product >> 32; } - void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) { - unsigned int ZERO = 0; unsigned int high[8]; + unsigned int low[8]; unsigned int hWord = 0; + unsigned int borrow = 0; unsigned int carry = 0; unsigned int t = 0; unsigned int product6 = 0; unsigned int product7 = 0; + unsigned int tmp; // 256 x 256 multiply - multiply256(a, b, high, product_low); + multiply256(a, b, high, low); + product_low[7] = low[7]; + product_low[6] = low[6]; + product_low[5] = low[5]; + product_low[4] = low[4]; + product_low[3] = low[3]; + product_low[2] = low[2]; + product_low[1] = low[1]; + product_low[0] = low[0]; // Add 2^32 * high to the low 256 bits (shift left 1 word and add) // Affects product[14] to product[6] - addc(&product_low[6], &high[7], &carry, &product_low[6]); - addc(&product_low[5], &high[6], &carry, &product_low[5]); - addc(&product_low[4], &high[5], &carry, &product_low[4]); - addc(&product_low[3], &high[4], &carry, &product_low[3]); - addc(&product_low[2], &high[3], &carry, &product_low[2]); - addc(&product_low[1], &high[2], &carry, &product_low[1]); - addc(&product_low[0], &high[1], &carry, &product_low[0]); - - addc(&high[0], &ZERO, &carry, &product7); + addc(product_low[6], high[7], product_low[6], carry, tmp); + addc(product_low[5], high[6], product_low[5], carry, tmp); + addc(product_low[4], high[5], product_low[4], carry, tmp); + addc(product_low[3], high[4], product_low[3], carry, tmp); + addc(product_low[2], high[3], product_low[2], carry, tmp); + addc(product_low[1], high[2], product_low[1], carry, tmp); + addc(product_low[0], high[1], product_low[0], carry, tmp); + + addc(high[0], 0, product7, carry, tmp); product6 = carry; carry = 0; @@ -623,142 +693,227 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) // Affects product[15] to product[5] for(int i = 7; i >= 0; i--) { madd977(&hWord, &t, &high[i], &hWord); - addc(&product_low[i], &t, &carry, &product_low[i]); + addc(product_low[i], t, product_low[i], carry, tmp); t = 0; } - addc(&product7, &hWord, &carry, &product7); - addc(&product6, &ZERO, &carry, &product6); + addc(product7, hWord, high[7], carry, tmp); + addc(product6, 0, high[6], carry, tmp); // Multiply high 2 words by 2^32 and add to low // Affects product[14] to product[7] carry = 0; - high[7] = product7; - high[6] = product6; - - product7 = 0; - product6 = 0; - addc(&product_low[6], &high[7], &carry, &product_low[6]); - addc(&product_low[5], &high[6], &carry, &product_low[5]); + addc(product_low[6], high[7], product_low[6], carry, tmp); + addc(product_low[5], high[6], product_low[5], carry, tmp); - addc(&product_low[4], &ZERO, &carry, &product_low[4]); - addc(&product_low[3], &ZERO, &carry, &product_low[3]); - addc(&product_low[2], &ZERO, &carry, &product_low[2]); - addc(&product_low[1], &ZERO, &carry, &product_low[1]); - addc(&product_low[0], &ZERO, &carry, &product_low[0]); - - product7 = carry; + addc(product_low[4], 0, product_low[4], carry, tmp); + addc(product_low[3], 0, product_low[3], carry, tmp); + addc(product_low[2], 0, product_low[2], carry, tmp); + addc(product_low[1], 0, product_low[1], carry, tmp); + addc(product_low[0], 0, product_low[0], carry, tmp); // Multiply top 2 words by 977 and add to low // Affects product[15] to product[7] carry = 0; hWord = 0; madd977(&hWord, &t, &high[7], &hWord); - addc(&product_low[7], &t, &carry, &product_low[7]); + addc(product_low[7], t, product_low[7], carry, tmp); madd977(&hWord, &t, &high[6], &hWord); - addc(&product_low[6], &t, &carry, &product_low[6]); - addc(&product_low[5], &hWord, &carry, &product_low[5]); - + addc(product_low[6], t, product_low[6], carry, tmp); + addc(product_low[5], hWord, product_low[5], carry, tmp); // Propagate carry - addc(&product_low[4], &ZERO, &carry, &product_low[4]); - addc(&product_low[3], &ZERO, &carry, &product_low[3]); - addc(&product_low[2], &ZERO, &carry, &product_low[2]); - addc(&product_low[1], &ZERO, &carry, &product_low[1]); - addc(&product_low[0], &ZERO, &carry, &product_low[0]); - product7 = carry; + addc(product_low[4], 0, product_low[4], carry, tmp); + addc(product_low[3], 0, product_low[3], carry, tmp); + addc(product_low[2], 0, product_low[2], carry, tmp); + addc(product_low[1], 0, product_low[1], carry, tmp); + addc(product_low[0], 0, product_low[0], carry, tmp); // Reduce if >= P - if(product7 || greaterThanEqualToP(product_low)) { - subP(product_low, product_low); + if(carry || greaterOrEqualToP(product_low)) { + sub256k(product_low, P, product_low, borrow, tmp); } } -void mulModP256k(uint256_t *a, uint256_t *b, uint256_t *c) -{ - mulModP(a->v, b->v, c->v); -} - -void squareModP256k(uint256_t *a) +/** + * Subtraction mod p + */ +void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) { - mulModP(a->v, a->v, a->v); + unsigned int borrow = 0; + unsigned int tmp; + + sub256k(a, b, c, borrow, tmp); + + if (borrow) { + unsigned carry = 0; + add256k(c, P, c, carry, tmp); + } } /** * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains */ -uint256_t invModP256k(uint256_t x) +void invModP256k(unsigned int x[8], unsigned int result[8]) { - uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}}; - - mulModP256k(&x, &y, &y); - squareModP256k(&x); - squareModP256k(&x); - mulModP256k(&x, &y, &y); - squareModP256k(&x); - mulModP256k(&x, &y, &y); - squareModP256k(&x); - squareModP256k(&x); - mulModP256k(&x, &y, &y); + unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1}; + + mulModP(x, y, y); + mulModP(x, x, x); + mulModP(x, x, x); + mulModP(x, y, y); + mulModP(x, x, x); + mulModP(x, y, y); + mulModP(x, x, x); + mulModP(x, x, x); + mulModP(x, y, y); for(int i = 0; i < 5; i++) { - squareModP256k(&x); + mulModP(x, x, x); } for(int i = 0; i < 22; i++) { - mulModP256k(&x, &y, &y); - squareModP256k(&x); + mulModP(x, y, y); + mulModP(x, x, x); } - squareModP256k(&x); + mulModP(x, x, x); for(int i = 0; i < 222; i++) { - mulModP256k(&x, &y, &y); - squareModP256k(&x); + mulModP(x, y, y); + mulModP(x, x, x); } - mulModP256k(&x, &y, &x); - return x; + mulModP(x, y, result); +} + +void addModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) +{ + unsigned int borrow = 0; + unsigned int carry = 0; + unsigned int tmp = 0; + + add256k(a, b, c, carry, tmp); + + if(carry) { sub256k(c, P, c, borrow, tmp); } + + else if(c[0] > P[0]) { sub256k(c, P, c, borrow, tmp); } + else if(c[0] < P[0]) { } + + else if(c[1] > P[1]) { sub256k(c, P, c, borrow, tmp); } + else if(c[1] < P[1]) { } + + else if(c[2] > P[2]) { sub256k(c, P, c, borrow, tmp); } + else if(c[2] < P[2]) { } + + else if(c[3] > P[3]) { sub256k(c, P, c, borrow, tmp); } + else if(c[3] < P[3]) { } + + else if(c[4] > P[4]) { sub256k(c, P, c, borrow, tmp); } + else if(c[4] < P[4]) { } + + else if(c[5] > P[5]) { sub256k(c, P, c, borrow, tmp); } + else if(c[5] < P[5]) { } + + else if(c[6] > P[6]) { sub256k(c, P, c, borrow, tmp); } + else if(c[6] < P[6]) { } + + else if(c[7] > P[7]) { sub256k(c, P, c, borrow, tmp); } } +void doBatchInverse256k(unsigned int x[8], unsigned int result[8]) +{ + invModP256k(x, result); +} void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) { int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); + unsigned int t[8]; + // x = Gx - x - uint256_t t = subModP256k(px, x); + subModP256k(px.v, x.v, t); // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc - mulModP256k(inverse, &t, inverse); + mulModP(inverse->v, t, inverse->v); chain[batchIdx * dim + gid] = *inverse; } - void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* xPtr, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) { int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); - uint256_t x = xPtr[i]; - if(equal256k(&px, &x)) { - addModP256k(&py,&py, &x); + if(equal256k(px.v, x.v)) { + addModP256k(py.v,py.v, x.v); } else { // x = Gx - x - x = subModP256k(px, x); + subModP256k(px.v, x.v, x.v); } // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc - mulModP256k(&x, inverse, inverse); + mulModP(x.v, inverse->v, inverse->v); chain[batchIdx * dim + gid] = *inverse; } +void completeBatchAdd256k( + uint256_t px, + uint256_t py, + __global uint256_t* xPtr, + __global uint256_t* yPtr, + int i, + int batchIdx, + __global uint256_t* chain, + uint256_t* inverse, + uint256_t* newX, + uint256_t* newY) +{ + int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + int dim = get_global_size(0); + uint256_t x = xPtr[i]; + uint256_t y = yPtr[i]; + + uint256_t s; + + if(batchIdx != 0) { + uint256_t c; + + c = chain[(batchIdx - 1) * dim + gid]; + mulModP(inverse->v, c.v, s.v); + + uint256_t diff; + subModP256k(px.v, x.v, diff.v); + mulModP(diff.v, inverse->v, inverse->v); + } else { + s = *inverse; + } + + uint256_t rise; + subModP256k(py.v, y.v, rise.v); + + mulModP(rise.v, s.v, s.v); + + // Rx = s^2 - Gx - Qx + uint256_t s2; + mulModP(s.v, s.v, s2.v); + + subModP256k(s2.v, px.v, newX->v); + subModP256k(newX->v, x.v, newX->v); + + // Ry = s(px - rx) - py + uint256_t k; + subModP256k(px.v, newX->v, k.v); + mulModP(s.v, k.v, newY->v); + subModP256k(newY->v, py.v, newY->v); +} + void completeBatchAddWithDouble256k( uint256_t px, @@ -786,121 +941,78 @@ void completeBatchAddWithDouble256k( uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; - mulModP256k(inverse, &c, &s); + mulModP(inverse->v, c.v, s.v); uint256_t diff; - if(equal256k(&px, &x)) { - addModP256k(&py, &py, &diff); + if(equal256k(px.v, x.v)) { + addModP256k(py.v, py.v, diff.v); } else { - diff = subModP256k(px, x); + subModP256k(px.v, x.v, diff.v); } - mulModP256k(&diff, inverse, inverse); + mulModP(diff.v, inverse->v, inverse->v); } else { s = *inverse; } - if(equal256k(&px, &x)) { + if(equal256k(px.v, x.v)) { // currently s = 1 / 2y uint256_t x2; uint256_t tx2; // 3x^2 - mulModP256k(&x, &x, &x2); - addModP256k(&x2, &x2, &tx2); - addModP256k(&x2, &tx2, &tx2); + mulModP(x.v, x.v, x2.v); + addModP256k(x2.v, x2.v, tx2.v); + addModP256k(x2.v, tx2.v, tx2.v); // s = 3x^2 * 1/2y - mulModP256k(&tx2, &s, &s); + mulModP(tx2.v, s.v, s.v); // s^2 uint256_t s2; - mulModP256k(&s, &s, &s2); + mulModP(s.v, s.v, s2.v); // Rx = s^2 - 2px - *newX = subModP256k(s2, x); - *newX = subModP256k(*newX, x); + subModP256k(s2.v, x.v, newX->v); + subModP256k(newX->v, x.v, newX->v); // Ry = s(px - rx) - py - uint256_t k = subModP256k(px, *newX); - mulModP256k(&s, &k, newY); - *newY = subModP256k(*newY, py); + uint256_t k; + subModP256k(px.v, newX->v, k.v); + mulModP(s.v, k.v, newY->v); + subModP256k(newY->v, py.v,newY->v); } else { uint256_t rise; - rise = subModP256k(py, y); + subModP256k(py.v, y.v, rise.v); - mulModP256k(&rise, &s, &s); + mulModP(rise.v, s.v, s.v); // Rx = s^2 - Gx - Qx uint256_t s2; - mulModP256k(&s, &s, &s2); + mulModP(s.v, s.v, s2.v); - *newX = subModP256k(s2, px); - *newX = subModP256k(*newX, x); + subModP256k(s2.v, px.v, newX->v); + subModP256k(newX->v, x.v,newX->v); // Ry = s(px - rx) - py uint256_t k; - k = subModP256k(px, *newX); - mulModP256k(&s, &k, newY); - *newY = subModP256k(*newY, py); + subModP256k(px.v, newX->v, k.v); + mulModP(s.v, k.v, newY->v); + subModP256k(newY->v, py.v, newY->v); } } - -void completeBatchAdd256k( - uint256_t px, - uint256_t py, - __global uint256_t* xPtr, - __global uint256_t* yPtr, - int i, - int batchIdx, - __global uint256_t* chain, - uint256_t* inverse, - uint256_t* newX, - uint256_t* newY) +unsigned int readLSW256k(__global const uint256_t* ara, int idx) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); - - uint256_t s; - - if(batchIdx != 0) { - uint256_t c; - - c = chain[(batchIdx - 1) * dim + gid]; - mulModP256k(inverse, &c, &s); - - uint256_t diff; - diff = subModP256k(px, xPtr[i]); - mulModP256k(&diff, inverse, inverse); - } else { - s = *inverse; - } - - uint256_t rise = subModP256k(py, yPtr[i]); - - mulModP256k(&rise, &s, &s); - - // Rx = s^2 - Gx - Qx - uint256_t s2; - mulModP256k(&s, &s, &s2); - - *newX = subModP256k(s2, px); - *newX = subModP256k(*newX, xPtr[i]); - - // Ry = s(px - rx) - py - uint256_t k = subModP256k(px, *newX); - mulModP256k(&s, &k, newY); - *newY = subModP256k(*newY, py); + return ara[idx].v[7]; } - -uint256_t doBatchInverse256k(uint256_t x) +unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) { - return invModP256k(x); + return ara[idx].v[word]; } #endif @@ -1490,14 +1602,14 @@ __kernel void multiplyStepKernel( x = xPtr[i]; if(( p & (1 << (step % 32))) != 0) { - if(!isInfinity256k(&x)) { + if(!isInfinity256k(x.v)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } } } - inverse = doBatchInverse256k(inverse); + doBatchInverse256k(inverse.v, inverse.v); i -= dim; for(; i >= 0; i -= dim) { @@ -1510,7 +1622,7 @@ __kernel void multiplyStepKernel( uint256_t x = xPtr[i]; if((p & (1 << (step % 32))) != 0) { - if(!isInfinity256k(&x)) { + if(!isInfinity256k(x.v)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); } else { @@ -1674,7 +1786,7 @@ __kernel void keyFinderKernel( } #endif - inverse = doBatchInverse256k(inverse); + doBatchInverse256k(inverse.v, inverse.v); i -= dim; uint256_t newX; @@ -1755,7 +1867,7 @@ __kernel void keyFinderKernelWithDouble( } #endif - inverse = doBatchInverse256k(inverse); + doBatchInverse256k(inverse.v, inverse.v); i -= dim; diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index c97e8180..86d50b68 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -70,14 +70,14 @@ __kernel void multiplyStepKernel( x = xPtr[i]; if(( p & (1 << (step % 32))) != 0) { - if(!isInfinity256k(&x)) { + if(!isInfinity256k(x.v)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } } } - inverse = doBatchInverse256k(inverse); + doBatchInverse256k(inverse.v, inverse.v); i -= dim; for(; i >= 0; i -= dim) { @@ -90,7 +90,7 @@ __kernel void multiplyStepKernel( uint256_t x = xPtr[i]; if((p & (1 << (step % 32))) != 0) { - if(!isInfinity256k(&x)) { + if(!isInfinity256k(x.v)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); } else { @@ -254,7 +254,7 @@ __kernel void keyFinderKernel( } #endif - inverse = doBatchInverse256k(inverse); + doBatchInverse256k(inverse.v, inverse.v); i -= dim; uint256_t newX; @@ -335,7 +335,7 @@ __kernel void keyFinderKernelWithDouble( } #endif - inverse = doBatchInverse256k(inverse); + doBatchInverse256k(inverse.v, inverse.v); i -= dim; diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index 2fd83b1d..285d5056 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -1,45 +1,42 @@ #ifndef SECP256K1_CL #define SECP256K1_CL -typedef unsigned long uint64_t; - typedef struct uint256_t { unsigned int v[8]; } uint256_t; /** - Prime modulus 2^256 - 2^32 - 977 + * Base point X */ -__constant unsigned int P[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F +__constant unsigned int GX[8] = { + 0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798 }; -// Add with carry -void addc(unsigned int *a, unsigned int *b, unsigned int *carry, unsigned int *sum) -{ - *sum = *a + *carry; - - unsigned int c1 = (*sum < *a) * 1; - - *sum = *sum + *b; - - *carry = c1 | ((*sum < *b) * 1); -} - -// Subtract with borrow -void subc(unsigned int *a, unsigned int *b, unsigned int *borrow, unsigned int *diff) -{ - unsigned int tmp = *a - *borrow; +/** + * Base point Y + */ +__constant unsigned int GY[8] = { + 0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8 +}; - *borrow = (tmp > *a) * 1; +/** + * Group order + */ +__constant unsigned int N[8] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141 +}; - *diff = tmp - *b; +/** + * Prime modulus 2^256 - 2^32 - 977 + */ +__constant unsigned int P[8] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F +}; - *borrow |= (*diff > tmp) ? 1 : 0; -} +#define P6 (0xFFFFFFFE) +#define P7 (0xFFFFFC2F) #ifdef DEVICE_VENDOR_INTEL - // Intel devices have a mul_hi bug unsigned int mul_hi977(unsigned int x) { @@ -58,11 +55,9 @@ void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned in *low = tmp; *high = mul_hi977(*a) + carry; } - #else -// 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) +__inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { *low = *a * 977; unsigned int tmp = *low + *c; @@ -73,444 +68,316 @@ void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned in #endif -uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr) -{ - unsigned int borrow = 0; - uint256_t c; - - subc(&a.v[7], &b.v[7], &borrow, &c.v[7]); - subc(&a.v[6], &b.v[6], &borrow, &c.v[6]); - subc(&a.v[5], &b.v[5], &borrow, &c.v[5]); - subc(&a.v[4], &b.v[4], &borrow, &c.v[4]); - subc(&a.v[3], &b.v[3], &borrow, &c.v[3]); - subc(&a.v[2], &b.v[2], &borrow, &c.v[2]); - subc(&a.v[1], &b.v[1], &borrow, &c.v[1]); - subc(&a.v[0], &b.v[0], &borrow, &c.v[0]); - - *borrow_ptr = borrow; - - return c; -} - -bool greaterThanEqualToP(const unsigned int a[8]) -{ - if(a[0] > P[0]) { return true; } - if(a[0] < P[0]) { return false; } - - if(a[1] > P[1]) { return true; } - if(a[1] < P[1]) { return false; } - - if(a[2] > P[2]) { return true; } - if(a[2] < P[2]) { return false; } - - if(a[3] > P[3]) { return true; } - if(a[3] < P[3]) { return false; } - - if(a[4] > P[4]) { return true; } - if(a[4] < P[4]) { return false; } - - if(a[5] > P[5]) { return true; } - if(a[5] < P[5]) { return false; } - - if(a[6] > P[6]) { return true; } - if(a[6] < P[6]) { return false; } - - if(a[7] > P[7]) { return true; } - if(a[7] < P[7]) { return false; } - - return true; -} - +// Add with carry +#define addc(a, b, sum, carry, tmp) \ + sum = (a) + (carry); \ + tmp = ((sum) < (a)) * 1; \ + sum = (sum) + (b); \ + carry = (tmp) | (((sum) < (b)) * 1); + +// subtract with borrow +#define subc(a, b, diff, borrow, tmp) \ + tmp = (a) - (borrow); \ + borrow = ((tmp) > (a)) * 1; \ + diff = (tmp) - (b); \ + borrow |= ((diff) > (tmp)) ? 1 : 0; + +#define add256k(a, b, c, carry, tmp) \ + addc(a[7], b[7], c[7], carry, tmp); \ + addc(a[6], b[6], c[6], carry, tmp); \ + addc(a[5], b[5], c[5], carry, tmp); \ + addc(a[4], b[4], c[4], carry, tmp); \ + addc(a[3], b[3], c[3], carry, tmp); \ + addc(a[2], b[2], c[2], carry, tmp); \ + addc(a[1], b[1], c[1], carry, tmp); \ + addc(a[0], b[0], c[0], carry, tmp); + +#define sub256k( a, b, c, borrow, tmp) \ + subc(a[7], b[7], c[7], borrow, tmp); \ + subc(a[6], b[6], c[6], borrow, tmp); \ + subc(a[5], b[5], c[5], borrow, tmp); \ + subc(a[4], b[4], c[4], borrow, tmp); \ + subc(a[3], b[3], c[3], borrow, tmp); \ + subc(a[2], b[2], c[2], borrow, tmp); \ + subc(a[1], b[1], c[1], borrow, tmp); \ + subc(a[0], b[0], c[0], borrow, tmp); + +#define isInfinity256k(a) \ + ( \ + (a[0] == 0xffffffff) && \ + (a[1] == 0xffffffff) && \ + (a[2] == 0xffffffff) && \ + (a[3] == 0xffffffff) && \ + (a[4] == 0xffffffff) && \ + (a[5] == 0xffffffff) && \ + (a[6] == 0xffffffff) && \ + (a[7] == 0xffffffff) \ + ) + +#define greaterOrEqualToP(a) \ + (a[6] >= P6 || a[7] >= P7) + +#define equal256k(a, b) \ + ( \ + (a[0] == b[0]) && \ + (a[1] == b[1]) && \ + (a[2] == b[2]) && \ + (a[3] == b[3]) && \ + (a[4] == b[4]) && \ + (a[5] == b[5]) && \ + (a[6] == b[6]) && \ + (a[7] == b[7]) \ + ) void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8]) { - unsigned int z[16]; unsigned long product; // First round, overwrite z product = (unsigned long)x[7] * y[7]; - z[15] = (unsigned int)product; + out_low[7] = (unsigned int)product; product = (unsigned long)x[7] * y[6] + (unsigned int)(product >> 32); - z[14] = (unsigned int)product; + out_low[6] = (unsigned int)product; product = (unsigned long)x[7] * y[5] + (unsigned int)(product >> 32); - z[13] = (unsigned int)product; + out_low[5] = (unsigned int)product; product = (unsigned long)x[7] * y[4] + (unsigned int)(product >> 32); - z[12] = (unsigned int)product; + out_low[4] = (unsigned int)product; product = (unsigned long)x[7] * y[3] + (unsigned int)(product >> 32); - z[11] = (unsigned int)product; + out_low[3] = (unsigned int)product; product = (unsigned long)x[7] * y[2] + (unsigned int)(product >> 32); - z[10] = (unsigned int)product; + out_low[2] = (unsigned int)product; product = (unsigned long)x[7] * y[1] + (unsigned int)(product >> 32); - z[9] = (unsigned int)product; + out_low[1] = (unsigned int)product; product = (unsigned long)x[7] * y[0] + (unsigned int)(product >> 32); - z[8] = (unsigned int)product; - z[7] = (unsigned int)(product >> 32); + out_low[0] = (unsigned int)product; + out_high[7] = (unsigned int)(product >> 32); - product = (unsigned long)x[6] * y[7] + z[14]; - z[14] = (unsigned int)product; + product = (unsigned long)x[6] * y[7] + out_low[6]; + out_low[6] = (unsigned int)product; /** round6 */ - product = (unsigned long)x[6] * y[6] + z[13] + (product >> 32); - z[13] = (unsigned int)product; + product = (unsigned long)x[6] * y[6] + out_low[5] + (product >> 32); + out_low[5] = (unsigned int)product; - product = (unsigned long)x[6] * y[5] + z[12] + (product >> 32); - z[12] = (unsigned int)product; + product = (unsigned long)x[6] * y[5] + out_low[4] + (product >> 32); + out_low[4] = (unsigned int)product; - product = (unsigned long)x[6] * y[4] + z[11] + (product >> 32); - z[11] = (unsigned int)product; + product = (unsigned long)x[6] * y[4] + out_low[3] + (product >> 32); + out_low[3] = (unsigned int)product; - product = (unsigned long)x[6] * y[3] + z[10] + (product >> 32); - z[10] = (unsigned int)product; + product = (unsigned long)x[6] * y[3] + out_low[2] + (product >> 32); + out_low[2] = (unsigned int)product; - product = (unsigned long)x[6] * y[2] + z[9] + (product >> 32); - z[9] = (unsigned int)product; + product = (unsigned long)x[6] * y[2] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; - product = (unsigned long)x[6] * y[1] + z[8] + (product >> 32); - z[8] = (unsigned int)product; + product = (unsigned long)x[6] * y[1] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; - product = (unsigned long)x[6] * y[0] + z[7] + (product >> 32); - z[7] = (unsigned int)product; - z[6] = product >> 32; + product = (unsigned long)x[6] * y[0] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; + out_high[6] = product >> 32; /** round 5 */ - product = (unsigned long)x[5] * y[7] + z[13]; - z[13] = (unsigned int)product; + product = (unsigned long)x[5] * y[7] + out_low[5]; + out_low[5] = (unsigned int)product; - product = (unsigned long)x[5] * y[6] + z[12] + (product >> 32); - z[12] = (unsigned int)product; + product = (unsigned long)x[5] * y[6] + out_low[4] + (product >> 32); + out_low[4] = (unsigned int)product; - product = (unsigned long)x[5] * y[5] + z[11] + (product >> 32); - z[11] = (unsigned int)product; + product = (unsigned long)x[5] * y[5] + out_low[3] + (product >> 32); + out_low[3] = (unsigned int)product; - product = (unsigned long)x[5] * y[4] + z[10] + (product >> 32); - z[10] = (unsigned int)product; + product = (unsigned long)x[5] * y[4] + out_low[2] + (product >> 32); + out_low[2] = (unsigned int)product; - product = (unsigned long)x[5] * y[3] + z[9] + (product >> 32); - z[9] = (unsigned int)product; + product = (unsigned long)x[5] * y[3] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; - product = (unsigned long)x[5] * y[2] + z[8] + (product >> 32); - z[8] = (unsigned int)product; + product = (unsigned long)x[5] * y[2] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; - product = (unsigned long)x[5] * y[1] + z[7] + (product >> 32); - z[7] = (unsigned int)product; + product = (unsigned long)x[5] * y[1] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; - product = (unsigned long)x[5] * y[0] + z[6] + (product >> 32); - z[6] = (unsigned int)product; - z[5] = product >> 32; + product = (unsigned long)x[5] * y[0] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; + out_high[5] = product >> 32; /** round 4 */ - product = (unsigned long)x[4] * y[7] + z[12]; - z[12] = (unsigned int)product; + product = (unsigned long)x[4] * y[7] + out_low[4]; + out_low[4] = (unsigned int)product; - product = (unsigned long)x[4] * y[6] + z[11] + (product >> 32); - z[11] = (unsigned int)product; + product = (unsigned long)x[4] * y[6] + out_low[3] + (product >> 32); + out_low[3] = (unsigned int)product; - product = (unsigned long)x[4] * y[5] + z[10] + (product >> 32); - z[10] = (unsigned int)product; + product = (unsigned long)x[4] * y[5] + out_low[2] + (product >> 32); + out_low[2] = (unsigned int)product; - product = (unsigned long)x[4] * y[4] + z[9] + (product >> 32); - z[9] = (unsigned int)product; + product = (unsigned long)x[4] * y[4] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; - product = (unsigned long)x[4] * y[3] + z[8] + (product >> 32); - z[8] = (unsigned int)product; + product = (unsigned long)x[4] * y[3] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; - product = (unsigned long)x[4] * y[2] + z[7] + (product >> 32); - z[7] = (unsigned int)product; + product = (unsigned long)x[4] * y[2] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; - product = (unsigned long)x[4] * y[1] + z[6] + (product >> 32); - z[6] = (unsigned int)product; + product = (unsigned long)x[4] * y[1] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; - product = (unsigned long)x[4] * y[0] + z[5] + (product >> 32); - z[5] = (unsigned int)product; - z[4] = product >> 32; + product = (unsigned long)x[4] * y[0] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; + out_high[4] = product >> 32; /** round 3 */ - product = (unsigned long)x[3] * y[7] + z[11]; - z[11] = (unsigned int)product; + product = (unsigned long)x[3] * y[7] + out_low[3]; + out_low[3] = (unsigned int)product; - product = (unsigned long)x[3] * y[6] + z[10] + (product >> 32); - z[10] = (unsigned int)product; + product = (unsigned long)x[3] * y[6] + out_low[2] + (product >> 32); + out_low[2] = (unsigned int)product; - product = (unsigned long)x[3] * y[5] + z[9] + (product >> 32); - z[9] = (unsigned int)product; + product = (unsigned long)x[3] * y[5] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; - product = (unsigned long)x[3] * y[4] + z[8] + (product >> 32); - z[8] = (unsigned int)product; + product = (unsigned long)x[3] * y[4] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; - product = (unsigned long)x[3] * y[3] + z[7] + (product >> 32); - z[7] = (unsigned int)product; + product = (unsigned long)x[3] * y[3] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; - product = (unsigned long)x[3] * y[2] + z[6] + (product >> 32); - z[6] = (unsigned int)product; + product = (unsigned long)x[3] * y[2] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; - product = (unsigned long)x[3] * y[1] + z[5] + (product >> 32); - z[5] = (unsigned int)product; + product = (unsigned long)x[3] * y[1] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; - product = (unsigned long)x[3] * y[0] + z[4] + (product >> 32); - z[4] = (unsigned int)product; - z[3] = product >> 32; + product = (unsigned long)x[3] * y[0] + out_high[4] + (product >> 32); + out_high[4] = (unsigned int)product; + out_high[3] = product >> 32; /** round 2 */ - product = (unsigned long)x[2] * y[7] + z[10]; - z[10] = (unsigned int)product; + product = (unsigned long)x[2] * y[7] + out_low[2]; + out_low[2] = (unsigned int)product; - product = (unsigned long)x[2] * y[6] + z[9] + (product >> 32); - z[9] = (unsigned int)product; + product = (unsigned long)x[2] * y[6] + out_low[1] + (product >> 32); + out_low[1] = (unsigned int)product; - product = (unsigned long)x[2] * y[5] + z[8] + (product >> 32); - z[8] = (unsigned int)product; + product = (unsigned long)x[2] * y[5] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; - product = (unsigned long)x[2] * y[4] + z[7] + (product >> 32); - z[7] = (unsigned int)product; + product = (unsigned long)x[2] * y[4] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; - product = (unsigned long)x[2] * y[3] + z[6] + (product >> 32); - z[6] = (unsigned int)product; + product = (unsigned long)x[2] * y[3] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; - product = (unsigned long)x[2] * y[2] + z[5] + (product >> 32); - z[5] = (unsigned int)product; + product = (unsigned long)x[2] * y[2] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; - product = (unsigned long)x[2] * y[1] + z[4] + (product >> 32); - z[4] = (unsigned int)product; + product = (unsigned long)x[2] * y[1] + out_high[4] + (product >> 32); + out_high[4] = (unsigned int)product; - product = (unsigned long)x[2] * y[0] + z[3] + (product >> 32); - z[3] = (unsigned int)product; - z[2] = product >> 32; + product = (unsigned long)x[2] * y[0] + out_high[3] + (product >> 32); + out_high[3] = (unsigned int)product; + out_high[2] = product >> 32; /** round 1 */ - product = (unsigned long)x[1] * y[7] + z[9]; - z[9] = (unsigned int)product; + product = (unsigned long)x[1] * y[7] + out_low[1]; + out_low[1] = (unsigned int)product; - product = (unsigned long)x[1] * y[6] + z[8] + (product >> 32); - z[8] = (unsigned int)product; + product = (unsigned long)x[1] * y[6] + out_low[0] + (product >> 32); + out_low[0] = (unsigned int)product; - product = (unsigned long)x[1] * y[5] + z[7] + (product >> 32); - z[7] = (unsigned int)product; + product = (unsigned long)x[1] * y[5] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; - product = (unsigned long)x[1] * y[4] + z[6] + (product >> 32); - z[6] = (unsigned int)product; + product = (unsigned long)x[1] * y[4] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; - product = (unsigned long)x[1] * y[3] + z[5] + (product >> 32); - z[5] = (unsigned int)product; + product = (unsigned long)x[1] * y[3] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; - product = (unsigned long)x[1] * y[2] + z[4] + (product >> 32); - z[4] = (unsigned int)product; + product = (unsigned long)x[1] * y[2] + out_high[4] + (product >> 32); + out_high[4] = (unsigned int)product; - product = (unsigned long)x[1] * y[1] + z[3] + (product >> 32); - z[3] = (unsigned int)product; + product = (unsigned long)x[1] * y[1] + out_high[3] + (product >> 32); + out_high[3] = (unsigned int)product; - product = (unsigned long)x[1] * y[0] + z[2] + (product >> 32); - z[2] = (unsigned int)product; - z[1] = product >> 32; + product = (unsigned long)x[1] * y[0] + out_high[2] + (product >> 32); + out_high[2] = (unsigned int)product; + out_high[1] = product >> 32; /** round 0 */ - product = (unsigned long)x[0] * y[7] + z[8]; - z[8] = (unsigned int)product; + product = (unsigned long)x[0] * y[7] + out_low[0]; + out_low[0] = (unsigned int)product; - product = (unsigned long)x[0] * y[6] + z[7] + (product >> 32); - z[7] = (unsigned int)product; + product = (unsigned long)x[0] * y[6] + out_high[7] + (product >> 32); + out_high[7] = (unsigned int)product; - product = (unsigned long)x[0] * y[5] + z[6] + (product >> 32); - z[6] = (unsigned int)product; + product = (unsigned long)x[0] * y[5] + out_high[6] + (product >> 32); + out_high[6] = (unsigned int)product; - product = (unsigned long)x[0] * y[4] + z[5] + (product >> 32); - z[5] = (unsigned int)product; + product = (unsigned long)x[0] * y[4] + out_high[5] + (product >> 32); + out_high[5] = (unsigned int)product; - product = (unsigned long)x[0] * y[3] + z[4] + (product >> 32); - z[4] = (unsigned int)product; + product = (unsigned long)x[0] * y[3] + out_high[4] + (product >> 32); + out_high[4] = (unsigned int)product; - product = (unsigned long)x[0] * y[2] + z[3] + (product >> 32); - z[3] = (unsigned int)product; + product = (unsigned long)x[0] * y[2] + out_high[3] + (product >> 32); + out_high[3] = (unsigned int)product; - product = (unsigned long)x[0] * y[1] + z[2] + (product >> 32); - z[2] = (unsigned int)product; + product = (unsigned long)x[0] * y[1] + out_high[2] + (product >> 32); + out_high[2] = (unsigned int)product; - product = (unsigned long)x[0] * y[0] + z[1] + (product >> 32); - z[1] = (unsigned int)product; + product = (unsigned long)x[0] * y[0] + out_high[1] + (product >> 32); + out_high[1] = (unsigned int)product; out_high[0] = product >> 32; - - out_high[1] = z[1]; - out_high[2] = z[2]; - out_high[3] = z[3]; - out_high[4] = z[4]; - out_high[5] = z[5]; - out_high[6] = z[6]; - out_high[7] = z[7]; - - out_low[0] = z[8]; - out_low[1] = z[9]; - out_low[2] = z[10]; - out_low[3] = z[11]; - out_low[4] = z[12]; - out_low[5] = z[13]; - out_low[6] = z[14]; - out_low[7] = z[15]; } -uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr) -{ - uint256_t c; - unsigned int carry = 0; - - for(int i = 7; i >= 0; i--) { - addc(&a.v[i], &b.v[i], &carry, &c.v[i]); - } - - *carry_ptr = carry; - - return c; -} - -bool isInfinity256k(const uint256_t *x) -{ - return ( - (x->v[0] == 0xffffffff) && - (x->v[1] == 0xffffffff) && - (x->v[2] == 0xffffffff) && - (x->v[3] == 0xffffffff) && - (x->v[4] == 0xffffffff) && - (x->v[5] == 0xffffffff) && - (x->v[6] == 0xffffffff) && - (x->v[7] == 0xffffffff) - ); -} - -bool equal256k(uint256_t *a, uint256_t *b) -{ - return ( - (a->v[0] == b->v[0]) && - (a->v[1] == b->v[1]) && - (a->v[2] == b->v[2]) && - (a->v[3] == b->v[3]) && - (a->v[4] == b->v[4]) && - (a->v[5] == b->v[5]) && - (a->v[6] == b->v[6]) && - (a->v[7] == b->v[7]) - ); -} - -unsigned int readLSW256k(__global const uint256_t* ara, int idx) -{ - return ara[idx].v[7]; -} - -unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) -{ - return ara[idx].v[word]; -} - -void addP(unsigned int a[8], unsigned int c[8]) -{ - unsigned int carry = 0; - unsigned int P[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F - }; - addc(&a[7], &P[7], &carry, &c[7]); - addc(&a[6], &P[6], &carry, &c[6]); - addc(&a[5], &P[5], &carry, &c[5]); - addc(&a[4], &P[4], &carry, &c[4]); - addc(&a[3], &P[3], &carry, &c[3]); - addc(&a[2], &P[2], &carry, &c[2]); - addc(&a[1], &P[1], &carry, &c[1]); - addc(&a[0], &P[0], &carry, &c[0]); -} - -void subP(unsigned int a[8], unsigned int c[8]) -{ - unsigned int borrow = 0; - unsigned int P[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F - }; - subc(&a[7], &P[7], &borrow, &c[7]); - subc(&a[6], &P[6], &borrow, &c[6]); - subc(&a[5], &P[5], &borrow, &c[5]); - subc(&a[4], &P[4], &borrow, &c[4]); - subc(&a[3], &P[3], &borrow, &c[3]); - subc(&a[2], &P[2], &borrow, &c[2]); - subc(&a[1], &P[1], &borrow, &c[1]); - subc(&a[0], &P[0], &borrow, &c[0]); -} - -/** - * Subtraction mod p - */ -uint256_t subModP256k(uint256_t a, uint256_t b) -{ - unsigned int borrow = 0; - uint256_t c = sub256k(a, b, &borrow); - if(borrow) { - addP(c.v, c.v); - } - - return c; -} - -void addModP256k(uint256_t *a, uint256_t *b, uint256_t *cP) -{ - unsigned int carry = 0; - - uint256_t c = add256k(*a, *b, &carry); - - if(carry) { subP(c.v, c.v); *cP = c; } - - else if(c.v[0] > P[0]) { subP(c.v, c.v); *cP = c; } - else if(c.v[0] < P[0]) { *cP = c; } - - else if(c.v[1] > P[1]) { subP(c.v, c.v); *cP = c; } - else if(c.v[1] < P[1]) { *cP = c; } - - else if(c.v[2] > P[2]) { subP(c.v, c.v); *cP = c; } - else if(c.v[2] < P[2]) { *cP = c; } - - else if(c.v[3] > P[3]) { subP(c.v, c.v); *cP = c; } - else if(c.v[3] < P[3]) { *cP = c; } - - else if(c.v[4] > P[4]) { subP(c.v, c.v); *cP = c; } - else if(c.v[4] < P[4]) { *cP = c; } - - else if(c.v[5] > P[5]) { subP(c.v, c.v); *cP = c; } - else if(c.v[5] < P[5]) { *cP = c; } - - else if(c.v[6] > P[6]) { subP(c.v, c.v); *cP = c; } - else if(c.v[6] < P[6]) { *cP = c; } - - else if(c.v[7] > P[7]) { subP(c.v, c.v); *cP = c; } - else { *cP = c; } -} - - void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) { - unsigned int ZERO = 0; unsigned int high[8]; + unsigned int low[8]; unsigned int hWord = 0; + unsigned int borrow = 0; unsigned int carry = 0; unsigned int t = 0; unsigned int product6 = 0; unsigned int product7 = 0; + unsigned int tmp; // 256 x 256 multiply - multiply256(a, b, high, product_low); + multiply256(a, b, high, low); + product_low[7] = low[7]; + product_low[6] = low[6]; + product_low[5] = low[5]; + product_low[4] = low[4]; + product_low[3] = low[3]; + product_low[2] = low[2]; + product_low[1] = low[1]; + product_low[0] = low[0]; // Add 2^32 * high to the low 256 bits (shift left 1 word and add) // Affects product[14] to product[6] - addc(&product_low[6], &high[7], &carry, &product_low[6]); - addc(&product_low[5], &high[6], &carry, &product_low[5]); - addc(&product_low[4], &high[5], &carry, &product_low[4]); - addc(&product_low[3], &high[4], &carry, &product_low[3]); - addc(&product_low[2], &high[3], &carry, &product_low[2]); - addc(&product_low[1], &high[2], &carry, &product_low[1]); - addc(&product_low[0], &high[1], &carry, &product_low[0]); - - addc(&high[0], &ZERO, &carry, &product7); + addc(product_low[6], high[7], product_low[6], carry, tmp); + addc(product_low[5], high[6], product_low[5], carry, tmp); + addc(product_low[4], high[5], product_low[4], carry, tmp); + addc(product_low[3], high[4], product_low[3], carry, tmp); + addc(product_low[2], high[3], product_low[2], carry, tmp); + addc(product_low[1], high[2], product_low[1], carry, tmp); + addc(product_low[0], high[1], product_low[0], carry, tmp); + + addc(high[0], 0, product7, carry, tmp); product6 = carry; carry = 0; @@ -519,142 +386,227 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) // Affects product[15] to product[5] for(int i = 7; i >= 0; i--) { madd977(&hWord, &t, &high[i], &hWord); - addc(&product_low[i], &t, &carry, &product_low[i]); + addc(product_low[i], t, product_low[i], carry, tmp); t = 0; } - addc(&product7, &hWord, &carry, &product7); - addc(&product6, &ZERO, &carry, &product6); + addc(product7, hWord, high[7], carry, tmp); + addc(product6, 0, high[6], carry, tmp); // Multiply high 2 words by 2^32 and add to low // Affects product[14] to product[7] carry = 0; - high[7] = product7; - high[6] = product6; - - product7 = 0; - product6 = 0; - addc(&product_low[6], &high[7], &carry, &product_low[6]); - addc(&product_low[5], &high[6], &carry, &product_low[5]); + addc(product_low[6], high[7], product_low[6], carry, tmp); + addc(product_low[5], high[6], product_low[5], carry, tmp); - addc(&product_low[4], &ZERO, &carry, &product_low[4]); - addc(&product_low[3], &ZERO, &carry, &product_low[3]); - addc(&product_low[2], &ZERO, &carry, &product_low[2]); - addc(&product_low[1], &ZERO, &carry, &product_low[1]); - addc(&product_low[0], &ZERO, &carry, &product_low[0]); - - product7 = carry; + addc(product_low[4], 0, product_low[4], carry, tmp); + addc(product_low[3], 0, product_low[3], carry, tmp); + addc(product_low[2], 0, product_low[2], carry, tmp); + addc(product_low[1], 0, product_low[1], carry, tmp); + addc(product_low[0], 0, product_low[0], carry, tmp); // Multiply top 2 words by 977 and add to low // Affects product[15] to product[7] carry = 0; hWord = 0; madd977(&hWord, &t, &high[7], &hWord); - addc(&product_low[7], &t, &carry, &product_low[7]); + addc(product_low[7], t, product_low[7], carry, tmp); madd977(&hWord, &t, &high[6], &hWord); - addc(&product_low[6], &t, &carry, &product_low[6]); - addc(&product_low[5], &hWord, &carry, &product_low[5]); - + addc(product_low[6], t, product_low[6], carry, tmp); + addc(product_low[5], hWord, product_low[5], carry, tmp); // Propagate carry - addc(&product_low[4], &ZERO, &carry, &product_low[4]); - addc(&product_low[3], &ZERO, &carry, &product_low[3]); - addc(&product_low[2], &ZERO, &carry, &product_low[2]); - addc(&product_low[1], &ZERO, &carry, &product_low[1]); - addc(&product_low[0], &ZERO, &carry, &product_low[0]); - product7 = carry; + addc(product_low[4], 0, product_low[4], carry, tmp); + addc(product_low[3], 0, product_low[3], carry, tmp); + addc(product_low[2], 0, product_low[2], carry, tmp); + addc(product_low[1], 0, product_low[1], carry, tmp); + addc(product_low[0], 0, product_low[0], carry, tmp); // Reduce if >= P - if(product7 || greaterThanEqualToP(product_low)) { - subP(product_low, product_low); + if(carry || greaterOrEqualToP(product_low)) { + sub256k(product_low, P, product_low, borrow, tmp); } } -void mulModP256k(uint256_t *a, uint256_t *b, uint256_t *c) -{ - mulModP(a->v, b->v, c->v); -} - -void squareModP256k(uint256_t *a) +/** + * Subtraction mod p + */ +void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) { - mulModP(a->v, a->v, a->v); + unsigned int borrow = 0; + unsigned int tmp; + + sub256k(a, b, c, borrow, tmp); + + if (borrow) { + unsigned carry = 0; + add256k(c, P, c, carry, tmp); + } } /** * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains */ -uint256_t invModP256k(uint256_t x) +void invModP256k(unsigned int x[8], unsigned int result[8]) { - uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}}; - - mulModP256k(&x, &y, &y); - squareModP256k(&x); - squareModP256k(&x); - mulModP256k(&x, &y, &y); - squareModP256k(&x); - mulModP256k(&x, &y, &y); - squareModP256k(&x); - squareModP256k(&x); - mulModP256k(&x, &y, &y); + unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1}; + + mulModP(x, y, y); + mulModP(x, x, x); + mulModP(x, x, x); + mulModP(x, y, y); + mulModP(x, x, x); + mulModP(x, y, y); + mulModP(x, x, x); + mulModP(x, x, x); + mulModP(x, y, y); for(int i = 0; i < 5; i++) { - squareModP256k(&x); + mulModP(x, x, x); } for(int i = 0; i < 22; i++) { - mulModP256k(&x, &y, &y); - squareModP256k(&x); + mulModP(x, y, y); + mulModP(x, x, x); } - squareModP256k(&x); + mulModP(x, x, x); for(int i = 0; i < 222; i++) { - mulModP256k(&x, &y, &y); - squareModP256k(&x); + mulModP(x, y, y); + mulModP(x, x, x); } - mulModP256k(&x, &y, &x); - return x; + mulModP(x, y, result); +} + +void addModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) +{ + unsigned int borrow = 0; + unsigned int carry = 0; + unsigned int tmp = 0; + + add256k(a, b, c, carry, tmp); + + if(carry) { sub256k(c, P, c, borrow, tmp); } + + else if(c[0] > P[0]) { sub256k(c, P, c, borrow, tmp); } + else if(c[0] < P[0]) { } + + else if(c[1] > P[1]) { sub256k(c, P, c, borrow, tmp); } + else if(c[1] < P[1]) { } + + else if(c[2] > P[2]) { sub256k(c, P, c, borrow, tmp); } + else if(c[2] < P[2]) { } + + else if(c[3] > P[3]) { sub256k(c, P, c, borrow, tmp); } + else if(c[3] < P[3]) { } + + else if(c[4] > P[4]) { sub256k(c, P, c, borrow, tmp); } + else if(c[4] < P[4]) { } + + else if(c[5] > P[5]) { sub256k(c, P, c, borrow, tmp); } + else if(c[5] < P[5]) { } + + else if(c[6] > P[6]) { sub256k(c, P, c, borrow, tmp); } + else if(c[6] < P[6]) { } + + else if(c[7] > P[7]) { sub256k(c, P, c, borrow, tmp); } } +void doBatchInverse256k(unsigned int x[8], unsigned int result[8]) +{ + invModP256k(x, result); +} void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) { int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); + unsigned int t[8]; + // x = Gx - x - uint256_t t = subModP256k(px, x); + subModP256k(px.v, x.v, t); // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc - mulModP256k(inverse, &t, inverse); + mulModP(inverse->v, t, inverse->v); chain[batchIdx * dim + gid] = *inverse; } - void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* xPtr, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) { int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); - uint256_t x = xPtr[i]; - if(equal256k(&px, &x)) { - addModP256k(&py,&py, &x); + if(equal256k(px.v, x.v)) { + addModP256k(py.v,py.v, x.v); } else { // x = Gx - x - x = subModP256k(px, x); + subModP256k(px.v, x.v, x.v); } // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc - mulModP256k(&x, inverse, inverse); + mulModP(x.v, inverse->v, inverse->v); chain[batchIdx * dim + gid] = *inverse; } +void completeBatchAdd256k( + uint256_t px, + uint256_t py, + __global uint256_t* xPtr, + __global uint256_t* yPtr, + int i, + int batchIdx, + __global uint256_t* chain, + uint256_t* inverse, + uint256_t* newX, + uint256_t* newY) +{ + int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + int dim = get_global_size(0); + uint256_t x = xPtr[i]; + uint256_t y = yPtr[i]; + + uint256_t s; + + if(batchIdx != 0) { + uint256_t c; + + c = chain[(batchIdx - 1) * dim + gid]; + mulModP(inverse->v, c.v, s.v); + + uint256_t diff; + subModP256k(px.v, x.v, diff.v); + mulModP(diff.v, inverse->v, inverse->v); + } else { + s = *inverse; + } + + uint256_t rise; + subModP256k(py.v, y.v, rise.v); + + mulModP(rise.v, s.v, s.v); + + // Rx = s^2 - Gx - Qx + uint256_t s2; + mulModP(s.v, s.v, s2.v); + + subModP256k(s2.v, px.v, newX->v); + subModP256k(newX->v, x.v, newX->v); + + // Ry = s(px - rx) - py + uint256_t k; + subModP256k(px.v, newX->v, k.v); + mulModP(s.v, k.v, newY->v); + subModP256k(newY->v, py.v, newY->v); +} + void completeBatchAddWithDouble256k( uint256_t px, @@ -682,121 +634,78 @@ void completeBatchAddWithDouble256k( uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; - mulModP256k(inverse, &c, &s); + mulModP(inverse->v, c.v, s.v); uint256_t diff; - if(equal256k(&px, &x)) { - addModP256k(&py, &py, &diff); + if(equal256k(px.v, x.v)) { + addModP256k(py.v, py.v, diff.v); } else { - diff = subModP256k(px, x); + subModP256k(px.v, x.v, diff.v); } - mulModP256k(&diff, inverse, inverse); + mulModP(diff.v, inverse->v, inverse->v); } else { s = *inverse; } - if(equal256k(&px, &x)) { + if(equal256k(px.v, x.v)) { // currently s = 1 / 2y uint256_t x2; uint256_t tx2; // 3x^2 - mulModP256k(&x, &x, &x2); - addModP256k(&x2, &x2, &tx2); - addModP256k(&x2, &tx2, &tx2); + mulModP(x.v, x.v, x2.v); + addModP256k(x2.v, x2.v, tx2.v); + addModP256k(x2.v, tx2.v, tx2.v); // s = 3x^2 * 1/2y - mulModP256k(&tx2, &s, &s); + mulModP(tx2.v, s.v, s.v); // s^2 uint256_t s2; - mulModP256k(&s, &s, &s2); + mulModP(s.v, s.v, s2.v); // Rx = s^2 - 2px - *newX = subModP256k(s2, x); - *newX = subModP256k(*newX, x); + subModP256k(s2.v, x.v, newX->v); + subModP256k(newX->v, x.v, newX->v); // Ry = s(px - rx) - py - uint256_t k = subModP256k(px, *newX); - mulModP256k(&s, &k, newY); - *newY = subModP256k(*newY, py); + uint256_t k; + subModP256k(px.v, newX->v, k.v); + mulModP(s.v, k.v, newY->v); + subModP256k(newY->v, py.v,newY->v); } else { uint256_t rise; - rise = subModP256k(py, y); + subModP256k(py.v, y.v, rise.v); - mulModP256k(&rise, &s, &s); + mulModP(rise.v, s.v, s.v); // Rx = s^2 - Gx - Qx uint256_t s2; - mulModP256k(&s, &s, &s2); + mulModP(s.v, s.v, s2.v); - *newX = subModP256k(s2, px); - *newX = subModP256k(*newX, x); + subModP256k(s2.v, px.v, newX->v); + subModP256k(newX->v, x.v,newX->v); // Ry = s(px - rx) - py uint256_t k; - k = subModP256k(px, *newX); - mulModP256k(&s, &k, newY); - *newY = subModP256k(*newY, py); + subModP256k(px.v, newX->v, k.v); + mulModP(s.v, k.v, newY->v); + subModP256k(newY->v, py.v, newY->v); } } - -void completeBatchAdd256k( - uint256_t px, - uint256_t py, - __global uint256_t* xPtr, - __global uint256_t* yPtr, - int i, - int batchIdx, - __global uint256_t* chain, - uint256_t* inverse, - uint256_t* newX, - uint256_t* newY) +unsigned int readLSW256k(__global const uint256_t* ara, int idx) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); - - uint256_t s; - - if(batchIdx != 0) { - uint256_t c; - - c = chain[(batchIdx - 1) * dim + gid]; - mulModP256k(inverse, &c, &s); - - uint256_t diff; - diff = subModP256k(px, xPtr[i]); - mulModP256k(&diff, inverse, inverse); - } else { - s = *inverse; - } - - uint256_t rise = subModP256k(py, yPtr[i]); - - mulModP256k(&rise, &s, &s); - - // Rx = s^2 - Gx - Qx - uint256_t s2; - mulModP256k(&s, &s, &s2); - - *newX = subModP256k(s2, px); - *newX = subModP256k(*newX, xPtr[i]); - - // Ry = s(px - rx) - py - uint256_t k = subModP256k(px, *newX); - mulModP256k(&s, &k, newY); - *newY = subModP256k(*newY, py); + return ara[idx].v[7]; } - -uint256_t doBatchInverse256k(uint256_t x) +unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) { - return invModP256k(x); + return ara[idx].v[word]; } #endif From 48c93be869045e460e6bf0b270c841f58bee0437 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 1 Jun 2021 05:11:34 +0200 Subject: [PATCH 36/62] improve further --- CLKeySearchDevice/bitcrack.cl | 14 +++++++------- CLKeySearchDevice/keysearch.cl | 6 +++--- clMath/secp256k1.cl | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index d6f7b737..38ede30f 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -753,7 +753,7 @@ void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) /** * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains */ -void invModP256k(unsigned int x[8], unsigned int result[8]) +void invModP256k(unsigned int x[8]) { unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1}; @@ -783,7 +783,7 @@ void invModP256k(unsigned int x[8], unsigned int result[8]) mulModP(x, x, x); } - mulModP(x, y, result); + mulModP(x, y, x); } void addModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) @@ -820,9 +820,9 @@ void addModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) else if(c[7] > P[7]) { sub256k(c, P, c, borrow, tmp); } } -void doBatchInverse256k(unsigned int x[8], unsigned int result[8]) +void doBatchInverse256k(unsigned int x[8]) { - invModP256k(x, result); + invModP256k(x); } void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) @@ -1609,7 +1609,7 @@ __kernel void multiplyStepKernel( } } - doBatchInverse256k(inverse.v, inverse.v); + doBatchInverse256k(inverse.v); i -= dim; for(; i >= 0; i -= dim) { @@ -1786,7 +1786,7 @@ __kernel void keyFinderKernel( } #endif - doBatchInverse256k(inverse.v, inverse.v); + doBatchInverse256k(inverse.v); i -= dim; uint256_t newX; @@ -1867,7 +1867,7 @@ __kernel void keyFinderKernelWithDouble( } #endif - doBatchInverse256k(inverse.v, inverse.v); + doBatchInverse256k(inverse.v); i -= dim; diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 86d50b68..ae3fbe6e 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -77,7 +77,7 @@ __kernel void multiplyStepKernel( } } - doBatchInverse256k(inverse.v, inverse.v); + doBatchInverse256k(inverse.v); i -= dim; for(; i >= 0; i -= dim) { @@ -254,7 +254,7 @@ __kernel void keyFinderKernel( } #endif - doBatchInverse256k(inverse.v, inverse.v); + doBatchInverse256k(inverse.v); i -= dim; uint256_t newX; @@ -335,7 +335,7 @@ __kernel void keyFinderKernelWithDouble( } #endif - doBatchInverse256k(inverse.v, inverse.v); + doBatchInverse256k(inverse.v); i -= dim; diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index 285d5056..06fc1d53 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -446,7 +446,7 @@ void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) /** * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains */ -void invModP256k(unsigned int x[8], unsigned int result[8]) +void invModP256k(unsigned int x[8]) { unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1}; @@ -476,7 +476,7 @@ void invModP256k(unsigned int x[8], unsigned int result[8]) mulModP(x, x, x); } - mulModP(x, y, result); + mulModP(x, y, x); } void addModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) @@ -513,9 +513,9 @@ void addModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) else if(c[7] > P[7]) { sub256k(c, P, c, borrow, tmp); } } -void doBatchInverse256k(unsigned int x[8], unsigned int result[8]) +void doBatchInverse256k(unsigned int x[8]) { - invModP256k(x, result); + invModP256k(x); } void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) From 09a2ce6c1479f3ec3911f3e38562883a815c16fa Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 1 Jun 2021 06:10:29 +0200 Subject: [PATCH 37/62] more improvements --- CLKeySearchDevice/bitcrack.cl | 72 +++++++++++++++-------------------- clMath/ripemd160.cl | 57 ++++++++++++--------------- clMath/secp256k1.cl | 15 +++----- 3 files changed, 62 insertions(+), 82 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 38ede30f..00e77487 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1,20 +1,13 @@ #ifndef RIPEMD160_CL #define RIPEMD160_CL -#define RIPEMD160_IV_0 (0x67452301) -#define RIPEMD160_IV_1 (0xefcdab89) -#define RIPEMD160_IV_2 (0x98badcfe) -#define RIPEMD160_IV_3 (0x10325476) -#define RIPEMD160_IV_4 (0xc3d2e1f0) - -#define K0 (0x5a827999); -#define K1 (0x6ed9eba1); -#define K2 (0x8f1bbcdc); -#define K3 (0xa953fd4e); -#define K4 (0x7a6d76e9); -#define K5 (0x6d703ef3); -#define K6 (0x5c4dd124); -#define K7 (0x50a28be6); +__constant unsigned int RIPEMD160_IV[5] = { + 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0, +}; + +__constant unsigned int K[8] = { + 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xa953fd4e, 0x7a6d76e9, 0x6d703ef3, 0x5c4dd124, 0x50a28be6 +}; #define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) @@ -34,22 +27,22 @@ c = rotl((c), 10) #define GG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + (x) + K0;\ + a += G((b), (c), (d)) + (x) + K[0];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + K1;\ + a += H((b), (c), (d)) + (x) + K[1];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define II(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + K2;\ + a += I((b), (c), (d)) + (x) + K[2];\ a = rotl((a), (s)) + e;\ c = rotl((c), 10) #define JJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + K3;\ + a += J((b), (c), (d)) + (x) + K[3];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) @@ -59,32 +52,32 @@ c = rotl((c), 10) #define GGG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + x + K4;\ + a += G((b), (c), (d)) + x + K[4];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HHH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + K5;\ + a += H((b), (c), (d)) + (x) + K[5];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define III(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + K6;\ + a += I((b), (c), (d)) + (x) + K[6];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define JJJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + K7;\ + a += J((b), (c), (d)) + (x) + K[7];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a = RIPEMD160_IV_0; - unsigned int b = RIPEMD160_IV_1; - unsigned int c = RIPEMD160_IV_2; - unsigned int d = RIPEMD160_IV_3; - unsigned int e = RIPEMD160_IV_4; + unsigned int a = RIPEMD160_IV[0]; + unsigned int b = RIPEMD160_IV[1]; + unsigned int c = RIPEMD160_IV[2]; + unsigned int d = RIPEMD160_IV[3]; + unsigned int e = RIPEMD160_IV[4]; /* round 1 */ FF(a, b, c, d, e, x[0], 11); @@ -185,11 +178,11 @@ void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a = RIPEMD160_IV_0; - unsigned int b = RIPEMD160_IV_1; - unsigned int c = RIPEMD160_IV_2; - unsigned int d = RIPEMD160_IV_3; - unsigned int e = RIPEMD160_IV_4; + unsigned int a = RIPEMD160_IV[0]; + unsigned int b = RIPEMD160_IV[1]; + unsigned int c = RIPEMD160_IV[2]; + unsigned int d = RIPEMD160_IV[3]; + unsigned int e = RIPEMD160_IV[4]; /* parallel round 1 */ JJJ(a, b, c, d, e, x[5], 8); @@ -340,9 +333,6 @@ __constant unsigned int P[8] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F }; -#define P6 (0xFFFFFFFE) -#define P7 (0xFFFFFC2F) - #ifdef DEVICE_VENDOR_INTEL // Intel devices have a mul_hi bug unsigned int mul_hi977(unsigned int x) @@ -364,7 +354,7 @@ void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned in } #else -__inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) +void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { *low = *a * 977; unsigned int tmp = *low + *c; @@ -422,7 +412,7 @@ __inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, un ) #define greaterOrEqualToP(a) \ - (a[6] >= P6 || a[7] >= P7) + (a[6] >= P[6] || a[7] >= P[7]) #define equal256k(a, b) \ ( \ @@ -655,7 +645,6 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) unsigned int low[8]; unsigned int hWord = 0; - unsigned int borrow = 0; unsigned int carry = 0; unsigned int t = 0; unsigned int product6 = 0; @@ -730,7 +719,8 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) // Reduce if >= P if(carry || greaterOrEqualToP(product_low)) { - sub256k(product_low, P, product_low, borrow, tmp); + carry = 0; + sub256k(product_low, P, product_low, carry, tmp); } } @@ -745,8 +735,8 @@ void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) sub256k(a, b, c, borrow, tmp); if (borrow) { - unsigned carry = 0; - add256k(c, P, c, carry, tmp); + borrow = 0; + add256k(c, P, c, borrow, tmp); } } diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index 390615ab..828fd3fb 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -1,20 +1,13 @@ #ifndef RIPEMD160_CL #define RIPEMD160_CL -#define RIPEMD160_IV_0 (0x67452301) -#define RIPEMD160_IV_1 (0xefcdab89) -#define RIPEMD160_IV_2 (0x98badcfe) -#define RIPEMD160_IV_3 (0x10325476) -#define RIPEMD160_IV_4 (0xc3d2e1f0) - -#define K0 (0x5a827999); -#define K1 (0x6ed9eba1); -#define K2 (0x8f1bbcdc); -#define K3 (0xa953fd4e); -#define K4 (0x7a6d76e9); -#define K5 (0x6d703ef3); -#define K6 (0x5c4dd124); -#define K7 (0x50a28be6); +__constant unsigned int RIPEMD160_IV[5] = { + 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0, +}; + +__constant unsigned int K[8] = { + 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xa953fd4e, 0x7a6d76e9, 0x6d703ef3, 0x5c4dd124, 0x50a28be6 +}; #define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) @@ -34,22 +27,22 @@ c = rotl((c), 10) #define GG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + (x) + K0;\ + a += G((b), (c), (d)) + (x) + K[0];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + K1;\ + a += H((b), (c), (d)) + (x) + K[1];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define II(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + K2;\ + a += I((b), (c), (d)) + (x) + K[2];\ a = rotl((a), (s)) + e;\ c = rotl((c), 10) #define JJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + K3;\ + a += J((b), (c), (d)) + (x) + K[3];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) @@ -59,32 +52,32 @@ c = rotl((c), 10) #define GGG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + x + K4;\ + a += G((b), (c), (d)) + x + K[4];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define HHH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + K5;\ + a += H((b), (c), (d)) + (x) + K[5];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define III(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + K6;\ + a += I((b), (c), (d)) + (x) + K[6];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) #define JJJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + K7;\ + a += J((b), (c), (d)) + (x) + K[7];\ a = rotl((a), (s)) + (e);\ c = rotl((c), 10) void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a = RIPEMD160_IV_0; - unsigned int b = RIPEMD160_IV_1; - unsigned int c = RIPEMD160_IV_2; - unsigned int d = RIPEMD160_IV_3; - unsigned int e = RIPEMD160_IV_4; + unsigned int a = RIPEMD160_IV[0]; + unsigned int b = RIPEMD160_IV[1]; + unsigned int c = RIPEMD160_IV[2]; + unsigned int d = RIPEMD160_IV[3]; + unsigned int e = RIPEMD160_IV[4]; /* round 1 */ FF(a, b, c, d, e, x[0], 11); @@ -185,11 +178,11 @@ void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a = RIPEMD160_IV_0; - unsigned int b = RIPEMD160_IV_1; - unsigned int c = RIPEMD160_IV_2; - unsigned int d = RIPEMD160_IV_3; - unsigned int e = RIPEMD160_IV_4; + unsigned int a = RIPEMD160_IV[0]; + unsigned int b = RIPEMD160_IV[1]; + unsigned int c = RIPEMD160_IV[2]; + unsigned int d = RIPEMD160_IV[3]; + unsigned int e = RIPEMD160_IV[4]; /* parallel round 1 */ JJJ(a, b, c, d, e, x[5], 8); diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index 06fc1d53..ded5a8aa 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -33,9 +33,6 @@ __constant unsigned int P[8] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F }; -#define P6 (0xFFFFFFFE) -#define P7 (0xFFFFFC2F) - #ifdef DEVICE_VENDOR_INTEL // Intel devices have a mul_hi bug unsigned int mul_hi977(unsigned int x) @@ -57,7 +54,7 @@ void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned in } #else -__inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) +void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { *low = *a * 977; unsigned int tmp = *low + *c; @@ -115,7 +112,7 @@ __inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, un ) #define greaterOrEqualToP(a) \ - (a[6] >= P6 || a[7] >= P7) + (a[6] >= P[6] || a[7] >= P[7]) #define equal256k(a, b) \ ( \ @@ -348,7 +345,6 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) unsigned int low[8]; unsigned int hWord = 0; - unsigned int borrow = 0; unsigned int carry = 0; unsigned int t = 0; unsigned int product6 = 0; @@ -423,7 +419,8 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) // Reduce if >= P if(carry || greaterOrEqualToP(product_low)) { - sub256k(product_low, P, product_low, borrow, tmp); + carry = 0; + sub256k(product_low, P, product_low, carry, tmp); } } @@ -438,8 +435,8 @@ void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) sub256k(a, b, c, borrow, tmp); if (borrow) { - unsigned carry = 0; - add256k(c, P, c, carry, tmp); + borrow = 0; + add256k(c, P, c, borrow, tmp); } } From ed303fa08ed75e1888cebfa7a79b33c1411d470a Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Tue, 1 Jun 2021 21:27:50 +0200 Subject: [PATCH 38/62] improve more --- CLKeySearchDevice/bitcrack.cl | 46 +++++++++++++--------------------- CLKeySearchDevice/keysearch.cl | 20 +++++---------- clMath/secp256k1.cl | 25 ++++++++---------- 3 files changed, 34 insertions(+), 57 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 00e77487..e8a0b9a9 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -825,7 +825,6 @@ void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int // x = Gx - x subModP256k(px.v, x.v, t); - // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc mulModP(inverse->v, t, inverse->v); @@ -867,10 +866,11 @@ void completeBatchAdd256k( { int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); - uint256_t x = xPtr[i]; - uint256_t y = yPtr[i]; + uint256_t x = xPtr[i]; + uint256_t y = yPtr[i]; uint256_t s; + unsigned int tmp[8]; if(batchIdx != 0) { uint256_t c; @@ -878,29 +878,25 @@ void completeBatchAdd256k( c = chain[(batchIdx - 1) * dim + gid]; mulModP(inverse->v, c.v, s.v); - uint256_t diff; - subModP256k(px.v, x.v, diff.v); - mulModP(diff.v, inverse->v, inverse->v); + subModP256k(px.v, x.v, tmp); + mulModP(tmp, inverse->v, inverse->v); } else { s = *inverse; } - uint256_t rise; - subModP256k(py.v, y.v, rise.v); + subModP256k(py.v, y.v, tmp); - mulModP(rise.v, s.v, s.v); + mulModP(tmp, s.v, s.v); // Rx = s^2 - Gx - Qx - uint256_t s2; - mulModP(s.v, s.v, s2.v); + mulModP(s.v, s.v, tmp); - subModP256k(s2.v, px.v, newX->v); + subModP256k(tmp, px.v, newX->v); subModP256k(newX->v, x.v, newX->v); // Ry = s(px - rx) - py - uint256_t k; - subModP256k(px.v, newX->v, k.v); - mulModP(s.v, k.v, newY->v); + subModP256k(px.v, newX->v, tmp); + mulModP(s.v, tmp, newY->v); subModP256k(newY->v, py.v, newY->v); } @@ -1583,15 +1579,11 @@ __kernel void multiplyStepKernel( uint256_t inverse = { {0,0,0,0,0,0,0,1} }; int batchIdx = 0; - unsigned int p; uint256_t x; for(; i < totalPoints; i += dim) { - - p = readWord256k(privateKeys, i, 7 - step / 32); - x = xPtr[i]; - - if(( p & (1 << (step % 32))) != 0) { + if(( (readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) { + x = xPtr[i]; if(!isInfinity256k(x.v)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; @@ -1601,17 +1593,13 @@ __kernel void multiplyStepKernel( doBatchInverse256k(inverse.v); + uint256_t newX; + uint256_t newY; i -= dim; for(; i >= 0; i -= dim) { - uint256_t newX; - uint256_t newY; - - unsigned int p; - p = readWord256k(privateKeys, i, 7 - step / 32); - - uint256_t x = xPtr[i]; + x = xPtr[i]; - if((p & (1 << (step % 32))) != 0) { + if(((readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) { if(!isInfinity256k(x.v)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index ae3fbe6e..9209a319 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -61,15 +61,11 @@ __kernel void multiplyStepKernel( uint256_t inverse = { {0,0,0,0,0,0,0,1} }; int batchIdx = 0; - unsigned int p; uint256_t x; for(; i < totalPoints; i += dim) { - - p = readWord256k(privateKeys, i, 7 - step / 32); - x = xPtr[i]; - - if(( p & (1 << (step % 32))) != 0) { + if(( (readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) { + x = xPtr[i]; if(!isInfinity256k(x.v)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; @@ -79,17 +75,13 @@ __kernel void multiplyStepKernel( doBatchInverse256k(inverse.v); + uint256_t newX; + uint256_t newY; i -= dim; for(; i >= 0; i -= dim) { - uint256_t newX; - uint256_t newY; - - unsigned int p; - p = readWord256k(privateKeys, i, 7 - step / 32); - - uint256_t x = xPtr[i]; + x = xPtr[i]; - if((p & (1 << (step % 32))) != 0) { + if(((readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) { if(!isInfinity256k(x.v)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index ded5a8aa..f9562541 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -567,10 +567,11 @@ void completeBatchAdd256k( { int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); int dim = get_global_size(0); - uint256_t x = xPtr[i]; - uint256_t y = yPtr[i]; + uint256_t x = xPtr[i]; + uint256_t y = yPtr[i]; uint256_t s; + unsigned int tmp[8]; if(batchIdx != 0) { uint256_t c; @@ -578,29 +579,25 @@ void completeBatchAdd256k( c = chain[(batchIdx - 1) * dim + gid]; mulModP(inverse->v, c.v, s.v); - uint256_t diff; - subModP256k(px.v, x.v, diff.v); - mulModP(diff.v, inverse->v, inverse->v); + subModP256k(px.v, x.v, tmp); + mulModP(tmp, inverse->v, inverse->v); } else { s = *inverse; } - uint256_t rise; - subModP256k(py.v, y.v, rise.v); + subModP256k(py.v, y.v, tmp); - mulModP(rise.v, s.v, s.v); + mulModP(tmp, s.v, s.v); // Rx = s^2 - Gx - Qx - uint256_t s2; - mulModP(s.v, s.v, s2.v); + mulModP(s.v, s.v, tmp); - subModP256k(s2.v, px.v, newX->v); + subModP256k(tmp, px.v, newX->v); subModP256k(newX->v, x.v, newX->v); // Ry = s(px - rx) - py - uint256_t k; - subModP256k(px.v, newX->v, k.v); - mulModP(s.v, k.v, newY->v); + subModP256k(px.v, newX->v, tmp); + mulModP(s.v, tmp, newY->v); subModP256k(newY->v, py.v, newY->v); } From 8085f13cb5ce73f78958e94b4ce7d49d65c15490 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 2 Jun 2021 17:24:57 +0200 Subject: [PATCH 39/62] recude complexity --- CLKeySearchDevice/bitcrack.cl | 99 +++++++++++++++-------------------- clMath/sha256.cl | 79 ++++++++++++---------------- 2 files changed, 76 insertions(+), 102 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index e8a0b9a9..bfdffb15 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -825,6 +825,7 @@ void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int // x = Gx - x subModP256k(px.v, x.v, t); + // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, // c[2] = diff2 * diff1 * diff0, etc mulModP(inverse->v, t, inverse->v); @@ -1005,7 +1006,6 @@ unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) #ifndef _SHA256_CL #define _SHA256_CL - __constant unsigned int _K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, @@ -1030,7 +1030,6 @@ __constant unsigned int _IV[8] = { #define rotr(x, n) ((x) >> (n)) ^ ((x) << (32 - (n))) - #define MAJ(a, b, c) (((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c))) #define CH(e, f, g) (((e) & (f)) ^ (~(e) & (g))) @@ -1044,13 +1043,21 @@ __constant unsigned int _IV[8] = { (d) += (t) + (h);\ (h) += (t) + MAJ((a), (b), (c)) + (rotr((a), 2) ^ rotr((a), 13) ^ rotr((a), 22)) - void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8]) { unsigned int a, b, c, d, e, f, g, h; unsigned int w[16]; unsigned int t; + a = _IV[0]; + b = _IV[1]; + c = _IV[2]; + d = _IV[3]; + e = _IV[4]; + f = _IV[5]; + g = _IV[6]; + h = _IV[7]; + // 0x04 || x || y w[0] = (x[0] >> 8) | 0x04000000; w[1] = (x[1] >> 8) | (x[0] << 24); @@ -1069,15 +1076,6 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = (y[6] >> 8) | (y[5] << 24); w[15] = (y[7] >> 8) | (y[6] << 24); - a = _IV[0]; - b = _IV[1]; - c = _IV[2]; - d = _IV[3]; - e = _IV[4]; - f = _IV[5]; - g = _IV[6]; - h = _IV[7]; - roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]); roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]); @@ -1207,15 +1205,14 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned h += _IV[7]; // store the intermediate hash value - unsigned int tmp[8]; - tmp[0] = a; - tmp[1] = b; - tmp[2] = c; - tmp[3] = d; - tmp[4] = e; - tmp[5] = f; - tmp[6] = g; - tmp[7] = h; + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = g; + digest[7] = h; w[0] = (y[7] << 24) | 0x00800000; w[15] = 520; // 65 * 8 @@ -1339,19 +1336,27 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); - digest[0] = tmp[0] + a; - digest[1] = tmp[1] + b; - digest[2] = tmp[2] + c; - digest[3] = tmp[3] + d; - digest[4] = tmp[4] + e; - digest[5] = tmp[5] + f; - digest[6] = tmp[6] + g; - digest[7] = tmp[7] + h; + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; } void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8]) { - unsigned int a, b, c, d, e, f, g, h; + unsigned int a = _IV[0]; + unsigned int b = _IV[1]; + unsigned int c = _IV[2]; + unsigned int d = _IV[3]; + unsigned int e = _IV[4]; + unsigned int f = _IV[5]; + unsigned int g = _IV[6]; + unsigned int h = _IV[7]; + unsigned int w[16]; unsigned int t; @@ -1368,15 +1373,6 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[8] = (x[7] << 24) | 0x00800000; w[15] = 264; // 33 * 8 - a = _IV[0]; - b = _IV[1]; - c = _IV[2]; - d = _IV[3]; - e = _IV[4]; - f = _IV[5]; - g = _IV[6]; - h = _IV[7]; - roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]); roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]); @@ -1497,23 +1493,14 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); - a += _IV[0]; - b += _IV[1]; - c += _IV[2]; - d += _IV[3]; - e += _IV[4]; - f += _IV[5]; - g += _IV[6]; - h += _IV[7]; - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; + digest[0] = a + _IV[0]; + digest[1] = b + _IV[1]; + digest[2] = c + _IV[2]; + digest[3] = d + _IV[3]; + digest[4] = e + _IV[4]; + digest[5] = f + _IV[5]; + digest[6] = g + _IV[6]; + digest[7] = h + _IV[7]; } #endif #define COMPRESSED 0 diff --git a/clMath/sha256.cl b/clMath/sha256.cl index 83b8bd65..2ec89c05 100644 --- a/clMath/sha256.cl +++ b/clMath/sha256.cl @@ -1,7 +1,6 @@ #ifndef _SHA256_CL #define _SHA256_CL - __constant unsigned int _K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, @@ -26,7 +25,6 @@ __constant unsigned int _IV[8] = { #define rotr(x, n) ((x) >> (n)) ^ ((x) << (32 - (n))) - #define MAJ(a, b, c) (((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c))) #define CH(e, f, g) (((e) & (f)) ^ (~(e) & (g))) @@ -40,13 +38,21 @@ __constant unsigned int _IV[8] = { (d) += (t) + (h);\ (h) += (t) + MAJ((a), (b), (c)) + (rotr((a), 2) ^ rotr((a), 13) ^ rotr((a), 22)) - void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8]) { unsigned int a, b, c, d, e, f, g, h; unsigned int w[16]; unsigned int t; + a = _IV[0]; + b = _IV[1]; + c = _IV[2]; + d = _IV[3]; + e = _IV[4]; + f = _IV[5]; + g = _IV[6]; + h = _IV[7]; + // 0x04 || x || y w[0] = (x[0] >> 8) | 0x04000000; w[1] = (x[1] >> 8) | (x[0] << 24); @@ -65,15 +71,6 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned w[14] = (y[6] >> 8) | (y[5] << 24); w[15] = (y[7] >> 8) | (y[6] << 24); - a = _IV[0]; - b = _IV[1]; - c = _IV[2]; - d = _IV[3]; - e = _IV[4]; - f = _IV[5]; - g = _IV[6]; - h = _IV[7]; - roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]); roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]); @@ -203,15 +200,14 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned h += _IV[7]; // store the intermediate hash value - unsigned int tmp[8]; - tmp[0] = a; - tmp[1] = b; - tmp[2] = c; - tmp[3] = d; - tmp[4] = e; - tmp[5] = f; - tmp[6] = g; - tmp[7] = h; + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = g; + digest[7] = h; w[0] = (y[7] << 24) | 0x00800000; w[15] = 520; // 65 * 8 @@ -335,14 +331,14 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); - digest[0] = tmp[0] + a; - digest[1] = tmp[1] + b; - digest[2] = tmp[2] + c; - digest[3] = tmp[3] + d; - digest[4] = tmp[4] + e; - digest[5] = tmp[5] + f; - digest[6] = tmp[6] + g; - digest[7] = tmp[7] + h; + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; } void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8]) @@ -493,22 +489,13 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]); roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]); - a += _IV[0]; - b += _IV[1]; - c += _IV[2]; - d += _IV[3]; - e += _IV[4]; - f += _IV[5]; - g += _IV[6]; - h += _IV[7]; - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; + digest[0] = a + _IV[0]; + digest[1] = b + _IV[1]; + digest[2] = c + _IV[2]; + digest[3] = d + _IV[3]; + digest[4] = e + _IV[4]; + digest[5] = f + _IV[5]; + digest[6] = g + _IV[6]; + digest[7] = h + _IV[7]; } #endif From 07848c782c3ac1e7432c5deabaad78390baba610 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Wed, 2 Jun 2021 21:13:20 +0200 Subject: [PATCH 40/62] add Notify to have sound when we find a key --- CLKeySearchDevice/bitcrack.cl | 19 ++++++++++--------- KeyFinder/main.cpp | 4 ++-- Logger/Logger.cpp | 7 ++++++- Logger/Logger.h | 3 ++- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index bfdffb15..f22a7d76 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1348,15 +1348,7 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8]) { - unsigned int a = _IV[0]; - unsigned int b = _IV[1]; - unsigned int c = _IV[2]; - unsigned int d = _IV[3]; - unsigned int e = _IV[4]; - unsigned int f = _IV[5]; - unsigned int g = _IV[6]; - unsigned int h = _IV[7]; - + unsigned int a, b, c, d, e, f, g, h; unsigned int w[16]; unsigned int t; @@ -1373,6 +1365,15 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un w[8] = (x[7] << 24) | 0x00800000; w[15] = 264; // 33 * 8 + a = _IV[0]; + b = _IV[1]; + c = _IV[2]; + d = _IV[3]; + e = _IV[4]; + f = _IV[5]; + g = _IV[6]; + h = _IV[7]; + roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]); roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]); roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]); diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index bad1a215..a13ab79d 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -72,12 +72,12 @@ void resultCallback(KeySearchResult info) return; } - std::string logStr = "\nAddress: " + info.address + "\n"; + std::string logStr = "\n\nAddress: " + info.address + "\n"; logStr += "Private key: " + info.privateKey.toString() + "\n"; logStr += "Compressed: "; logStr += (info.compressed) ? "yes\n" : "no\n"; logStr += "Public key: "; logStr += (info.compressed) ? info.publicKey.toString(true) + "\n" : info.publicKey.x.toString() + "\n " + info.publicKey.y.toString() + "\n"; - Logger::log(LogLevel::Info, logStr); + Logger::log(LogLevel::Notify, logStr); } /** diff --git a/Logger/Logger.cpp b/Logger/Logger.cpp index c0d57ab6..b38a3cfe 100644 --- a/Logger/Logger.cpp +++ b/Logger/Logger.cpp @@ -27,6 +27,7 @@ bool LogLevel::isValid(int level) case Error: case Debug: case Warning: + case Notify: return true; default: return false; @@ -44,6 +45,8 @@ std::string LogLevel::toString(int level) return "Debug"; case Warning: return "Warning"; + case Notify: + return "Notify"; default: return ""; } @@ -92,6 +95,8 @@ std::string Logger::formatLog(int logLevel, std::string msg) void Logger::log(int logLevel, std::string msg) { std::string str = formatLog(logLevel, msg); - + if (logLevel == LogLevel::Level::Notify) { + fprintf(stdout, "\a"); + } fprintf(stderr, "%s\n", str.c_str()); } diff --git a/Logger/Logger.h b/Logger/Logger.h index 2bedc89a..849de52e 100644 --- a/Logger/Logger.h +++ b/Logger/Logger.h @@ -8,7 +8,8 @@ namespace LogLevel { Info = 1, Error = 2, Debug = 4, - Warning = 8 + Warning = 8, + Notify = 16, }; bool isValid(int level); From 42c3e06d5425ca99acee1dbd046c30004f876981 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Thu, 3 Jun 2021 01:53:34 +0200 Subject: [PATCH 41/62] minor optimizations --- KeyFinder/DeviceManager.cpp | 7 ++----- KeyFinder/main.cpp | 8 ++++---- Logger/Logger.cpp | 10 +++------- Logger/Logger.h | 4 ++-- clUtil/clError.cpp | 2 +- 5 files changed, 12 insertions(+), 19 deletions(-) diff --git a/KeyFinder/DeviceManager.cpp b/KeyFinder/DeviceManager.cpp index 31776aea..ff5cf21b 100644 --- a/KeyFinder/DeviceManager.cpp +++ b/KeyFinder/DeviceManager.cpp @@ -3,24 +3,21 @@ std::vector DeviceManager::getDevices() { - int deviceId = 0; std::vector devices; - // Get OpenCL devices try { std::vector clDevices = cl::getDevices(); - for(int i = 0; i < clDevices.size(); i++) { + for(size_t i = 0; i < clDevices.size(); i++) { DeviceManager::DeviceInfo device; device.name = clDevices[i].name; device.type = DeviceType::OpenCL; - device.id = deviceId; + device.id = i; device.physicalId = (uint64_t)clDevices[i].id; device.memory = clDevices[i].mem; device.computeUnits = clDevices[i].cores; devices.push_back(device); - deviceId++; } } catch(cl::CLException ex) { throw DeviceManager::DeviceManagerException(ex.msg); diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index a13ab79d..8cc790dc 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -199,10 +199,10 @@ void usage() Finds default parameters depending on the device */ typedef struct { - int threads; - int blocks; - int pointsPerThread; - int compressionMode; + unsigned int threads; + unsigned int blocks; + unsigned int pointsPerThread; + unsigned int compressionMode; }DeviceParameters; DeviceParameters getDefaultParameters(const DeviceManager::DeviceInfo &device) diff --git a/Logger/Logger.cpp b/Logger/Logger.cpp index b38a3cfe..dd378b57 100644 --- a/Logger/Logger.cpp +++ b/Logger/Logger.cpp @@ -6,7 +6,7 @@ inline tm localtime_xp(time_t timer) { - tm bt{}; + tm bt; #if defined(__unix__) localtime_r(&timer, &bt); #elif defined(_MSC_VER) @@ -19,7 +19,6 @@ inline tm localtime_xp(time_t timer) return bt; } - bool LogLevel::isValid(int level) { switch(level) { @@ -70,9 +69,7 @@ std::string Logger::formatLog(int logLevel, std::string msg) std::string prefix = "[" + dateTime + "] [" + LogLevel::toString(logLevel) + "] "; - size_t prefixLen = prefix.length(); - - std::string padding(prefixLen, ' '); + std::string padding(prefix.length(), ' '); if(msg.find('\n', 0) != std::string::npos) { size_t pos = 0; @@ -91,12 +88,11 @@ std::string Logger::formatLog(int logLevel, std::string msg) return prefix; } - void Logger::log(int logLevel, std::string msg) { std::string str = formatLog(logLevel, msg); if (logLevel == LogLevel::Level::Notify) { - fprintf(stdout, "\a"); + fprintf(stderr, "\a"); } fprintf(stderr, "%s\n", str.c_str()); } diff --git a/Logger/Logger.h b/Logger/Logger.h index 849de52e..0675ba83 100644 --- a/Logger/Logger.h +++ b/Logger/Logger.h @@ -9,7 +9,7 @@ namespace LogLevel { Error = 2, Debug = 4, Warning = 8, - Notify = 16, + Notify = 16 }; bool isValid(int level); @@ -36,4 +36,4 @@ class Logger { }; -#endif \ No newline at end of file +#endif diff --git a/clUtil/clError.cpp b/clUtil/clError.cpp index 4f7d5306..c5e7f575 100644 --- a/clUtil/clError.cpp +++ b/clUtil/clError.cpp @@ -139,4 +139,4 @@ std::string cl::getOpenCLErrorDescription(cl_int err) { default: return "No description available"; } -} \ No newline at end of file +} From 5a47e33c84286f96abcb3ed7be29d21cfaa96e9d Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Thu, 3 Jun 2021 02:22:47 +0200 Subject: [PATCH 42/62] minor optimizations --- CLKeySearchDevice/CLKeySearchDevice.h | 5 ++--- CLKeySearchDevice/bitcrack.cl | 4 ++-- KeyFinder/ConfigFile.cpp | 2 +- KeyFinder/ConfigFile.h | 6 +++--- KeyFinder/DeviceManager.cpp | 2 +- KeyFinder/DeviceManager.h | 8 +++----- KeyFinder/main.cpp | 2 +- KeyFinderLib/KeyFinder.h | 4 ++-- KeyFinderLib/KeyFinderShared.h | 16 +++------------- KeyFinderLib/KeySearchDevice.h | 6 +++--- KeyFinderLib/KeySearchTypes.h | 8 ++++---- Logger/Logger.cpp | 8 ++++---- Logger/Logger.h | 4 ++-- clMath/sha256.cl | 4 ++-- clUtil/clContext.cpp | 4 ++-- clUtil/clContext.h | 5 +---- util/util.h | 2 ++ 17 files changed, 38 insertions(+), 52 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.h b/CLKeySearchDevice/CLKeySearchDevice.h index 0905f40e..14b3723b 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.h +++ b/CLKeySearchDevice/CLKeySearchDevice.h @@ -1,5 +1,5 @@ -#ifndef _CL_KEYSEARCH_DEVICE_H -#define _CL_KEYSEARCH_DEVICE_H +#ifndef CL_KEYSEARCH_DEVICE_H +#define CL_KEYSEARCH_DEVICE_H #include "KeySearchDevice.h" #include "clContext.h" @@ -138,4 +138,3 @@ class CLKeySearchDevice : public KeySearchDevice { }; #endif - diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index f22a7d76..1a2ecdc0 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1003,8 +1003,8 @@ unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) } #endif -#ifndef _SHA256_CL -#define _SHA256_CL +#ifndef SHA256_CL +#define SHA256_CL __constant unsigned int _K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, diff --git a/KeyFinder/ConfigFile.cpp b/KeyFinder/ConfigFile.cpp index 83098ae2..4cc7fc29 100644 --- a/KeyFinder/ConfigFile.cpp +++ b/KeyFinder/ConfigFile.cpp @@ -43,4 +43,4 @@ std::map ConfigFileReader::read() } return entries; -} \ No newline at end of file +} diff --git a/KeyFinder/ConfigFile.h b/KeyFinder/ConfigFile.h index ed481167..312d21bc 100644 --- a/KeyFinder/ConfigFile.h +++ b/KeyFinder/ConfigFile.h @@ -1,5 +1,5 @@ -#ifndef _CONFIG_FILE_H -#define _CONFIG_FILE_H +#ifndef CONFIG_FILE_H +#define CONFIG_FILE_H #include #include @@ -34,4 +34,4 @@ class ConfigFileReader { std::map read(); }; -#endif \ No newline at end of file +#endif diff --git a/KeyFinder/DeviceManager.cpp b/KeyFinder/DeviceManager.cpp index ff5cf21b..79ed9119 100644 --- a/KeyFinder/DeviceManager.cpp +++ b/KeyFinder/DeviceManager.cpp @@ -24,4 +24,4 @@ std::vector DeviceManager::getDevices() } return devices; -} \ No newline at end of file +} diff --git a/KeyFinder/DeviceManager.h b/KeyFinder/DeviceManager.h index f906a070..122be59f 100644 --- a/KeyFinder/DeviceManager.h +++ b/KeyFinder/DeviceManager.h @@ -1,5 +1,5 @@ -#ifndef _DEVICE_MANAGER_H -#define _DEVICE_MANAGER_H +#ifndef DEVICE_MANAGER_H +#define DEVICE_MANAGER_H #include #include @@ -25,7 +25,6 @@ class DeviceType { }; }; - typedef struct { int type; int id; @@ -42,5 +41,4 @@ std::vector getDevices(); } - -#endif \ No newline at end of file +#endif diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index 8cc790dc..42ff338b 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -642,4 +642,4 @@ int main(int argc, char **argv) } return run(); -} \ No newline at end of file +} diff --git a/KeyFinderLib/KeyFinder.h b/KeyFinderLib/KeyFinder.h index c8150f7d..1bc67cde 100644 --- a/KeyFinderLib/KeyFinder.h +++ b/KeyFinderLib/KeyFinder.h @@ -1,5 +1,5 @@ -#ifndef _KEY_FINDER_H -#define _KEY_FINDER_H +#ifndef KEY_FINDER_H +#define KEY_FINDER_H #include #include diff --git a/KeyFinderLib/KeyFinderShared.h b/KeyFinderLib/KeyFinderShared.h index a6973f1a..049d426f 100644 --- a/KeyFinderLib/KeyFinderShared.h +++ b/KeyFinderLib/KeyFinderShared.h @@ -1,5 +1,5 @@ -#ifndef _KEY_FINDER_SHARED_H -#define _KEY_FINDER_SHARED_H +#ifndef KEY_FINDER_SHARED_H +#define KEY_FINDER_SHARED_H namespace PointCompressionType { enum Value { @@ -20,14 +20,4 @@ struct KeyFinderDeviceResult { unsigned int digest[5]; }; -//typedef struct hash160 { -// -// unsigned int h[5]; -// -// hash160(const unsigned int hash[5]) -// { -// memcpy(h, hash, sizeof(unsigned int) * 5); -// } -//}hash160; - -#endif \ No newline at end of file +#endif diff --git a/KeyFinderLib/KeySearchDevice.h b/KeyFinderLib/KeySearchDevice.h index 82afb967..26621d24 100644 --- a/KeyFinderLib/KeySearchDevice.h +++ b/KeyFinderLib/KeySearchDevice.h @@ -1,5 +1,5 @@ -#ifndef _KEY_SEARCH_DEVICE_H -#define _KEY_SEARCH_DEVICE_H +#ifndef KEY_SEARCH_DEVICE_H +#define KEY_SEARCH_DEVICE_H #include #include @@ -66,4 +66,4 @@ class KeySearchDevice { virtual secp256k1::uint256 getNextKey() = 0; }; -#endif \ No newline at end of file +#endif diff --git a/KeyFinderLib/KeySearchTypes.h b/KeyFinderLib/KeySearchTypes.h index 9ff12d3f..61dea7b1 100644 --- a/KeyFinderLib/KeySearchTypes.h +++ b/KeyFinderLib/KeySearchTypes.h @@ -1,8 +1,8 @@ -#ifndef _KEY_FINDER_TYPES -#define _KEY_FINDER_TYPES +#ifndef KEY_FINDER_TYPES +#define KEY_FINDER_TYPES -#include -#include +#include +#include #include "secp256k1.h" namespace PointCompressionType { diff --git a/Logger/Logger.cpp b/Logger/Logger.cpp index dd378b57..59af7242 100644 --- a/Logger/Logger.cpp +++ b/Logger/Logger.cpp @@ -63,7 +63,7 @@ std::string Logger::getDateTimeString() return std::string(buf); } -std::string Logger::formatLog(int logLevel, std::string msg) +std::string Logger::formatLog(LogLevel::Level logLevel, std::string msg) { std::string dateTime = getDateTimeString(); @@ -88,10 +88,10 @@ std::string Logger::formatLog(int logLevel, std::string msg) return prefix; } -void Logger::log(int logLevel, std::string msg) +void Logger::log(LogLevel::Level level, std::string msg) { - std::string str = formatLog(logLevel, msg); - if (logLevel == LogLevel::Level::Notify) { + std::string str = formatLog(level, msg); + if (level == LogLevel::Level::Notify) { fprintf(stderr, "\a"); } fprintf(stderr, "%s\n", str.c_str()); diff --git a/Logger/Logger.h b/Logger/Logger.h index 0675ba83..34221190 100644 --- a/Logger/Logger.h +++ b/Logger/Logger.h @@ -22,7 +22,7 @@ class Logger { private: static std::string _logFile; - static std::string formatLog(int logLevel, std::string msg); + static std::string formatLog(LogLevel::Level logLevel, std::string msg); static std::string getDateTimeString(); @@ -32,7 +32,7 @@ class Logger { { } - static void log(int logLevel, std::string msg); + static void log(LogLevel::Level level, std::string msg); }; diff --git a/clMath/sha256.cl b/clMath/sha256.cl index 2ec89c05..0e4833f0 100644 --- a/clMath/sha256.cl +++ b/clMath/sha256.cl @@ -1,5 +1,5 @@ -#ifndef _SHA256_CL -#define _SHA256_CL +#ifndef SHA256_CL +#define SHA256_CL __constant unsigned int _K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, diff --git a/clUtil/clContext.cpp b/clUtil/clContext.cpp index 9ebe54e5..b31b4823 100644 --- a/clUtil/clContext.cpp +++ b/clUtil/clContext.cpp @@ -15,7 +15,7 @@ cl::CLContext::CLContext(cl_device_id device) _ctx = clCreateContext(0, 1, &_device, NULL, NULL, &err); clCall(err); - _queue = clCreateCommandQueueWithProperties(_ctx, _device, 0, &err); + _queue = clCreateCommandQueueWithProperties(_ctx, _device, NULL, &err); clCall(err); } @@ -249,4 +249,4 @@ size_t cl::CLKernel::getWorkGroupSize() cl::CLKernel::~CLKernel() { clReleaseKernel(_kernel); -} \ No newline at end of file +} diff --git a/clUtil/clContext.h b/clUtil/clContext.h index 5e9e28d5..4b280279 100644 --- a/clUtil/clContext.h +++ b/clUtil/clContext.h @@ -57,9 +57,6 @@ class CLProgram { cl_program getProgram(); CLContext& getContext(); - - std::string getBuildLog(); - }; @@ -306,4 +303,4 @@ class CLKernel { } -#endif \ No newline at end of file +#endif diff --git a/util/util.h b/util/util.h index 239e7af3..5a0b4e39 100644 --- a/util/util.h +++ b/util/util.h @@ -27,6 +27,8 @@ std::string formatSeconds(unsigned int seconds); uint32_t parseUInt32(std::string s); uint64_t parseUInt64(std::string s); bool isHex(const std::string &s); + +long getFileSize(const std::string& fileName); bool appendToFile(const std::string &fileName, const std::string &s); bool readLinesFromStream(std::istream &in, std::vector &lines); bool readLinesFromStream(const std::string &fileName, std::vector &lines); From c58fa3da28dc5d16e0fbc49f744b7ca6df3c950f Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Thu, 3 Jun 2021 15:56:03 +0200 Subject: [PATCH 43/62] minor improvements --- CLKeySearchDevice/CLKeySearchDevice.cpp | 2 -- CLKeySearchDevice/keysearch.cl | 22 ++++++---------------- clUtil/clContext.cpp | 3 --- 3 files changed, 6 insertions(+), 21 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index 93a4ad4f..cd4e2e47 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -231,7 +231,6 @@ void CLKeySearchDevice::doStep() _stepKernelWithDouble->set_args( _points, - _compression, _chain, _x, _y, @@ -247,7 +246,6 @@ void CLKeySearchDevice::doStep() _stepKernel->set_args( _points, - _compression, _chain, _x, _y, diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 9209a319..1629baae 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -29,7 +29,7 @@ bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ul ); } -void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) +inline void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) { hOut[0] = endian(hIn[0] + 0xefcdab89); hOut[1] = endian(hIn[1] + 0x98badcfe); @@ -97,7 +97,7 @@ __kernel void multiplyStepKernel( } -void hashPublicKey(uint256_t x, uint256_t y, unsigned int* digestOut) +void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) { unsigned int hash[8]; @@ -113,10 +113,10 @@ void hashPublicKey(uint256_t x, uint256_t y, unsigned int* digestOut) hash[6] = endian(hash[6]); hash[7] = endian(hash[7]); - ripemd160sha256NoFinal(hash, digestOut); + ripemd160sha256NoFinal(hash, digest); } -void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int* digestOut) +void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int digest[5]) { unsigned int hash[8]; @@ -132,15 +132,7 @@ void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int* di hash[6] = endian(hash[6]); hash[7] = endian(hash[7]); - ripemd160sha256NoFinal(hash, digestOut); - -} - -void atomicListAdd(__global CLDeviceResult *results, __global unsigned int *numResults, CLDeviceResult *r) -{ - unsigned int count = atomic_add(numResults, 1); - - results[count] = *r; + ripemd160sha256NoFinal(hash, digest); } void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults) @@ -176,12 +168,11 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned doRMD160FinalRound(digest, r.digest); - atomicListAdd(results, numResults, &r); + results[atomic_add(numResults, 1)] = r; } __kernel void keyFinderKernel( unsigned int totalPoints, - int compression, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, @@ -263,7 +254,6 @@ __kernel void keyFinderKernel( __kernel void keyFinderKernelWithDouble( unsigned int totalPoints, - int compression, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, diff --git a/clUtil/clContext.cpp b/clUtil/clContext.cpp index b31b4823..a50eedb6 100644 --- a/clUtil/clContext.cpp +++ b/clUtil/clContext.cpp @@ -97,9 +97,6 @@ cl::CLProgram::CLProgram(cl::CLContext &ctx, std::string srcFile, std::string op options += " -DDEVICE_VENDOR_INTEL"; } - // disable optimization as codeXL shows it will result in higher throughput - options += " -O0"; - _prog = clCreateProgramWithSource(ctx.getContext(), 1, &ptr, &len, &err); clCall(err); From 3070ee397f80782a9e98eda8bd4a6ab3b90c2573 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Thu, 3 Jun 2021 17:05:09 +0200 Subject: [PATCH 44/62] more info --- KeyFinder/main.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index 42ff338b..c56a0a06 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -1,6 +1,8 @@ #include #include #include +#include + #include "KeyFinder.h" #include "AddressUtil.h" @@ -339,6 +341,8 @@ void readCheckpointFile() int run() { + Logger::log(LogLevel::Info, "BitCrackOpenCL"); + if(_config.device < 0 || _config.device >= _devices.size()) { Logger::log(LogLevel::Error, "device " + util::format(_config.device) + " does not exist"); return 1; @@ -369,6 +373,10 @@ int run() _config.pointsPerThread = params.pointsPerThread; } + Logger::log(LogLevel::Info, "Threads: " + std::to_string(_config.threads)); + Logger::log(LogLevel::Info, "Blocks: " + std::to_string(_config.blocks)); + Logger::log(LogLevel::Info, "Points per Thread: " + std::to_string(_config.pointsPerThread)); + // Get device context KeySearchDevice *keySearchDevice = getDeviceContext(_devices[_config.device], _config.blocks, _config.threads, _config.pointsPerThread, _config.compressionMode); From daff067e1bab9b221f7adbb1aafa50e9e6207cca Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Fri, 4 Jun 2021 02:38:30 +0200 Subject: [PATCH 45/62] separate some structures --- CLKeySearchDevice/CLKeySearchDevice.vcxproj | 12 +- CLKeySearchDevice/Makefile | 2 +- CLKeySearchDevice/bitcoin.cl | 41 ++++++ CLKeySearchDevice/bitcrack.cl | 133 ++++++++++---------- CLKeySearchDevice/bloomfilter.cl | 14 +++ CLKeySearchDevice/keysearch.cl | 64 ---------- KeyFinder/main.cpp | 4 +- clMath/ripemd160.cl | 13 ++ clMath/secp256k1.cl | 1 - 9 files changed, 140 insertions(+), 144 deletions(-) create mode 100644 CLKeySearchDevice/bitcoin.cl create mode 100644 CLKeySearchDevice/bloomfilter.cl diff --git a/CLKeySearchDevice/CLKeySearchDevice.vcxproj b/CLKeySearchDevice/CLKeySearchDevice.vcxproj index 43b1a785..6ea2683d 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.vcxproj +++ b/CLKeySearchDevice/CLKeySearchDevice.vcxproj @@ -124,7 +124,7 @@ - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -139,7 +139,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -154,7 +154,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -173,7 +173,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -199,7 +199,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp @@ -220,7 +220,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cltrue - type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl + type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl Embed bitcrack.cl into bitcrack_cl.cpp diff --git a/CLKeySearchDevice/Makefile b/CLKeySearchDevice/Makefile index 2da1746c..41b85dc0 100644 --- a/CLKeySearchDevice/Makefile +++ b/CLKeySearchDevice/Makefile @@ -2,7 +2,7 @@ NAME=CLKeySearchDevice CPPSRC:=$(wildcard *.cpp) all: - cat ../clMath/sha256.cl ../clMath/secp256k1.cl ../clMath/ripemd160.cl keysearch.cl > bitcrack.cl + cat ../clMath/sha256.cl ../clMath/secp256k1.cl ../clMath/ripemd160.cl bloomfilter.cl bitcoin.cl keysearch.cl > bitcrack.cl ${BINDIR}/embedcl bitcrack.cl bitcrack_cl.cpp _bitcrack_cl for file in ${CPPSRC} bitcrack_cl.cpp; do\ diff --git a/CLKeySearchDevice/bitcoin.cl b/CLKeySearchDevice/bitcoin.cl new file mode 100644 index 00000000..377dc30f --- /dev/null +++ b/CLKeySearchDevice/bitcoin.cl @@ -0,0 +1,41 @@ +#ifndef endian +#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) +#endif + +void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int digest[5]) +{ + unsigned int hash[8]; + + sha256PublicKeyCompressed(x.v, yParity, hash); + + // Swap to little-endian + hash[0] = endian(hash[0]); + hash[1] = endian(hash[1]); + hash[2] = endian(hash[2]); + hash[3] = endian(hash[3]); + hash[4] = endian(hash[4]); + hash[5] = endian(hash[5]); + hash[6] = endian(hash[6]); + hash[7] = endian(hash[7]); + + ripemd160sha256NoFinal(hash, digest); +} + +void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) +{ + unsigned int hash[8]; + + sha256PublicKey(x.v, y.v, hash); + + // Swap to little-endian + hash[0] = endian(hash[0]); + hash[1] = endian(hash[1]); + hash[2] = endian(hash[2]); + hash[3] = endian(hash[3]); + hash[4] = endian(hash[4]); + hash[5] = endian(hash[5]); + hash[6] = endian(hash[6]); + hash[7] = endian(hash[7]); + + ripemd160sha256NoFinal(hash, digest); +} diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 1a2ecdc0..6f875297 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1,6 +1,10 @@ #ifndef RIPEMD160_CL #define RIPEMD160_CL +#ifndef endian +#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) +#endif + __constant unsigned int RIPEMD160_IV[5] = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0, }; @@ -297,6 +301,15 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) } +void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) +{ + hOut[0] = endian(hIn[0] + 0xefcdab89); + hOut[1] = endian(hIn[1] + 0x98badcfe); + hOut[2] = endian(hIn[2] + 0x10325476); + hOut[3] = endian(hIn[3] + 0xc3d2e1f0); + hOut[4] = endian(hIn[4] + 0x67452301); +} + #endif #ifndef SECP256K1_CL #define SECP256K1_CL @@ -651,7 +664,6 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) unsigned int product7 = 0; unsigned int tmp; - // 256 x 256 multiply multiply256(a, b, high, low); product_low[7] = low[7]; @@ -1504,22 +1516,47 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un digest[7] = h + _IV[7]; } #endif -#define COMPRESSED 0 -#define UNCOMPRESSED 1 -#define BOTH 2 - #ifndef endian #define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) #endif -typedef struct { - int idx; - bool compressed; - unsigned int x[8]; - unsigned int y[8]; - unsigned int digest[5]; -}CLDeviceResult; +void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int digest[5]) +{ + unsigned int hash[8]; + sha256PublicKeyCompressed(x.v, yParity, hash); + + // Swap to little-endian + hash[0] = endian(hash[0]); + hash[1] = endian(hash[1]); + hash[2] = endian(hash[2]); + hash[3] = endian(hash[3]); + hash[4] = endian(hash[4]); + hash[5] = endian(hash[5]); + hash[6] = endian(hash[6]); + hash[7] = endian(hash[7]); + + ripemd160sha256NoFinal(hash, digest); +} + +void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) +{ + unsigned int hash[8]; + + sha256PublicKey(x.v, y.v, hash); + + // Swap to little-endian + hash[0] = endian(hash[0]); + hash[1] = endian(hash[1]); + hash[2] = endian(hash[2]); + hash[3] = endian(hash[3]); + hash[4] = endian(hash[4]); + hash[5] = endian(hash[5]); + hash[6] = endian(hash[6]); + hash[7] = endian(hash[7]); + + ripemd160sha256NoFinal(hash, digest); +} bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong *mask) { unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; @@ -1534,16 +1571,21 @@ bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ul ) ); } +#define COMPRESSED 0 +#define UNCOMPRESSED 1 +#define BOTH 2 -void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) -{ - hOut[0] = endian(hIn[0] + 0xefcdab89); - hOut[1] = endian(hIn[1] + 0x98badcfe); - hOut[2] = endian(hIn[2] + 0x10325476); - hOut[3] = endian(hIn[3] + 0xc3d2e1f0); - hOut[4] = endian(hIn[4] + 0x67452301); -} +#ifndef endian +#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) +#endif +typedef struct { + int idx; + bool compressed; + unsigned int x[8]; + unsigned int y[8]; + unsigned int digest[5]; +}CLDeviceResult; __kernel void multiplyStepKernel( int totalPoints, @@ -1602,53 +1644,6 @@ __kernel void multiplyStepKernel( } } - -void hashPublicKey(uint256_t x, uint256_t y, unsigned int* digestOut) -{ - unsigned int hash[8]; - - sha256PublicKey(x.v, y.v, hash); - - // Swap to little-endian - hash[0] = endian(hash[0]); - hash[1] = endian(hash[1]); - hash[2] = endian(hash[2]); - hash[3] = endian(hash[3]); - hash[4] = endian(hash[4]); - hash[5] = endian(hash[5]); - hash[6] = endian(hash[6]); - hash[7] = endian(hash[7]); - - ripemd160sha256NoFinal(hash, digestOut); -} - -void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int* digestOut) -{ - unsigned int hash[8]; - - sha256PublicKeyCompressed(x.v, yParity, hash); - - // Swap to little-endian - hash[0] = endian(hash[0]); - hash[1] = endian(hash[1]); - hash[2] = endian(hash[2]); - hash[3] = endian(hash[3]); - hash[4] = endian(hash[4]); - hash[5] = endian(hash[5]); - hash[6] = endian(hash[6]); - hash[7] = endian(hash[7]); - - ripemd160sha256NoFinal(hash, digestOut); - -} - -void atomicListAdd(__global CLDeviceResult *results, __global unsigned int *numResults, CLDeviceResult *r) -{ - unsigned int count = atomic_add(numResults, 1); - - results[count] = *r; -} - void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults) { CLDeviceResult r; @@ -1682,12 +1677,11 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned doRMD160FinalRound(digest, r.digest); - atomicListAdd(results, numResults, &r); + results[atomic_add(numResults, 1)] = r; } __kernel void keyFinderKernel( unsigned int totalPoints, - int compression, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, @@ -1769,7 +1763,6 @@ __kernel void keyFinderKernel( __kernel void keyFinderKernelWithDouble( unsigned int totalPoints, - int compression, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, diff --git a/CLKeySearchDevice/bloomfilter.cl b/CLKeySearchDevice/bloomfilter.cl new file mode 100644 index 00000000..278199ea --- /dev/null +++ b/CLKeySearchDevice/bloomfilter.cl @@ -0,0 +1,14 @@ +bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong *mask) +{ + unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; + + return (false == + ( + (targetList[(((hash[0] << 6) | (h5 & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[0] << 6) | (h5 & 0x3f)) & *mask) % 32))) == 0 || + (targetList[(((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) % 32))) == 0 || + (targetList[(((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) % 32))) == 0 || + (targetList[(((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) % 32))) == 0 || + (targetList[ (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) / 32] & (0x01 << ( (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) % 32))) == 0 + ) + ); +} diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 1629baae..69ccc516 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -14,31 +14,6 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; -bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong *mask) -{ - unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; - - return (false == - ( - (targetList[(((hash[0] << 6) | (h5 & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[0] << 6) | (h5 & 0x3f)) & *mask) % 32))) == 0 || - (targetList[(((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) % 32))) == 0 || - (targetList[(((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) % 32))) == 0 || - (targetList[(((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) % 32))) == 0 || - (targetList[ (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) / 32] & (0x01 << ( (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) % 32))) == 0 - ) - ); -} - -inline void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) -{ - hOut[0] = endian(hIn[0] + 0xefcdab89); - hOut[1] = endian(hIn[1] + 0x98badcfe); - hOut[2] = endian(hIn[2] + 0x10325476); - hOut[3] = endian(hIn[3] + 0xc3d2e1f0); - hOut[4] = endian(hIn[4] + 0x67452301); -} - - __kernel void multiplyStepKernel( int totalPoints, int step, @@ -96,45 +71,6 @@ __kernel void multiplyStepKernel( } } - -void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) -{ - unsigned int hash[8]; - - sha256PublicKey(x.v, y.v, hash); - - // Swap to little-endian - hash[0] = endian(hash[0]); - hash[1] = endian(hash[1]); - hash[2] = endian(hash[2]); - hash[3] = endian(hash[3]); - hash[4] = endian(hash[4]); - hash[5] = endian(hash[5]); - hash[6] = endian(hash[6]); - hash[7] = endian(hash[7]); - - ripemd160sha256NoFinal(hash, digest); -} - -void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int digest[5]) -{ - unsigned int hash[8]; - - sha256PublicKeyCompressed(x.v, yParity, hash); - - // Swap to little-endian - hash[0] = endian(hash[0]); - hash[1] = endian(hash[1]); - hash[2] = endian(hash[2]); - hash[3] = endian(hash[3]); - hash[4] = endian(hash[4]); - hash[5] = endian(hash[5]); - hash[6] = endian(hash[6]); - hash[7] = endian(hash[7]); - - ripemd160sha256NoFinal(hash, digest); -} - void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults) { CLDeviceResult r; diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index c56a0a06..70638f2a 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -341,7 +341,7 @@ void readCheckpointFile() int run() { - Logger::log(LogLevel::Info, "BitCrackOpenCL"); + Logger::log(LogLevel::Info, "BitCrackOpenCL\n"); if(_config.device < 0 || _config.device >= _devices.size()) { Logger::log(LogLevel::Error, "device " + util::format(_config.device) + " does not exist"); @@ -351,7 +351,7 @@ int run() Logger::log(LogLevel::Info, "Compression: " + getCompressionString(_config.compressionMode)); Logger::log(LogLevel::Info, "Starting at: " + _config.nextKey.toString()); Logger::log(LogLevel::Info, "Ending at: " + _config.endKey.toString()); - Logger::log(LogLevel::Info, "Counting by: " + _config.stride.toString()); + Logger::log(LogLevel::Info, "Counting by: " + _config.stride.toString() + "\n"); try { diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index 828fd3fb..f3700216 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -1,6 +1,10 @@ #ifndef RIPEMD160_CL #define RIPEMD160_CL +#ifndef endian +#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) +#endif + __constant unsigned int RIPEMD160_IV[5] = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0, }; @@ -297,4 +301,13 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) } +void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) +{ + hOut[0] = endian(hIn[0] + 0xefcdab89); + hOut[1] = endian(hIn[1] + 0x98badcfe); + hOut[2] = endian(hIn[2] + 0x10325476); + hOut[3] = endian(hIn[3] + 0xc3d2e1f0); + hOut[4] = endian(hIn[4] + 0x67452301); +} + #endif diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index f9562541..417b308c 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -351,7 +351,6 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) unsigned int product7 = 0; unsigned int tmp; - // 256 x 256 multiply multiply256(a, b, high, low); product_low[7] = low[7]; From 3828b813699ac53a8e996b4a4431ff33fc632e9d Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Fri, 4 Jun 2021 14:17:57 +0200 Subject: [PATCH 46/62] minor changes --- CLKeySearchDevice/CLKeySearchDevice.h | 5 ---- CLKeySearchDevice/bitcoin.cl | 9 +++++-- CLKeySearchDevice/bitcrack.cl | 35 +++++++++++++++------------ CLKeySearchDevice/bloomfilter.cl | 5 ++++ CLKeySearchDevice/keysearch.cl | 5 ---- clMath/ripemd160.cl | 10 ++++---- clMath/secp256k1.cl | 6 ++--- 7 files changed, 40 insertions(+), 35 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.h b/CLKeySearchDevice/CLKeySearchDevice.h index 14b3723b..5fa98448 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.h +++ b/CLKeySearchDevice/CLKeySearchDevice.h @@ -81,9 +81,6 @@ class CLKeySearchDevice : public KeySearchDevice { void initializeBasePoints(); - int getIndex(int block, int thread, int idx); - - void splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i); void splatBigInt(unsigned int *dest, int idx, secp256k1::uint256 &k); secp256k1::uint256 readBigInt(unsigned int *src, int idx); @@ -101,8 +98,6 @@ class CLKeySearchDevice : public KeySearchDevice { void removeTargetFromList(const unsigned int hash[5]); - uint32_t getPrivateKeyOffset(int thread, int block, int idx); - void initializeBloomFilter(const std::vector &targets, uint64_t mask); uint64_t getOptimalBloomFilterMask(double p, size_t n); diff --git a/CLKeySearchDevice/bitcoin.cl b/CLKeySearchDevice/bitcoin.cl index 377dc30f..8863dff0 100644 --- a/CLKeySearchDevice/bitcoin.cl +++ b/CLKeySearchDevice/bitcoin.cl @@ -1,10 +1,13 @@ +#ifndef BITCOIN_CL +#define BITCOIN_CL + #ifndef endian #define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) #endif void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int digest[5]) { - unsigned int hash[8]; + __private unsigned int hash[8]; sha256PublicKeyCompressed(x.v, yParity, hash); @@ -23,7 +26,7 @@ void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int dig void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) { - unsigned int hash[8]; + __private unsigned int hash[8]; sha256PublicKey(x.v, y.v, hash); @@ -39,3 +42,5 @@ void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) ripemd160sha256NoFinal(hash, digest); } + +#endif diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 6f875297..c762988c 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -303,11 +303,11 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) { - hOut[0] = endian(hIn[0] + 0xefcdab89); - hOut[1] = endian(hIn[1] + 0x98badcfe); - hOut[2] = endian(hIn[2] + 0x10325476); - hOut[3] = endian(hIn[3] + 0xc3d2e1f0); - hOut[4] = endian(hIn[4] + 0x67452301); + hOut[0] = endian(hIn[0] + RIPEMD160_IV[1]); + hOut[1] = endian(hIn[1] + RIPEMD160_IV[2]); + hOut[2] = endian(hIn[2] + RIPEMD160_IV[3]); + hOut[3] = endian(hIn[3] + RIPEMD160_IV[4]); + hOut[4] = endian(hIn[4] + RIPEMD160_IV[0]); } #endif @@ -348,7 +348,7 @@ __constant unsigned int P[8] = { #ifdef DEVICE_VENDOR_INTEL // Intel devices have a mul_hi bug -unsigned int mul_hi977(unsigned int x) +inline unsigned int mul_hi977(unsigned int x) { unsigned int high = x >> 16; unsigned int low = x & 0xffff; @@ -357,7 +357,7 @@ unsigned int mul_hi977(unsigned int x) } // 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) +inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { *low = *a * 977; unsigned int tmp = *low + *c; @@ -367,7 +367,7 @@ void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned in } #else -void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) +inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { *low = *a * 977; unsigned int tmp = *low + *c; @@ -1516,13 +1516,16 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un digest[7] = h + _IV[7]; } #endif +#ifndef BITCOIN_CL +#define BITCOIN_CL + #ifndef endian #define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) #endif void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int digest[5]) { - unsigned int hash[8]; + __private unsigned int hash[8]; sha256PublicKeyCompressed(x.v, yParity, hash); @@ -1541,7 +1544,7 @@ void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int dig void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) { - unsigned int hash[8]; + __private unsigned int hash[8]; sha256PublicKey(x.v, y.v, hash); @@ -1557,6 +1560,11 @@ void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) ripemd160sha256NoFinal(hash, digest); } + +#endif +#ifndef BLOOMFILTER_CL +#define BLOOMFILTER_CL + bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong *mask) { unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; @@ -1571,14 +1579,12 @@ bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ul ) ); } + +#endif #define COMPRESSED 0 #define UNCOMPRESSED 1 #define BOTH 2 -#ifndef endian -#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) -#endif - typedef struct { int idx; bool compressed; @@ -1605,7 +1611,6 @@ __kernel void multiplyStepKernel( gx = gxPtr[step]; gy = gyPtr[step]; - // Multiply together all (_Gx - x) and then invert uint256_t inverse = { {0,0,0,0,0,0,0,1} }; int batchIdx = 0; diff --git a/CLKeySearchDevice/bloomfilter.cl b/CLKeySearchDevice/bloomfilter.cl index 278199ea..5e0c715c 100644 --- a/CLKeySearchDevice/bloomfilter.cl +++ b/CLKeySearchDevice/bloomfilter.cl @@ -1,3 +1,6 @@ +#ifndef BLOOMFILTER_CL +#define BLOOMFILTER_CL + bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong *mask) { unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; @@ -12,3 +15,5 @@ bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ul ) ); } + +#endif diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 69ccc516..6aa50a44 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -2,10 +2,6 @@ #define UNCOMPRESSED 1 #define BOTH 2 -#ifndef endian -#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) -#endif - typedef struct { int idx; bool compressed; @@ -32,7 +28,6 @@ __kernel void multiplyStepKernel( gx = gxPtr[step]; gy = gyPtr[step]; - // Multiply together all (_Gx - x) and then invert uint256_t inverse = { {0,0,0,0,0,0,0,1} }; int batchIdx = 0; diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index f3700216..001b3f67 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -303,11 +303,11 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) { - hOut[0] = endian(hIn[0] + 0xefcdab89); - hOut[1] = endian(hIn[1] + 0x98badcfe); - hOut[2] = endian(hIn[2] + 0x10325476); - hOut[3] = endian(hIn[3] + 0xc3d2e1f0); - hOut[4] = endian(hIn[4] + 0x67452301); + hOut[0] = endian(hIn[0] + RIPEMD160_IV[1]); + hOut[1] = endian(hIn[1] + RIPEMD160_IV[2]); + hOut[2] = endian(hIn[2] + RIPEMD160_IV[3]); + hOut[3] = endian(hIn[3] + RIPEMD160_IV[4]); + hOut[4] = endian(hIn[4] + RIPEMD160_IV[0]); } #endif diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index 417b308c..e91962db 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -35,7 +35,7 @@ __constant unsigned int P[8] = { #ifdef DEVICE_VENDOR_INTEL // Intel devices have a mul_hi bug -unsigned int mul_hi977(unsigned int x) +inline unsigned int mul_hi977(unsigned int x) { unsigned int high = x >> 16; unsigned int low = x & 0xffff; @@ -44,7 +44,7 @@ unsigned int mul_hi977(unsigned int x) } // 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) +inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { *low = *a * 977; unsigned int tmp = *low + *c; @@ -54,7 +54,7 @@ void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned in } #else -void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) +inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c) { *low = *a * 977; unsigned int tmp = *low + *c; From faa8505fb1afc36bb98f254b65658769fa12e40a Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 00:02:28 +0200 Subject: [PATCH 47/62] remove DeviceType --- KeyFinder/DeviceManager.cpp | 1 - KeyFinder/DeviceManager.h | 8 -------- KeyFinder/main.cpp | 6 +----- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/KeyFinder/DeviceManager.cpp b/KeyFinder/DeviceManager.cpp index 79ed9119..2497876c 100644 --- a/KeyFinder/DeviceManager.cpp +++ b/KeyFinder/DeviceManager.cpp @@ -12,7 +12,6 @@ std::vector DeviceManager::getDevices() for(size_t i = 0; i < clDevices.size(); i++) { DeviceManager::DeviceInfo device; device.name = clDevices[i].name; - device.type = DeviceType::OpenCL; device.id = i; device.physicalId = (uint64_t)clDevices[i].id; device.memory = clDevices[i].mem; diff --git a/KeyFinder/DeviceManager.h b/KeyFinder/DeviceManager.h index 122be59f..7d0693f1 100644 --- a/KeyFinder/DeviceManager.h +++ b/KeyFinder/DeviceManager.h @@ -18,15 +18,7 @@ class DeviceManagerException { } }; -class DeviceType { -public: - enum { - OpenCL - }; -}; - typedef struct { - int type; int id; // General device info diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index 70638f2a..c7d675e3 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -220,11 +220,7 @@ DeviceParameters getDefaultParameters(const DeviceManager::DeviceInfo &device) static KeySearchDevice *getDeviceContext(DeviceManager::DeviceInfo &device, int blocks, int threads, int pointsPerThread, int compressionMode) { - if(device.type == DeviceManager::DeviceType::OpenCL) { - return new CLKeySearchDevice(device.physicalId, threads, pointsPerThread, blocks, compressionMode); - } - - return NULL; + return new CLKeySearchDevice(device.physicalId, threads, pointsPerThread, blocks, compressionMode); } static void printDeviceList(const std::vector &devices) From d4f4c6e0f2b13439deb893bafa5ecec53de88456 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 00:28:36 +0200 Subject: [PATCH 48/62] remove useless memory information --- KeyFinder/main.cpp | 14 +++----------- KeyFinderLib/KeyFinder.cpp | 9 --------- KeyFinderLib/KeySearchTypes.h | 3 --- 3 files changed, 3 insertions(+), 23 deletions(-) diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index c7d675e3..e5aebcd4 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -99,26 +99,18 @@ void statusCallback(KeySearchStatus info) std::string timeStr = "[" + util::formatSeconds((unsigned int)((_config.elapsed + info.totalTime) / 1000)) + "]"; - std::string usedMemStr = util::format((info.deviceMemory - info.freeMemory) /(1024 * 1024)); - - std::string totalMemStr = util::format(info.deviceMemory / (1024 * 1024)); - std::string targetStr = util::format(info.targets) + " target" + (info.targets > 1 ? "s" : ""); - // Fit device name in 16 characters, pad with spaces if less - std::string devName = info.deviceName.substr(0, 16); - devName += std::string(16 - devName.length(), ' '); - const char *formatStr = NULL; if(_config.follow) { - formatStr = "%s %s/%sMB | %s %s %s %s\n"; + formatStr = "%s %s %s %s\n"; } else { - formatStr = "\r%s %s / %sMB | %s %s %s %s"; + formatStr = "\r%s %s %s %s"; } - printf(formatStr, devName.c_str(), usedMemStr.c_str(), totalMemStr.c_str(), targetStr.c_str(), speedStr.c_str(), totalStr.c_str(), timeStr.c_str()); + printf(formatStr, targetStr.c_str(), speedStr.c_str(), totalStr.c_str(), timeStr.c_str()); if(_config.checkpointFile.length() > 0) { uint64_t t = util::getSystemTime(); diff --git a/KeyFinderLib/KeyFinder.cpp b/KeyFinderLib/KeyFinder.cpp index 58df9da9..0e41c076 100644 --- a/KeyFinderLib/KeyFinder.cpp +++ b/KeyFinderLib/KeyFinder.cpp @@ -186,15 +186,6 @@ void KeyFinder::run() info.totalTime = _totalTime; - uint64_t freeMem = 0; - - uint64_t totalMem = 0; - - _device->getMemoryInfo(freeMem, totalMem); - - info.freeMemory = freeMem; - info.deviceMemory = totalMem; - info.deviceName = _device->getDeviceName(); info.targets = _targets.size(); info.nextKey = getNextKey(); diff --git a/KeyFinderLib/KeySearchTypes.h b/KeyFinderLib/KeySearchTypes.h index 61dea7b1..cfe8a890 100644 --- a/KeyFinderLib/KeySearchTypes.h +++ b/KeyFinderLib/KeySearchTypes.h @@ -29,9 +29,6 @@ typedef struct { double speed; uint64_t total; uint64_t totalTime; - std::string deviceName; - uint64_t freeMemory; - uint64_t deviceMemory; uint64_t targets; secp256k1::uint256 nextKey; }KeySearchStatus; From 5e2e8d514afc3b95e174f1eba1cfbd9f2948f8c0 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 02:39:43 +0200 Subject: [PATCH 49/62] make it more userfriendly --- KeyFinder/main.cpp | 64 ++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index e5aebcd4..8695f98c 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -163,29 +163,29 @@ void usage() printf("BitCrack OPTIONS [TARGETS]\n"); printf("Where TARGETS is one or more addresses\n\n"); - printf("--help Display this message\n"); - printf("-c, --compressed Use compressed points\n"); - printf("-u, --uncompressed Use Uncompressed points\n"); - printf("--compression MODE Specify compression where MODE is\n"); - printf(" COMPRESSED or UNCOMPRESSED or BOTH\n"); - printf("-d, --device ID Use device ID\n"); - printf("-b, --blocks N N blocks\n"); - printf("-t, --threads N N threads per block\n"); - printf("-p, --points N N points per thread\n"); - printf("-i, --in FILE Read addresses from FILE, one per line\n"); - printf("-o, --out FILE Write keys to FILE\n"); - printf("-f, --follow Follow text output\n"); - printf("--list-devices List available devices\n"); - printf("-k, --keyspace KEYSPACE Specify the keyspace:\n"); - printf(" START:END\n"); - printf(" START:+COUNT\n"); - printf(" START\n"); - printf(" :END\n"); - printf(" :+COUNT\n"); - printf(" Where START, END, COUNT are in hex format\n"); - printf("--stride N Increment by N keys at a time\n"); - printf("--share M/N Divide the keyspace into N equal shares, process the Mth share\n"); - printf("--continue FILE Save/load progress from FILE\n"); + printf("-?, -h, --help Display this message\n"); + printf("-c, --compressed Use compressed points\n"); + printf("-u, --uncompressed Use Uncompressed points\n"); + printf("--compression MODE Specify compression where MODE is\n"); + printf(" COMPRESSED or UNCOMPRESSED or BOTH\n"); + printf("-d, --device ID Use device ID\n"); + printf("-b, --blocks N N blocks\n"); + printf("-t, --threads N N threads per block\n"); + printf("-p, --points N N points per thread\n"); + printf("-i, --in FILE Read addresses from FILE, one per line\n"); + printf("-o, --out FILE Write keys to FILE\n"); + printf("-f, --follow Follow text output\n"); + printf("--list-devices List available devices\n"); + printf("-k, --keyspace KEYSPACE Specify the keyspace:\n"); + printf(" START:END\n"); + printf(" START:+COUNT\n"); + printf(" START\n"); + printf(" :END\n"); + printf(" :+COUNT\n"); + printf(" Where START, END, COUNT are in hex format\n"); + printf("--stride N Increment by N keys at a time\n"); + printf("--share M/N Divide the keyspace into N equal shares, process the Mth share\n"); + printf("--continue FILE Save/load progress from FILE\n"); } @@ -235,7 +235,7 @@ bool readAddressesFromFile(const std::string &fileName, std::vector } } -int parseCompressionString(const std::string &s) +PointCompressionType::Value parseCompressionString(const std::string &s) { std::string comp = util::toLower(s); @@ -263,9 +263,9 @@ static std::string getCompressionString(int mode) return "uncompressed"; case PointCompressionType::COMPRESSED: return "compressed"; + default: + throw std::string("Invalid compression setting '" + util::format(mode) + "'"); } - - throw std::string("Invalid compression setting '" + util::format(mode) + "'"); } void writeCheckpoint(secp256k1::uint256 nextKey) @@ -436,9 +436,12 @@ int main(int argc, char **argv) uint32_t shareIdx = 0; uint32_t numShares = 0; - // Catch --help first for(int i = 1; i < argc; i++) { - if(std::string(argv[i]) == "--help") { + if( + std::string(argv[i]) == "--help" || + std::string(argv[i]) == "-h" || + std::string(argv[i]) == "-?" + ) { usage(); return 0; } @@ -463,7 +466,6 @@ int main(int argc, char **argv) return 0; } - CmdParse parser; parser.add("-d", "--device", true); parser.add("-t", "--threads", true); @@ -518,7 +520,7 @@ int main(int argc, char **argv) listDevices = true; } else if(optArg.equals("", "--continue")) { _config.checkpointFile = optArg.arg; - } else if(optArg.equals("", "--keyspace")) { + } else if(optArg.equals("-k", "--keyspace")) { secp256k1::uint256 start; secp256k1::uint256 end; @@ -633,7 +635,7 @@ int main(int argc, char **argv) _config.compressionMode = PointCompressionType::UNCOMPRESSED; } - if(_config.checkpointFile.length() > 0) { + if(_config.checkpointFile.length() != 0) { readCheckpointFile(); } From 2ad6449c75f45898bb12503ff5449797b9ede2fc Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 04:20:41 +0200 Subject: [PATCH 50/62] make threads depending on maxWorkingGroupSize --- KeyFinder/DeviceManager.cpp | 1 + KeyFinder/DeviceManager.h | 1 + KeyFinder/main.cpp | 2 +- clUtil/clUtil.cpp | 4 ++++ clUtil/clutil.h | 2 +- 5 files changed, 8 insertions(+), 2 deletions(-) diff --git a/KeyFinder/DeviceManager.cpp b/KeyFinder/DeviceManager.cpp index 2497876c..9e9e0817 100644 --- a/KeyFinder/DeviceManager.cpp +++ b/KeyFinder/DeviceManager.cpp @@ -16,6 +16,7 @@ std::vector DeviceManager::getDevices() device.physicalId = (uint64_t)clDevices[i].id; device.memory = clDevices[i].mem; device.computeUnits = clDevices[i].cores; + device.maxWorkingGroupSize = clDevices[i].maxWorkingGroupSize; devices.push_back(device); } } catch(cl::CLException ex) { diff --git a/KeyFinder/DeviceManager.h b/KeyFinder/DeviceManager.h index 7d0693f1..8a367ba3 100644 --- a/KeyFinder/DeviceManager.h +++ b/KeyFinder/DeviceManager.h @@ -26,6 +26,7 @@ typedef struct { std::string name; uint64_t memory; int computeUnits; + int maxWorkingGroupSize; }DeviceInfo; diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index 8695f98c..e069c938 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -202,7 +202,7 @@ typedef struct { DeviceParameters getDefaultParameters(const DeviceManager::DeviceInfo &device) { DeviceParameters parameters; - parameters.threads = 256; + parameters.threads = device.maxWorkingGroupSize; parameters.blocks = 32; parameters.pointsPerThread = 32; parameters.compressionMode = PointCompressionType::COMPRESSED; diff --git a/clUtil/clUtil.cpp b/clUtil/clUtil.cpp index dfea8f1c..0f161522 100644 --- a/clUtil/clUtil.cpp +++ b/clUtil/clUtil.cpp @@ -50,6 +50,10 @@ std::vector cl::getDevices() info.cores = cores; + size_t maxWorkingGroupSize = 0; + clCall(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &maxWorkingGroupSize, NULL)); + info.maxWorkingGroupSize = maxWorkingGroupSize; + cl_ulong mem; clCall(clGetDeviceInfo(devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem), &mem, NULL)); diff --git a/clUtil/clutil.h b/clUtil/clutil.h index c5a8e953..617a2b59 100644 --- a/clUtil/clutil.h +++ b/clUtil/clutil.h @@ -20,7 +20,7 @@ namespace cl { int cores; uint64_t mem; std::string name; - + size_t maxWorkingGroupSize; }CLDeviceInfo; class CLException { From ca5d1b5b61f2c92544ab3e02181fd7796913a520 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 05:19:37 +0200 Subject: [PATCH 51/62] remove selfTest --- CLKeySearchDevice/CLKeySearchDevice.cpp | 64 ++----------------------- CLKeySearchDevice/CLKeySearchDevice.h | 2 - 2 files changed, 5 insertions(+), 61 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index cd4e2e47..33caa905 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -14,7 +14,6 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; - static void undoRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) { unsigned int iv[5] = { @@ -436,52 +435,6 @@ void CLKeySearchDevice::getResultsInternal() } } -void CLKeySearchDevice::selfTest() -{ - uint64_t numPoints = (uint64_t)_points; - std::vector privateKeys; - - // Generate key pairs for k, k+1, k+2 ... k + - secp256k1::uint256 privKey = _start; - - privateKeys.push_back(_start); - - for(uint64_t i = 1; i < numPoints; i++) { - privKey = privKey.add(_stride); - privateKeys.push_back(privKey); - } - - unsigned int *xBuf = new unsigned int[numPoints * 8]; - unsigned int *yBuf = new unsigned int[numPoints * 8]; - - _clContext->copyDeviceToHost(_x, xBuf, sizeof(unsigned int) * 8 * numPoints); - _clContext->copyDeviceToHost(_y, yBuf, sizeof(unsigned int) * 8 * numPoints); - - for(int index = 0; index < _points; index++) { - secp256k1::uint256 privateKey = privateKeys[index]; - - secp256k1::uint256 x = readBigInt(xBuf, index); - secp256k1::uint256 y = readBigInt(yBuf, index); - - secp256k1::ecpoint p1(x, y); - secp256k1::ecpoint p2 = secp256k1::multiplyPoint(privateKey, secp256k1::G()); - - if(!secp256k1::pointExists(p1)) { - throw std::string("Validation failed: invalid point"); - } - - if(!secp256k1::pointExists(p2)) { - throw std::string("Validation failed: invalid point"); - } - - if(!(p1 == p2)) { - throw std::string("Validation failed: points do not match"); - } - } -} - - - secp256k1::uint256 CLKeySearchDevice::readBigInt(unsigned int *src, int idx) { unsigned int value[8] = {0}; @@ -539,28 +492,21 @@ void CLKeySearchDevice::generateStartingPoints() uint64_t totalPoints = (uint64_t)_points; uint64_t totalMemory = totalPoints * 40; - std::vector exponents; - initializeBasePoints(); _pointsMemSize = totalPoints * sizeof(unsigned int) * 16 + _points * sizeof(unsigned int) * 8; Logger::log(LogLevel::Info, "Generating " + util::formatThousands(totalPoints) + " starting points (" + util::format("%.1f", (double)totalMemory / (double)(1024 * 1024)) + "MB)"); + unsigned int *privateKeys = new unsigned int[8 * totalPoints]; + // Generate key pairs for k, k+1, k+2 ... k + secp256k1::uint256 privKey = _start; - - exponents.push_back(privKey); - + splatBigInt(privateKeys, 0, privKey); + for(uint64_t i = 1; i < totalPoints; i++) { privKey = privKey.add(_stride); - exponents.push_back(privKey); - } - - unsigned int *privateKeys = new unsigned int[8 * totalPoints]; - - for(int index = 0; index < _points; index++) { - splatBigInt(privateKeys, index, exponents[index]); + splatBigInt(privateKeys, i, privKey); } // Copy to device diff --git a/CLKeySearchDevice/CLKeySearchDevice.h b/CLKeySearchDevice/CLKeySearchDevice.h index 5fa98448..64db008e 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.h +++ b/CLKeySearchDevice/CLKeySearchDevice.h @@ -84,8 +84,6 @@ class CLKeySearchDevice : public KeySearchDevice { void splatBigInt(unsigned int *dest, int idx, secp256k1::uint256 &k); secp256k1::uint256 readBigInt(unsigned int *src, int idx); - void selfTest(); - bool _useBloomFilter = false; void setTargetsInternal(); From 6502006cab5c98710d439c9e093b69176320bba3 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 06:21:43 +0200 Subject: [PATCH 52/62] reduce memory footprint --- CLKeySearchDevice/CLKeySearchDevice.cpp | 23 ++++++++++++++--------- CLKeySearchDevice/bitcrack.cl | 2 +- CLKeySearchDevice/keysearch.cl | 2 +- secp256k1lib/secp256k1.cpp | 18 ++++++++++++------ 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index 33caa905..f1adfdaf 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -65,7 +65,7 @@ CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPer _clProgram = new cl::CLProgram(*_clContext, _bitcrack_cl, options); // Load the kernels - _initKeysKernel = new cl::CLKernel(*_clProgram, "multiplyStepKernel"); + _initKeysKernel = new cl::CLKernel(*_clProgram, "_initKeysKernel"); _stepKernel = new cl::CLKernel(*_clProgram, "keyFinderKernel"); _stepKernelWithDouble = new cl::CLKernel(*_clProgram, "keyFinderKernelWithDouble"); @@ -454,39 +454,44 @@ void CLKeySearchDevice::initializeBasePoints() std::vector table; table.push_back(secp256k1::G()); - for(uint64_t i = 1; i < 256; i++) { + for (uint64_t i = 1; i < 256; i++) { secp256k1::ecpoint p = doublePoint(table[i - 1]); - if(!pointExists(p)) { +#ifdef DEBUG + if (!pointExists(p)) { throw std::string("Point does not exist!"); } +#endif table.push_back(p); } size_t count = 256; - unsigned int *tmpX = new unsigned int[count * 8]; - unsigned int *tmpY = new unsigned int[count * 8]; + unsigned int* tmpX = new unsigned int[count * 8]; + unsigned int* tmpY = new unsigned int[count * 8]; - for(int i = 0; i < 256; i++) { + for (int i = 0; i < 256; i++) { unsigned int bufX[8]; unsigned int bufY[8]; table[i].x.exportWords(bufX, 8, secp256k1::uint256::BigEndian); table[i].y.exportWords(bufY, 8, secp256k1::uint256::BigEndian); - for(int j = 0; j < 8; j++) { + for (int j = 0; j < 8; j++) { tmpX[i * 8 + j] = bufX[j]; tmpY[i * 8 + j] = bufY[j]; } } + table.clear(); + table.shrink_to_fit(); + _clContext->copyHostToDevice(tmpX, _xTable, count * 8 * sizeof(unsigned int)); + delete[] tmpX; _clContext->copyHostToDevice(tmpY, _yTable, count * 8 * sizeof(unsigned int)); + delete[] tmpY; } - - void CLKeySearchDevice::generateStartingPoints() { uint64_t totalPoints = (uint64_t)_points; diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index c762988c..3392464d 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1593,7 +1593,7 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; -__kernel void multiplyStepKernel( +__kernel void _initKeysKernel( int totalPoints, int step, __global uint256_t* privateKeys, diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 6aa50a44..1cac7040 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -10,7 +10,7 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; -__kernel void multiplyStepKernel( +__kernel void _initKeysKernel( int totalPoints, int step, __global uint256_t* privateKeys, diff --git a/secp256k1lib/secp256k1.cpp b/secp256k1lib/secp256k1.cpp index c38f160f..e24fbe32 100644 --- a/secp256k1lib/secp256k1.cpp +++ b/secp256k1lib/secp256k1.cpp @@ -5,7 +5,6 @@ #include "CryptoUtil.h" #include "secp256k1.h" - using namespace secp256k1; static uint256 _ONE(1); @@ -780,7 +779,7 @@ void secp256k1::generateKeyPairsBulk(unsigned int count, const ecpoint &basePoin void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector &privKeys, std::vector &pubKeysOut) { - unsigned int count = (unsigned int)privKeys.size(); + size_t count = privKeys.size(); //privKeysOut.clear(); pubKeysOut.clear(); @@ -789,12 +788,14 @@ void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector table; table.push_back(basePoint); - for(int i = 1; i < 256; i++) { + for(size_t i = 1; i < 256; i++) { ecpoint p = doublePoint(table[i-1]); +#ifdef DEBUG if(!pointExists(p)) { throw std::string("Point does not exist!"); } +#endif table.push_back(p); } @@ -848,13 +849,18 @@ void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector Date: Sat, 5 Jun 2021 06:25:50 +0200 Subject: [PATCH 53/62] remove numTargets, rename kernels --- CLKeySearchDevice/CLKeySearchDevice.cpp | 6 ++---- CLKeySearchDevice/bitcrack.cl | 6 ++---- CLKeySearchDevice/keysearch.cl | 6 ++---- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index f1adfdaf..ed66d63a 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -66,8 +66,8 @@ CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPer // Load the kernels _initKeysKernel = new cl::CLKernel(*_clProgram, "_initKeysKernel"); - _stepKernel = new cl::CLKernel(*_clProgram, "keyFinderKernel"); - _stepKernelWithDouble = new cl::CLKernel(*_clProgram, "keyFinderKernelWithDouble"); + _stepKernel = new cl::CLKernel(*_clProgram, "_stepKernel"); + _stepKernelWithDouble = new cl::CLKernel(*_clProgram, "_stepKernelWithDouble"); _globalMemSize = _clContext->getGlobalMemorySize(); @@ -236,7 +236,6 @@ void CLKeySearchDevice::doStep() _xInc, _yInc, _deviceTargetList.ptr, - _deviceTargetList.size, _deviceTargetList.mask, _deviceResults, _deviceResultsCount); @@ -251,7 +250,6 @@ void CLKeySearchDevice::doStep() _xInc, _yInc, _deviceTargetList.ptr, - _deviceTargetList.size, _deviceTargetList.mask, _deviceResults, _deviceResultsCount); diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 3392464d..50f85d0e 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1685,7 +1685,7 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned results[atomic_add(numResults, 1)] = r; } -__kernel void keyFinderKernel( +__kernel void _stepKernel( unsigned int totalPoints, __global uint256_t* chain, __global uint256_t* xPtr, @@ -1693,7 +1693,6 @@ __kernel void keyFinderKernel( __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - ulong numTargets, ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) @@ -1766,7 +1765,7 @@ __kernel void keyFinderKernel( } } -__kernel void keyFinderKernelWithDouble( +__kernel void _stepKernelWithDouble( unsigned int totalPoints, __global uint256_t* chain, __global uint256_t* xPtr, @@ -1774,7 +1773,6 @@ __kernel void keyFinderKernelWithDouble( __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - ulong numTargets, ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 1cac7040..d53bda4b 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -102,7 +102,7 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned results[atomic_add(numResults, 1)] = r; } -__kernel void keyFinderKernel( +__kernel void _stepKernel( unsigned int totalPoints, __global uint256_t* chain, __global uint256_t* xPtr, @@ -110,7 +110,6 @@ __kernel void keyFinderKernel( __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - ulong numTargets, ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) @@ -183,7 +182,7 @@ __kernel void keyFinderKernel( } } -__kernel void keyFinderKernelWithDouble( +__kernel void _stepKernelWithDouble( unsigned int totalPoints, __global uint256_t* chain, __global uint256_t* xPtr, @@ -191,7 +190,6 @@ __kernel void keyFinderKernelWithDouble( __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - ulong numTargets, ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) From 1144ad6c00f81879feb1fb1731c839ce284ab4e1 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 08:18:24 +0200 Subject: [PATCH 54/62] modify a little --- CLKeySearchDevice/bitcrack.cl | 69 +++++++++++++++------------------- CLKeySearchDevice/keysearch.cl | 66 +++++++++++++++----------------- clMath/ripemd160.cl | 3 +- 3 files changed, 62 insertions(+), 76 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 50f85d0e..56fe396b 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -298,10 +298,9 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) digest[2] = digest1[2] + digest2[2]; digest[3] = digest1[3] + digest2[3]; digest[4] = digest1[4] + digest2[4]; - } -void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) +void ripemd160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) { hOut[0] = endian(hIn[0] + RIPEMD160_IV[1]); hOut[1] = endian(hIn[1] + RIPEMD160_IV[2]); @@ -1593,6 +1592,36 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; +void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults) +{ + CLDeviceResult r; + + r.idx = idx; + r.compressed = compressed; + + r.x[0] = x.v[0]; + r.x[1] = x.v[1]; + r.x[2] = x.v[2]; + r.x[3] = x.v[3]; + r.x[4] = x.v[4]; + r.x[5] = x.v[5]; + r.x[6] = x.v[6]; + r.x[7] = x.v[7]; + + r.y[0] = y.v[0]; + r.y[1] = y.v[1]; + r.y[2] = y.v[2]; + r.y[3] = y.v[3]; + r.y[4] = y.v[4]; + r.y[5] = y.v[5]; + r.y[6] = y.v[6]; + r.y[7] = y.v[7]; + + ripemd160FinalRound(digest, r.digest); + + results[atomic_add(numResults, 1)] = r; +} + __kernel void _initKeysKernel( int totalPoints, int step, @@ -1649,42 +1678,6 @@ __kernel void _initKeysKernel( } } -void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults) -{ - CLDeviceResult r; - - r.idx = idx; - r.compressed = compressed; - - r.x[0] = x.v[0]; - r.y[0] = y.v[0]; - - r.x[1] = x.v[1]; - r.y[1] = y.v[1]; - - r.x[2] = x.v[2]; - r.y[2] = y.v[2]; - - r.x[3] = x.v[3]; - r.y[3] = y.v[3]; - - r.x[4] = x.v[4]; - r.y[4] = y.v[4]; - - r.x[5] = x.v[5]; - r.y[5] = y.v[5]; - - r.x[6] = x.v[6]; - r.y[6] = y.v[6]; - - r.x[7] = x.v[7]; - r.y[7] = y.v[7]; - - doRMD160FinalRound(digest, r.digest); - - results[atomic_add(numResults, 1)] = r; -} - __kernel void _stepKernel( unsigned int totalPoints, __global uint256_t* chain, diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index d53bda4b..0618de3c 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -10,6 +10,36 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; +void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults) +{ + CLDeviceResult r; + + r.idx = idx; + r.compressed = compressed; + + r.x[0] = x.v[0]; + r.x[1] = x.v[1]; + r.x[2] = x.v[2]; + r.x[3] = x.v[3]; + r.x[4] = x.v[4]; + r.x[5] = x.v[5]; + r.x[6] = x.v[6]; + r.x[7] = x.v[7]; + + r.y[0] = y.v[0]; + r.y[1] = y.v[1]; + r.y[2] = y.v[2]; + r.y[3] = y.v[3]; + r.y[4] = y.v[4]; + r.y[5] = y.v[5]; + r.y[6] = y.v[6]; + r.y[7] = y.v[7]; + + ripemd160FinalRound(digest, r.digest); + + results[atomic_add(numResults, 1)] = r; +} + __kernel void _initKeysKernel( int totalPoints, int step, @@ -66,42 +96,6 @@ __kernel void _initKeysKernel( } } -void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults) -{ - CLDeviceResult r; - - r.idx = idx; - r.compressed = compressed; - - r.x[0] = x.v[0]; - r.y[0] = y.v[0]; - - r.x[1] = x.v[1]; - r.y[1] = y.v[1]; - - r.x[2] = x.v[2]; - r.y[2] = y.v[2]; - - r.x[3] = x.v[3]; - r.y[3] = y.v[3]; - - r.x[4] = x.v[4]; - r.y[4] = y.v[4]; - - r.x[5] = x.v[5]; - r.y[5] = y.v[5]; - - r.x[6] = x.v[6]; - r.y[6] = y.v[6]; - - r.x[7] = x.v[7]; - r.y[7] = y.v[7]; - - doRMD160FinalRound(digest, r.digest); - - results[atomic_add(numResults, 1)] = r; -} - __kernel void _stepKernel( unsigned int totalPoints, __global uint256_t* chain, diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index 001b3f67..3799a957 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -298,10 +298,9 @@ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) digest[2] = digest1[2] + digest2[2]; digest[3] = digest1[3] + digest2[3]; digest[4] = digest1[4] + digest2[4]; - } -void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) +void ripemd160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]) { hOut[0] = endian(hIn[0] + RIPEMD160_IV[1]); hOut[1] = endian(hIn[1] + RIPEMD160_IV[2]); From 2a9a5bd9b7842db2bf1a2b655e51113522adbf54 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 09:45:51 +0200 Subject: [PATCH 55/62] minor changes --- CLKeySearchDevice/CLKeySearchDevice.cpp | 4 +- CLKeySearchDevice/bitcoin.cl | 4 +- CLKeySearchDevice/bitcrack.cl | 186 +++++++++++++----------- CLKeySearchDevice/bloomfilter.cl | 2 +- CLKeySearchDevice/keysearch.cl | 23 ++- clMath/ripemd160.cl | 24 +-- clMath/secp256k1.cl | 121 ++++++++------- clMath/sha256.cl | 12 +- 8 files changed, 207 insertions(+), 169 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index ed66d63a..6d45bf56 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -212,9 +212,7 @@ void CLKeySearchDevice::init(const secp256k1::uint256 &start, int compression, c generateStartingPoints(); // Set the incrementor - secp256k1::ecpoint g = secp256k1::G(); - secp256k1::ecpoint p = secp256k1::multiplyPoint(secp256k1::uint256((uint64_t)_points ) * _stride, g); - + secp256k1::ecpoint p = secp256k1::multiplyPoint(secp256k1::uint256((uint64_t)_points ) * _stride, secp256k1::G()); setIncrementor(p); } catch(cl::CLException ex) { throw KeySearchException(ex.msg, ex.description); diff --git a/CLKeySearchDevice/bitcoin.cl b/CLKeySearchDevice/bitcoin.cl index 8863dff0..3da6dbb3 100644 --- a/CLKeySearchDevice/bitcoin.cl +++ b/CLKeySearchDevice/bitcoin.cl @@ -5,7 +5,7 @@ #define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) #endif -void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int digest[5]) +void hashPublicKeyCompressed(const uint256_t x, const unsigned int yParity, unsigned int digest[5]) { __private unsigned int hash[8]; @@ -24,7 +24,7 @@ void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int dig ripemd160sha256NoFinal(hash, digest); } -void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) +void hashPublicKey(const uint256_t x, const uint256_t y, unsigned int digest[5]) { __private unsigned int hash[8]; diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 56fe396b..81ced315 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -77,11 +77,11 @@ __constant unsigned int K[8] = { void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a = RIPEMD160_IV[0]; - unsigned int b = RIPEMD160_IV[1]; - unsigned int c = RIPEMD160_IV[2]; - unsigned int d = RIPEMD160_IV[3]; - unsigned int e = RIPEMD160_IV[4]; + __private unsigned int a = RIPEMD160_IV[0]; + __private unsigned int b = RIPEMD160_IV[1]; + __private unsigned int c = RIPEMD160_IV[2]; + __private unsigned int d = RIPEMD160_IV[3]; + __private unsigned int e = RIPEMD160_IV[4]; /* round 1 */ FF(a, b, c, d, e, x[0], 11); @@ -182,11 +182,11 @@ void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a = RIPEMD160_IV[0]; - unsigned int b = RIPEMD160_IV[1]; - unsigned int c = RIPEMD160_IV[2]; - unsigned int d = RIPEMD160_IV[3]; - unsigned int e = RIPEMD160_IV[4]; + __private unsigned int a = RIPEMD160_IV[0]; + __private unsigned int b = RIPEMD160_IV[1]; + __private unsigned int c = RIPEMD160_IV[2]; + __private unsigned int d = RIPEMD160_IV[3]; + __private unsigned int e = RIPEMD160_IV[4]; /* parallel round 1 */ JJJ(a, b, c, d, e, x[5], 8); @@ -287,8 +287,8 @@ void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { - unsigned int digest1[5]; - unsigned int digest2[5]; + __private unsigned int digest1[5]; + __private unsigned int digest2[5]; ripemd160p1(x, digest1); ripemd160p2(x, digest2); @@ -440,7 +440,7 @@ inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsi void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8]) { - unsigned long product; + __private unsigned long product; // First round, overwrite z product = (unsigned long)x[7] * y[7]; @@ -653,15 +653,15 @@ void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) { - unsigned int high[8]; - unsigned int low[8]; + __private unsigned int high[8]; + __private unsigned int low[8]; - unsigned int hWord = 0; - unsigned int carry = 0; - unsigned int t = 0; - unsigned int product6 = 0; - unsigned int product7 = 0; - unsigned int tmp; + __private unsigned int hWord = 0; + __private unsigned int carry = 0; + __private unsigned int t = 0; + __private unsigned int product6 = 0; + __private unsigned int product7 = 0; + __private unsigned int tmp; // 256 x 256 multiply multiply256(a, b, high, low); @@ -740,8 +740,8 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) */ void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) { - unsigned int borrow = 0; - unsigned int tmp; + __private unsigned int borrow = 0; + __private unsigned int tmp; sub256k(a, b, c, borrow, tmp); @@ -756,7 +756,7 @@ void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) */ void invModP256k(unsigned int x[8]) { - unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1}; + __private unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1}; mulModP(x, y, y); mulModP(x, x, x); @@ -787,11 +787,11 @@ void invModP256k(unsigned int x[8]) mulModP(x, y, x); } -void addModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) +void addModP256k(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) { - unsigned int borrow = 0; - unsigned int carry = 0; - unsigned int tmp = 0; + __private unsigned int borrow = 0; + __private unsigned int carry = 0; + __private unsigned int tmp = 0; add256k(a, b, c, carry, tmp); @@ -826,12 +826,18 @@ void doBatchInverse256k(unsigned int x[8]) invModP256k(x); } -void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) -{ - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); +void beginBatchAdd256k( + const uint256_t px, + const uint256_t x, + __global uint256_t* chain, + const int i, + const int batchIdx, + uint256_t* inverse +) { + __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + __private int dim = get_global_size(0); - unsigned int t[8]; + __private unsigned int t[8]; // x = Gx - x subModP256k(px.v, x.v, t); @@ -844,11 +850,18 @@ void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int chain[batchIdx * dim + gid] = *inverse; } -void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* xPtr, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) -{ - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); - uint256_t x = xPtr[i]; +void beginBatchAddWithDouble256k( + const uint256_t px, + const uint256_t py, + __global uint256_t* xPtr, + __global uint256_t* chain, + const int i, + const int batchIdx, + uint256_t* inverse +) { + __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + __private int dim = get_global_size(0); + __private uint256_t x = xPtr[i]; if(equal256k(px.v, x.v)) { addModP256k(py.v,py.v, x.v); @@ -865,24 +878,24 @@ void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* } void completeBatchAdd256k( - uint256_t px, - uint256_t py, + const uint256_t px, + const uint256_t py, __global uint256_t* xPtr, __global uint256_t* yPtr, - int i, - int batchIdx, + const int i, + const int batchIdx, __global uint256_t* chain, uint256_t* inverse, uint256_t* newX, uint256_t* newY) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); - uint256_t x = xPtr[i]; - uint256_t y = yPtr[i]; + __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + __private int dim = get_global_size(0); + __private uint256_t x = xPtr[i]; + __private uint256_t y = yPtr[i]; uint256_t s; - unsigned int tmp[8]; + __private unsigned int tmp[8]; if(batchIdx != 0) { uint256_t c; @@ -914,29 +927,29 @@ void completeBatchAdd256k( void completeBatchAddWithDouble256k( - uint256_t px, - uint256_t py, + const uint256_t px, + const uint256_t py, __global const uint256_t* xPtr, __global const uint256_t* yPtr, - int i, - int batchIdx, + const int i, + const int batchIdx, __global uint256_t* chain, uint256_t* inverse, uint256_t* newX, uint256_t* newY) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); - uint256_t s; - uint256_t x; - uint256_t y; + __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + __private int dim = get_global_size(0); + __private uint256_t s; + __private uint256_t x; + __private uint256_t y; x = xPtr[i]; y = yPtr[i]; if(batchIdx >= 1) { - uint256_t c; + __private uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; mulModP(inverse->v, c.v, s.v); @@ -957,8 +970,8 @@ void completeBatchAddWithDouble256k( if(equal256k(px.v, x.v)) { // currently s = 1 / 2y - uint256_t x2; - uint256_t tx2; + __private uint256_t x2; + __private uint256_t tx2; // 3x^2 mulModP(x.v, x.v, x2.v); @@ -969,7 +982,7 @@ void completeBatchAddWithDouble256k( mulModP(tx2.v, s.v, s.v); // s^2 - uint256_t s2; + __private uint256_t s2; mulModP(s.v, s.v, s2.v); // Rx = s^2 - 2px @@ -977,38 +990,38 @@ void completeBatchAddWithDouble256k( subModP256k(newX->v, x.v, newX->v); // Ry = s(px - rx) - py - uint256_t k; + __private uint256_t k; subModP256k(px.v, newX->v, k.v); mulModP(s.v, k.v, newY->v); subModP256k(newY->v, py.v,newY->v); } else { - uint256_t rise; + __private uint256_t rise; subModP256k(py.v, y.v, rise.v); mulModP(rise.v, s.v, s.v); // Rx = s^2 - Gx - Qx - uint256_t s2; + __private uint256_t s2; mulModP(s.v, s.v, s2.v); subModP256k(s2.v, px.v, newX->v); subModP256k(newX->v, x.v,newX->v); // Ry = s(px - rx) - py - uint256_t k; + __private uint256_t k; subModP256k(px.v, newX->v, k.v); mulModP(s.v, k.v, newY->v); subModP256k(newY->v, py.v, newY->v); } } -unsigned int readLSW256k(__global const uint256_t* ara, int idx) +unsigned int readLSW256k(__global const uint256_t* ara, const int idx) { return ara[idx].v[7]; } -unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) +unsigned int readWord256k(__global const uint256_t* ara, const int idx, const int word) { return ara[idx].v[word]; } @@ -1056,9 +1069,9 @@ __constant unsigned int _IV[8] = { void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8]) { - unsigned int a, b, c, d, e, f, g, h; - unsigned int w[16]; - unsigned int t; + __private unsigned int a, b, c, d, e, f, g, h; + __private unsigned int w[16]; + __private unsigned int t; a = _IV[0]; b = _IV[1]; @@ -1359,9 +1372,9 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8]) { - unsigned int a, b, c, d, e, f, g, h; - unsigned int w[16]; - unsigned int t; + __private unsigned int a, b, c, d, e, f, g, h; + __private unsigned int w[16]; + __private unsigned int t; // 0x03 || x or 0x02 || x w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8); @@ -1522,7 +1535,7 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un #define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) #endif -void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int digest[5]) +void hashPublicKeyCompressed(const uint256_t x, const unsigned int yParity, unsigned int digest[5]) { __private unsigned int hash[8]; @@ -1541,7 +1554,7 @@ void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int dig ripemd160sha256NoFinal(hash, digest); } -void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) +void hashPublicKey(const uint256_t x, const uint256_t y, unsigned int digest[5]) { __private unsigned int hash[8]; @@ -1564,7 +1577,7 @@ void hashPublicKey(uint256_t x, uint256_t y, unsigned int digest[5]) #ifndef BLOOMFILTER_CL #define BLOOMFILTER_CL -bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong *mask) +bool isInBloomFilter(const unsigned int hash[5], __global unsigned int *targetList, const ulong *mask) { unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; @@ -1592,8 +1605,15 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; -void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults) -{ +void setResultFound( + const int idx, + const bool compressed, + const uint256_t x, + const uint256_t y, + const unsigned int digest[5], + __global CLDeviceResult* results, + __global unsigned int* numResults +) { CLDeviceResult r; r.idx = idx; @@ -1623,8 +1643,8 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned } __kernel void _initKeysKernel( - int totalPoints, - int step, + const unsigned int totalPoints, + const unsigned int step, __global uint256_t* privateKeys, __global uint256_t* chain, __global uint256_t* gxPtr, @@ -1679,14 +1699,14 @@ __kernel void _initKeysKernel( } __kernel void _stepKernel( - unsigned int totalPoints, + const unsigned int totalPoints, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - ulong mask, + const ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) { @@ -1759,14 +1779,14 @@ __kernel void _stepKernel( } __kernel void _stepKernelWithDouble( - unsigned int totalPoints, + const unsigned int totalPoints, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - ulong mask, + const ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) { diff --git a/CLKeySearchDevice/bloomfilter.cl b/CLKeySearchDevice/bloomfilter.cl index 5e0c715c..3e6265f1 100644 --- a/CLKeySearchDevice/bloomfilter.cl +++ b/CLKeySearchDevice/bloomfilter.cl @@ -1,7 +1,7 @@ #ifndef BLOOMFILTER_CL #define BLOOMFILTER_CL -bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong *mask) +bool isInBloomFilter(const unsigned int hash[5], __global unsigned int *targetList, const ulong *mask) { unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4]; diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 0618de3c..ca598580 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -10,8 +10,15 @@ typedef struct { unsigned int digest[5]; }CLDeviceResult; -void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults) -{ +void setResultFound( + const int idx, + const bool compressed, + const uint256_t x, + const uint256_t y, + const unsigned int digest[5], + __global CLDeviceResult* results, + __global unsigned int* numResults +) { CLDeviceResult r; r.idx = idx; @@ -41,8 +48,8 @@ void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned } __kernel void _initKeysKernel( - int totalPoints, - int step, + const unsigned int totalPoints, + const unsigned int step, __global uint256_t* privateKeys, __global uint256_t* chain, __global uint256_t* gxPtr, @@ -97,14 +104,14 @@ __kernel void _initKeysKernel( } __kernel void _stepKernel( - unsigned int totalPoints, + const unsigned int totalPoints, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - ulong mask, + const ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) { @@ -177,14 +184,14 @@ __kernel void _stepKernel( } __kernel void _stepKernelWithDouble( - unsigned int totalPoints, + const unsigned int totalPoints, __global uint256_t* chain, __global uint256_t* xPtr, __global uint256_t* yPtr, __global uint256_t* incXPtr, __global uint256_t* incYPtr, __global unsigned int* targetList, - ulong mask, + const ulong mask, __global CLDeviceResult *results, __global unsigned int *numResults) { diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl index 3799a957..97bd6249 100644 --- a/clMath/ripemd160.cl +++ b/clMath/ripemd160.cl @@ -77,11 +77,11 @@ __constant unsigned int K[8] = { void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a = RIPEMD160_IV[0]; - unsigned int b = RIPEMD160_IV[1]; - unsigned int c = RIPEMD160_IV[2]; - unsigned int d = RIPEMD160_IV[3]; - unsigned int e = RIPEMD160_IV[4]; + __private unsigned int a = RIPEMD160_IV[0]; + __private unsigned int b = RIPEMD160_IV[1]; + __private unsigned int c = RIPEMD160_IV[2]; + __private unsigned int d = RIPEMD160_IV[3]; + __private unsigned int e = RIPEMD160_IV[4]; /* round 1 */ FF(a, b, c, d, e, x[0], 11); @@ -182,11 +182,11 @@ void ripemd160p1(const unsigned int x[8], unsigned int digest[5]) void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) { - unsigned int a = RIPEMD160_IV[0]; - unsigned int b = RIPEMD160_IV[1]; - unsigned int c = RIPEMD160_IV[2]; - unsigned int d = RIPEMD160_IV[3]; - unsigned int e = RIPEMD160_IV[4]; + __private unsigned int a = RIPEMD160_IV[0]; + __private unsigned int b = RIPEMD160_IV[1]; + __private unsigned int c = RIPEMD160_IV[2]; + __private unsigned int d = RIPEMD160_IV[3]; + __private unsigned int e = RIPEMD160_IV[4]; /* parallel round 1 */ JJJ(a, b, c, d, e, x[5], 8); @@ -287,8 +287,8 @@ void ripemd160p2(const unsigned int x[8], unsigned int digest[5]) void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) { - unsigned int digest1[5]; - unsigned int digest2[5]; + __private unsigned int digest1[5]; + __private unsigned int digest2[5]; ripemd160p1(x, digest1); ripemd160p2(x, digest2); diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index e91962db..578c4bb5 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -128,7 +128,7 @@ inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsi void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8]) { - unsigned long product; + __private unsigned long product; // First round, overwrite z product = (unsigned long)x[7] * y[7]; @@ -341,15 +341,15 @@ void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) { - unsigned int high[8]; - unsigned int low[8]; + __private unsigned int high[8]; + __private unsigned int low[8]; - unsigned int hWord = 0; - unsigned int carry = 0; - unsigned int t = 0; - unsigned int product6 = 0; - unsigned int product7 = 0; - unsigned int tmp; + __private unsigned int hWord = 0; + __private unsigned int carry = 0; + __private unsigned int t = 0; + __private unsigned int product6 = 0; + __private unsigned int product7 = 0; + __private unsigned int tmp; // 256 x 256 multiply multiply256(a, b, high, low); @@ -428,8 +428,8 @@ void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8]) */ void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) { - unsigned int borrow = 0; - unsigned int tmp; + __private unsigned int borrow = 0; + __private unsigned int tmp; sub256k(a, b, c, borrow, tmp); @@ -444,7 +444,7 @@ void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) */ void invModP256k(unsigned int x[8]) { - unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1}; + __private unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1}; mulModP(x, y, y); mulModP(x, x, x); @@ -475,11 +475,11 @@ void invModP256k(unsigned int x[8]) mulModP(x, y, x); } -void addModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8]) +void addModP256k(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) { - unsigned int borrow = 0; - unsigned int carry = 0; - unsigned int tmp = 0; + __private unsigned int borrow = 0; + __private unsigned int carry = 0; + __private unsigned int tmp = 0; add256k(a, b, c, carry, tmp); @@ -514,12 +514,18 @@ void doBatchInverse256k(unsigned int x[8]) invModP256k(x); } -void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) -{ - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); +void beginBatchAdd256k( + const uint256_t px, + const uint256_t x, + __global uint256_t* chain, + const int i, + const int batchIdx, + uint256_t* inverse +) { + __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + __private int dim = get_global_size(0); - unsigned int t[8]; + __private unsigned int t[8]; // x = Gx - x subModP256k(px.v, x.v, t); @@ -532,11 +538,18 @@ void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int chain[batchIdx * dim + gid] = *inverse; } -void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* xPtr, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse) -{ - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); - uint256_t x = xPtr[i]; +void beginBatchAddWithDouble256k( + const uint256_t px, + const uint256_t py, + __global uint256_t* xPtr, + __global uint256_t* chain, + const int i, + const int batchIdx, + uint256_t* inverse +) { + __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + __private int dim = get_global_size(0); + __private uint256_t x = xPtr[i]; if(equal256k(px.v, x.v)) { addModP256k(py.v,py.v, x.v); @@ -553,24 +566,24 @@ void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* } void completeBatchAdd256k( - uint256_t px, - uint256_t py, + const uint256_t px, + const uint256_t py, __global uint256_t* xPtr, __global uint256_t* yPtr, - int i, - int batchIdx, + const int i, + const int batchIdx, __global uint256_t* chain, uint256_t* inverse, uint256_t* newX, uint256_t* newY) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); - uint256_t x = xPtr[i]; - uint256_t y = yPtr[i]; + __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + __private int dim = get_global_size(0); + __private uint256_t x = xPtr[i]; + __private uint256_t y = yPtr[i]; uint256_t s; - unsigned int tmp[8]; + __private unsigned int tmp[8]; if(batchIdx != 0) { uint256_t c; @@ -602,29 +615,29 @@ void completeBatchAdd256k( void completeBatchAddWithDouble256k( - uint256_t px, - uint256_t py, + const uint256_t px, + const uint256_t py, __global const uint256_t* xPtr, __global const uint256_t* yPtr, - int i, - int batchIdx, + const int i, + const int batchIdx, __global uint256_t* chain, uint256_t* inverse, uint256_t* newX, uint256_t* newY) { - int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); - uint256_t s; - uint256_t x; - uint256_t y; + __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0); + __private int dim = get_global_size(0); + __private uint256_t s; + __private uint256_t x; + __private uint256_t y; x = xPtr[i]; y = yPtr[i]; if(batchIdx >= 1) { - uint256_t c; + __private uint256_t c; c = chain[(batchIdx - 1) * dim + gid]; mulModP(inverse->v, c.v, s.v); @@ -645,8 +658,8 @@ void completeBatchAddWithDouble256k( if(equal256k(px.v, x.v)) { // currently s = 1 / 2y - uint256_t x2; - uint256_t tx2; + __private uint256_t x2; + __private uint256_t tx2; // 3x^2 mulModP(x.v, x.v, x2.v); @@ -657,7 +670,7 @@ void completeBatchAddWithDouble256k( mulModP(tx2.v, s.v, s.v); // s^2 - uint256_t s2; + __private uint256_t s2; mulModP(s.v, s.v, s2.v); // Rx = s^2 - 2px @@ -665,38 +678,38 @@ void completeBatchAddWithDouble256k( subModP256k(newX->v, x.v, newX->v); // Ry = s(px - rx) - py - uint256_t k; + __private uint256_t k; subModP256k(px.v, newX->v, k.v); mulModP(s.v, k.v, newY->v); subModP256k(newY->v, py.v,newY->v); } else { - uint256_t rise; + __private uint256_t rise; subModP256k(py.v, y.v, rise.v); mulModP(rise.v, s.v, s.v); // Rx = s^2 - Gx - Qx - uint256_t s2; + __private uint256_t s2; mulModP(s.v, s.v, s2.v); subModP256k(s2.v, px.v, newX->v); subModP256k(newX->v, x.v,newX->v); // Ry = s(px - rx) - py - uint256_t k; + __private uint256_t k; subModP256k(px.v, newX->v, k.v); mulModP(s.v, k.v, newY->v); subModP256k(newY->v, py.v, newY->v); } } -unsigned int readLSW256k(__global const uint256_t* ara, int idx) +unsigned int readLSW256k(__global const uint256_t* ara, const int idx) { return ara[idx].v[7]; } -unsigned int readWord256k(__global const uint256_t* ara, int idx, int word) +unsigned int readWord256k(__global const uint256_t* ara, const int idx, const int word) { return ara[idx].v[word]; } diff --git a/clMath/sha256.cl b/clMath/sha256.cl index 0e4833f0..4c8ffd6f 100644 --- a/clMath/sha256.cl +++ b/clMath/sha256.cl @@ -40,9 +40,9 @@ __constant unsigned int _IV[8] = { void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8]) { - unsigned int a, b, c, d, e, f, g, h; - unsigned int w[16]; - unsigned int t; + __private unsigned int a, b, c, d, e, f, g, h; + __private unsigned int w[16]; + __private unsigned int t; a = _IV[0]; b = _IV[1]; @@ -343,9 +343,9 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8]) { - unsigned int a, b, c, d, e, f, g, h; - unsigned int w[16]; - unsigned int t; + __private unsigned int a, b, c, d, e, f, g, h; + __private unsigned int w[16]; + __private unsigned int t; // 0x03 || x or 0x02 || x w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8); From c19fba0b306f5d5cac5ad0be4a14dfe73a11db17 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 11:22:52 +0200 Subject: [PATCH 56/62] simplify --- CLKeySearchDevice/bitcrack.cl | 61 +++++----------------------------- CLKeySearchDevice/keysearch.cl | 61 +++++----------------------------- 2 files changed, 18 insertions(+), 104 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 81ced315..86792f83 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1722,46 +1722,24 @@ __kernel void _stepKernel( unsigned int digest[5]; -#ifdef COMPRESSION_UNCOMPRESSED for(; i < totalPoints; i += dim) { + +#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKey(xPtr[i], yPtr[i], digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } - - beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); - batchIdx++; - } -#elif COMPRESSION_BOTH - for(; i < totalPoints; i += dim) { - hashPublicKey(xPtr[i], yPtr[i], digest); - - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); - } - - hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); - } - - beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); - batchIdx++; - } -#else - for(; i < totalPoints; i += dim) { +#endif +#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } - +#endif beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); batchIdx++; } -#endif doBatchInverse256k(inverse.v); @@ -1802,45 +1780,24 @@ __kernel void _stepKernelWithDouble( int batchIdx = 0; unsigned int digest[5]; -#ifdef COMPRESSION_UNCOMPRESSED for(; i < totalPoints; i += dim) { +#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKey(xPtr[i], yPtr[i], digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } - - beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); - batchIdx++; - } -#elif COMPRESSION_BOTH - for(; i < totalPoints; i += dim) { - hashPublicKey(xPtr[i], yPtr[i], digest); - - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); - } - - hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); - } - - beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); - batchIdx++; - } -#else - for(; i < totalPoints; i += dim) { +#endif +#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } +#endif beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } -#endif doBatchInverse256k(inverse.v); diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index ca598580..3853cf20 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -127,46 +127,25 @@ __kernel void _stepKernel( unsigned int digest[5]; -#ifdef COMPRESSION_UNCOMPRESSED for(; i < totalPoints; i += dim) { + +#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKey(xPtr[i], yPtr[i], digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } - - beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); - batchIdx++; - } -#elif COMPRESSION_BOTH - for(; i < totalPoints; i += dim) { - hashPublicKey(xPtr[i], yPtr[i], digest); - - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); - } - - hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); - } - - beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); - batchIdx++; - } -#else - for(; i < totalPoints; i += dim) { +#endif +#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } - +#endif beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse); batchIdx++; } -#endif doBatchInverse256k(inverse.v); @@ -207,45 +186,23 @@ __kernel void _stepKernelWithDouble( int batchIdx = 0; unsigned int digest[5]; -#ifdef COMPRESSION_UNCOMPRESSED for(; i < totalPoints; i += dim) { +#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } - - beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); - batchIdx++; - } -#elif COMPRESSION_BOTH - for(; i < totalPoints; i += dim) { - hashPublicKey(xPtr[i], yPtr[i], digest); - - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); - } - - hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); - } - - beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); - batchIdx++; - } -#else - for(; i < totalPoints; i += dim) { +#endif +#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } +#endif beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } -#endif doBatchInverse256k(inverse.v); From 513e161a9010f7340cd70ade86c2ec2389a73e22 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 16:16:13 +0200 Subject: [PATCH 57/62] remove _stepKernelWithDouble --- CLKeySearchDevice/CLKeySearchDevice.cpp | 45 ++++++------------- CLKeySearchDevice/CLKeySearchDevice.h | 1 - CLKeySearchDevice/keysearch.cl | 57 ------------------------- 3 files changed, 12 insertions(+), 91 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index 6d45bf56..9b23b279 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -67,7 +67,6 @@ CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPer // Load the kernels _initKeysKernel = new cl::CLKernel(*_clProgram, "_initKeysKernel"); _stepKernel = new cl::CLKernel(*_clProgram, "_stepKernel"); - _stepKernelWithDouble = new cl::CLKernel(*_clProgram, "_stepKernelWithDouble"); _globalMemSize = _clContext->getGlobalMemorySize(); @@ -91,7 +90,6 @@ CLKeySearchDevice::~CLKeySearchDevice() _clContext->free(_deviceResultsCount); delete _stepKernel; - delete _stepKernelWithDouble; delete _initKeysKernel; delete _clContext; } @@ -222,37 +220,18 @@ void CLKeySearchDevice::init(const secp256k1::uint256 &start, int compression, c void CLKeySearchDevice::doStep() { try { - uint64_t numKeys = (uint64_t)_points; - - if(_iterations < 2 && _start.cmp(numKeys) <= 0) { - - _stepKernelWithDouble->set_args( - _points, - _chain, - _x, - _y, - _xInc, - _yInc, - _deviceTargetList.ptr, - _deviceTargetList.mask, - _deviceResults, - _deviceResultsCount); - _stepKernelWithDouble->call(_blocks, _threads); - } else { - - _stepKernel->set_args( - _points, - _chain, - _x, - _y, - _xInc, - _yInc, - _deviceTargetList.ptr, - _deviceTargetList.mask, - _deviceResults, - _deviceResultsCount); - _stepKernel->call(_blocks, _threads); - } + _stepKernel->set_args( + _points, + _chain, + _x, + _y, + _xInc, + _yInc, + _deviceTargetList.ptr, + _deviceTargetList.mask, + _deviceResults, + _deviceResultsCount); + _stepKernel->call(_blocks, _threads); fflush(stdout); getResultsInternal(); diff --git a/CLKeySearchDevice/CLKeySearchDevice.h b/CLKeySearchDevice/CLKeySearchDevice.h index 64db008e..396b7deb 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.h +++ b/CLKeySearchDevice/CLKeySearchDevice.h @@ -18,7 +18,6 @@ class CLKeySearchDevice : public KeySearchDevice { cl::CLProgram *_clProgram = NULL; cl::CLKernel *_initKeysKernel = NULL; cl::CLKernel *_stepKernel = NULL; - cl::CLKernel *_stepKernelWithDouble = NULL; uint64_t _globalMemSize = 0; uint64_t _pointsMemSize = 0; diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 3853cf20..b657b59a 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -161,60 +161,3 @@ __kernel void _stepKernel( yPtr[i] = newY; } } - -__kernel void _stepKernelWithDouble( - const unsigned int totalPoints, - __global uint256_t* chain, - __global uint256_t* xPtr, - __global uint256_t* yPtr, - __global uint256_t* incXPtr, - __global uint256_t* incYPtr, - __global unsigned int* targetList, - const ulong mask, - __global CLDeviceResult *results, - __global unsigned int *numResults) -{ - int i = get_local_size(0) * get_group_id(0) + get_local_id(0); - int dim = get_global_size(0); - - uint256_t incX = *incXPtr; - uint256_t incY = *incYPtr; - - // Multiply together all (_Gx - x) and then invert - uint256_t inverse = { {0,0,0,0,0,0,0,1} }; - - int batchIdx = 0; - unsigned int digest[5]; - - for(; i < totalPoints; i += dim) { -#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH) - hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); - } -#endif -#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH) - hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - if(isInBloomFilter(digest, targetList, &mask)) { - setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); - } -#endif - - beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse); - batchIdx++; - } - - doBatchInverse256k(inverse.v); - - i -= dim; - - uint256_t newX; - uint256_t newY; - for(; i >= 0; i -= dim) { - batchIdx--; - completeBatchAddWithDouble256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); - - xPtr[i] = newX; - yPtr[i] = newY; - } -} From 7d5644157a45bbd7dc4ea280bc2b49ce706a02a5 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 16:16:41 +0200 Subject: [PATCH 58/62] format --- util/util.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/util.cpp b/util/util.cpp index 2034330f..c7bbdf2d 100644 --- a/util/util.cpp +++ b/util/util.cpp @@ -8,7 +8,7 @@ #include"util.h" #ifdef _WIN32 -#include +#include #else #include #include From 72f40f9c5ed7e52e1b2cb22c470dcd37e7c35337 Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 17:19:41 +0200 Subject: [PATCH 59/62] minor formatting --- CLKeySearchDevice/bitcrack.cl | 13 +++---------- CLKeySearchDevice/keysearch.cl | 7 +++---- clMath/secp256k1.cl | 6 ------ 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 86792f83..de5464fa 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -925,7 +925,6 @@ void completeBatchAdd256k( subModP256k(newY->v, py.v, newY->v); } - void completeBatchAddWithDouble256k( const uint256_t px, const uint256_t py, @@ -1016,11 +1015,6 @@ void completeBatchAddWithDouble256k( } } -unsigned int readLSW256k(__global const uint256_t* ara, const int idx) -{ - return ara[idx].v[7]; -} - unsigned int readWord256k(__global const uint256_t* ara, const int idx, const int word) { return ara[idx].v[word]; @@ -1726,13 +1720,12 @@ __kernel void _stepKernel( #if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } #endif #if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH) - hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); + hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } @@ -1781,15 +1774,15 @@ __kernel void _stepKernelWithDouble( unsigned int digest[5]; for(; i < totalPoints; i += dim) { + #if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } #endif #if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH) - hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); + hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index 3853cf20..deccb549 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -131,14 +131,12 @@ __kernel void _stepKernel( #if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKey(xPtr[i], yPtr[i], digest); - if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults); } #endif #if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH) - hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); - + hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } @@ -187,6 +185,7 @@ __kernel void _stepKernelWithDouble( unsigned int digest[5]; for(; i < totalPoints; i += dim) { + #if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH) hashPublicKey(xPtr[i], yPtr[i], digest); if(isInBloomFilter(digest, targetList, &mask)) { @@ -194,7 +193,7 @@ __kernel void _stepKernelWithDouble( } #endif #if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH) - hashPublicKeyCompressed(xPtr[i], readLSW256k(yPtr, i), digest); + hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest); if(isInBloomFilter(digest, targetList, &mask)) { setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults); } diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl index 578c4bb5..95c5556f 100644 --- a/clMath/secp256k1.cl +++ b/clMath/secp256k1.cl @@ -613,7 +613,6 @@ void completeBatchAdd256k( subModP256k(newY->v, py.v, newY->v); } - void completeBatchAddWithDouble256k( const uint256_t px, const uint256_t py, @@ -704,11 +703,6 @@ void completeBatchAddWithDouble256k( } } -unsigned int readLSW256k(__global const uint256_t* ara, const int idx) -{ - return ara[idx].v[7]; -} - unsigned int readWord256k(__global const uint256_t* ara, const int idx, const int word) { return ara[idx].v[word]; From e923ee64a87095811be1bf34f759d1396f24a26b Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 17:25:23 +0200 Subject: [PATCH 60/62] remove unnecessary variable --- CLKeySearchDevice/bitcrack.cl | 8 ++------ CLKeySearchDevice/keysearch.cl | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index de5464fa..32ac980f 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1657,12 +1657,10 @@ __kernel void _initKeysKernel( uint256_t inverse = { {0,0,0,0,0,0,0,1} }; int batchIdx = 0; - uint256_t x; for(; i < totalPoints; i += dim) { if(( (readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) { - x = xPtr[i]; - if(!isInfinity256k(x.v)) { + if(!isInfinity256k(xPtr[i].v)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } @@ -1675,10 +1673,8 @@ __kernel void _initKeysKernel( uint256_t newY; i -= dim; for(; i >= 0; i -= dim) { - x = xPtr[i]; - if(((readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) { - if(!isInfinity256k(x.v)) { + if(!isInfinity256k(xPtr[i].v)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); } else { diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl index deccb549..ae559b2b 100644 --- a/CLKeySearchDevice/keysearch.cl +++ b/CLKeySearchDevice/keysearch.cl @@ -68,12 +68,10 @@ __kernel void _initKeysKernel( uint256_t inverse = { {0,0,0,0,0,0,0,1} }; int batchIdx = 0; - uint256_t x; for(; i < totalPoints; i += dim) { if(( (readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) { - x = xPtr[i]; - if(!isInfinity256k(x.v)) { + if(!isInfinity256k(xPtr[i].v)) { beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse); batchIdx++; } @@ -86,10 +84,8 @@ __kernel void _initKeysKernel( uint256_t newY; i -= dim; for(; i >= 0; i -= dim) { - x = xPtr[i]; - if(((readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) { - if(!isInfinity256k(x.v)) { + if(!isInfinity256k(xPtr[i].v)) { batchIdx--; completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY); } else { From 5c4b8640c2a77c2d16387f23a3dfd9d41b60e95a Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 18:38:22 +0200 Subject: [PATCH 61/62] use again 256 threads by default as it throws in a nvidia P620, have to dig deeper --- KeyFinder/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp index e069c938..8695f98c 100644 --- a/KeyFinder/main.cpp +++ b/KeyFinder/main.cpp @@ -202,7 +202,7 @@ typedef struct { DeviceParameters getDefaultParameters(const DeviceManager::DeviceInfo &device) { DeviceParameters parameters; - parameters.threads = device.maxWorkingGroupSize; + parameters.threads = 256; parameters.blocks = 32; parameters.pointsPerThread = 32; parameters.compressionMode = PointCompressionType::COMPRESSED; From 0f76bc2bdbc3f58c1bd7ba32e95f677623c43bcf Mon Sep 17 00:00:00 2001 From: Uzlopak Date: Sat, 5 Jun 2021 19:41:34 +0200 Subject: [PATCH 62/62] expose memory usage, ignore invalid Addresses --- CLKeySearchDevice/CLKeySearchDevice.cpp | 54 ++++++++++--------------- CLKeySearchDevice/CLKeySearchDevice.h | 1 + KeyFinderLib/KeyFinder.cpp | 12 +++--- 3 files changed, 30 insertions(+), 37 deletions(-) diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp index 6d45bf56..1fecd98a 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.cpp +++ b/CLKeySearchDevice/CLKeySearchDevice.cpp @@ -36,7 +36,6 @@ CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPer _points = pointsPerThread * threads * blocks; _device = (cl_device_id)device; - if(threads <= 0 || threads % 32 != 0) { throw KeySearchException("KEYSEARCH_THREAD_MULTIPLE_EXCEPTION", "The number of threads must be a multiple of 32"); } @@ -108,6 +107,9 @@ uint64_t CLKeySearchDevice::getOptimalBloomFilterMask(double p, size_t n) void CLKeySearchDevice::initializeBloomFilter(const std::vector &targets, uint64_t mask) { size_t sizeInWords = (mask + 1) / 32; + _targetMemSize = sizeInWords * sizeof(uint32_t); + + Logger::log(LogLevel::Info, "Initializing BloomFilter (" + util::format("%.1f", (double)_targetMemSize / (double)(1024 * 1024)) + "MB)"); uint32_t *buf = new uint32_t[sizeInWords]; @@ -140,13 +142,10 @@ void CLKeySearchDevice::initializeBloomFilter(const std::vector } } - - _targetMemSize = sizeInWords * sizeof(uint32_t); - _deviceTargetList.mask = mask; - _deviceTargetList.ptr = _clContext->malloc(sizeInWords * sizeof(uint32_t)); + _deviceTargetList.ptr = _clContext->malloc(_targetMemSize); _deviceTargetList.size = targets.size(); - _clContext->copyHostToDevice(buf, _deviceTargetList.ptr, sizeInWords * sizeof(uint32_t)); + _clContext->copyHostToDevice(buf, _deviceTargetList.ptr, _targetMemSize); delete[] buf; } @@ -156,6 +155,20 @@ void CLKeySearchDevice::allocateBuffers() size_t numKeys = (size_t)_points; size_t size = numKeys * 8 * sizeof(unsigned int); + _bufferMemSize = + size + // _x + size + // _y + size + // _chain + size + // _privateKeys + 256 * 8 * sizeof(unsigned int) + // _xTable + 256 * 8 * sizeof(unsigned int) + // _yTable + 8 * sizeof(unsigned int) + // _xInc + 8 * sizeof(unsigned int) + // _yInc + 128 * sizeof(CLDeviceResult) + // _deviceResults + sizeof(unsigned int); // _deviceResultsCount + + Logger::log(LogLevel::Info, "Allocating Memory for Buffers (" + util::format("%.1f", (double)_bufferMemSize / (double)(1024 * 1024)) + "MB)"); + // X values _x = _clContext->malloc(size); _clContext->memset(_x, -1, size); @@ -263,25 +276,6 @@ void CLKeySearchDevice::doStep() } } -void CLKeySearchDevice::setTargetsList() -{ - size_t count = _targetList.size(); - - _targets = _clContext->malloc(5 * sizeof(unsigned int) * count); - - for(size_t i = 0; i < count; i++) { - unsigned int h[5]; - - undoRMD160FinalRound(_targetList[i].h, h); - - _clContext->copyHostToDevice(h, _targets, i * 5 * sizeof(unsigned int), 5 * sizeof(unsigned int)); - } - - _targetMemSize = count * 5 * sizeof(unsigned int); - _deviceTargetList.ptr = _targets; - _deviceTargetList.size = count; -} - void CLKeySearchDevice::setBloomFilter() { uint64_t bloomFilterMask = getOptimalBloomFilterMask(1.0e-9, _targetList.size()); @@ -296,11 +290,7 @@ void CLKeySearchDevice::setTargetsInternal() _clContext->free(_deviceTargetList.ptr); } - if(_targetList.size() < 16) { - setTargetsList(); - } else { - setBloomFilter(); - } + setBloomFilter(); } void CLKeySearchDevice::setTargets(const std::set &targets) @@ -342,7 +332,7 @@ std::string CLKeySearchDevice::getDeviceName() void CLKeySearchDevice::getMemoryInfo(uint64_t &freeMem, uint64_t &totalMem) { - freeMem = _globalMemSize - _targetMemSize - _pointsMemSize; + freeMem = _globalMemSize - _targetMemSize - _pointsMemSize - _bufferMemSize; totalMem = _globalMemSize; } @@ -383,7 +373,6 @@ void CLKeySearchDevice::removeTargetFromList(const unsigned int hash[5]) } } - void CLKeySearchDevice::getResultsInternal() { unsigned int numResults = 0; @@ -491,6 +480,7 @@ void CLKeySearchDevice::initializeBasePoints() void CLKeySearchDevice::generateStartingPoints() { uint64_t totalPoints = (uint64_t)_points; + // TODO: Magic Number 40? uint64_t totalMemory = totalPoints * 40; initializeBasePoints(); diff --git a/CLKeySearchDevice/CLKeySearchDevice.h b/CLKeySearchDevice/CLKeySearchDevice.h index 64db008e..758b7443 100644 --- a/CLKeySearchDevice/CLKeySearchDevice.h +++ b/CLKeySearchDevice/CLKeySearchDevice.h @@ -22,6 +22,7 @@ class CLKeySearchDevice : public KeySearchDevice { uint64_t _globalMemSize = 0; uint64_t _pointsMemSize = 0; + uint64_t _bufferMemSize = 0; uint64_t _targetMemSize = 0; CLTargetList _deviceTargetList; diff --git a/KeyFinderLib/KeyFinder.cpp b/KeyFinderLib/KeyFinder.cpp index 0e41c076..0f142805 100644 --- a/KeyFinderLib/KeyFinder.cpp +++ b/KeyFinderLib/KeyFinder.cpp @@ -60,6 +60,7 @@ void KeyFinder::setTargets(std::vector &targets) void KeyFinder::setTargets(std::string targetsFile) { std::ifstream inFile(targetsFile.c_str()); + unsigned int invalidAddressCount = 0; if(!inFile.is_open()) { Logger::log(LogLevel::Error, "Unable to open '" + targetsFile + "'"); @@ -74,10 +75,10 @@ void KeyFinder::setTargets(std::string targetsFile) util::removeNewline(line); line = util::trim(line); - if(line.length() > 0) { + if(line.length() != 0) { if(!Address::verifyAddress(line)) { - Logger::log(LogLevel::Error, "Invalid address '" + line + "'"); - throw KeySearchException(); + invalidAddressCount++; + continue; } KeySearchTarget t; @@ -87,8 +88,9 @@ void KeyFinder::setTargets(std::string targetsFile) _targets.insert(t); } } - Logger::log(LogLevel::Info, util::formatThousands(_targets.size()) + " addresses loaded (" - + util::format("%.1f", (double)(sizeof(KeySearchTarget) * _targets.size()) / (double)(1024 * 1024)) + "MB)"); + Logger::log(LogLevel::Info, util::formatThousands(_targets.size()) + " address(es) loaded (" + + util::format("%.1f", (double)(sizeof(KeySearchTarget) * _targets.size()) / (double)(1024 * 1024)) + "MB)" + + "\n" + util::formatThousands(invalidAddressCount) + " address(es) ignored"); _device->setTargets(_targets); }