commit 0d7a0db3035397779fee0f624e423bb44d5bf7be Author: Aram Date: Tue Apr 12 20:55:22 2016 +0200 first commit diff --git a/0xcafec0de.bin b/0xcafec0de.bin new file mode 100644 index 0000000..8139d18 Binary files /dev/null and b/0xcafec0de.bin differ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..534bf8a --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +all: + gcc -O3 craptev1-v1.0/solve.c -fPIC -shared -o solve.so + gcc -O3 -mpopcnt -std=c99 solve_bs.c crypto1_bs.c crypto1_bs_crack.c -Icraptev1-v1.0 craptev1-v1.0/craptev1.c crapto1-v3.3/crapto1.c ./solve.so -o solve_bs -lpthread + gcc -O3 -mpopcnt -std=c99 solve_piwi_bs.c crypto1_bs.c crypto1_bs_crack.c -Icraptev1-v1.0 craptev1-v1.0/craptev1.c crapto1-v3.3/crapto1.c -o solve_piwi_bs -lpthread + gcc -O3 -mpopcnt solve_piwi.c -I craptev1-v1.0 craptev1-v1.0/craptev1.c -o solve_piwi -lpthread + +clean: + rm solve.so solve_bs solve_piwi_bs solve_piwi + +get_craptev1: + wget http://crapto1.netgarage.org/craptev1-v1.0.tar.xz + tar Jxvf craptev1-v1.0.tar.xz + +get_crapto1: + wget http://crapto1.netgarage.org/crapto1-v3.3.tar.xz + mkdir crapto1-v3.3 + tar Jxvf crapto1-v3.3.tar.xz -C crapto1-v3.3 + diff --git a/README.md b/README.md new file mode 100644 index 0000000..f03ab3b --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +Bitsliced Crypto-1 brute-forcer +=============================== + +A pure C(99) implementation of the [Crypto-1 cipher](https://en.wikipedia.org/wiki/Crypto-1) using the method of [bitslicing](https://en.wikipedia.org/wiki/Bit_slicing), which uses GNU vector extensions to be portable across SSE/AVX/AVX2 supporting architectures while offering the highest amount of possible parallelism. + + +Background +---------- + +I wrote this as a patch for [piwi's imlementation](https://github.com/pwpiwi/proxmark3/tree/hard_nested/) of the research documented in [Ciphertext-only cryptanalysis on Hardened Mifare Classic cards](http://www.cs.ru.nl/~rverdult/Ciphertext-only_Cryptanalysis_on_Hardened_Mifare_Classic_Cards-CCS_2015.pdf) after reading (most of) the paper, while it was still under [active development](http://www.proxmark.org/forum/viewtopic.php?id=2120). +The final patch is included as `pwpiwi_proxmark3_hard_nested.patch`. + +Later on, another implementation of the same attack surfaced, [CraptEV1](http://crapto1.netgarage.org/). +I managed to gather some great tricks from that code, which unfortunately is off-line now (and has a license forbidding redistribution). +This also allowed me to compare my Crypto-1 implementation to a finished brute-forcer, and eventually I managed to significantly beat CraptEV1's (great) performance. + +Tools +----- + +If you want to use the following stand-alone binaries, you will need the original CraptEV1 / Crapto1 source packages. +For convenience, and because redistribution of CraptEV1 is not allowed, I've added make targets `get_craptev1` and `get_crapto1` to fetch and extract these packages to the current working directory. +I have included a conversion of the test file `0xcafec0de.txt` included in the CraptEV1 package to the binary format used by the `proxmark3/hard_nested` branch. + +`solve_bs` is analogous to CraptEV1 `solve` and works on .txt files using the bitsliced crypto-1 cracker + + $ ./solve_bs craptev1-v1.0/0xcafec0de.txt 0xcafec0de + +`solve_piwi` uses CraptEV1 on .bin files as gathered by piwi's PM3 code + + $ ./solve_piwi 0xcafec0de.bin + +`solve_piwi_bs` does the same but uses the bitsliced cracker + + $ ./solve_piwi_bs 0xcafec0de.bin + + +Special thanks to Carlo Meijer, Roel Verdult, piwi and bla. + diff --git a/crypto1_bs.c b/crypto1_bs.c new file mode 100644 index 0000000..cc68246 --- /dev/null +++ b/crypto1_bs.c @@ -0,0 +1,94 @@ +// Bit-sliced Crypto-1 implementation (C) 2015 by Aram Verstegen +// The cipher states are stored with the least significant bit first, hence all bit indexes are reversed here + +#include "crypto1_bs.h" + +// The following functions use this global or thread-local state +// It is sized to fit exactly KEYSTREAM_SIZE more states next to the initial state +__thread bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE]; +__thread bitslice_t * restrict state_p; + +void crypto1_bs_init(){ + // initialize constant one and zero bit vectors + memset(bs_ones.bytes, 0xff, VECTOR_SIZE); + memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE); +} + +// The following functions have side effects on 48 bitslices at the state_p pointer +// use the crypto1_bs_rewind_* macros to (re-)initialize them as needed + +inline const bitslice_value_t crypto1_bs_bit(const bitslice_value_t input, const bool is_encrypted){ + bitslice_value_t feedback = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^ + state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ + state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^ + state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^ + state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^ + state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value); + const bitslice_value_t ks_bits = crypto1_bs_f20(state_p); + if(is_encrypted){ + feedback ^= ks_bits; + } + state_p--; + state_p[0].value = feedback ^ input; + return ks_bits; +} + +inline const bitslice_value_t crypto1_bs_lfsr_rollback(const bitslice_value_t input, const bool is_encrypted){ + bitslice_value_t feedout = state_p[0].value; + state_p++; + const bitslice_value_t ks_bits = crypto1_bs_f20(state_p); + if(is_encrypted){ + feedout ^= ks_bits; + } + const bitslice_value_t feedback = (feedout ^ state_p[47- 5].value ^ state_p[47- 9].value ^ + state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ + state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^ + state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^ + state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^ + state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value); + state_p[47].value = feedback ^ input; + return ks_bits; +} + +// side-effect free from here on +// note that bytes are sliced and unsliced with reversed endianness +inline void crypto1_bs_convert_states(bitslice_t bitsliced_states[], state_t regular_states[]){ + size_t bit_idx = 0, slice_idx = 0; + state_t values[MAX_BITSLICES]; + for(slice_idx = 0; slice_idx < MAX_BITSLICES; slice_idx++){ + for(bit_idx = 0; bit_idx < STATE_SIZE; bit_idx++){ + bool bit = get_vector_bit(slice_idx, bitsliced_states[bit_idx]); + values[slice_idx].value <<= 1; + values[slice_idx].value |= bit; + } + // swap endianness + values[slice_idx].value = rev_state_t(values[slice_idx].value); + // roll off unused bits + values[slice_idx].value >>= ((sizeof(state_t)*8)-STATE_SIZE); + } + memcpy(regular_states, values, sizeof(values)); +} + +// bitslice a value +void crypto1_bs_bitslice_value32(uint32_t value, bitslice_t bitsliced_value[], size_t bit_len){ + // load nonce bytes with unswapped endianness + size_t bit_idx; + for(bit_idx = 0; bit_idx < bit_len; bit_idx++){ + bool bit = get_bit(bit_len-1-bit_idx, rev32(value)); + if(bit){ + bitsliced_value[bit_idx].value = bs_ones.value; + } else { + bitsliced_value[bit_idx].value = bs_zeroes.value; + } + } +} + +void crypto1_bs_print_states(bitslice_t bitsliced_states[]){ + size_t slice_idx = 0; + state_t values[MAX_BITSLICES]; + crypto1_bs_convert_states(bitsliced_states, values); + for(slice_idx = 0; slice_idx < MAX_BITSLICES; slice_idx++){ + printf("State %03lu: %012lx\n", slice_idx, values[slice_idx].value); + } +} + diff --git a/crypto1_bs.h b/crypto1_bs.h new file mode 100644 index 0000000..8f33274 --- /dev/null +++ b/crypto1_bs.h @@ -0,0 +1,99 @@ +#ifndef _CRYPTO1_BS_H +#define _CRYPTO1_BS_H +#include +#include +#include +#include +#include +#include + +// bitslice type +// while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic +// same for AVX2 and 512 bit vectors +// using larger vectors works but seems to generate more register pressure +#if defined(__AVX2__) +#define MAX_BITSLICES 256 +#elif defined(__AVX__) +#define MAX_BITSLICES 128 +#elif defined(__SSE2__) +#define MAX_BITSLICES 128 +#else +#define MAX_BITSLICES 64 +#endif + +#define VECTOR_SIZE (MAX_BITSLICES/8) +typedef unsigned int __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t; +typedef union { + bitslice_value_t value; + uint64_t bytes64[MAX_BITSLICES/64]; + uint8_t bytes[MAX_BITSLICES/8]; +} bitslice_t; + +// filter function (f20) +// sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur +#define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d))) +#define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d))) +#define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e)))) + +#define crypto1_bs_f20(s) \ +f20c(f20a((s[47- 9].value), (s[47-11].value), (s[47-13].value), (s[47-15].value)), \ + f20b((s[47-17].value), (s[47-19].value), (s[47-21].value), (s[47-23].value)), \ + f20b((s[47-25].value), (s[47-27].value), (s[47-29].value), (s[47-31].value)), \ + f20a((s[47-33].value), (s[47-35].value), (s[47-37].value), (s[47-39].value)), \ + f20b((s[47-41].value), (s[47-43].value), (s[47-45].value), (s[47-47].value))) + +// bit indexing +#define get_bit(n, word) ((word >> (n)) & 1) +#define get_vector_bit(slice, value) get_bit(slice&0x3f, value.bytes64[slice>>6]) + +// constant ones/zeroes +bitslice_t bs_ones; +bitslice_t bs_zeroes; + +// size of crypto-1 state +#define STATE_SIZE 48 +// size of nonce to be decrypted +#define KEYSTREAM_SIZE 32 +// size of first uid^nonce byte to be rolled back to the initial key +#define ROLLBACK_SIZE 8 +// number of nonces required to test to cover entire 48-bit state +// I would have said it's 12... but bla goes with 100, so I do too +#define NONCE_TESTS 100 + +// state pointer management +extern __thread bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE]; +extern __thread bitslice_t * restrict state_p; + +// rewind to the point a0, at which KEYSTREAM_SIZE more bits can be generated +#define crypto1_bs_rewind_a0() (state_p = &states[KEYSTREAM_SIZE]) + +// bitsliced bytewise parity +#define bitsliced_byte_parity(n) (n[0].value ^ n[1].value ^ n[2].value ^ n[3].value ^ n[4].value ^ n[5].value ^ n[6].value ^ n[7].value) + +// 48-bit crypto-1 states are normally represented using 64-bit values +typedef union { + uint64_t value; + uint8_t bytes[8]; +} state_t; + +// endianness conversion +#define rev32(word) (((word & 0xff) << 24) | (((word >> 8) & 0xff) << 16) | (((word >> 16) & 0xff) << 8) | (((word >> 24) & 0xff))) +#define rev64(x) (rev32(x)<<32|(rev32((x>>32)))) +#define rev_state_t rev64 + +// crypto-1 functions +const bitslice_value_t crypto1_bs_bit(const bitslice_value_t input, const bool is_encrypted); +const bitslice_value_t crypto1_bs_lfsr_rollback(const bitslice_value_t input, const bool is_encrypted); + +// initialization functions +void crypto1_bs_init(); + +// conversion functions +void crypto1_bs_bitslice_value32(uint32_t value, bitslice_t bitsliced_value[], size_t bit_len); +void crypto1_bs_convert_states(bitslice_t bitsliced_states[], state_t regular_states[]); + +// debug print +void crypto1_bs_print_states(bitslice_t *bitsliced_states); + +#endif // _CRYPTO1_BS_H + diff --git a/crypto1_bs_crack.c b/crypto1_bs_crack.c new file mode 100644 index 0000000..50db40e --- /dev/null +++ b/crypto1_bs_crack.c @@ -0,0 +1,196 @@ +#include +#include "crypto1_bs_crack.h" + +inline uint64_t crack_states_bitsliced(uint32_t **task){ + // the idea to roll back the half-states before combining them was suggested/explained to me by bla + // first we pre-bitslice all the even state bits and roll them back, then bitslice the odd bits and combine the two in the inner loop + uint64_t key = -1; +#ifdef EXACT_COUNT + size_t bucket_states_tested = 0; + size_t bucket_size[(task[4]-task[3])/MAX_BITSLICES]; +#else + const size_t bucket_states_tested = (task[4]-task[3])*(task[2]-task[1]); +#endif + // bitslice all the even states + bitslice_t * restrict bitsliced_even_states[(task[4]-task[3])/MAX_BITSLICES]; + size_t bitsliced_blocks = 0; + for(uint32_t const * restrict p_even = task[3]; p_even < task[4]; p_even+=MAX_BITSLICES){ + bitslice_t * restrict lstate_p = memalign(sizeof(bitslice_t), (STATE_SIZE+ROLLBACK_SIZE)*sizeof(bitslice_t)); + memset(lstate_p, 0x0, (STATE_SIZE)*sizeof(bitslice_t)); + // bitslice even half-states + const size_t max_slices = (task[4]-p_even) < MAX_BITSLICES ? task[4]-p_even : MAX_BITSLICES; +#ifdef EXACT_COUNT + bucket_size[bitsliced_blocks] = max_slices; +#endif + for(size_t slice_idx = 0; slice_idx < max_slices; ++slice_idx){ + // set even bits + uint32_t e = *(p_even+slice_idx); + for(size_t bit_idx = 1; bit_idx < STATE_SIZE; bit_idx+=2, e >>= 1){ + if(e&1){ + lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx&63); + } + } + } + // compute the rollback bits + for(size_t rollback = 0; rollback < ROLLBACK_SIZE; ++rollback){ + // inlined crypto1_bs_lfsr_rollback + const bitslice_value_t feedout = lstate_p[0].value; + ++lstate_p; + const bitslice_value_t ks_bits = crypto1_bs_f20(lstate_p); + const bitslice_value_t feedback = (feedout ^ ks_bits ^ lstate_p[47- 5].value ^ lstate_p[47- 9].value ^ + lstate_p[47-10].value ^ lstate_p[47-12].value ^ lstate_p[47-14].value ^ + lstate_p[47-15].value ^ lstate_p[47-17].value ^ lstate_p[47-19].value ^ + lstate_p[47-24].value ^ lstate_p[47-25].value ^ lstate_p[47-27].value ^ + lstate_p[47-29].value ^ lstate_p[47-35].value ^ lstate_p[47-39].value ^ + lstate_p[47-41].value ^ lstate_p[47-42].value ^ lstate_p[47-43].value); + lstate_p[47].value = feedback ^ bitsliced_rollback_byte[rollback].value; + } + bitsliced_even_states[bitsliced_blocks++] = lstate_p; + } + // bitslice every odd state to every block of even half-states with half-finished rollback + for(uint32_t const * restrict p_odd = task[1]; p_odd < task[2]; ++p_odd){ + // early abort + if(keys_found){ + goto out; + } + + // set the odd bits and compute rollback + uint64_t o = (uint64_t) *p_odd; + lfsr_rollback_byte(&o, 0, 1); + // pre-compute part of the odd feedback bits (minus rollback) + bool odd_feedback_bit = parity(o&0x9ce5c); + + crypto1_bs_rewind_a0(); + // set odd bits + for(size_t state_idx = 0; state_idx < (STATE_SIZE-ROLLBACK_SIZE); o >>= 1, state_idx+=2){ + if(o & 1){ + state_p[state_idx] = bs_ones; + } else { + state_p[state_idx] = bs_zeroes; + } + } + const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value; + + // set even and rollback bits + for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){ + const bitslice_t const * restrict bitsliced_even_state = bitsliced_even_states[block_idx]; + size_t state_idx; + // set even bits + for(state_idx = 0; state_idx < (STATE_SIZE-ROLLBACK_SIZE); state_idx+=2){ + state_p[1+state_idx] = bitsliced_even_state[1+state_idx]; + } + // set rollback bits + uint64_t lo = o; + for(; state_idx < STATE_SIZE; lo >>= 1, state_idx+=2){ + // set the odd bits and take in the odd rollback bits from the even states + if(lo & 1){ + state_p[state_idx].value = ~bitsliced_even_state[state_idx].value; + } else { + state_p[state_idx] = bitsliced_even_state[state_idx]; + } + + // set the even bits and take in the even rollback bits from the odd states + if((lo >> 32) & 1){ + state_p[1+state_idx].value = ~bitsliced_even_state[1+state_idx].value; + } else { + state_p[1+state_idx] = bitsliced_even_state[1+state_idx]; + } + } + +#ifdef EXACT_COUNT + bucket_states_tested += bucket_size[block_idx]; +#endif + // pre-compute first keystream and feedback bit vectors + const bitslice_value_t ksb = crypto1_bs_f20(state_p); + const bitslice_value_t fbb = (odd_feedback ^ state_p[47- 0].value ^ state_p[47- 5].value ^ // take in the even and rollback bits + state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ + state_p[47-24].value ^ state_p[47-42].value); + + // vector to contain test results (1 = passed, 0 = failed) + bitslice_t results = bs_ones; + + for(size_t tests = 0; tests < NONCE_TESTS; ++tests){ + size_t parity_bit_idx = 0; + bitslice_value_t fb_bits = fbb; + bitslice_value_t ks_bits = ksb; + state_p = &states[KEYSTREAM_SIZE-1]; + bitslice_value_t parity_bit_vector = bs_zeroes.value; + + // highest bit is transmitted/received first + for(int32_t ks_idx = KEYSTREAM_SIZE-1; ks_idx >= 0; --ks_idx, --state_p){ + // decrypt nonce bits + const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value; + const bitslice_value_t decrypted_nonce_bit_vector = (encrypted_nonce_bit_vector ^ ks_bits); + + // compute real parity bits on the fly + parity_bit_vector ^= decrypted_nonce_bit_vector; + + // update state + state_p[0].value = (fb_bits ^ decrypted_nonce_bit_vector); + + // compute next keystream bit + ks_bits = crypto1_bs_f20(state_p); + + // for each byte: + if((ks_idx&7) == 0){ + // get encrypted parity bits + const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value; + + // decrypt parity bits + const bitslice_value_t decrypted_parity_bit_vector = (encrypted_parity_bit_vector ^ ks_bits); + + // compare actual parity bits with decrypted parity bits and take count in results vector + results.value &= (parity_bit_vector ^ decrypted_parity_bit_vector); + + // make sure we still have a match in our set + // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){ + + // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ??? + // the short-circuiting also helps + if(results.bytes64[0] == 0 +#if MAX_BITSLICES > 64 + && results.bytes64[1] == 0 +#endif +#if MAX_BITSLICES > 128 + && results.bytes64[2] == 0 + && results.bytes64[3] == 0 +#endif + ){ + goto stop_tests; + } + // this is about as fast but less portable (requires -std=gnu99) + // asm goto ("ptest %1, %0\n\t" + // "jz %l2" :: "xm" (results.value), "xm" (bs_ones.value) : "cc" : stop_tests); + parity_bit_vector = bs_zeroes.value; + } + // compute next feedback bit vector + fb_bits = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^ + state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ + state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^ + state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^ + state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^ + state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value); + } + } + // all nonce tests were successful: we've found the key in this block! + state_t keys[MAX_BITSLICES]; + crypto1_bs_convert_states(&states[KEYSTREAM_SIZE], keys); + for(size_t results_idx = 0; results_idx < MAX_BITSLICES; ++results_idx){ + if(get_vector_bit(results_idx, results)){ + key = keys[results_idx].value; + goto out; + } + } +stop_tests: + // prepare to set new states + crypto1_bs_rewind_a0(); + continue; + } + } +out: + for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){ + free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE); + } + __sync_fetch_and_add(&total_states_tested, bucket_states_tested); + return key; +} diff --git a/crypto1_bs_crack.h b/crypto1_bs_crack.h new file mode 100644 index 0000000..f4867a7 --- /dev/null +++ b/crypto1_bs_crack.h @@ -0,0 +1,21 @@ +#ifndef _CRYPTO1_BS_CRACK_H +#define _CRYPTO1_BS_CRACK_H +#include +#include "crypto1_bs.h" +#include "craptev1.h" +uint64_t crack_states_bitsliced(uint32_t **task); +size_t keys_found; +size_t total_states_tested; +size_t total_states; + +// linked from crapto1.c file +extern uint8_t lfsr_rollback_byte(uint64_t* s, uint32_t in, int fb); + +#define EXACT_COUNT + +// arrays of bitsliced states with identical values in all slices +bitslice_t bitsliced_encrypted_nonces[NONCE_TESTS][STATE_SIZE]; +bitslice_t bitsliced_encrypted_parity_bits[NONCE_TESTS][STATE_SIZE]; +bitslice_t bitsliced_rollback_byte[ROLLBACK_SIZE]; + +#endif // _CRYPTO1_BS_CRACK_H diff --git a/pwpiwi_proxmark3_hard_nested.patch b/pwpiwi_proxmark3_hard_nested.patch new file mode 100644 index 0000000..2db24fc --- /dev/null +++ b/pwpiwi_proxmark3_hard_nested.patch @@ -0,0 +1,358 @@ +diff --git a/client/Makefile b/client/Makefile +index 91e595d..dc3557f 100644 +--- a/client/Makefile ++++ b/client/Makefile +@@ -107,6 +107,7 @@ CMDSRCS = nonce2key/crapto1.c\ + aes.c\ + protocols.c\ + sha1.c\ ++ crypto1_bs.c \ + + ZLIBSRCS = deflate.c adler32.c trees.c zutil.c inflate.c inffast.c inftrees.c + ZLIB_FLAGS = -DZ_SOLO -DZ_PREFIX -DNO_GZIP -DZLIB_PM3_TUNED +diff --git a/client/cmdhfmfhard.c b/client/cmdhfmfhard.c +index b3893ea..4a0bd38 100644 +--- a/client/cmdhfmfhard.c ++++ b/client/cmdhfmfhard.c +@@ -20,18 +20,21 @@ + #include + #include + #include ++#include ++#include + #include "proxmark3.h" + #include "cmdmain.h" + #include "ui.h" + #include "util.h" + #include "nonce2key/crapto1.h" + #include "parity.h" ++#include "crypto1_bs.h" + + // uint32_t test_state_odd = 0; + // uint32_t test_state_even = 0; + + #define CONFIDENCE_THRESHOLD 0.95 // Collect nonces until we are certain enough that the following brute force is successfull +-#define GOOD_BYTES_REQUIRED 30 ++#define GOOD_BYTES_REQUIRED 28 + + + static const float p_K[257] = { // the probability that a random nonce has a Sum Property == K +@@ -88,6 +91,8 @@ typedef struct noncelist { + } noncelist_t; + + ++static size_t nonces_to_bruteforce = 0; ++static noncelistentry_t *brute_force_nonces[256]; + static uint32_t cuid; + static noncelist_t nonces[256]; + static uint8_t best_first_bytes[256]; +@@ -169,6 +174,11 @@ static int add_nonce(uint32_t nonce_enc, uint8_t par_enc) + p2->nonce_enc = nonce_enc; + p2->par_enc = par_enc; + ++ if(nonces_to_bruteforce < 256){ ++ brute_force_nonces[nonces_to_bruteforce] = p2; ++ nonces_to_bruteforce++; ++ } ++ + nonces[first_byte].num++; + nonces[first_byte].Sum += evenparity32((nonce_enc & 0x00ff0000) | (par_enc & 0x04)); + nonces[first_byte].updated = true; // indicates that we need to recalculate the Sum(a8) probability for this first byte +@@ -1376,19 +1386,293 @@ static void free_statelist_cache(void) + } + } + ++size_t keys_found = 0; ++size_t bucket_count = 0; ++statelist_t* buckets[128]; ++size_t total_states_tested = 0; ++size_t thread_count = 4; ++ ++// these bitsliced states will hold identical states in all slices ++bitslice_t bitsliced_rollback_byte[ROLLBACK_SIZE]; ++ ++// arrays of bitsliced states with identical values in all slices ++bitslice_t bitsliced_encrypted_nonces[NONCE_TESTS][STATE_SIZE]; ++bitslice_t bitsliced_encrypted_parity_bits[NONCE_TESTS][ROLLBACK_SIZE]; ++ ++#define EXACT_COUNT ++ ++static const uint64_t crack_states_bitsliced(statelist_t *p){ ++ // the idea to roll back the half-states before combining them was suggested/explained to me by bla ++ // first we pre-bitslice all the even state bits and roll them back, then bitslice the odd bits and combine the two in the inner loop ++ uint64_t key = -1; ++#ifdef EXACT_COUNT ++ size_t bucket_states_tested = 0; ++ size_t bucket_size[p->len[EVEN_STATE]/MAX_BITSLICES]; ++#else ++ const size_t bucket_states_tested = (p->len[EVEN_STATE])*(p->len[ODD_STATE]); ++#endif ++ bitslice_t *bitsliced_even_states[p->len[EVEN_STATE]/MAX_BITSLICES]; ++ size_t bitsliced_blocks = 0; ++ uint32_t const * restrict even_end = p->states[EVEN_STATE]+p->len[EVEN_STATE]; ++ for(uint32_t * restrict p_even = p->states[EVEN_STATE]; p_even < even_end; p_even+=MAX_BITSLICES){ ++ bitslice_t * restrict lstate_p = memalign(sizeof(bitslice_t), (STATE_SIZE+ROLLBACK_SIZE)*sizeof(bitslice_t)); ++ memset(lstate_p+1, 0x0, (STATE_SIZE-1)*sizeof(bitslice_t)); // zero even bits ++ // bitslice even half-states ++ const size_t max_slices = (even_end-p_even) < MAX_BITSLICES ? even_end-p_even : MAX_BITSLICES; ++#ifdef EXACT_COUNT ++ bucket_size[bitsliced_blocks] = max_slices; ++#endif ++ for(size_t slice_idx = 0; slice_idx < max_slices; ++slice_idx){ ++ uint32_t e = *(p_even+slice_idx); ++ for(size_t bit_idx = 1; bit_idx < STATE_SIZE; bit_idx+=2, e >>= 1){ ++ // set even bits ++ if(e&1){ ++ lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx&63); ++ } ++ } ++ } ++ // compute the rollback bits ++ for(size_t rollback = 0; rollback < ROLLBACK_SIZE; ++rollback){ ++ // inlined crypto1_bs_lfsr_rollback ++ const bitslice_value_t feedout = lstate_p[0].value; ++ ++lstate_p; ++ const bitslice_value_t ks_bits = crypto1_bs_f20(lstate_p); ++ const bitslice_value_t feedback = (feedout ^ ks_bits ^ lstate_p[47- 5].value ^ lstate_p[47- 9].value ^ ++ lstate_p[47-10].value ^ lstate_p[47-12].value ^ lstate_p[47-14].value ^ ++ lstate_p[47-15].value ^ lstate_p[47-17].value ^ lstate_p[47-19].value ^ ++ lstate_p[47-24].value ^ lstate_p[47-25].value ^ lstate_p[47-27].value ^ ++ lstate_p[47-29].value ^ lstate_p[47-35].value ^ lstate_p[47-39].value ^ ++ lstate_p[47-41].value ^ lstate_p[47-42].value ^ lstate_p[47-43].value); ++ lstate_p[47].value = feedback ^ bitsliced_rollback_byte[rollback].value; ++ } ++ bitsliced_even_states[bitsliced_blocks++] = lstate_p; ++ } ++ for(uint32_t const * restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE]+p->len[ODD_STATE]; ++p_odd){ ++ // early abort ++ if(keys_found){ ++ goto out; ++ } ++ ++ // set the odd bits and compute rollback ++ uint64_t o = (uint64_t) *p_odd; ++ lfsr_rollback_byte((struct Crypto1State*) &o, 0, 1); ++ // pre-compute part of the odd feedback bits (minus rollback) ++ bool odd_feedback_bit = parity(o&0x9ce5c); ++ ++ crypto1_bs_rewind_a0(); ++ // set odd bits ++ for(size_t state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; o >>= 1, state_idx+=2){ ++ if(o & 1){ ++ state_p[state_idx] = bs_ones; ++ } else { ++ state_p[state_idx] = bs_zeroes; ++ } ++ } ++ const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value; ++ ++ for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){ ++ const bitslice_t const * restrict bitsliced_even_state = bitsliced_even_states[block_idx]; ++ size_t state_idx; ++ // set even bits ++ for(state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; state_idx+=2){ ++ state_p[1+state_idx] = bitsliced_even_state[1+state_idx]; ++ } ++ // set rollback bits ++ uint64_t lo = o; ++ for(; state_idx < STATE_SIZE; lo >>= 1, state_idx+=2){ ++ // set the odd bits and take in the odd rollback bits from the even states ++ if(lo & 1){ ++ state_p[state_idx].value = ~bitsliced_even_state[state_idx].value; ++ } else { ++ state_p[state_idx] = bitsliced_even_state[state_idx]; ++ } ++ ++ // set the even bits and take in the even rollback bits from the odd states ++ if((lo >> 32) & 1){ ++ state_p[1+state_idx].value = ~bitsliced_even_state[1+state_idx].value; ++ } else { ++ state_p[1+state_idx] = bitsliced_even_state[1+state_idx]; ++ } ++ } ++ ++#ifdef EXACT_COUNT ++ bucket_states_tested += bucket_size[block_idx]; ++#endif ++ // pre-compute first keystream and feedback bit vectors ++ const bitslice_value_t ksb = crypto1_bs_f20(state_p); ++ const bitslice_value_t fbb = (odd_feedback ^ state_p[47- 0].value ^ state_p[47- 5].value ^ // take in the even and rollback bits ++ state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ ++ state_p[47-24].value ^ state_p[47-42].value); ++ ++ // test keys ++ bitslice_t results = bs_ones; ++ ++ for(size_t tests = 0; tests < NONCE_TESTS; ++tests){ ++ size_t parity_bit_idx = 0; ++ bitslice_value_t fb_bits = fbb; ++ bitslice_value_t ks_bits = ksb; ++ state_p = &states[KEYSTREAM_SIZE-1]; ++ bitslice_value_t parity_bit_vector = bs_zeroes.value; ++ ++ // highest bit is transmitted/received first ++ for(int32_t ks_idx = KEYSTREAM_SIZE-1; ks_idx >= 0; --ks_idx, --state_p){ ++ // decrypt nonce bits ++ const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value; ++ const bitslice_value_t decrypted_nonce_bit_vector = (encrypted_nonce_bit_vector ^ ks_bits); ++ ++ // compute real parity bits on the fly ++ parity_bit_vector ^= decrypted_nonce_bit_vector; ++ ++ // update state ++ state_p[0].value = (fb_bits ^ decrypted_nonce_bit_vector); ++ ++ // compute next keystream bit ++ ks_bits = crypto1_bs_f20(state_p); ++ ++ // for each byte: ++ if((ks_idx&7) == 0){ ++ // get encrypted parity bits ++ const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value; ++ ++ // decrypt parity bits ++ const bitslice_value_t decrypted_parity_bit_vector = (encrypted_parity_bit_vector ^ ks_bits); ++ ++ // compare actual parity bits with decrypted parity bits and take count in results vector ++ results.value &= (parity_bit_vector ^ decrypted_parity_bit_vector); ++ ++ // make sure we still have a match in our set ++ // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){ ++ ++ // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ??? ++ // the short-circuiting also helps ++ if(results.bytes64[0] == 0 ++#if MAX_BITSLICES > 64 ++ && results.bytes64[1] == 0 ++#endif ++#if MAX_BITSLICES > 128 ++ && results.bytes64[2] == 0 ++ && results.bytes64[3] == 0 ++#endif ++ ){ ++ goto stop_tests; ++ } ++ // this is about as fast but less portable (requires -std=gnu99) ++ // asm goto ("ptest %1, %0\n\t" ++ // "jz %l2" :: "xm" (results.value), "xm" (bs_ones.value) : "cc" : stop_tests); ++ parity_bit_vector = bs_zeroes.value; ++ } ++ // compute next feedback bit vector ++ fb_bits = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^ ++ state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ ++ state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^ ++ state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^ ++ state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^ ++ state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value); ++ } ++ } ++ // all nonce tests were successful: we've found the key in this block! ++ state_t keys[MAX_BITSLICES]; ++ crypto1_bs_convert_states(&states[KEYSTREAM_SIZE], keys); ++ for(size_t results_idx = 0; results_idx < MAX_BITSLICES; ++results_idx){ ++ if(get_vector_bit(results_idx, results)){ ++ key = keys[results_idx].value; ++ goto out; ++ } ++ } ++stop_tests: ++ // prepare to set new states ++ crypto1_bs_rewind_a0(); ++ continue; ++ } ++ } ++out: ++ for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){ ++ free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE); ++ } ++ __sync_fetch_and_add(&total_states_tested, bucket_states_tested); ++ return key; ++} ++ ++static void* crack_states_thread(void* x){ ++ const size_t thread_id = (size_t)x; ++ size_t current_bucket = thread_id; ++ while(current_bucket < bucket_count){ ++ statelist_t * bucket = buckets[current_bucket]; ++ if(bucket){ ++ const uint64_t key = crack_states_bitsliced(bucket); ++ if(key != -1){ ++ printf("Found key: %012lx\n", key); ++ __sync_fetch_and_add(&keys_found, 1); ++ break; ++ } else if(keys_found){ ++ break; ++ } else { ++ printf("Cracking... %6.02f%%\n", (100.0*total_states_tested/(maximum_states))); ++ } ++ } ++ current_bucket += thread_count; ++ } ++ return NULL; ++} + +-static void brute_force(void) ++void brute_force(void) + { + if (known_target_key != -1) { + PrintAndLog("Looking for known target key in remaining key space..."); + TestIfKeyExists(known_target_key); + } else { +- PrintAndLog("Brute Force phase is not implemented."); ++ PrintAndLog("Brute force phase starting."); ++ time_t start, end; ++ time(&start); ++ keys_found = 0; ++ ++ crypto1_bs_init(); ++ ++ PrintAndLog("Using %u-bit bitslices", MAX_BITSLICES); ++ PrintAndLog("Bitslicing best_first_byte^uid[3] (rollback byte): %02x...", best_first_bytes[0]^(cuid>>24)); ++ // convert to 32 bit little-endian ++ crypto1_bs_bitslice_value32(rev32((best_first_bytes[0]^(cuid>>24))), bitsliced_rollback_byte, 8); ++ ++ PrintAndLog("Bitslicing nonces..."); ++ for(size_t tests = 0; tests < NONCE_TESTS; tests++){ ++ uint32_t test_nonce = brute_force_nonces[tests]->nonce_enc; ++ uint8_t test_parity = brute_force_nonces[tests]->par_enc; ++ // pre-xor the uid into the decrypted nonces, and also pre-xor the cuid parity into the encrypted parity bits - otherwise an exta xor is required in the decryption routine ++ crypto1_bs_bitslice_value32(cuid^test_nonce, bitsliced_encrypted_nonces[tests], 32); ++ // convert to 32 bit little-endian ++ crypto1_bs_bitslice_value32(rev32( ~(test_parity ^ ~(parity(cuid>>24 & 0xff)<<3 | parity(cuid>>16 & 0xff)<<2 | parity(cuid>>8 & 0xff)<<1 | parity(cuid&0xff)))), bitsliced_encrypted_parity_bits[tests], 4); ++ } ++ total_states_tested = 0; ++ ++ // count number of states to go ++ bucket_count = 0; ++ for (statelist_t *p = candidates; p != NULL; p = p->next) { ++ buckets[bucket_count] = p; ++ bucket_count++; ++ } ++ ++ // enumerate states using all hardware threads, each thread handles one bucket ++ PrintAndLog("Starting %u cracking threads to search %u buckets containing a total of %lu states...", thread_count, bucket_count, maximum_states); ++ pthread_t threads[thread_count]; ++ thread_count = sysconf(_SC_NPROCESSORS_CONF); ++ for(size_t i = 0; i < thread_count; i++){ ++ pthread_create(&threads[i], NULL, crack_states_thread, (void*) i); ++ } ++ for(size_t i = 0; i < thread_count; i++){ ++ pthread_join(threads[i], 0); ++ } ++ ++ time(&end); ++ unsigned long elapsed_time = difftime(end, start); ++ PrintAndLog("Tested %lu states, found %u keys after %u seconds", total_states_tested, keys_found, elapsed_time); ++ if(!keys_found){ ++ assert(total_states_tested == maximum_states); ++ } ++ // reset this counter for the next call ++ nonces_to_bruteforce = 0; + } +- + } + +- + int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests) + { + // initialize Random number generator diff --git a/solve_bs.c b/solve_bs.c new file mode 100644 index 0000000..bffaabc --- /dev/null +++ b/solve_bs.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include +#include +#include "craptev1.h" +#include "crypto1_bs.h" +#include "crypto1_bs_crack.h" + +// linked from .so / .c files by bla +extern uint64_t *readnonces(char* fname); + +uint32_t **space; +size_t thread_count; + +void* crack_states_thread(void* x){ + const size_t thread_id = (size_t)x; + int j; + for(j = thread_id; space[j * 5]; j += thread_count) { + const uint64_t key = crack_states_bitsliced(space + j * 5); + if(key != -1){ + printf("Found key: %012lx\n", key); + __sync_fetch_and_add(&keys_found, 1); + break; + } else if(keys_found){ + break; + } else { + printf("Cracking... %6.02f%%\n", (100.0*total_states_tested/(total_states))); + } + } + return NULL; +} + +int main(int argc, char* argv[]){ + if(argc != 3){ + printf("Usage: %s \n", argv[0]); + return -1; + } + uint64_t *nonces = readnonces(argv[1]); + uint32_t uid = strtoul(argv[2], NULL, 16); + space = craptev1_get_space(nonces, 95, uid); + total_states = craptev1_sizeof_space(space); + + thread_count = get_nprocs_conf(); + pthread_t threads[thread_count]; + size_t i; + + printf("Initializing BS crypto-1\n"); + crypto1_bs_init(); + printf("Using %u-bit bitslices\n", MAX_BITSLICES); + + uint8_t rollback_byte = **space; + printf("Bitslicing rollback byte: %02x...\n", rollback_byte); + // convert to 32 bit little-endian + crypto1_bs_bitslice_value32(rev32((rollback_byte)), bitsliced_rollback_byte, 8); + + printf("Bitslicing nonces...\n"); + for(size_t tests = 0; tests < NONCE_TESTS; tests++){ + // pre-xor the uid into the decrypted nonces, and also pre-xor the uid parity into the encrypted parity bits - otherwise an exta xor is required in the decryption routine + uint32_t test_nonce = uid^rev32(nonces[tests]); + uint32_t test_parity = (nonces[tests]>>32)^rev32(uid); + test_parity = ((parity(test_parity >> 24 & 0xff) & 1) | (parity(test_parity>>16 & 0xff) & 1)<<1 | (parity(test_parity>>8 & 0xff) & 1)<<2 | (parity(test_parity & 0xff) & 1) << 3); + crypto1_bs_bitslice_value32(test_nonce, bitsliced_encrypted_nonces[tests], 32); + // convert to 32 bit little-endian + crypto1_bs_bitslice_value32(~(test_parity)<<24, bitsliced_encrypted_parity_bits[tests], 4); + } + + total_states_tested = 0; + keys_found = 0; + + printf("Starting %lu threads to test %lu states\n", thread_count, total_states); + for(i = 0; i < thread_count; i++){ + pthread_create(&threads[i], NULL, crack_states_thread, (void*) i); + } + for(i = 0; i < thread_count; i++){ + pthread_join(threads[i], 0); + } + printf("Tested %lu states\n", total_states_tested); + + craptev1_destroy_space(space); + return 0; +} + diff --git a/solve_piwi.c b/solve_piwi.c new file mode 100644 index 0000000..ea21c3b --- /dev/null +++ b/solve_piwi.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include +#include "craptev1.h" + +#define rev32(word) (((word & 0xff) << 24) | (((word >> 8) & 0xff) << 16) | (((word >> 16) & 0xff) << 8) | (((word >> 24) & 0xff))) + +uint64_t split(uint8_t p){ + return (((p & 0x8) >>3 )| ((p & 0x4) >> 2) << 8 | ((p & 0x2) >> 1) << 16 | (p & 0x1) << 24 ); +} + +uint32_t uid; +uint64_t *readnonces(char* fname){ + int i; + FILE *f = fopen(fname, "r"); + uint64_t *nonces = malloc(sizeof (uint64_t) << 24); + if(fread(&uid, 1, 4, f)){ + uid = rev32(uid); + } + fseek(f, 6, SEEK_SET); + i = 0; + while(!feof(f)){ + uint32_t nt_enc1, nt_enc2; + uint8_t par_enc; + if(fread(&nt_enc1, 1, 4, f) && fread(&nt_enc2, 1, 4, f) && fread(&par_enc, 1, 1, f)){ + nonces[i ] = split(~(par_enc >> 4)) << 32 | nt_enc1; + nonces[i+1] = split(~(par_enc & 0xff)) << 32 | nt_enc2; + i += 2; + } + } + nonces[i] = -1; + fclose(f); + return nonces; +} + +uint32_t **space; +size_t thread_count; +size_t states_tested = 0; +size_t total_states; +size_t keys_found = 0; + +void* crack_states_thread(void* x){ + const size_t thread_id = (size_t)x; + int j; + for(j = thread_id; space[j * 5]; j += thread_count) { + uint64_t key = craptev1_search_partition(space + j * 5); + states_tested = total_states - craptev1_sizeof_space(space+j*5); + printf("Cracking... %6.02f%%\n", (100.0*states_tested/(total_states))); + if(key != -1){ + printf("Found key: %012lx\n", key); + exit(0); + } + } + return NULL; +} + +int main(int argc, char* argv[]){ + if(argc != 2){ + printf("Usage: %s \n", argv[0]); + return -1; + } + uint64_t *nonces = readnonces(argv[1]); + space = craptev1_get_space(nonces, 95, uid); + total_states = craptev1_sizeof_space(space); + + thread_count = get_nprocs_conf(); + pthread_t threads[thread_count]; + printf("Starting %lu threads to test %lu states\n", thread_count, total_states); + size_t i; + states_tested = 0; + for(i = 0; i < thread_count; i++){ + pthread_create(&threads[i], NULL, crack_states_thread, (void*) i); + } + for(i = 0; i < thread_count; i++){ + pthread_join(threads[i], 0); + } + printf("Tested %lu states\n", states_tested); + + craptev1_destroy_space(space); + return 0; +} + diff --git a/solve_piwi_bs.c b/solve_piwi_bs.c new file mode 100644 index 0000000..aea81b3 --- /dev/null +++ b/solve_piwi_bs.c @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include +#include +#include +#include "craptev1.h" +#include "crypto1_bs.h" +#include "crypto1_bs_crack.h" + +uint64_t split(uint8_t p){ + return (((p & 0x8) >>3 )| ((p & 0x4) >> 2) << 8 | ((p & 0x2) >> 1) << 16 | (p & 0x1) << 24 ); +} + +uint32_t uid; +uint64_t *readnonces(char* fname){ + int i; + FILE *f = fopen(fname, "r"); + uint64_t *nonces = malloc(sizeof (uint64_t) << 24); + if(fread(&uid, 1, 4, f)){ + uid = rev32(uid); + } + fseek(f, 6, SEEK_SET); + i = 0; + while(!feof(f)){ + uint32_t nt_enc1, nt_enc2; + uint8_t par_enc; + if(fread(&nt_enc1, 1, 4, f) && fread(&nt_enc2, 1, 4, f) && fread(&par_enc, 1, 1, f)){ + nonces[i ] = split(~(par_enc >> 4)) << 32 | nt_enc1; + nonces[i+1] = split(~(par_enc & 0xff)) << 32 | nt_enc2; + i += 2; + } + } + nonces[i] = -1; + fclose(f); + return nonces; +} + +uint32_t **space; +size_t thread_count; + +void* crack_states_thread(void* x){ + const size_t thread_id = (size_t)x; + int j; + for(j = thread_id; space[j * 5]; j += thread_count) { + const uint64_t key = crack_states_bitsliced(space + j * 5); + if(key != -1){ + printf("Found key: %012lx\n", key); + __sync_fetch_and_add(&keys_found, 1); + break; + } else if(keys_found){ + break; + } else { + printf("Cracking... %6.02f%%\n", (100.0*total_states_tested/(total_states))); + } + } + return NULL; +} + +int main(int argc, char* argv[]){ + if(argc != 2){ + printf("Usage: %s \n", argv[0]); + return -1; + } + uint64_t *nonces = readnonces(argv[1]); + space = craptev1_get_space(nonces, 95, uid); + total_states = craptev1_sizeof_space(space); + + thread_count = get_nprocs_conf(); + pthread_t threads[thread_count]; + size_t i; + + printf("Initializing BS crypto-1\n"); + crypto1_bs_init(); + printf("Using %u-bit bitslices\n", MAX_BITSLICES); + + uint8_t rollback_byte = **space; + printf("Bitslicing rollback byte: %02x...\n", rollback_byte); + // convert to 32 bit little-endian + crypto1_bs_bitslice_value32(rev32((rollback_byte)), bitsliced_rollback_byte, 8); + + printf("Bitslicing nonces...\n"); + for(size_t tests = 0; tests < NONCE_TESTS; tests++){ + // pre-xor the uid into the decrypted nonces, and also pre-xor the uid parity into the encrypted parity bits - otherwise an exta xor is required in the decryption routine + uint32_t test_nonce = uid^rev32(nonces[tests]); + uint32_t test_parity = (nonces[tests]>>32)^rev32(uid); + test_parity = ((parity(test_parity >> 24 & 0xff) & 1) | (parity(test_parity>>16 & 0xff) & 1)<<1 | (parity(test_parity>>8 & 0xff) & 1)<<2 | (parity(test_parity &0xff) & 1) << 3); + crypto1_bs_bitslice_value32(test_nonce, bitsliced_encrypted_nonces[tests], 32); + // convert to 32 bit little-endian + crypto1_bs_bitslice_value32(~(test_parity)<<24, bitsliced_encrypted_parity_bits[tests], 4); + } + + total_states_tested = 0; + keys_found = 0; + + printf("Starting %lu threads to test %lu states\n", thread_count, total_states); + for(i = 0; i < thread_count; i++){ + pthread_create(&threads[i], NULL, crack_states_thread, (void*) i); + } + for(i = 0; i < thread_count; i++){ + pthread_join(threads[i], 0); + } + printf("Tested %lu states\n", total_states_tested); + + craptev1_destroy_space(space); + return 0; +} + +