From: Neil Smith Date: Tue, 15 Jul 2014 07:29:35 +0000 (+0100) Subject: Breaking keyword ciphers X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=9a837e35f87615d98f6fbfaa0bbeb2729822c23a;p=cipher-training.git Breaking keyword ciphers --- diff --git a/cipher.py b/cipher.py index d2438b7..f71d1c1 100644 --- a/cipher.py +++ b/cipher.py @@ -16,52 +16,6 @@ for a in range(26): modular_division_table[b][c] = a -def every_nth(text, n, fillvalue=''): - """Returns n strings, each of which consists of every nth character, - starting with the 0th, 1st, 2nd, ... (n-1)th character - - >>> every_nth(string.ascii_lowercase, 5) - ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty'] - >>> every_nth(string.ascii_lowercase, 1) - ['abcdefghijklmnopqrstuvwxyz'] - >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE - ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] - >>> every_nth(string.ascii_lowercase, 5, fillvalue='!') - ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!'] - """ - split_text = chunks(text, n, fillvalue) - return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)] - -def combine_every_nth(split_text): - """Reforms a text split into every_nth strings - - >>> combine_every_nth(every_nth(string.ascii_lowercase, 5)) - 'abcdefghijklmnopqrstuvwxyz' - >>> combine_every_nth(every_nth(string.ascii_lowercase, 1)) - 'abcdefghijklmnopqrstuvwxyz' - >>> combine_every_nth(every_nth(string.ascii_lowercase, 26)) - 'abcdefghijklmnopqrstuvwxyz' - """ - return ''.join([''.join(l) - for l in zip_longest(*split_text, fillvalue='')]) - -def chunks(text, n, fillvalue=None): - """Split a text into chunks of n characters - - >>> chunks('abcdefghi', 3) - ['abc', 'def', 'ghi'] - >>> chunks('abcdefghi', 4) - ['abcd', 'efgh', 'i'] - >>> chunks('abcdefghi', 4, fillvalue='!') - ['abcd', 'efgh', 'i!!!'] - """ - if fillvalue: - padding = fillvalue[0] * (n - len(text) % n) - else: - padding = '' - return [(text+padding)[i:i+n] for i in range(0, len(text), n)] - def deduplicate(text): """If a string contains duplicate letters, remove all but the first. Retain the order of the letters. @@ -308,31 +262,6 @@ def keyword_decipher(message, keyword, cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) return message.lower().translate(cipher_translation) - -def vigenere_encipher(message, keyword): - """Vigenere encipher - - >>> vigenere_encipher('hello', 'abc') - 'hfnlp' - """ - shifts = [ord(l) - ord('a') for l in sanitise(keyword)] - pairs = zip(message, cycle(shifts)) - return ''.join([caesar_encipher_letter(l, k) for l, k in pairs]) - -def vigenere_decipher(message, keyword): - """Vigenere decipher - - >>> vigenere_decipher('hfnlp', 'abc') - 'hello' - """ - shifts = [ord(l) - ord('a') for l in sanitise(keyword)] - pairs = zip(message, cycle(shifts)) - return ''.join([caesar_decipher_letter(l, k) for l, k in pairs]) - -beaufort_encipher = vigenere_decipher -beaufort_decipher = vigenere_encipher - - if __name__ == "__main__": import doctest doctest.testmod() diff --git a/cipherbreak.py b/cipherbreak.py index 9ce8000..f4c9c03 100644 --- a/cipherbreak.py +++ b/cipherbreak.py @@ -187,81 +187,6 @@ def keyword_break_worker(message, keyword, wrap_alphabet, fitness): return (keyword, wrap_alphabet), fit -def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters, - chunksize=500): - """Breaks a vigenere cipher using a dictionary and frequency analysis. - - >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \ - 'message for the vigenere decipherment'), 'cat'), \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS - ('cat', -52.947271216...) - """ - with Pool() as pool: - helper_args = [(message, word, fitness) - for word in wordlist] - # Gotcha: the helper function here needs to be defined at the top level - # (limitation of Pool.starmap) - breaks = pool.starmap(vigenere_keyword_break_worker, helper_args, - chunksize) - return max(breaks, key=lambda k: k[1]) -vigenere_keyword_break = vigenere_keyword_break_mp - -def vigenere_keyword_break_worker(message, keyword, fitness): - plaintext = vigenere_decipher(message, keyword) - fit = fitness(plaintext) - logger.debug('Vigenere keyword break attempt using key {0} gives fit of ' - '{1} and decrypt starting: {2}'.format(keyword, - fit, sanitise(plaintext)[:50])) - return keyword, fit - - -def vigenere_frequency_break(message, max_key_length=20, fitness=Pletters): - """Breaks a Vigenere cipher with frequency analysis - - >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \ - "run. She is ready and so am I. I stole Daniel's pocketbook this " \ - "afternoon when he left his jacket hanging on the easel in the " \ - "attic. I jump every time I hear a footstep on the stairs, " \ - "certain that the theft has been discovered and that I will " \ - "be caught. The SS officer visits less often now that he is " \ - "sure"), 'florence')) # doctest: +ELLIPSIS - ('florence', -307.5473096791...) - """ - def worker(message, key_length, fitness): - splits = every_nth(sanitised_message, key_length) - key = ''.join([chr(caesar_break(s)[0] + ord('a')) for s in splits]) - plaintext = vigenere_decipher(message, key) - fit = fitness(plaintext) - return key, fit - sanitised_message = sanitise(message) - results = starmap(worker, [(sanitised_message, i, fitness) - for i in range(1, max_key_length+1)]) - return max(results, key=lambda k: k[1]) - - -def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters): - """Breaks a Beaufort cipher with frequency analysis - - >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \ - "run. She is ready and so am I. I stole Daniel's pocketbook this " \ - "afternoon when he left his jacket hanging on the easel in the " \ - "attic. I jump every time I hear a footstep on the stairs, " \ - "certain that the theft has been discovered and that I will " \ - "be caught. The SS officer visits less often now " \ - "that he is sure"), 'florence')) # doctest: +ELLIPSIS - ('florence', -307.5473096791...) - """ - def worker(message, key_length, fitness): - splits = every_nth(sanitised_message, key_length) - key = ''.join([chr(-caesar_break(s)[0] % 26 + ord('a')) - for s in splits]) - plaintext = beaufort_decipher(message, key) - fit = fitness(plaintext) - return key, fit - sanitised_message = sanitise(message) - results = starmap(worker, [(sanitised_message, i, fitness) - for i in range(1, max_key_length+1)]) - return max(results, key=lambda k: k[1]) def plot_frequency_histogram(freqs, sort_key=None): x = range(len(freqs.keys()))