X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=cipher.py;h=ba4a73f7f39ab8d712b9ba9e52d49e269e1999ed;hb=2a497efd4b256df235ff52606d510d987f8ae325;hp=55d99b6773bef3aac5b752ee8e0395856e3c75a8;hpb=5141b041bd618e0339ce04f6abef56665d829b95;p=cipher-tools.git diff --git a/cipher.py b/cipher.py index 55d99b6..ba4a73f 100644 --- a/cipher.py +++ b/cipher.py @@ -1,14 +1,64 @@ import string import collections import norms +import logging +import math +from itertools import zip_longest, repeat +from segment import segment +from multiprocessing import Pool + +# To time a run: +# +# import timeit +# c5a = open('2012/5a.ciphertext', 'r').read() +# timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1) +# timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1 + +logger = logging.getLogger(__name__) +logger.addHandler(logging.FileHandler('cipher.log')) +logger.setLevel(logging.WARNING) +#logger.setLevel(logging.INFO) +#logger.setLevel(logging.DEBUG) english_counts = collections.defaultdict(int) with open('count_1l.txt', 'r') as f: for line in f: (letter, count) = line.split("\t") english_counts[letter] = int(count) -normalised_english_counts = norms.normalise(english_counts) +normalised_english_counts = norms.normalise(english_counts) + +english_bigram_counts = collections.defaultdict(int) +with open('count_2l.txt', 'r') as f: + for line in f: + (bigram, count) = line.split("\t") + english_bigram_counts[bigram] = int(count) +normalised_english_bigram_counts = norms.normalise(english_bigram_counts) + +english_trigram_counts = collections.defaultdict(int) +with open('count_3l.txt', 'r') as f: + for line in f: + (trigram, count) = line.split("\t") + english_trigram_counts[trigram] = int(count) +normalised_english_trigram_counts = norms.normalise(english_trigram_counts) + + +with open('words.txt', 'r') as f: + keywords = [line.rstrip() for line in f] +modular_division_table = [[0]*26 for x in range(26)] +for a in range(26): + for b in range(26): + c = (a * b) % 26 + modular_division_table[b][c] = a + +def letters(text): + """Remove all non-alphabetic characters from a text + >>> letters('The Quick') + 'TheQuick' + >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG') + 'TheQuickBROWNfoxjumpedoverthelazyDOG' + """ + return ''.join([c for c in text if c in string.ascii_letters]) def sanitise(text): """Remove all non-alphabetic characters and convert the text to lowercase @@ -18,28 +68,121 @@ def sanitise(text): >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG') 'thequickbrownfoxjumpedoverthelazydog' """ - sanitised = [c.lower() for c in text if c in string.ascii_letters] - return ''.join(sanitised) + # sanitised = [c.lower() for c in text if c in string.ascii_letters] + # return ''.join(sanitised) + return letters(text).lower() def ngrams(text, n): - return [tuple(text[i:i+n]) for i in range(len(text)-n+1)] + """Returns all n-grams of a text + + >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE + ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn', + 'nf', 'fo', 'ox'] + >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE + ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow', + 'rown', 'ownf', 'wnfo', 'nfox'] + """ + return [text[i:i+n] for i in range(len(text)-n+1)] + +def every_nth(text, n, fillvalue=''): + """Returns n strings, each of which consists of every nth character, + starting with the 0th, 1st, 2nd, ... (n-1)th character + + >>> every_nth(string.ascii_lowercase, 5) + ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty'] + >>> every_nth(string.ascii_lowercase, 1) + ['abcdefghijklmnopqrstuvwxyz'] + >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] + >>> every_nth(string.ascii_lowercase, 5, fillvalue='!') + ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!'] + """ + split_text = [text[i:i+n] for i in range(0, len(text), n)] + return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)] -def letter_frequencies(text): +def combine_every_nth(split_text): + """Reforms a text split into every_nth strings + + >>> combine_every_nth(every_nth(string.ascii_lowercase, 5)) + 'abcdefghijklmnopqrstuvwxyz' + >>> combine_every_nth(every_nth(string.ascii_lowercase, 1)) + 'abcdefghijklmnopqrstuvwxyz' + >>> combine_every_nth(every_nth(string.ascii_lowercase, 26)) + 'abcdefghijklmnopqrstuvwxyz' + """ + return ''.join([''.join(l) + for l in zip_longest(*split_text, fillvalue='')]) + +def transpose(items, transposition): + """Moves items around according to the given transposition + + >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3]) + ['a', 'b', 'c', 'd'] + >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0]) + ['d', 'b', 'c', 'a'] + >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0]) + [13, 12, 14, 11, 15, 10] + """ + transposed = list(repeat('', len(transposition))) + for p, t in enumerate(transposition): + transposed[p] = items[t] + return transposed + +def untranspose(items, transposition): + """Undoes a transpose + + >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3]) + ['a', 'b', 'c', 'd'] + >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0]) + ['a', 'b', 'c', 'd'] + >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0]) + [10, 11, 12, 13, 14, 15] + """ + transposed = list(repeat('', len(transposition))) + for p, t in enumerate(transposition): + transposed[t] = items[p] + return transposed + + +def frequencies(text): """Count the number of occurrences of each character in text - >>> sorted(letter_frequencies('abcdefabc').items()) + >>> sorted(frequencies('abcdefabc').items()) [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)] - >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items()) - [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)] - >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items()) - [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)] - >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items()) - [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)] - """ - counts = collections.defaultdict(int) - for c in text: - counts[c] += 1 - return counts + >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \ + 'dog').items()) # doctest: +NORMALIZE_WHITESPACE + [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), + ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), + ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), + ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)] + >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \ + '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE + [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), + ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), + ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), + ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), + ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)] + >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \ + 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE + [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), + ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), + ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), + ('w', 1), ('x', 1), ('y', 1), ('z', 1)] + >>> frequencies('abcdefabcdef')['x'] + 0 + """ + #counts = collections.defaultdict(int) + #for c in text: + # counts[c] += 1 + #return counts + return collections.Counter(c for c in text) +letter_frequencies = frequencies + +def deduplicate(text): + return list(collections.OrderedDict.fromkeys(text)) + + def caesar_encipher_letter(letter, shift): """Encipher a letter, given a shift amount @@ -66,7 +209,8 @@ def caesar_encipher_letter(letter, shift): alphabet_start = ord('A') else: alphabet_start = ord('a') - return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start) + return chr(((ord(letter) - alphabet_start + shift) % 26) + + alphabet_start) else: return letter @@ -107,30 +251,519 @@ def caesar_decipher(message, shift): """ return caesar_encipher(message, -shift) -def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise): +def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True): + """Encipher a letter, given a multiplier and adder + + >>> ''.join([affine_encipher_letter(l, 3, 5, True) \ + for l in string.ascii_uppercase]) + 'HKNQTWZCFILORUXADGJMPSVYBE' + >>> ''.join([affine_encipher_letter(l, 3, 5, False) \ + for l in string.ascii_uppercase]) + 'FILORUXADGJMPSVYBEHKNQTWZC' + """ + if letter in string.ascii_letters: + if letter in string.ascii_uppercase: + alphabet_start = ord('A') + else: + alphabet_start = ord('a') + letter_number = ord(letter) - alphabet_start + if one_based: letter_number += 1 + cipher_number = (letter_number * multiplier + adder) % 26 + if one_based: cipher_number -= 1 + return chr(cipher_number % 26 + alphabet_start) + else: + return letter + +def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): + """Encipher a letter, given a multiplier and adder + + >>> ''.join([affine_decipher_letter(l, 3, 5, True) \ + for l in 'HKNQTWZCFILORUXADGJMPSVYBE']) + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + >>> ''.join([affine_decipher_letter(l, 3, 5, False) \ + for l in 'FILORUXADGJMPSVYBEHKNQTWZC']) + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + """ + if letter in string.ascii_letters: + if letter in string.ascii_uppercase: + alphabet_start = ord('A') + else: + alphabet_start = ord('a') + cipher_number = ord(letter) - alphabet_start + if one_based: cipher_number += 1 + plaintext_number = ( modular_division_table[multiplier] + [(cipher_number - adder) % 26] ) + if one_based: plaintext_number -= 1 + return chr(plaintext_number % 26 + alphabet_start) + else: + return letter + +def affine_encipher(message, multiplier=1, adder=0, one_based=True): + """Encipher a message + + >>> affine_encipher('hours passed during which jerico tried every ' \ + 'trick he could think of', 15, 22, True) + 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh' + """ + enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) + for l in message] + return ''.join(enciphered) + +def affine_decipher(message, multiplier=1, adder=0, one_based=True): + """Decipher a message + + >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \ + 'jfaoe ls omytd jlaxe mh', 15, 22, True) + 'hours passed during which jerico tried every trick he could think of' + """ + enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) + for l in message] + return ''.join(enciphered) + + +def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0): + """Find the cipher alphabet given a keyword. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_cipher_alphabet_of('bayes') + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', 0) + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', 1) + 'bayestuvwxzcdfghijklmnopqr' + >>> keyword_cipher_alphabet_of('bayes', 2) + 'bayeszcdfghijklmnopqrtuvwx' + """ + if wrap_alphabet == 0: + cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + + string.ascii_lowercase)) + else: + if wrap_alphabet == 1: + last_keyword_letter = deduplicate(sanitise(keyword))[-1] + else: + last_keyword_letter = sorted(sanitise(keyword))[-1] + last_keyword_position = string.ascii_lowercase.find( + last_keyword_letter) + 1 + cipher_alphabet = ''.join( + deduplicate(sanitise(keyword) + + string.ascii_lowercase[last_keyword_position:] + + string.ascii_lowercase)) + return cipher_alphabet + + +def keyword_encipher(message, keyword, wrap_alphabet=0): + """Enciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_encipher('test message', 'bayes') + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', 0) + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', 1) + 'lskl dskkbus' + >>> keyword_encipher('test message', 'bayes', 2) + 'qspq jsppbcs' + """ + cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) + cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) + return message.lower().translate(cipher_translation) + +def keyword_decipher(message, keyword, wrap_alphabet=0): + """Deciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_decipher('rsqr ksqqbds', 'bayes') + 'test message' + >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0) + 'test message' + >>> keyword_decipher('lskl dskkbus', 'bayes', 1) + 'test message' + >>> keyword_decipher('qspq jsppbcs', 'bayes', 2) + 'test message' + """ + cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) + cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) + return message.lower().translate(cipher_translation) + +def scytale_encipher(message, rows): + """Enciphers using the scytale transposition cipher. + Message is padded with spaces to allow all rows to be the same length. + + >>> scytale_encipher('thequickbrownfox', 3) + 'tcnhkfeboqrxuo iw ' + >>> scytale_encipher('thequickbrownfox', 4) + 'tubnhirfecooqkwx' + >>> scytale_encipher('thequickbrownfox', 5) + 'tubn hirf ecoo qkwx ' + >>> scytale_encipher('thequickbrownfox', 6) + 'tqcrnxhukof eibwo ' + >>> scytale_encipher('thequickbrownfox', 7) + 'tqcrnx hukof eibwo ' + """ + if len(message) % rows != 0: + message += ' '*(rows - len(message) % rows) + row_length = round(len(message) / rows) + slices = [message[i:i+row_length] + for i in range(0, len(message), row_length)] + return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')]) + +def scytale_decipher(message, rows): + """Deciphers using the scytale transposition cipher. + Assumes the message is padded so that all rows are the same length. + + >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3) + 'thequickbrownfox ' + >>> scytale_decipher('tubnhirfecooqkwx', 4) + 'thequickbrownfox' + >>> scytale_decipher('tubn hirf ecoo qkwx ', 5) + 'thequickbrownfox ' + >>> scytale_decipher('tqcrnxhukof eibwo ', 6) + 'thequickbrownfox ' + >>> scytale_decipher('tqcrnx hukof eibwo ', 7) + 'thequickbrownfox ' + """ + cols = round(len(message) / rows) + columns = [message[i:i+rows] for i in range(0, cols * rows, rows)] + return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')]) + + +def transpositions_of(keyword): + """Finds the transpostions given by a keyword. For instance, the keyword + 'clever' rearranges to 'celrv', so the first column (0) stays first, the + second column (1) moves to third, the third column (2) moves to second, + and so on. + + >>> transpositions_of('clever') + [0, 2, 1, 4, 3] + """ + key = deduplicate(keyword) + transpositions = [key.index(l) for l in sorted(key)] + return transpositions + +def column_transposition_encipher(message, keyword, fillvalue=' '): + """Enciphers using the column transposition cipher. + Message is padded to allow all rows to be the same length. + + >>> column_transposition_encipher('hellothere', 'clever') + 'hleolteher' + >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!') + 'hleolthre!e!' + """ + return column_transposition_worker(message, keyword, encipher=True, + fillvalue=fillvalue) + +def column_transposition_decipher(message, keyword, fillvalue=' '): + """Deciphers using the column transposition cipher. + Message is padded to allow all rows to be the same length. + + >>> column_transposition_decipher('hleolteher', 'clever') + 'hellothere' + >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?') + 'hellothere!!' + """ + return column_transposition_worker(message, keyword, encipher=False, + fillvalue=fillvalue) + +def column_transposition_worker(message, keyword, + encipher=True, fillvalue=' '): + """Does the actual work of the column transposition cipher. + Message is padded with spaces to allow all rows to be the same length. + + >>> column_transposition_worker('hellothere', 'clever') + 'hleolteher' + >>> column_transposition_worker('hellothere', 'clever', encipher=True) + 'hleolteher' + >>> column_transposition_worker('hleolteher', 'clever', encipher=False) + 'hellothere' + """ + transpositions = transpositions_of(keyword) + columns = every_nth(message, len(transpositions), fillvalue=fillvalue) + if encipher: + transposed_columns = transpose(columns, transpositions) + else: + transposed_columns = untranspose(columns, transpositions) + return combine_every_nth(transposed_columns) + + + +def caesar_break(message, + metric=norms.euclidean_distance, + target_counts=normalised_english_counts, + message_frequency_scaling=norms.normalise): """Breaks a Caesar cipher using frequency analysis - >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') - (4, 0.3186395289018361) - >>> caesar_break('jhzhuhfrqilqhgwrdevwudfwuhdvrqlqjwkhqkdylqjvxemhfwhgwrfulwlflvpwkhhasodqdwlrqrisrzhuwkdwmxulglfdovfl') - (3, 0.3290204286173084) - >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') - (19, 0.4215290123583277) - >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') - (13, 0.31602920807545154) + >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \ + 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS + (4, 0.31863952890183...) + >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \ + 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS + (19, 0.42152901235832...) + >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \ + 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS + (13, 0.316029208075451...) """ sanitised_message = sanitise(message) best_shift = 0 best_fit = float("inf") for shift in range(26): plaintext = caesar_decipher(sanitised_message, shift) - frequencies = message_frequency_scaling(letter_frequencies(plaintext)) - fit = metric(target_frequencies, frequencies) + counts = message_frequency_scaling(letter_frequencies(plaintext)) + fit = metric(target_counts, counts) + logger.debug('Caesar break attempt using key {0} gives fit of {1} ' + 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50])) if fit < best_fit: best_fit = fit best_shift = shift + logger.info('Caesar break best fit: key {0} gives fit of {1} and ' + 'decrypt starting: {2}'.format(best_shift, best_fit, + caesar_decipher(sanitised_message, best_shift)[:50])) return best_shift, best_fit +def affine_break(message, + metric=norms.euclidean_distance, + target_counts=normalised_english_counts, + message_frequency_scaling=norms.normalise): + """Breaks an affine cipher using frequency analysis + + >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \ + 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \ + 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \ + 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \ + 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \ + 'clm ckuxj.') # doctest: +ELLIPSIS + ((15, 22, True), 0.23570361818655...) + """ + sanitised_message = sanitise(message) + best_multiplier = 0 + best_adder = 0 + best_one_based = True + best_fit = float("inf") + for one_based in [True, False]: + for multiplier in range(1, 26, 2): + for adder in range(26): + plaintext = affine_decipher(sanitised_message, + multiplier, adder, one_based) + counts = message_frequency_scaling(letter_frequencies(plaintext)) + fit = metric(target_counts, counts) + logger.debug('Affine break attempt using key {0}x+{1} ({2}) ' + 'gives fit of {3} and decrypt starting: {4}'. + format(multiplier, adder, one_based, fit, + plaintext[:50])) + if fit < best_fit: + best_fit = fit + best_multiplier = multiplier + best_adder = adder + best_one_based = one_based + logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} ' + 'and decrypt starting: {4}'.format( + best_multiplier, best_adder, best_one_based, best_fit, + affine_decipher(sanitised_message, best_multiplier, + best_adder, best_one_based)[:50])) + return (best_multiplier, best_adder, best_one_based), best_fit + +def keyword_break(message, + wordlist=keywords, + metric=norms.euclidean_distance, + target_counts=normalised_english_counts, + message_frequency_scaling=norms.normalise): + """Breaks a keyword substitution cipher using a dictionary and + frequency analysis + + >>> keyword_break(keyword_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', 1), \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS + (('elephant', 1), 0.41643991598441...) + """ + best_keyword = '' + best_wrap_alphabet = True + best_fit = float("inf") + for wrap_alphabet in range(3): + for keyword in wordlist: + plaintext = keyword_decipher(message, keyword, wrap_alphabet) + counts = message_frequency_scaling(letter_frequencies(plaintext)) + fit = metric(target_counts, counts) + logger.debug('Keyword break attempt using key {0} (wrap={1}) ' + 'gives fit of {2} and decrypt starting: {3}'.format( + keyword, wrap_alphabet, fit, + sanitise(plaintext)[:50])) + if fit < best_fit: + best_fit = fit + best_keyword = keyword + best_wrap_alphabet = wrap_alphabet + logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of ' + '{2} and decrypt starting: {3}'.format(best_keyword, + best_wrap_alphabet, best_fit, sanitise( + keyword_decipher(message, best_keyword, + best_wrap_alphabet))[:50])) + return (best_keyword, best_wrap_alphabet), best_fit + +def keyword_break_mp(message, + wordlist=keywords, + metric=norms.euclidean_distance, + target_counts=normalised_english_counts, + message_frequency_scaling=norms.normalise, + chunksize=500): + """Breaks a keyword substitution cipher using a dictionary and + frequency analysis + + >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', 1), \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS + (('elephant', 1), 0.41643991598441...) + """ + with Pool() as pool: + helper_args = [(message, word, wrap, metric, target_counts, + message_frequency_scaling) + for word in wordlist for wrap in range(3)] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(keyword_break_one, helper_args, chunksize) + return min(breaks, key=lambda k: k[1]) + +def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts, + message_frequency_scaling): + plaintext = keyword_decipher(message, keyword, wrap_alphabet) + counts = message_frequency_scaling(letter_frequencies(plaintext)) + fit = metric(target_counts, counts) + logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of ' + '{2} and decrypt starting: {3}'.format(keyword, + wrap_alphabet, fit, sanitise(plaintext)[:50])) + return (keyword, wrap_alphabet), fit + +def scytale_break(message, + metric=norms.euclidean_distance, + target_counts=normalised_english_bigram_counts, + message_frequency_scaling=norms.normalise): + """Breaks a Scytale cipher + + >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \ + 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \ + 'aek') # doctest: +ELLIPSIS + (6, 0.83453041115025...) + """ + best_key = 0 + best_fit = float("inf") + for key in range(1, 20): + if len(message) % key == 0: + plaintext = scytale_decipher(message, key) + counts = message_frequency_scaling(frequencies( + ngrams(sanitise(plaintext), 2))) + fit = metric(target_counts, counts) + logger.debug('Scytale break attempt using key {0} gives fit of ' + '{1} and decrypt starting: {2}'.format(key, + fit, sanitise(plaintext)[:50])) + if fit < best_fit: + best_fit = fit + best_key = key + logger.info('Scytale break best fit with key {0} gives fit of {1} and ' + 'decrypt starting: {2}'.format(best_key, best_fit, + sanitise(scytale_decipher(message, best_key))[:50])) + return best_key, best_fit + +def column_transposition_break(message, + wordlist=keywords, + metric=norms.euclidean_distance, + target_counts=normalised_english_bigram_counts, + message_frequency_scaling=norms.normalise): + """Breaks a column transposition cipher using a dictionary and + n-gram frequency analysis + + >>> column_transposition_break(column_transposition_encipher(sanitise( \ + "Turing's homosexuality resulted in a criminal prosecution in 1952, \ + when homosexual acts were still illegal in the United Kingdom. "), \ + 'encipher'), \ + wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS + ('encipher', 0.898128626285...) + >>> column_transposition_break(column_transposition_encipher(sanitise( \ + "Turing's homosexuality resulted in a criminal prosecution in 1952, " \ + "when homosexual acts were still illegal in the United Kingdom."), \ + 'encipher'), \ + wordlist=['encipher', 'keyword', 'fourteen'], \ + target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS + ('encipher', 1.1958792913127...) + """ + best_keyword = '' + best_fit = float("inf") + ngram_length = len(next(iter(target_counts.keys()))) + for keyword in wordlist: + if len(message) % len(deduplicate(keyword)) == 0: + plaintext = column_transposition_decipher(message, keyword) + counts = message_frequency_scaling(frequencies( + ngrams(sanitise(plaintext), ngram_length))) + fit = metric(target_counts, counts) + logger.debug('Column transposition break attempt using key {0} ' + 'gives fit of {1} and decrypt starting: {2}'.format( + keyword, fit, + sanitise(plaintext)[:50])) + if fit < best_fit: + best_fit = fit + best_keyword = keyword + logger.info('Column transposition break best fit with key {0} gives fit ' + 'of {1} and decrypt starting: {2}'.format(best_keyword, + best_fit, sanitise( + column_transposition_decipher(message, + best_keyword))[:50])) + return best_keyword, best_fit + + +def column_transposition_break_mp(message, + wordlist=keywords, + metric=norms.euclidean_distance, + target_counts=normalised_english_bigram_counts, + message_frequency_scaling=norms.normalise, + chunksize=500): + """Breaks a column transposition cipher using a dictionary and + n-gram frequency analysis + + >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + "Turing's homosexuality resulted in a criminal prosecution in 1952, \ + when homosexual acts were still illegal in the United Kingdom. "), \ + 'encipher'), \ + wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS + ('encipher', 0.898128626285...) + >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + "Turing's homosexuality resulted in a criminal prosecution in 1952, " \ + "when homosexual acts were still illegal in the United Kingdom."), \ + 'encipher'), \ + wordlist=['encipher', 'keyword', 'fourteen'], \ + target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS + ('encipher', 1.1958792913127...) + """ + ngram_length = len(next(iter(target_counts.keys()))) + with Pool() as pool: + helper_args = [(message, word, metric, target_counts, ngram_length, + message_frequency_scaling) + for word in wordlist] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize) + return min(breaks, key=lambda k: k[1]) + +def column_transposition_break_worker(message, keyword, metric, target_counts, + ngram_length, message_frequency_scaling): + plaintext = column_transposition_decipher(message, keyword) + counts = message_frequency_scaling(frequencies( + ngrams(sanitise(plaintext), ngram_length))) + fit = metric(target_counts, counts) + logger.debug('Column transposition break attempt using key {0} ' + 'gives fit of {1} and decrypt starting: {2}'.format( + keyword, fit, + sanitise(plaintext)[:50])) + return keyword, fit + + if __name__ == "__main__": import doctest