From: Neil Smith Date: Tue, 6 Mar 2018 14:16:22 +0000 (+0000) Subject: Split each cipher into its own file X-Git-Url: https://git.njae.me.uk/?p=cipher-tools.git;a=commitdiff_plain;h=3d8f7067b9c3a48ef140d7cff834d18ee91f58b3 Split each cipher into its own file --- diff --git a/amsco.py b/amsco.py new file mode 100644 index 0000000..4eeeb59 --- /dev/null +++ b/amsco.py @@ -0,0 +1,204 @@ +from enum import Enum +import multiprocessing +import itertools + +from utilities import * +from language_models import * +from column_transposition import transpositions + +from logger import logger + +# Where each piece of text ends up in the AMSCO transpositon cipher. +# 'index' shows where the slice appears in the plaintext, with the slice +# from 'start' to 'end' +AmscoSlice = collections.namedtuple('AmscoSlice', ['index', 'start', 'end']) + +class AmscoFillStyle(Enum): + continuous = 1 + same_each_row = 2 + reverse_each_row = 3 + +def amsco_transposition_positions(message, keyword, + fillpattern=(1, 2), + fillstyle=AmscoFillStyle.continuous, + fillcolumnwise=False, + emptycolumnwise=True): + """Creates the grid for the AMSCO transposition cipher. Each element in the + grid shows the index of that slice and the start and end positions of the + plaintext that go to make it up. + + >>> amsco_transposition_positions(string.ascii_lowercase, 'freddy', \ + fillpattern=(1, 2)) # doctest: +NORMALIZE_WHITESPACE + [[AmscoSlice(index=3, start=4, end=6), + AmscoSlice(index=2, start=3, end=4), + AmscoSlice(index=0, start=0, end=1), + AmscoSlice(index=1, start=1, end=3), + AmscoSlice(index=4, start=6, end=7)], + [AmscoSlice(index=8, start=12, end=13), + AmscoSlice(index=7, start=10, end=12), + AmscoSlice(index=5, start=7, end=9), + AmscoSlice(index=6, start=9, end=10), + AmscoSlice(index=9, start=13, end=15)], + [AmscoSlice(index=13, start=19, end=21), + AmscoSlice(index=12, start=18, end=19), + AmscoSlice(index=10, start=15, end=16), + AmscoSlice(index=11, start=16, end=18), + AmscoSlice(index=14, start=21, end=22)], + [AmscoSlice(index=18, start=27, end=28), + AmscoSlice(index=17, start=25, end=27), + AmscoSlice(index=15, start=22, end=24), + AmscoSlice(index=16, start=24, end=25), + AmscoSlice(index=19, start=28, end=30)]] + """ + transpositions = transpositions_of(keyword) + fill_iterator = itertools.cycle(fillpattern) + indices = itertools.count() + message_length = len(message) + + current_position = 0 + grid = [] + current_fillpattern = fillpattern + while current_position < message_length: + row = [] + if fillstyle == AmscoFillStyle.same_each_row: + fill_iterator = itertools.cycle(fillpattern) + if fillstyle == AmscoFillStyle.reverse_each_row: + fill_iterator = itertools.cycle(current_fillpattern) + for _ in range(len(transpositions)): + index = next(indices) + gap = next(fill_iterator) + row += [AmscoSlice(index, current_position, current_position + gap)] + current_position += gap + grid += [row] + if fillstyle == AmscoFillStyle.reverse_each_row: + current_fillpattern = list(reversed(current_fillpattern)) + return [transpose(r, transpositions) for r in grid] + +def amsco_transposition_encipher(message, keyword, + fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row): + """AMSCO transposition encipher. + + >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(1, 2)) + 'hoteelhler' + >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(2, 1)) + 'hetelhelor' + >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(1, 2)) + 'hotelerelh' + >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(2, 1)) + 'hetelorlhe' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'encode') + 'etecstthhomoerereenisxip' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2)) + 'hetcsoeisterereipexthomn' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous) + 'hecsoisttererteipexhomen' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(2, 1)) + 'heecisoosttrrtepeixhemen' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2)) + 'hxtomephescieretoeisnter' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous) + 'hxomeiphscerettoisenteer' + """ + grid = amsco_transposition_positions(message, keyword, + fillpattern=fillpattern, fillstyle=fillstyle) + ct_as_grid = [[message[s.start:s.end] for s in r] for r in grid] + return combine_every_nth(ct_as_grid) + + +def amsco_transposition_decipher(message, keyword, + fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row): + """AMSCO transposition decipher + + >>> amsco_transposition_decipher('hoteelhler', 'abc', fillpattern=(1, 2)) + 'hellothere' + >>> amsco_transposition_decipher('hetelhelor', 'abc', fillpattern=(2, 1)) + 'hellothere' + >>> amsco_transposition_decipher('hotelerelh', 'acb', fillpattern=(1, 2)) + 'hellothere' + >>> amsco_transposition_decipher('hetelorlhe', 'acb', fillpattern=(2, 1)) + 'hellothere' + >>> amsco_transposition_decipher('etecstthhomoerereenisxip', 'encode') + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('hetcsoeisterereipexthomn', 'cipher', fillpattern=(1, 2)) + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('hecsoisttererteipexhomen', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous) + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('heecisoosttrrtepeixhemen', 'cipher', fillpattern=(2, 1)) + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('hxtomephescieretoeisnter', 'cipher', fillpattern=(1, 3, 2)) + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('hxomeiphscerettoisenteer', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous) + 'hereissometexttoencipher' + """ + + grid = amsco_transposition_positions(message, keyword, + fillpattern=fillpattern, fillstyle=fillstyle) + transposed_sections = [s for c in [l for l in zip(*grid)] for s in c] + plaintext_list = [''] * len(transposed_sections) + current_pos = 0 + for slice in transposed_sections: + plaintext_list[slice.index] = message[current_pos:current_pos-slice.start+slice.end][:len(message[slice.start:slice.end])] + current_pos += len(message[slice.start:slice.end]) + return cat(plaintext_list) + + +def amsco_break(message, translist=transpositions, patterns = [(1, 2), (2, 1)], + fillstyles = [AmscoFillStyle.continuous, + AmscoFillStyle.same_each_row, + AmscoFillStyle.reverse_each_row], + fitness=Pbigrams, + chunksize=500): + """Breaks an AMSCO transposition cipher using a dictionary and + n-gram frequency analysis + + >>> amsco_break(amsco_transposition_encipher(sanitise( \ + "It is a truth universally acknowledged, that a single man in \ + possession of a good fortune, must be in want of a wife. However \ + little known the feelings or views of such a man may be on his \ + first entering a neighbourhood, this truth is so well fixed in \ + the minds of the surrounding families, that he is considered the \ + rightful property of some one or other of their daughters."), \ + 'encipher'), \ + translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ + (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ + (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \ + patterns=[(1, 2)]) # doctest: +ELLIPSIS + (((2, 0, 5, 3, 1, 4, 6), (1, 2), ), -709.4646722...) + >>> amsco_break(amsco_transposition_encipher(sanitise( \ + "It is a truth universally acknowledged, that a single man in \ + possession of a good fortune, must be in want of a wife. However \ + little known the feelings or views of such a man may be on his \ + first entering a neighbourhood, this truth is so well fixed in \ + the minds of the surrounding families, that he is considered the \ + rightful property of some one or other of their daughters."), \ + 'encipher', fillpattern=(2, 1)), \ + translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ + (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ + (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \ + patterns=[(1, 2), (2, 1)], fitness=Ptrigrams) # doctest: +ELLIPSIS + (((2, 0, 5, 3, 1, 4, 6), (2, 1), ), -997.0129085...) + """ + with multiprocessing.Pool() as pool: + helper_args = [(message, trans, pattern, fillstyle, fitness) + for trans in translist + for pattern in patterns + for fillstyle in fillstyles] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(amsco_break_worker, helper_args, chunksize) + return max(breaks, key=lambda k: k[1]) + +def amsco_break_worker(message, transposition, + pattern, fillstyle, fitness): + plaintext = amsco_transposition_decipher(message, transposition, + fillpattern=pattern, fillstyle=fillstyle) + fit = fitness(sanitise(plaintext)) + logger.debug('AMSCO transposition break attempt using key {0} and pattern' + '{1} ({2}) gives fit of {3} and decrypt starting: ' + '{4}'.format( + transposition, pattern, fillstyle, fit, + sanitise(plaintext)[:50])) + return (transposition, pattern, fillstyle), fit + +if __name__ == "__main__": + import doctest \ No newline at end of file diff --git a/autokey.py b/autokey.py new file mode 100644 index 0000000..b84f0a9 --- /dev/null +++ b/autokey.py @@ -0,0 +1,118 @@ +from utilities import * +from language_models import * +import multiprocessing + +from logger import logger + + +def autokey_encipher(message, keyword): + """Encipher with the autokey cipher + + >>> autokey_encipher('meetatthefountain', 'kilt') + 'wmpmmxxaeyhbryoca' + """ + shifts = [pos(l) for l in keyword + message] + pairs = zip(message, shifts) + return cat([caesar_encipher_letter(l, k) for l, k in pairs]) + +def autokey_decipher(ciphertext, keyword): + """Decipher with the autokey cipher + + >>> autokey_decipher('wmpmmxxaeyhbryoca', 'kilt') + 'meetatthefountain' + """ + plaintext = [] + keys = list(keyword) + for c in ciphertext: + plaintext_letter = caesar_decipher_letter(c, pos(keys[0])) + plaintext += [plaintext_letter] + keys = keys[1:] + [plaintext_letter] + return cat(plaintext) + + + +def autokey_sa_break( message + , min_keylength=2 + , max_keylength=20 + , workers=10 + , initial_temperature=200 + , max_iterations=20000 + , fitness=Pletters + , chunksize=1 + , result_count=1 + ): + """Break an autokey cipher by simulated annealing + """ + worker_args = [] + ciphertext = sanitise(message) + for keylength in range(min_keylength, max_keylength+1): + for i in range(workers): + key = cat(random.choice(string.ascii_lowercase) for _ in range(keylength)) + worker_args.append((ciphertext, key, + initial_temperature, max_iterations, fitness)) + + with multiprocessing.Pool() as pool: + breaks = pool.starmap(autokey_sa_break_worker, + worker_args, chunksize) + if result_count <= 1: + return max(breaks, key=lambda k: k[1]) + else: + return sorted(set(breaks), key=lambda k: k[1], reverse=True)[:result_count] + + +def autokey_sa_break_worker(message, key, + t0, max_iterations, fitness): + + temperature = t0 + + dt = t0 / (0.9 * max_iterations) + + plaintext = autokey_decipher(message, key) + current_fitness = fitness(plaintext) + current_key = key + + best_key = current_key + best_fitness = current_fitness + best_plaintext = plaintext + + # print('starting for', max_iterations) + for i in range(max_iterations): + swap_pos = random.randrange(len(current_key)) + swap_char = random.choice(string.ascii_lowercase) + + new_key = current_key[:swap_pos] + swap_char + current_key[swap_pos+1:] + + plaintext = autokey_decipher(message, new_key) + new_fitness = fitness(plaintext) + try: + sa_chance = math.exp((new_fitness - current_fitness) / temperature) + except (OverflowError, ZeroDivisionError): + # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature)) + sa_chance = 0 + if (new_fitness > current_fitness or random.random() < sa_chance): + # logger.debug('Simulated annealing: iteration {}, temperature {}, ' + # 'current alphabet {}, current_fitness {}, ' + # 'best_plaintext {}'.format(i, temperature, current_alphabet, + # current_fitness, best_plaintext[:50])) + + # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance)) +# print(new_fitness, new_key, plaintext[:100]) + current_fitness = new_fitness + current_key = new_key + + if current_fitness > best_fitness: + best_key = current_key + best_fitness = current_fitness + best_plaintext = plaintext + if i % 500 == 0: + logger.debug('Simulated annealing: iteration {}, temperature {}, ' + 'current key {}, current_fitness {}, ' + 'best_plaintext {}'.format(i, temperature, current_key, + current_fitness, plaintext[:50])) + temperature = max(temperature - dt, 0.001) + +# print(best_key, best_fitness, best_plaintext[:70]) + return best_key, best_fitness # current_alphabet, current_fitness + +if __name__ == "__main__": + import doctest \ No newline at end of file diff --git a/bifid.py b/bifid.py new file mode 100644 index 0000000..683297b --- /dev/null +++ b/bifid.py @@ -0,0 +1,125 @@ +import multiprocessing +from utilities import * +from language_models import * +from keyword_cipher import KeywordWrapAlphabet + +from logger import logger + +def bifid_grid(keyword, wrap_alphabet, letter_mapping): + """Create the grids for a Bifid cipher + """ + cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) + if letter_mapping is None: + letter_mapping = {'j': 'i'} + translation = ''.maketrans(letter_mapping) + cipher_alphabet = cat(collections.OrderedDict.fromkeys(cipher_alphabet.translate(translation))) + f_grid = {k: ((i // 5) + 1, (i % 5) + 1) + for i, k in enumerate(cipher_alphabet)} + r_grid = {((i // 5) + 1, (i % 5) + 1): k + for i, k in enumerate(cipher_alphabet)} + return translation, f_grid, r_grid + +def bifid_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a, + letter_mapping=None, period=None, fillvalue=None): + """Bifid cipher + + >>> bifid_encipher("indiajelly", 'iguana') + 'ibidonhprm' + >>> bifid_encipher("indiacurry", 'iguana', period=4) + 'ibnhgaqltm' + >>> bifid_encipher("indiacurry", 'iguana', period=4, fillvalue='x') + 'ibnhgaqltzml' + """ + translation, f_grid, r_grid = bifid_grid(keyword, wrap_alphabet, letter_mapping) + + t_message = message.translate(translation) + pairs0 = [f_grid[l] for l in sanitise(t_message)] + if period: + chunked_pairs = [pairs0[i:i+period] for i in range(0, len(pairs0), period)] + if len(chunked_pairs[-1]) < period and fillvalue: + chunked_pairs[-1] += [f_grid[fillvalue]] * (period - len(chunked_pairs[-1])) + else: + chunked_pairs = [pairs0] + + pairs1 = [] + for c in chunked_pairs: + items = sum(list(list(i) for i in zip(*c)), []) + p = [(items[i], items[i+1]) for i in range(0, len(items), 2)] + pairs1 += p + + return cat(r_grid[p] for p in pairs1) + + +def bifid_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a, + letter_mapping=None, period=None, fillvalue=None): + """Decipher with bifid cipher + + >>> bifid_decipher('ibidonhprm', 'iguana') + 'indiaielly' + >>> bifid_decipher("ibnhgaqltm", 'iguana', period=4) + 'indiacurry' + >>> bifid_decipher("ibnhgaqltzml", 'iguana', period=4) + 'indiacurryxx' + """ + translation, f_grid, r_grid = bifid_grid(keyword, wrap_alphabet, letter_mapping) + + t_message = message.translate(translation) + pairs0 = [f_grid[l] for l in sanitise(t_message)] + if period: + chunked_pairs = [pairs0[i:i+period] for i in range(0, len(pairs0), period)] + if len(chunked_pairs[-1]) < period and fillvalue: + chunked_pairs[-1] += [f_grid[fillvalue]] * (period - len(chunked_pairs[-1])) + else: + chunked_pairs = [pairs0] + + pairs1 = [] + for c in chunked_pairs: + items = [j for i in c for j in i] + gap = len(c) + p = [(items[i], items[i+gap]) for i in range(gap)] + pairs1 += p + + return cat(r_grid[p] for p in pairs1) + + + + +def bifid_break_mp(message, wordlist=keywords, fitness=Pletters, max_period=10, + number_of_solutions=1, chunksize=500): + """Breaks a keyword substitution cipher using a dictionary and + frequency analysis + + >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS + (('elephant', , 0), -52.834575011...) + >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \ + wordlist=['cat', 'elephant', 'kangaroo'], \ + number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + [(('elephant', , 0), -52.834575011...), + (('elephant', , 0), -52.834575011...)] + """ + with multiprocessing.Pool() as pool: + helper_args = [(message, word, wrap, period, fitness) + for word in wordlist + for wrap in KeywordWrapAlphabet + for period in range(max_period+1)] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(bifid_break_worker, helper_args, chunksize) + if number_of_solutions == 1: + return max(breaks, key=lambda k: k[1]) + else: + return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions] + +def bifid_break_worker(message, keyword, wrap_alphabet, period, fitness): + plaintext = bifid_decipher(message, keyword, wrap_alphabet, period=period) + fit = fitness(plaintext) + logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of ' + '{2} and decrypt starting: {3}'.format(keyword, + wrap_alphabet, fit, sanitise(plaintext)[:50])) + return (keyword, wrap_alphabet, period), fit + +if __name__ == "__main__": + import doctest \ No newline at end of file diff --git a/cadenus.py b/cadenus.py new file mode 100644 index 0000000..0d3b415 --- /dev/null +++ b/cadenus.py @@ -0,0 +1,95 @@ +from utilities import * +from language_models import * +from itertools import chain + +from logger import logger + +def make_cadenus_keycolumn(doubled_letters = 'vw', start='a', reverse=False): + """Makes the key column for a Cadenus cipher (the column down between the + rows of letters) + + >>> make_cadenus_keycolumn()['a'] + 0 + >>> make_cadenus_keycolumn()['b'] + 1 + >>> make_cadenus_keycolumn()['c'] + 2 + >>> make_cadenus_keycolumn()['v'] + 21 + >>> make_cadenus_keycolumn()['w'] + 21 + >>> make_cadenus_keycolumn()['z'] + 24 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['a'] + 1 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['b'] + 0 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['c'] + 24 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['i'] + 18 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['j'] + 18 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['v'] + 6 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['z'] + 2 + """ + index_to_remove = string.ascii_lowercase.find(doubled_letters[0]) + short_alphabet = string.ascii_lowercase[:index_to_remove] + string.ascii_lowercase[index_to_remove+1:] + if reverse: + short_alphabet = cat(reversed(short_alphabet)) + start_pos = short_alphabet.find(start) + rotated_alphabet = short_alphabet[start_pos:] + short_alphabet[:start_pos] + keycolumn = {l: i for i, l in enumerate(rotated_alphabet)} + keycolumn[doubled_letters[0]] = keycolumn[doubled_letters[1]] + return keycolumn + +def cadenus_encipher(message, keyword, keycolumn, fillvalue='a'): + """Encipher with the Cadenus cipher + + >>> cadenus_encipher(sanitise('Whoever has made a voyage up the Hudson ' \ + 'must remember the Kaatskill mountains. ' \ + 'They are a dismembered branch of the great'), \ + 'wink', \ + make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True)) + 'antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaasuvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned' + >>> cadenus_encipher(sanitise('a severe limitation on the usefulness of ' \ + 'the cadenus is that every message must be ' \ + 'a multiple of twenty-five letters long'), \ + 'easy', \ + make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True)) + 'systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtofarenuseieeieltarlmentieetogevesitfaisltngeeuvowul' + """ + rows = chunks(message, len(message) // 25, fillvalue=fillvalue) + columns = zip(*rows) + rotated_columns = [col[start:] + col[:start] for start, col in zip([keycolumn[l] for l in keyword], columns)] + rotated_rows = zip(*rotated_columns) + transpositions = transpositions_of(keyword) + transposed = [transpose(r, transpositions) for r in rotated_rows] + return cat(chain(*transposed)) + +def cadenus_decipher(message, keyword, keycolumn, fillvalue='a'): + """ + >>> cadenus_decipher('antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaa' \ + 'suvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned', \ + 'wink', \ + make_cadenus_keycolumn(reverse=True)) + 'whoeverhasmadeavoyageupthehudsonmustrememberthekaatskillmountainstheyareadismemberedbranchofthegreat' + >>> cadenus_decipher('systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtof' \ + 'arenuseieeieltarlmentieetogevesitfaisltngeeuvowul', \ + 'easy', \ + make_cadenus_keycolumn(reverse=True)) + 'aseverelimitationontheusefulnessofthecadenusisthateverymessagemustbeamultipleoftwentyfiveletterslong' + """ + rows = chunks(message, len(message) // 25, fillvalue=fillvalue) + transpositions = transpositions_of(keyword) + untransposed_rows = [untranspose(r, transpositions) for r in rows] + columns = zip(*untransposed_rows) + rotated_columns = [col[-start:] + col[:-start] for start, col in zip([keycolumn[l] for l in keyword], columns)] + rotated_rows = zip(*rotated_columns) + # return rotated_columns + return cat(chain(*rotated_rows)) + +if __name__ == "__main__": + import doctest \ No newline at end of file diff --git a/cipher.py b/cipher.py index 2eb89f7..248155e 100644 --- a/cipher.py +++ b/cipher.py @@ -1,18 +1,20 @@ -import string -import collections -import math -from enum import Enum -from itertools import zip_longest, cycle, chain, count -import numpy as np -from numpy import matrix -from numpy import linalg -from language_models import * -import pprint +# import string +# import collections +# import math +# from enum import Enum +# from itertools import zip_longest, cycle, chain, count +# import numpy as np +# from numpy import matrix +# from numpy import linalg +# from language_models import * +# import pprint from utilities import * from segment import * +from text_prettify import * +from plot_frequency_histogram import * from caesar import * from affine import * @@ -20,528 +22,10 @@ from keyword import * from polybius import * from column_transposition import * from railfence import * +from cadenus import * +from hill import * +from amsco import * +from bifid import * +from autokey import * +from pocket_enigma import * - -def make_cadenus_keycolumn(doubled_letters = 'vw', start='a', reverse=False): - """Makes the key column for a Cadenus cipher (the column down between the - rows of letters) - - >>> make_cadenus_keycolumn()['a'] - 0 - >>> make_cadenus_keycolumn()['b'] - 1 - >>> make_cadenus_keycolumn()['c'] - 2 - >>> make_cadenus_keycolumn()['v'] - 21 - >>> make_cadenus_keycolumn()['w'] - 21 - >>> make_cadenus_keycolumn()['z'] - 24 - >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['a'] - 1 - >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['b'] - 0 - >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['c'] - 24 - >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['i'] - 18 - >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['j'] - 18 - >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['v'] - 6 - >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['z'] - 2 - """ - index_to_remove = string.ascii_lowercase.find(doubled_letters[0]) - short_alphabet = string.ascii_lowercase[:index_to_remove] + string.ascii_lowercase[index_to_remove+1:] - if reverse: - short_alphabet = cat(reversed(short_alphabet)) - start_pos = short_alphabet.find(start) - rotated_alphabet = short_alphabet[start_pos:] + short_alphabet[:start_pos] - keycolumn = {l: i for i, l in enumerate(rotated_alphabet)} - keycolumn[doubled_letters[0]] = keycolumn[doubled_letters[1]] - return keycolumn - -def cadenus_encipher(message, keyword, keycolumn, fillvalue='a'): - """Encipher with the Cadenus cipher - - >>> cadenus_encipher(sanitise('Whoever has made a voyage up the Hudson ' \ - 'must remember the Kaatskill mountains. ' \ - 'They are a dismembered branch of the great'), \ - 'wink', \ - make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True)) - 'antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaasuvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned' - >>> cadenus_encipher(sanitise('a severe limitation on the usefulness of ' \ - 'the cadenus is that every message must be ' \ - 'a multiple of twenty-five letters long'), \ - 'easy', \ - make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True)) - 'systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtofarenuseieeieltarlmentieetogevesitfaisltngeeuvowul' - """ - rows = chunks(message, len(message) // 25, fillvalue=fillvalue) - columns = zip(*rows) - rotated_columns = [col[start:] + col[:start] for start, col in zip([keycolumn[l] for l in keyword], columns)] - rotated_rows = zip(*rotated_columns) - transpositions = transpositions_of(keyword) - transposed = [transpose(r, transpositions) for r in rotated_rows] - return cat(chain(*transposed)) - -def cadenus_decipher(message, keyword, keycolumn, fillvalue='a'): - """ - >>> cadenus_decipher('antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaa' \ - 'suvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned', \ - 'wink', \ - make_cadenus_keycolumn(reverse=True)) - 'whoeverhasmadeavoyageupthehudsonmustrememberthekaatskillmountainstheyareadismemberedbranchofthegreat' - >>> cadenus_decipher('systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtof' \ - 'arenuseieeieltarlmentieetogevesitfaisltngeeuvowul', \ - 'easy', \ - make_cadenus_keycolumn(reverse=True)) - 'aseverelimitationontheusefulnessofthecadenusisthateverymessagemustbeamultipleoftwentyfiveletterslong' - """ - rows = chunks(message, len(message) // 25, fillvalue=fillvalue) - transpositions = transpositions_of(keyword) - untransposed_rows = [untranspose(r, transpositions) for r in rows] - columns = zip(*untransposed_rows) - rotated_columns = [col[-start:] + col[:-start] for start, col in zip([keycolumn[l] for l in keyword], columns)] - rotated_rows = zip(*rotated_columns) - # return rotated_columns - return cat(chain(*rotated_rows)) - - -def hill_encipher(matrix, message_letters, fillvalue='a'): - """Hill cipher - - >>> hill_encipher(np.matrix([[7,8], [11,11]]), 'hellothere') - 'drjiqzdrvx' - >>> hill_encipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \ - 'hello there') - 'tfjflpznvyac' - """ - n = len(matrix) - sanitised_message = sanitise(message_letters) - if len(sanitised_message) % n != 0: - padding = fillvalue[0] * (n - len(sanitised_message) % n) - else: - padding = '' - message = [pos(c) for c in sanitised_message + padding] - message_chunks = [message[i:i+n] for i in range(0, len(message), n)] - # message_chunks = chunks(message, len(matrix), fillvalue=None) - enciphered_chunks = [((matrix * np.matrix(c).T).T).tolist()[0] - for c in message_chunks] - return cat([unpos(round(l)) - for l in sum(enciphered_chunks, [])]) - -def hill_decipher(matrix, message, fillvalue='a'): - """Hill cipher - - >>> hill_decipher(np.matrix([[7,8], [11,11]]), 'drjiqzdrvx') - 'hellothere' - >>> hill_decipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \ - 'tfjflpznvyac') - 'hellothereaa' - """ - adjoint = linalg.det(matrix)*linalg.inv(matrix) - inverse_determinant = modular_division_table[int(round(linalg.det(matrix))) % 26][1] - inverse_matrix = (inverse_determinant * adjoint) % 26 - return hill_encipher(inverse_matrix, message, fillvalue) - - -# Where each piece of text ends up in the AMSCO transpositon cipher. -# 'index' shows where the slice appears in the plaintext, with the slice -# from 'start' to 'end' -AmscoSlice = collections.namedtuple('AmscoSlice', ['index', 'start', 'end']) - -class AmscoFillStyle(Enum): - continuous = 1 - same_each_row = 2 - reverse_each_row = 3 - -def amsco_transposition_positions(message, keyword, - fillpattern=(1, 2), - fillstyle=AmscoFillStyle.continuous, - fillcolumnwise=False, - emptycolumnwise=True): - """Creates the grid for the AMSCO transposition cipher. Each element in the - grid shows the index of that slice and the start and end positions of the - plaintext that go to make it up. - - >>> amsco_transposition_positions(string.ascii_lowercase, 'freddy', \ - fillpattern=(1, 2)) # doctest: +NORMALIZE_WHITESPACE - [[AmscoSlice(index=3, start=4, end=6), - AmscoSlice(index=2, start=3, end=4), - AmscoSlice(index=0, start=0, end=1), - AmscoSlice(index=1, start=1, end=3), - AmscoSlice(index=4, start=6, end=7)], - [AmscoSlice(index=8, start=12, end=13), - AmscoSlice(index=7, start=10, end=12), - AmscoSlice(index=5, start=7, end=9), - AmscoSlice(index=6, start=9, end=10), - AmscoSlice(index=9, start=13, end=15)], - [AmscoSlice(index=13, start=19, end=21), - AmscoSlice(index=12, start=18, end=19), - AmscoSlice(index=10, start=15, end=16), - AmscoSlice(index=11, start=16, end=18), - AmscoSlice(index=14, start=21, end=22)], - [AmscoSlice(index=18, start=27, end=28), - AmscoSlice(index=17, start=25, end=27), - AmscoSlice(index=15, start=22, end=24), - AmscoSlice(index=16, start=24, end=25), - AmscoSlice(index=19, start=28, end=30)]] - """ - transpositions = transpositions_of(keyword) - fill_iterator = cycle(fillpattern) - indices = count() - message_length = len(message) - - current_position = 0 - grid = [] - current_fillpattern = fillpattern - while current_position < message_length: - row = [] - if fillstyle == AmscoFillStyle.same_each_row: - fill_iterator = cycle(fillpattern) - if fillstyle == AmscoFillStyle.reverse_each_row: - fill_iterator = cycle(current_fillpattern) - for _ in range(len(transpositions)): - index = next(indices) - gap = next(fill_iterator) - row += [AmscoSlice(index, current_position, current_position + gap)] - current_position += gap - grid += [row] - if fillstyle == AmscoFillStyle.reverse_each_row: - current_fillpattern = list(reversed(current_fillpattern)) - return [transpose(r, transpositions) for r in grid] - -def amsco_transposition_encipher(message, keyword, - fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row): - """AMSCO transposition encipher. - - >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(1, 2)) - 'hoteelhler' - >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(2, 1)) - 'hetelhelor' - >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(1, 2)) - 'hotelerelh' - >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(2, 1)) - 'hetelorlhe' - >>> amsco_transposition_encipher('hereissometexttoencipher', 'encode') - 'etecstthhomoerereenisxip' - >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2)) - 'hetcsoeisterereipexthomn' - >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous) - 'hecsoisttererteipexhomen' - >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(2, 1)) - 'heecisoosttrrtepeixhemen' - >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2)) - 'hxtomephescieretoeisnter' - >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous) - 'hxomeiphscerettoisenteer' - """ - grid = amsco_transposition_positions(message, keyword, - fillpattern=fillpattern, fillstyle=fillstyle) - ct_as_grid = [[message[s.start:s.end] for s in r] for r in grid] - return combine_every_nth(ct_as_grid) - - -def amsco_transposition_decipher(message, keyword, - fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row): - """AMSCO transposition decipher - - >>> amsco_transposition_decipher('hoteelhler', 'abc', fillpattern=(1, 2)) - 'hellothere' - >>> amsco_transposition_decipher('hetelhelor', 'abc', fillpattern=(2, 1)) - 'hellothere' - >>> amsco_transposition_decipher('hotelerelh', 'acb', fillpattern=(1, 2)) - 'hellothere' - >>> amsco_transposition_decipher('hetelorlhe', 'acb', fillpattern=(2, 1)) - 'hellothere' - >>> amsco_transposition_decipher('etecstthhomoerereenisxip', 'encode') - 'hereissometexttoencipher' - >>> amsco_transposition_decipher('hetcsoeisterereipexthomn', 'cipher', fillpattern=(1, 2)) - 'hereissometexttoencipher' - >>> amsco_transposition_decipher('hecsoisttererteipexhomen', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous) - 'hereissometexttoencipher' - >>> amsco_transposition_decipher('heecisoosttrrtepeixhemen', 'cipher', fillpattern=(2, 1)) - 'hereissometexttoencipher' - >>> amsco_transposition_decipher('hxtomephescieretoeisnter', 'cipher', fillpattern=(1, 3, 2)) - 'hereissometexttoencipher' - >>> amsco_transposition_decipher('hxomeiphscerettoisenteer', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous) - 'hereissometexttoencipher' - """ - - grid = amsco_transposition_positions(message, keyword, - fillpattern=fillpattern, fillstyle=fillstyle) - transposed_sections = [s for c in [l for l in zip(*grid)] for s in c] - plaintext_list = [''] * len(transposed_sections) - current_pos = 0 - for slice in transposed_sections: - plaintext_list[slice.index] = message[current_pos:current_pos-slice.start+slice.end][:len(message[slice.start:slice.end])] - current_pos += len(message[slice.start:slice.end]) - return cat(plaintext_list) - - -def bifid_grid(keyword, wrap_alphabet, letter_mapping): - """Create the grids for a Bifid cipher - """ - cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) - if letter_mapping is None: - letter_mapping = {'j': 'i'} - translation = ''.maketrans(letter_mapping) - cipher_alphabet = cat(collections.OrderedDict.fromkeys(cipher_alphabet.translate(translation))) - f_grid = {k: ((i // 5) + 1, (i % 5) + 1) - for i, k in enumerate(cipher_alphabet)} - r_grid = {((i // 5) + 1, (i % 5) + 1): k - for i, k in enumerate(cipher_alphabet)} - return translation, f_grid, r_grid - -def bifid_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a, - letter_mapping=None, period=None, fillvalue=None): - """Bifid cipher - - >>> bifid_encipher("indiajelly", 'iguana') - 'ibidonhprm' - >>> bifid_encipher("indiacurry", 'iguana', period=4) - 'ibnhgaqltm' - >>> bifid_encipher("indiacurry", 'iguana', period=4, fillvalue='x') - 'ibnhgaqltzml' - """ - translation, f_grid, r_grid = bifid_grid(keyword, wrap_alphabet, letter_mapping) - - t_message = message.translate(translation) - pairs0 = [f_grid[l] for l in sanitise(t_message)] - if period: - chunked_pairs = [pairs0[i:i+period] for i in range(0, len(pairs0), period)] - if len(chunked_pairs[-1]) < period and fillvalue: - chunked_pairs[-1] += [f_grid[fillvalue]] * (period - len(chunked_pairs[-1])) - else: - chunked_pairs = [pairs0] - - pairs1 = [] - for c in chunked_pairs: - items = sum(list(list(i) for i in zip(*c)), []) - p = [(items[i], items[i+1]) for i in range(0, len(items), 2)] - pairs1 += p - - return cat(r_grid[p] for p in pairs1) - - -def bifid_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a, - letter_mapping=None, period=None, fillvalue=None): - """Decipher with bifid cipher - - >>> bifid_decipher('ibidonhprm', 'iguana') - 'indiaielly' - >>> bifid_decipher("ibnhgaqltm", 'iguana', period=4) - 'indiacurry' - >>> bifid_decipher("ibnhgaqltzml", 'iguana', period=4) - 'indiacurryxx' - """ - translation, f_grid, r_grid = bifid_grid(keyword, wrap_alphabet, letter_mapping) - - t_message = message.translate(translation) - pairs0 = [f_grid[l] for l in sanitise(t_message)] - if period: - chunked_pairs = [pairs0[i:i+period] for i in range(0, len(pairs0), period)] - if len(chunked_pairs[-1]) < period and fillvalue: - chunked_pairs[-1] += [f_grid[fillvalue]] * (period - len(chunked_pairs[-1])) - else: - chunked_pairs = [pairs0] - - pairs1 = [] - for c in chunked_pairs: - items = [j for i in c for j in i] - gap = len(c) - p = [(items[i], items[i+gap]) for i in range(gap)] - pairs1 += p - - return cat(r_grid[p] for p in pairs1) - - -def autokey_encipher(message, keyword): - """Encipher with the autokey cipher - - >>> autokey_encipher('meetatthefountain', 'kilt') - 'wmpmmxxaeyhbryoca' - """ - shifts = [pos(l) for l in keyword + message] - pairs = zip(message, shifts) - return cat([caesar_encipher_letter(l, k) for l, k in pairs]) - -def autokey_decipher(ciphertext, keyword): - """Decipher with the autokey cipher - - >>> autokey_decipher('wmpmmxxaeyhbryoca', 'kilt') - 'meetatthefountain' - """ - plaintext = [] - keys = list(keyword) - for c in ciphertext: - plaintext_letter = caesar_decipher_letter(c, pos(keys[0])) - plaintext += [plaintext_letter] - keys = keys[1:] + [plaintext_letter] - return cat(plaintext) - - -class PocketEnigma(object): - """A pocket enigma machine - The wheel is internally represented as a 26-element list self.wheel_map, - where wheel_map[i] == j shows that the position i places on from the arrow - maps to the position j places on. - """ - def __init__(self, wheel=1, position='a'): - """initialise the pocket enigma, including which wheel to use and the - starting position of the wheel. - - The wheel is either 1 or 2 (the predefined wheels) or a list of letter - pairs. - - The position is the letter pointed to by the arrow on the wheel. - - >>> pe.wheel_map - [25, 4, 23, 10, 1, 7, 9, 5, 12, 6, 3, 17, 8, 14, 13, 21, 19, 11, 20, 16, 18, 15, 24, 2, 22, 0] - >>> pe.position - 0 - """ - self.wheel1 = [('a', 'z'), ('b', 'e'), ('c', 'x'), ('d', 'k'), - ('f', 'h'), ('g', 'j'), ('i', 'm'), ('l', 'r'), ('n', 'o'), - ('p', 'v'), ('q', 't'), ('s', 'u'), ('w', 'y')] - self.wheel2 = [('a', 'c'), ('b', 'd'), ('e', 'w'), ('f', 'i'), - ('g', 'p'), ('h', 'm'), ('j', 'k'), ('l', 'n'), ('o', 'q'), - ('r', 'z'), ('s', 'u'), ('t', 'v'), ('x', 'y')] - if wheel == 1: - self.make_wheel_map(self.wheel1) - elif wheel == 2: - self.make_wheel_map(self.wheel2) - else: - self.validate_wheel_spec(wheel) - self.make_wheel_map(wheel) - if position in string.ascii_lowercase: - self.position = pos(position) - else: - self.position = position - - def make_wheel_map(self, wheel_spec): - """Expands a wheel specification from a list of letter-letter pairs - into a full wheel_map. - - >>> pe.make_wheel_map(pe.wheel2) - [2, 3, 0, 1, 22, 8, 15, 12, 5, 10, 9, 13, 7, 11, 16, 6, 14, 25, 20, 21, 18, 19, 4, 24, 23, 17] - """ - self.validate_wheel_spec(wheel_spec) - self.wheel_map = [0] * 26 - for p in wheel_spec: - self.wheel_map[pos(p[0])] = pos(p[1]) - self.wheel_map[pos(p[1])] = pos(p[0]) - return self.wheel_map - - def validate_wheel_spec(self, wheel_spec): - """Validates that a wheel specificaiton will turn into a valid wheel - map. - - >>> pe.validate_wheel_spec([]) - Traceback (most recent call last): - ... - ValueError: Wheel specification has 0 pairs, requires 13 - >>> pe.validate_wheel_spec([('a', 'b', 'c')]*13) - Traceback (most recent call last): - ... - ValueError: Not all mappings in wheel specificationhave two elements - >>> pe.validate_wheel_spec([('a', 'b')]*13) - Traceback (most recent call last): - ... - ValueError: Wheel specification does not contain 26 letters - """ - if len(wheel_spec) != 13: - raise ValueError("Wheel specification has {} pairs, requires 13". - format(len(wheel_spec))) - for p in wheel_spec: - if len(p) != 2: - raise ValueError("Not all mappings in wheel specification" - "have two elements") - if len(set([p[0] for p in wheel_spec] + - [p[1] for p in wheel_spec])) != 26: - raise ValueError("Wheel specification does not contain 26 letters") - - def encipher_letter(self, letter): - """Enciphers a single letter, by advancing the wheel before looking up - the letter on the wheel. - - >>> pe.set_position('f') - 5 - >>> pe.encipher_letter('k') - 'h' - """ - self.advance() - return self.lookup(letter) - decipher_letter = encipher_letter - - def lookup(self, letter): - """Look up what a letter enciphers to, without turning the wheel. - - >>> pe.set_position('f') - 5 - >>> cat([pe.lookup(l) for l in string.ascii_lowercase]) - 'udhbfejcpgmokrliwntsayqzvx' - >>> pe.lookup('A') - '' - """ - if letter in string.ascii_lowercase: - return unpos( - (self.wheel_map[(pos(letter) - self.position) % 26] + - self.position)) - else: - return '' - - def advance(self): - """Advances the wheel one position. - - >>> pe.set_position('f') - 5 - >>> pe.advance() - 6 - """ - self.position = (self.position + 1) % 26 - return self.position - - def encipher(self, message, starting_position=None): - """Enciphers a whole message. - - >>> pe.set_position('f') - 5 - >>> pe.encipher('helloworld') - 'kjsglcjoqc' - >>> pe.set_position('f') - 5 - >>> pe.encipher('kjsglcjoqc') - 'helloworld' - >>> pe.encipher('helloworld', starting_position = 'x') - 'egrekthnnf' - """ - if starting_position: - self.set_position(starting_position) - transformed = '' - for l in message: - transformed += self.encipher_letter(l) - return transformed - decipher = encipher - - def set_position(self, position): - """Sets the position of the wheel, by specifying the letter the arrow - points to. - - >>> pe.set_position('a') - 0 - >>> pe.set_position('m') - 12 - >>> pe.set_position('z') - 25 - """ - self.position = pos(position) - return self.position - - -if __name__ == "__main__": - import doctest - doctest.testmod(extraglobs={'pe': PocketEnigma(1, 'a')}) diff --git a/column_transposition.py b/column_transposition.py index ecef331..a141ff2 100644 --- a/column_transposition.py +++ b/column_transposition.py @@ -1,6 +1,7 @@ from utilities import * from language_models import * -from multiprocessing import Pool +import multiprocessing +from itertools import chain from logger import logger @@ -26,6 +27,12 @@ def transpositions_of(keyword): transpositions = tuple(key.index(l) for l in sorted(key)) return transpositions + +transpositions = collections.defaultdict(list) +for word in keywords: + transpositions[transpositions_of(word)] += [word] + + def pad(message_len, group_len, fillvalue): padding_length = group_len - message_len % group_len if padding_length == group_len: padding_length = 0 @@ -197,7 +204,7 @@ def column_transposition_break_mp(message, translist=transpositions, fitness=Ptrigrams) # doctest: +ELLIPSIS (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...) """ - with Pool() as pool: + with multiprocessing.Pool() as pool: helper_args = [(message, trans, fillcolumnwise, emptycolumnwise, fitness) for trans in translist @@ -260,3 +267,5 @@ def scytale_break_mp(message, max_key_length=20, return math.trunc(len(message) / len(best[0][0])), best[1] scytale_break = scytale_break_mp +if __name__ == "__main__": + import doctest \ No newline at end of file diff --git a/hill.py b/hill.py new file mode 100644 index 0000000..75048f9 --- /dev/null +++ b/hill.py @@ -0,0 +1,80 @@ +from utilities import * +from language_models import * +import multiprocessing +import numpy as np +from numpy import matrix +from numpy import linalg + +from logger import logger + + +def hill_encipher(matrix, message_letters, fillvalue='a'): + """Hill cipher + + >>> hill_encipher(np.matrix([[7,8], [11,11]]), 'hellothere') + 'drjiqzdrvx' + >>> hill_encipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \ + 'hello there') + 'tfjflpznvyac' + """ + n = len(matrix) + sanitised_message = sanitise(message_letters) + if len(sanitised_message) % n != 0: + padding = fillvalue[0] * (n - len(sanitised_message) % n) + else: + padding = '' + message = [pos(c) for c in sanitised_message + padding] + message_chunks = [message[i:i+n] for i in range(0, len(message), n)] + # message_chunks = chunks(message, len(matrix), fillvalue=None) + enciphered_chunks = [((matrix * np.matrix(c).T).T).tolist()[0] + for c in message_chunks] + return cat([unpos(round(l)) + for l in sum(enciphered_chunks, [])]) + +def hill_decipher(matrix, message, fillvalue='a'): + """Hill cipher + + >>> hill_decipher(np.matrix([[7,8], [11,11]]), 'drjiqzdrvx') + 'hellothere' + >>> hill_decipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \ + 'tfjflpznvyac') + 'hellothereaa' + """ + adjoint = linalg.det(matrix)*linalg.inv(matrix) + inverse_determinant = modular_division_table[int(round(linalg.det(matrix))) % 26][1] + inverse_matrix = (inverse_determinant * adjoint) % 26 + return hill_encipher(inverse_matrix, message, fillvalue) + +def hill_break(message, matrix_size=2, fitness=Pletters, + number_of_solutions=1, chunksize=500): + + all_matrices = [np.matrix(list(m)) + for m in itertools.product([list(r) + for r in itertools.product(range(26), repeat=matrix_size)], + repeat=matrix_size)] + valid_matrices = [m for m, d in + zip(all_matrices, (int(round(linalg.det(m))) for m in all_matrices)) + if d != 0 + if d % 2 != 0 + if d % 13 != 0 ] + with multiprocessing.Pool() as pool: + helper_args = [(message, matrix, fitness) + for matrix in valid_matrices] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(hill_break_worker, helper_args, chunksize) + if number_of_solutions == 1: + return max(breaks, key=lambda k: k[1]) + else: + return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions] + +def hill_break_worker(message, matrix, fitness): + plaintext = hill_decipher(matrix, message) + fit = fitness(plaintext) + logger.debug('Hill cipher break attempt using key {0} gives fit of ' + '{1} and decrypt starting: {2}'.format(matrix, + fit, sanitise(plaintext)[:50])) + return matrix, fit + +if __name__ == "__main__": + import doctest \ No newline at end of file diff --git a/keyword.py b/keyword.py deleted file mode 100644 index 91491ba..0000000 --- a/keyword.py +++ /dev/null @@ -1,269 +0,0 @@ -from utilities import * -from language_models import * -from enum import Enum -# from itertools import starmap -from multiprocessing import Pool - -from logger import logger - - -class KeywordWrapAlphabet(Enum): - from_a = 1 - from_last = 2 - from_largest = 3 - - -def keyword_cipher_alphabet_of(keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): - """Find the cipher alphabet given a keyword. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - - >>> keyword_cipher_alphabet_of('bayes') - 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a) - 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last) - 'bayestuvwxzcdfghijklmnopqr' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest) - 'bayeszcdfghijklmnopqrtuvwx' - """ - if wrap_alphabet == KeywordWrapAlphabet.from_a: - cipher_alphabet = cat(deduplicate(sanitise(keyword) + - string.ascii_lowercase)) - else: - if wrap_alphabet == KeywordWrapAlphabet.from_last: - last_keyword_letter = deduplicate(sanitise(keyword))[-1] - else: - last_keyword_letter = sorted(sanitise(keyword))[-1] - last_keyword_position = string.ascii_lowercase.find( - last_keyword_letter) + 1 - cipher_alphabet = cat( - deduplicate(sanitise(keyword) + - string.ascii_lowercase[last_keyword_position:] + - string.ascii_lowercase)) - return cipher_alphabet - - -def keyword_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): - """Enciphers a message with a keyword substitution cipher. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - 0 : from 'a' - 1 : from the last letter in the sanitised keyword - 2 : from the largest letter in the sanitised keyword - - >>> keyword_encipher('test message', 'bayes') - 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a) - 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last) - 'lskl dskkbus' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest) - 'qspq jsppbcs' - """ - cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) - cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) - return unaccent(message).lower().translate(cipher_translation) - -def keyword_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): - """Deciphers a message with a keyword substitution cipher. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - 0 : from 'a' - 1 : from the last letter in the sanitised keyword - 2 : from the largest letter in the sanitised keyword - - >>> keyword_decipher('rsqr ksqqbds', 'bayes') - 'test message' - >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a) - 'test message' - >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last) - 'test message' - >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest) - 'test message' - """ - cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) - cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) - return message.lower().translate(cipher_translation) - - -def keyword_break(message, wordlist=keywords, fitness=Pletters): - """Breaks a keyword substitution cipher using a dictionary and - frequency analysis. - - >>> keyword_break(keyword_encipher('this is a test message for the ' \ - 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS - (('elephant', ), -52.834575011...) - """ - best_keyword = '' - best_wrap_alphabet = True - best_fit = float("-inf") - for wrap_alphabet in KeywordWrapAlphabet: - for keyword in wordlist: - plaintext = keyword_decipher(message, keyword, wrap_alphabet) - fit = fitness(plaintext) - logger.debug('Keyword break attempt using key {0} (wrap={1}) ' - 'gives fit of {2} and decrypt starting: {3}'.format( - keyword, wrap_alphabet, fit, - sanitise(plaintext)[:50])) - if fit > best_fit: - best_fit = fit - best_keyword = keyword - best_wrap_alphabet = wrap_alphabet - logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of ' - '{2} and decrypt starting: {3}'.format(best_keyword, - best_wrap_alphabet, best_fit, sanitise( - keyword_decipher(message, best_keyword, - best_wrap_alphabet))[:50])) - return (best_keyword, best_wrap_alphabet), best_fit - -def keyword_break_mp(message, wordlist=keywords, fitness=Pletters, - number_of_solutions=1, chunksize=500): - """Breaks a keyword substitution cipher using a dictionary and - frequency analysis - - >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ - 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS - (('elephant', ), -52.834575011...) - >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ - 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ - wordlist=['cat', 'elephant', 'kangaroo'], \ - number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE - [(('elephant', ), -52.834575011...), - (('elephant', ), -52.834575011...)] - """ - with Pool() as pool: - helper_args = [(message, word, wrap, fitness) - for word in wordlist - for wrap in KeywordWrapAlphabet] - # Gotcha: the helper function here needs to be defined at the top level - # (limitation of Pool.starmap) - breaks = pool.starmap(keyword_break_worker, helper_args, chunksize) - if number_of_solutions == 1: - return max(breaks, key=lambda k: k[1]) - else: - return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions] - -def keyword_break_worker(message, keyword, wrap_alphabet, fitness): - plaintext = keyword_decipher(message, keyword, wrap_alphabet) - fit = fitness(plaintext) - logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of ' - '{2} and decrypt starting: {3}'.format(keyword, - wrap_alphabet, fit, sanitise(plaintext)[:50])) - return (keyword, wrap_alphabet), fit - - -def monoalphabetic_break_hillclimbing(message, - max_iterations=20000, - plain_alphabet=None, - cipher_alphabet=None, - fitness=Pletters, chunksize=1): - return simulated_annealing_break(message, - workers=1, - initial_temperature=0, - max_iterations=max_iterations, - plain_alphabet=plain_alphabet, - cipher_alphabet=cipher_alphabet, - fitness=fitness, chunksize=chunksize) - - -def monoalphabetic_break_hillclimbing_mp(message, - workers=10, - max_iterations=20000, - plain_alphabet=None, - cipher_alphabet=None, - fitness=Pletters, chunksize=1): - return simulated_annealing_break(message, - workers=workers, - initial_temperature=0, - max_iterations=max_iterations, - plain_alphabet=plain_alphabet, - cipher_alphabet=cipher_alphabet, - fitness=fitness, chunksize=chunksize) - - -def simulated_annealing_break(message, workers=10, - initial_temperature=200, - max_iterations=20000, - plain_alphabet=None, - cipher_alphabet=None, - fitness=Pletters, chunksize=1): - worker_args = [] - ciphertext = sanitise(message) - for i in range(workers): - if not plain_alphabet: - plain_alphabet = string.ascii_lowercase - if not cipher_alphabet: - cipher_alphabet = list(string.ascii_lowercase) - random.shuffle(cipher_alphabet) - cipher_alphabet = cat(cipher_alphabet) - worker_args.append((ciphertext, plain_alphabet, cipher_alphabet, - initial_temperature, max_iterations, fitness)) - with Pool() as pool: - breaks = pool.starmap(simulated_annealing_break_worker, - worker_args, chunksize) - return max(breaks, key=lambda k: k[1]) - - -def simulated_annealing_break_worker(message, plain_alphabet, cipher_alphabet, - t0, max_iterations, fitness): - def swap(letters, i, j): - if i > j: - i, j = j, i - if i == j: - return letters - else: - return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] + - letters[j+1:]) - - temperature = t0 - - dt = t0 / (0.9 * max_iterations) - - current_alphabet = cipher_alphabet - alphabet = current_alphabet - cipher_translation = ''.maketrans(current_alphabet, plain_alphabet) - plaintext = message.translate(cipher_translation) - current_fitness = fitness(plaintext) - - best_alphabet = current_alphabet - best_fitness = current_fitness - best_plaintext = plaintext - - # print('starting for', max_iterations) - for i in range(max_iterations): - swap_a = random.randrange(26) - swap_b = (swap_a + int(random.gauss(0, 4))) % 26 - alphabet = swap(current_alphabet, swap_a, swap_b) - cipher_translation = ''.maketrans(alphabet, plain_alphabet) - plaintext = message.translate(cipher_translation) - new_fitness = fitness(plaintext) - try: - sa_chance = math.exp((new_fitness - current_fitness) / temperature) - except (OverflowError, ZeroDivisionError): - # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature)) - sa_chance = 0 - if (new_fitness > current_fitness or random.random() < sa_chance): - # logger.debug('Simulated annealing: iteration {}, temperature {}, ' - # 'current alphabet {}, current_fitness {}, ' - # 'best_plaintext {}'.format(i, temperature, current_alphabet, - # current_fitness, best_plaintext[:50])) - - # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance)) - current_fitness = new_fitness - current_alphabet = alphabet - - if current_fitness > best_fitness: - best_alphabet = current_alphabet - best_fitness = current_fitness - best_plaintext = plaintext - if i % 500 == 0: - logger.debug('Simulated annealing: iteration {}, temperature {}, ' - 'current alphabet {}, current_fitness {}, ' - 'best_plaintext {}'.format(i, temperature, current_alphabet, - current_fitness, plaintext[:50])) - temperature = max(temperature - dt, 0.001) - - return best_alphabet, best_fitness # current_alphabet, current_fitness diff --git a/keyword_cipher.py b/keyword_cipher.py new file mode 100644 index 0000000..68c8904 --- /dev/null +++ b/keyword_cipher.py @@ -0,0 +1,272 @@ +from utilities import * +from language_models import * +from enum import Enum +# from itertools import starmap +import multiprocessing + +from logger import logger + + +class KeywordWrapAlphabet(Enum): + from_a = 1 + from_last = 2 + from_largest = 3 + + +def keyword_cipher_alphabet_of(keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Find the cipher alphabet given a keyword. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + + >>> keyword_cipher_alphabet_of('bayes') + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a) + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last) + 'bayestuvwxzcdfghijklmnopqr' + >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest) + 'bayeszcdfghijklmnopqrtuvwx' + """ + if wrap_alphabet == KeywordWrapAlphabet.from_a: + cipher_alphabet = cat(deduplicate(sanitise(keyword) + + string.ascii_lowercase)) + else: + if wrap_alphabet == KeywordWrapAlphabet.from_last: + last_keyword_letter = deduplicate(sanitise(keyword))[-1] + else: + last_keyword_letter = sorted(sanitise(keyword))[-1] + last_keyword_position = string.ascii_lowercase.find( + last_keyword_letter) + 1 + cipher_alphabet = cat( + deduplicate(sanitise(keyword) + + string.ascii_lowercase[last_keyword_position:] + + string.ascii_lowercase)) + return cipher_alphabet + + +def keyword_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Enciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_encipher('test message', 'bayes') + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a) + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last) + 'lskl dskkbus' + >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest) + 'qspq jsppbcs' + """ + cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) + cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) + return unaccent(message).lower().translate(cipher_translation) + +def keyword_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Deciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_decipher('rsqr ksqqbds', 'bayes') + 'test message' + >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a) + 'test message' + >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last) + 'test message' + >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest) + 'test message' + """ + cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) + cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) + return message.lower().translate(cipher_translation) + + +def keyword_break(message, wordlist=keywords, fitness=Pletters): + """Breaks a keyword substitution cipher using a dictionary and + frequency analysis. + + >>> keyword_break(keyword_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS + (('elephant', ), -52.834575011...) + """ + best_keyword = '' + best_wrap_alphabet = True + best_fit = float("-inf") + for wrap_alphabet in KeywordWrapAlphabet: + for keyword in wordlist: + plaintext = keyword_decipher(message, keyword, wrap_alphabet) + fit = fitness(plaintext) + logger.debug('Keyword break attempt using key {0} (wrap={1}) ' + 'gives fit of {2} and decrypt starting: {3}'.format( + keyword, wrap_alphabet, fit, + sanitise(plaintext)[:50])) + if fit > best_fit: + best_fit = fit + best_keyword = keyword + best_wrap_alphabet = wrap_alphabet + logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of ' + '{2} and decrypt starting: {3}'.format(best_keyword, + best_wrap_alphabet, best_fit, sanitise( + keyword_decipher(message, best_keyword, + best_wrap_alphabet))[:50])) + return (best_keyword, best_wrap_alphabet), best_fit + +def keyword_break_mp(message, wordlist=keywords, fitness=Pletters, + number_of_solutions=1, chunksize=500): + """Breaks a keyword substitution cipher using a dictionary and + frequency analysis + + >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS + (('elephant', ), -52.834575011...) + >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ + wordlist=['cat', 'elephant', 'kangaroo'], \ + number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + [(('elephant', ), -52.834575011...), + (('elephant', ), -52.834575011...)] + """ + with multiprocessing.Pool() as pool: + helper_args = [(message, word, wrap, fitness) + for word in wordlist + for wrap in KeywordWrapAlphabet] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(keyword_break_worker, helper_args, chunksize) + if number_of_solutions == 1: + return max(breaks, key=lambda k: k[1]) + else: + return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions] + +def keyword_break_worker(message, keyword, wrap_alphabet, fitness): + plaintext = keyword_decipher(message, keyword, wrap_alphabet) + fit = fitness(plaintext) + logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of ' + '{2} and decrypt starting: {3}'.format(keyword, + wrap_alphabet, fit, sanitise(plaintext)[:50])) + return (keyword, wrap_alphabet), fit + + +def monoalphabetic_break_hillclimbing(message, + max_iterations=20000, + plain_alphabet=None, + cipher_alphabet=None, + fitness=Pletters, chunksize=1): + return simulated_annealing_break(message, + workers=1, + initial_temperature=0, + max_iterations=max_iterations, + plain_alphabet=plain_alphabet, + cipher_alphabet=cipher_alphabet, + fitness=fitness, chunksize=chunksize) + + +def monoalphabetic_break_hillclimbing_mp(message, + workers=10, + max_iterations=20000, + plain_alphabet=None, + cipher_alphabet=None, + fitness=Pletters, chunksize=1): + return simulated_annealing_break(message, + workers=workers, + initial_temperature=0, + max_iterations=max_iterations, + plain_alphabet=plain_alphabet, + cipher_alphabet=cipher_alphabet, + fitness=fitness, chunksize=chunksize) + + +def simulated_annealing_break(message, workers=10, + initial_temperature=200, + max_iterations=20000, + plain_alphabet=None, + cipher_alphabet=None, + fitness=Pletters, chunksize=1): + worker_args = [] + ciphertext = sanitise(message) + for i in range(workers): + if not plain_alphabet: + plain_alphabet = string.ascii_lowercase + if not cipher_alphabet: + cipher_alphabet = list(string.ascii_lowercase) + random.shuffle(cipher_alphabet) + cipher_alphabet = cat(cipher_alphabet) + worker_args.append((ciphertext, plain_alphabet, cipher_alphabet, + initial_temperature, max_iterations, fitness)) + with multiprocessing.Pool() as pool: + breaks = pool.starmap(simulated_annealing_break_worker, + worker_args, chunksize) + return max(breaks, key=lambda k: k[1]) + + +def simulated_annealing_break_worker(message, plain_alphabet, cipher_alphabet, + t0, max_iterations, fitness): + def swap(letters, i, j): + if i > j: + i, j = j, i + if i == j: + return letters + else: + return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] + + letters[j+1:]) + + temperature = t0 + + dt = t0 / (0.9 * max_iterations) + + current_alphabet = cipher_alphabet + alphabet = current_alphabet + cipher_translation = ''.maketrans(current_alphabet, plain_alphabet) + plaintext = message.translate(cipher_translation) + current_fitness = fitness(plaintext) + + best_alphabet = current_alphabet + best_fitness = current_fitness + best_plaintext = plaintext + + # print('starting for', max_iterations) + for i in range(max_iterations): + swap_a = random.randrange(26) + swap_b = (swap_a + int(random.gauss(0, 4))) % 26 + alphabet = swap(current_alphabet, swap_a, swap_b) + cipher_translation = ''.maketrans(alphabet, plain_alphabet) + plaintext = message.translate(cipher_translation) + new_fitness = fitness(plaintext) + try: + sa_chance = math.exp((new_fitness - current_fitness) / temperature) + except (OverflowError, ZeroDivisionError): + # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature)) + sa_chance = 0 + if (new_fitness > current_fitness or random.random() < sa_chance): + # logger.debug('Simulated annealing: iteration {}, temperature {}, ' + # 'current alphabet {}, current_fitness {}, ' + # 'best_plaintext {}'.format(i, temperature, current_alphabet, + # current_fitness, best_plaintext[:50])) + + # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance)) + current_fitness = new_fitness + current_alphabet = alphabet + + if current_fitness > best_fitness: + best_alphabet = current_alphabet + best_fitness = current_fitness + best_plaintext = plaintext + if i % 500 == 0: + logger.debug('Simulated annealing: iteration {}, temperature {}, ' + 'current alphabet {}, current_fitness {}, ' + 'best_plaintext {}'.format(i, temperature, current_alphabet, + current_fitness, plaintext[:50])) + temperature = max(temperature - dt, 0.001) + + return best_alphabet, best_fitness # current_alphabet, current_fitness + +if __name__ == "__main__": + import doctest \ No newline at end of file diff --git a/plot_frequency_histogram.py b/plot_frequency_histogram.py new file mode 100644 index 0000000..d4a9297 --- /dev/null +++ b/plot_frequency_histogram.py @@ -0,0 +1,15 @@ +import matplotlib.pyplot as plt + +def plot_frequency_histogram(freqs, sort_key=None): + x = range(len(freqs)) + y = [freqs[l] for l in sorted(freqs, key=sort_key)] + f = plt.figure() + ax = f.add_axes([0.1, 0.1, 0.9, 0.9]) + ax.bar(x, y, align='center') + ax.set_xticks(x) + ax.set_xticklabels(sorted(freqs, key=sort_key)) + f.show() + +if __name__ == "__main__": + import doctest + doctest.testmod() diff --git a/pocket_enigma.py b/pocket_enigma.py new file mode 100644 index 0000000..557bda3 --- /dev/null +++ b/pocket_enigma.py @@ -0,0 +1,194 @@ +from utilities import * +from language_models import * + +from logger import logger + + +class PocketEnigma(object): + """A pocket enigma machine + The wheel is internally represented as a 26-element list self.wheel_map, + where wheel_map[i] == j shows that the position i places on from the arrow + maps to the position j places on. + """ + def __init__(self, wheel=1, position='a'): + """initialise the pocket enigma, including which wheel to use and the + starting position of the wheel. + + The wheel is either 1 or 2 (the predefined wheels) or a list of letter + pairs. + + The position is the letter pointed to by the arrow on the wheel. + + >>> pe.wheel_map + [25, 4, 23, 10, 1, 7, 9, 5, 12, 6, 3, 17, 8, 14, 13, 21, 19, 11, 20, 16, 18, 15, 24, 2, 22, 0] + >>> pe.position + 0 + """ + self.wheel1 = [('a', 'z'), ('b', 'e'), ('c', 'x'), ('d', 'k'), + ('f', 'h'), ('g', 'j'), ('i', 'm'), ('l', 'r'), ('n', 'o'), + ('p', 'v'), ('q', 't'), ('s', 'u'), ('w', 'y')] + self.wheel2 = [('a', 'c'), ('b', 'd'), ('e', 'w'), ('f', 'i'), + ('g', 'p'), ('h', 'm'), ('j', 'k'), ('l', 'n'), ('o', 'q'), + ('r', 'z'), ('s', 'u'), ('t', 'v'), ('x', 'y')] + if wheel == 1: + self.make_wheel_map(self.wheel1) + elif wheel == 2: + self.make_wheel_map(self.wheel2) + else: + self.validate_wheel_spec(wheel) + self.make_wheel_map(wheel) + if position in string.ascii_lowercase: + self.position = pos(position) + else: + self.position = position + + def make_wheel_map(self, wheel_spec): + """Expands a wheel specification from a list of letter-letter pairs + into a full wheel_map. + + >>> pe.make_wheel_map(pe.wheel2) + [2, 3, 0, 1, 22, 8, 15, 12, 5, 10, 9, 13, 7, 11, 16, 6, 14, 25, 20, 21, 18, 19, 4, 24, 23, 17] + """ + self.validate_wheel_spec(wheel_spec) + self.wheel_map = [0] * 26 + for p in wheel_spec: + self.wheel_map[pos(p[0])] = pos(p[1]) + self.wheel_map[pos(p[1])] = pos(p[0]) + return self.wheel_map + + def validate_wheel_spec(self, wheel_spec): + """Validates that a wheel specificaiton will turn into a valid wheel + map. + + >>> pe.validate_wheel_spec([]) + Traceback (most recent call last): + ... + ValueError: Wheel specification has 0 pairs, requires 13 + >>> pe.validate_wheel_spec([('a', 'b', 'c')]*13) + Traceback (most recent call last): + ... + ValueError: Not all mappings in wheel specificationhave two elements + >>> pe.validate_wheel_spec([('a', 'b')]*13) + Traceback (most recent call last): + ... + ValueError: Wheel specification does not contain 26 letters + """ + if len(wheel_spec) != 13: + raise ValueError("Wheel specification has {} pairs, requires 13". + format(len(wheel_spec))) + for p in wheel_spec: + if len(p) != 2: + raise ValueError("Not all mappings in wheel specification" + "have two elements") + if len(set([p[0] for p in wheel_spec] + + [p[1] for p in wheel_spec])) != 26: + raise ValueError("Wheel specification does not contain 26 letters") + + def encipher_letter(self, letter): + """Enciphers a single letter, by advancing the wheel before looking up + the letter on the wheel. + + >>> pe.set_position('f') + 5 + >>> pe.encipher_letter('k') + 'h' + """ + self.advance() + return self.lookup(letter) + decipher_letter = encipher_letter + + def lookup(self, letter): + """Look up what a letter enciphers to, without turning the wheel. + + >>> pe.set_position('f') + 5 + >>> cat([pe.lookup(l) for l in string.ascii_lowercase]) + 'udhbfejcpgmokrliwntsayqzvx' + >>> pe.lookup('A') + '' + """ + if letter in string.ascii_lowercase: + return unpos( + (self.wheel_map[(pos(letter) - self.position) % 26] + + self.position)) + else: + return '' + + def advance(self): + """Advances the wheel one position. + + >>> pe.set_position('f') + 5 + >>> pe.advance() + 6 + """ + self.position = (self.position + 1) % 26 + return self.position + + def encipher(self, message, starting_position=None): + """Enciphers a whole message. + + >>> pe.set_position('f') + 5 + >>> pe.encipher('helloworld') + 'kjsglcjoqc' + >>> pe.set_position('f') + 5 + >>> pe.encipher('kjsglcjoqc') + 'helloworld' + >>> pe.encipher('helloworld', starting_position = 'x') + 'egrekthnnf' + """ + if starting_position: + self.set_position(starting_position) + transformed = '' + for l in message: + transformed += self.encipher_letter(l) + return transformed + decipher = encipher + + def set_position(self, position): + """Sets the position of the wheel, by specifying the letter the arrow + points to. + + >>> pe.set_position('a') + 0 + >>> pe.set_position('m') + 12 + >>> pe.set_position('z') + 25 + """ + self.position = pos(position) + return self.position + + +def pocket_enigma_break_by_crib(message, wheel_spec, crib, crib_position): + """Break a pocket enigma using a crib (some plaintext that's expected to + be in a certain position). Returns a list of possible starting wheel + positions that could produce the crib. + + >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'h', 0) + ['a', 'f', 'q'] + >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'he', 0) + ['a'] + >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'll', 2) + ['a'] + >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 2) + ['a'] + >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 3) + ['a', 'j', 'n'] + >>> pocket_enigma_break_by_crib('aaaaa', 1, 'l', 3) + [] + """ + pe = PocketEnigma(wheel=wheel_spec) + possible_positions = [] + for p in string.ascii_lowercase: + pe.set_position(p) + plaintext = pe.decipher(message) + if plaintext[crib_position:crib_position+len(crib)] == crib: + possible_positions += [p] + return possible_positions + +if __name__ == "__main__": + import doctest + doctest.testmod(extraglobs={'pe': PocketEnigma(1, 'a')}) \ No newline at end of file diff --git a/polybius.py b/polybius.py index 48509b1..79aeb38 100644 --- a/polybius.py +++ b/polybius.py @@ -1,6 +1,8 @@ from utilities import * from language_models import * -from multiprocessing import Pool +import multiprocessing + +from keyword_cipher import KeywordWrapAlphabet from logger import logger @@ -142,7 +144,7 @@ def polybius_break_mp(message, column_labels, row_labels, """ if letters_to_merge is None: letters_to_merge = {'j': 'i'} - with Pool() as pool: + with multiprocessing.Pool() as pool: helper_args = [(message, word, wrap, column_labels, row_labels, column_first, letters_to_merge, @@ -179,3 +181,5 @@ def polybius_break_worker(message, keyword, wrap_alphabet, fit, sanitise(plaintext)[:50])) return (keyword, wrap_alphabet, column_order, row_order, column_first), fit +if __name__ == "__main__": + import doctest \ No newline at end of file diff --git a/text_prettify.py b/text_prettify.py index ea7da4e..d3a6ffa 100644 --- a/text_prettify.py +++ b/text_prettify.py @@ -1,6 +1,5 @@ from segment import segment -from cipher import cat -from language_models import sanitise +from utilities import cat, sanitise import string diff --git a/utilities.py b/utilities.py index d1961a8..ca984a3 100644 --- a/utilities.py +++ b/utilities.py @@ -1,5 +1,6 @@ import string import collections +from itertools import zip_longest # join a a list of letters into a string cat = ''.join @@ -11,17 +12,17 @@ wcat = ' '.join lcat = '\n'.join def pos(letter): - """Return the position of a letter in the alphabet (0-25)""" + """Return the position of a letter in the alphabet (0-25)""" if letter in string.ascii_lowercase: return ord(letter) - ord('a') elif letter in string.ascii_uppercase: return ord(letter) - ord('A') else: - return 0 + raise ValueError('pos requires input of {} to be an ascii letter'.format(letter)) def unpos(number): - """Return the letter in the given position in the alphabet (mod 26)""" - return chr(number % 26 + ord('a')) + """Return the letter in the given position in the alphabet (mod 26)""" + return chr(number % 26 + ord('a')) def every_nth(text, n, fillvalue=''): """Returns n strings, each of which consists of every nth character, @@ -160,10 +161,6 @@ def index_of_coincidence(text): ) -transpositions = collections.defaultdict(list) -for word in keywords: - transpositions[transpositions_of(word)] += [word] - def frequencies(text): """Count the number of occurrences of each character in text @@ -192,3 +189,6 @@ def frequencies(text): 0 """ return collections.Counter(c for c in text) + +if __name__ == "__main__": + import doctest \ No newline at end of file diff --git a/vigenere.py b/vigenere.py index f0181dd..dcf4a2b 100644 --- a/vigenere.py +++ b/vigenere.py @@ -1,8 +1,8 @@ from utilities import * from language_models import * from enum import Enum -from itertools import starmap -from multiprocessing import Pool +from itertools import starmap, cycle +import multiprocessing from logger import logger @@ -52,7 +52,7 @@ def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters, wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS ('cat', -52.9472712...) """ - with Pool() as pool: + with multiprocessing.Pool() as pool: helper_args = [(message, word, fitness) for word in wordlist] # Gotcha: the helper function here needs to be defined at the top level @@ -168,3 +168,6 @@ def beaufort_variant_frequency_break(message, max_key_length=20, fitness=Pletter results = starmap(worker, [(sanitised_message, i, fitness) for i in range(1, max_key_length+1)]) return max(results, key=lambda k: k[1]) + +if __name__ == "__main__": + import doctest \ No newline at end of file