From: Neil Smith Date: Mon, 14 Jul 2014 18:51:17 +0000 (+0100) Subject: Hillclimbing to solve monosubstitution ciphers X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=e67dfbd9159645bc3621ef9f40c4d026cad7e747;p=cipher-training.git Hillclimbing to solve monosubstitution ciphers --- diff --git a/cipher.py b/cipher.py index 5a7781b..d284afe 100644 --- a/cipher.py +++ b/cipher.py @@ -62,35 +62,6 @@ def chunks(text, n, fillvalue=None): padding = '' return [(text+padding)[i:i+n] for i in range(0, len(text), n)] -def transpose(items, transposition): - """Moves items around according to the given transposition - - >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3)) - ['a', 'b', 'c', 'd'] - >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0)) - ['d', 'b', 'c', 'a'] - >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0)) - [13, 12, 14, 11, 15, 10] - """ - transposed = [''] * len(transposition) - for p, t in enumerate(transposition): - transposed[p] = items[t] - return transposed - -def untranspose(items, transposition): - """Undoes a transpose - - >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3]) - ['a', 'b', 'c', 'd'] - >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0]) - ['a', 'b', 'c', 'd'] - >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0]) - [10, 11, 12, 13, 14, 15] - """ - transposed = [''] * len(transposition) - for p, t in enumerate(transposition): - transposed[t] = items[p] - return transposed def deduplicate(text): """If a string contains duplicate letters, remove all but the first. Retain @@ -363,175 +334,6 @@ beaufort_encipher = vigenere_decipher beaufort_decipher = vigenere_encipher -def transpositions_of(keyword): - """Finds the transpostions given by a keyword. For instance, the keyword - 'clever' rearranges to 'celrv', so the first column (0) stays first, the - second column (1) moves to third, the third column (2) moves to second, - and so on. - - If passed a tuple, assume it's already a transposition and just return it. - - >>> transpositions_of('clever') - (0, 2, 1, 4, 3) - >>> transpositions_of('fred') - (3, 2, 0, 1) - >>> transpositions_of((3, 2, 0, 1)) - (3, 2, 0, 1) - """ - if isinstance(keyword, tuple): - return keyword - else: - key = deduplicate(keyword) - transpositions = tuple(key.index(l) for l in sorted(key)) - return transpositions - -def pad(message_len, group_len, fillvalue): - """Returns the padding required to extend a message of message_len to an - even multiple of group_len, by adding repreated copies of fillvalue. - fillvalue can either be a character or a function that returns a character. - - >>> pad(10, 4, '!') - '!!' - >>> pad(8, 4, '!') - '' - >>> pad(16, 4, '!') - '' - >>> pad(10, 4, lambda: '*') - '**' - """ - padding_length = group_len - message_len % group_len - if padding_length == group_len: padding_length = 0 - padding = '' - for _ in range(padding_length): - if callable(fillvalue): - padding += fillvalue() - else: - padding += fillvalue - return padding - -def column_transposition_encipher(message, keyword, fillvalue=' ', - fillcolumnwise=False, - emptycolumnwise=False): - """Enciphers using the column transposition cipher. - Message is padded to allow all rows to be the same length. - - >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True) - 'hlohr eltee ' - >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True) - 'hellothere ' - >>> column_transposition_encipher('hellothere', 'abcdef') - 'hellothere ' - >>> column_transposition_encipher('hellothere', 'abcde') - 'hellothere' - >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False) - 'hlohreltee' - >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True) - 'htehlelroe' - >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False) - 'hellothere' - >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True) - 'heotllrehe' - >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False) - 'holrhetlee' - >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True) - 'htleehoelr' - >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False) - 'hleolteher' - >>> column_transposition_encipher('hellothere', 'cleverly') - 'hleolthre e ' - >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!') - 'hleolthre!e!' - >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*') - 'hleolthre*e*' - """ - transpositions = transpositions_of(keyword) - message += pad(len(message), len(transpositions), fillvalue) - if fillcolumnwise: - rows = every_nth(message, len(message) // len(transpositions)) - else: - rows = chunks(message, len(transpositions)) - transposed = [transpose(r, transpositions) for r in rows] - if emptycolumnwise: - return combine_every_nth(transposed) - else: - return ''.join(chain(*transposed)) - -def column_transposition_decipher(message, keyword, fillvalue=' ', - fillcolumnwise=False, - emptycolumnwise=False): - """Deciphers using the column transposition cipher. - Message is padded to allow all rows to be the same length. - - >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False) - 'hellothere' - >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False) - 'hellothere' - >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False) - 'hellothere' - >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False) - 'hellothere' - """ - transpositions = transpositions_of(keyword) - message += pad(len(message), len(transpositions), '*') - if emptycolumnwise: - rows = every_nth(message, len(message) // len(transpositions)) - else: - rows = chunks(message, len(transpositions)) - untransposed = [untranspose(r, transpositions) for r in rows] - if fillcolumnwise: - return combine_every_nth(untransposed) - else: - return ''.join(chain(*untransposed)) - -def scytale_encipher(message, rows, fillvalue=' '): - """Enciphers using the scytale transposition cipher. - Message is padded with spaces to allow all rows to be the same length. - - >>> scytale_encipher('thequickbrownfox', 3) - 'tcnhkfeboqrxuo iw ' - >>> scytale_encipher('thequickbrownfox', 4) - 'tubnhirfecooqkwx' - >>> scytale_encipher('thequickbrownfox', 5) - 'tubn hirf ecoo qkwx ' - >>> scytale_encipher('thequickbrownfox', 6) - 'tqcrnxhukof eibwo ' - >>> scytale_encipher('thequickbrownfox', 7) - 'tqcrnx hukof eibwo ' - """ - transpositions = [i for i in range(rows)] - return column_transposition_encipher(message, transpositions, - fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False) - -def scytale_decipher(message, rows): - """Deciphers using the scytale transposition cipher. - Assumes the message is padded so that all rows are the same length. - - >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3) - 'thequickbrownfox ' - >>> scytale_decipher('tubnhirfecooqkwx', 4) - 'thequickbrownfox' - >>> scytale_decipher('tubn hirf ecoo qkwx ', 5) - 'thequickbrownfox ' - >>> scytale_decipher('tqcrnxhukof eibwo ', 6) - 'thequickbrownfox ' - >>> scytale_decipher('tqcrnx hukof eibwo ', 7) - 'thequickbrownfox ' - """ - transpositions = [i for i in range(rows)] - return column_transposition_decipher(message, transpositions, - fillcolumnwise=True, emptycolumnwise=False) - - if __name__ == "__main__": import doctest doctest.testmod() diff --git a/cipherbreak.py b/cipherbreak.py index 5614389..1adebe6 100644 --- a/cipherbreak.py +++ b/cipherbreak.py @@ -29,9 +29,6 @@ from language_models import * # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1) # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1) -transpositions = collections.defaultdict(list) -for word in keywords: - transpositions[transpositions_of(word)] += [word] def frequencies(text): """Count the number of occurrences of each character in text diff --git a/language_models.py b/language_models.py index 02d48bd..8f4bd9c 100644 --- a/language_models.py +++ b/language_models.py @@ -69,23 +69,6 @@ with open('words.txt', 'r') as f: keywords = [line.rstrip() for line in f] -def weighted_choice(d): - """Generate random item from a dictionary of item counts - """ - target = random.uniform(0, sum(d.values())) - cuml = 0.0 - for (l, p) in d.items(): - cuml += p - if cuml > target: - return l - return None - -def random_english_letter(): - """Generate a random letter based on English letter counts - """ - return weighted_choice(normalised_english_counts) - - class Pdist(dict): """A probability distribution estimated from counts in datafile. Values are stored and returned as log probabilities.