From: Neil Smith Date: Wed, 20 Nov 2013 11:46:26 +0000 (+0000) Subject: Test for column transposition break now done X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=a30c5ec7f93201f5d0225727b46828b3e23f3a72;p=cipher-tools.git Test for column transposition break now done --- diff --git a/cipher.py b/cipher.py index 4fa44cf..db2caff 100644 --- a/cipher.py +++ b/cipher.py @@ -34,6 +34,14 @@ with open('count_2l.txt', 'r') as f: english_bigram_counts[bigram] = int(count) normalised_english_bigram_counts = norms.normalise(english_bigram_counts) +english_trigram_counts = collections.defaultdict(int) +with open('count_3l.txt', 'r') as f: + for line in f: + (trigram, count) = line.split("\t") + english_trigram_counts[trigram] = int(count) +normalised_english_trigram_counts = norms.normalise(english_trigram_counts) + + with open('words.txt', 'r') as f: keywords = [line.rstrip() for line in f] @@ -420,7 +428,11 @@ def scytale_decipher(message, rows): def transpositions_of(keyword): - """ + """Finds the transpostions given by a keyword. For instance, the keyword + 'clever' rearranges to 'celrv', so the first column (0) stays first, the + second column (1) moves to third, the third column (2) moves to second, + and so on. + >>> transpositions_of('clever') [0, 2, 1, 4, 3] """ @@ -428,22 +440,35 @@ def transpositions_of(keyword): transpositions = [key.index(l) for l in sorted(key)] return transpositions -def column_transposition_encipher(message, keyword): - """ +def column_transposition_encipher(message, keyword, fillvalue=' '): + """Enciphers using the column transposition cipher. + Message is padded to allow all rows to be the same length. + >>> column_transposition_encipher('hellothere', 'clever') 'hleolteher' + >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!') + 'hleolthre!e!' """ - return column_transposition_worker(message, keyword, encipher=True) + return column_transposition_worker(message, keyword, encipher=True, + fillvalue=fillvalue) + +def column_transposition_decipher(message, keyword, fillvalue=' '): + """Deciphers using the column transposition cipher. + Message is padded to allow all rows to be the same length. -def column_transposition_decipher(message, keyword): - """ >>> column_transposition_decipher('hleolteher', 'clever') 'hellothere' + >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?') + 'hellothere!!' """ - return column_transposition_worker(message, keyword, encipher=False) + return column_transposition_worker(message, keyword, encipher=False, + fillvalue=fillvalue) + +def column_transposition_worker(message, keyword, + encipher=True, fillvalue=' '): + """Does the actual work of the column transposition cipher. + Message is padded with spaces to allow all rows to be the same length. -def column_transposition_worker(message, keyword, encipher=True): - """ >>> column_transposition_worker('hellothere', 'clever') 'hleolteher' >>> column_transposition_worker('hellothere', 'clever', encipher=True) @@ -452,7 +477,7 @@ def column_transposition_worker(message, keyword, encipher=True): 'hellothere' """ transpositions = transpositions_of(keyword) - columns = every_nth(message, len(transpositions), fillvalue=' ') + columns = every_nth(message, len(transpositions), fillvalue=fillvalue) if encipher: transposed_columns = transpose(columns, transpositions) else: @@ -635,6 +660,53 @@ def scytale_break(message, sanitise(scytale_decipher(message, best_key))[:50])) return best_key, best_fit +def column_transposition_break(message, + wordlist=keywords, + metric=norms.euclidean_distance, + #test_ngram_length=2, + target_counts=normalised_english_bigram_counts, + message_frequency_scaling=norms.normalise): + """Breaks a column transposition cipher using a dictionary and + n-gram frequency analysis + + >>> column_transposition_break(column_transposition_encipher(sanitise( \ + "Turing's homosexuality resulted in a criminal prosecution in 1952, \ + when homosexual acts were still illegal in the United Kingdom. "), \ + 'encipher'), \ + wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS + ('encipher', 0.898128626285...) + >>> column_transposition_break(column_transposition_encipher(sanitise( \ + "Turing's homosexuality resulted in a criminal prosecution in 1952, " \ + "when homosexual acts were still illegal in the United Kingdom."), \ + 'encipher'), \ + wordlist=['encipher', 'keyword', 'fourteen'], \ + target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS + ('encipher', 1.1958792913127...) + """ + best_keyword = '' + best_fit = float("inf") + ngram_length = len(next(iter(target_counts.keys()))) + for keyword in wordlist: + if len(message) % len(deduplicate(keyword)) == 0: + plaintext = column_transposition_decipher(message, keyword) + counts = message_frequency_scaling(frequencies( + ngrams(sanitise(plaintext), ngram_length))) + fit = metric(target_counts, counts) + logger.debug('Column transposition break attempt using key {0} ' + 'gives fit of {1} and decrypt starting: {2}'.format( + keyword, fit, + sanitise(plaintext)[:50])) + if fit < best_fit: + best_fit = fit + best_keyword = keyword + logger.info('Column transposition break best fit with key {0} gives fit ' + 'of {1} and decrypt starting: {2}'.format(best_keyword, + best_fit, sanitise( + column_transposition_decipher(message, + best_keyword))[:50])) + return best_keyword, best_fit + + if __name__ == "__main__": import doctest