From: Neil Smith Date: Wed, 20 Nov 2013 12:12:33 +0000 (+0000) Subject: Tidying X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=61d82c1ce520c37cf00d829e8f856c5fb672266a;hp=-c;p=cipher-tools.git Tidying --- 61d82c1ce520c37cf00d829e8f856c5fb672266a diff --git a/cipher.py b/cipher.py index db2caff..bd07596 100644 --- a/cipher.py +++ b/cipher.py @@ -663,7 +663,6 @@ def scytale_break(message, def column_transposition_break(message, wordlist=keywords, metric=norms.euclidean_distance, - #test_ngram_length=2, target_counts=normalised_english_bigram_counts, message_frequency_scaling=norms.normalise): """Breaks a column transposition cipher using a dictionary and @@ -707,6 +706,52 @@ def column_transposition_break(message, return best_keyword, best_fit +def column_transposition_break_mp(message, + wordlist=keywords, + metric=norms.euclidean_distance, + target_counts=normalised_english_bigram_counts, + message_frequency_scaling=norms.normalise, + chunksize=500): + """Breaks a column transposition cipher using a dictionary and + n-gram frequency analysis + + >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + "Turing's homosexuality resulted in a criminal prosecution in 1952, \ + when homosexual acts were still illegal in the United Kingdom. "), \ + 'encipher'), \ + wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS + ('encipher', 0.898128626285...) + >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + "Turing's homosexuality resulted in a criminal prosecution in 1952, " \ + "when homosexual acts were still illegal in the United Kingdom."), \ + 'encipher'), \ + wordlist=['encipher', 'keyword', 'fourteen'], \ + target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS + ('encipher', 1.1958792913127...) + """ + ngram_length = len(next(iter(target_counts.keys()))) + with Pool() as pool: + helper_args = [(message, word, metric, target_counts, ngram_length, + message_frequency_scaling) + for word in wordlist] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize) + return min(breaks, key=lambda k: k[1]) + +def column_transposition_break_worker(message, keyword, metric, target_counts, + ngram_length, message_frequency_scaling): + plaintext = column_transposition_decipher(message, keyword) + counts = message_frequency_scaling(frequencies( + ngrams(sanitise(plaintext), ngram_length))) + fit = metric(target_counts, counts) + logger.debug('Column transposition break attempt using key {0} ' + 'gives fit of {1} and decrypt starting: {2}'.format( + keyword, fit, + sanitise(plaintext)[:50])) + return keyword, fit + + if __name__ == "__main__": import doctest