From: Neil Smith Date: Sun, 26 Jan 2014 21:22:13 +0000 (+0000) Subject: Done for the day X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=eaecd10e334e6d63d2fd222bc280b02febca5a1b;p=cipher-tools.git Done for the day --- diff --git a/cipher.py b/cipher.py index 6ef01c3..3e42da1 100644 --- a/cipher.py +++ b/cipher.py @@ -2,8 +2,7 @@ import string import collections import logging from itertools import zip_longest, cycle, chain -from counts import * - +from language_models import * logger = logging.getLogger(__name__) logger.addHandler(logging.FileHandler('cipher.log')) @@ -12,19 +11,6 @@ logger.setLevel(logging.WARNING) #logger.setLevel(logging.DEBUG) -english_counts = collections.defaultdict(int) -with open('count_1l.txt', 'r') as f: - for line in f: - (letter, count) = line.split("\t") - english_counts[letter] = int(count) -normalised_english_counts = norms.normalise(english_counts) - -choices, weights = zip(*weighted_choices) -cumdist = list(itertools.accumulate(weights)) -x = random.random() * cumdist[-1] -choices[bisect.bisect(cumdist, x)] - - modular_division_table = [[0]*26 for x in range(26)] for a in range(26): for b in range(26): @@ -438,14 +424,14 @@ def column_transposition_encipher(message, keyword, fillvalue=' ', emptycolumnwise=False): """Enciphers using the column transposition cipher. Message is padded to allow all rows to be the same length. - - >>> column_transposition_encipher('hellothere', 'clever') - 'hleolteher' - >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!') - 'hleolthre!e!' - >>> column_transposition_encipher('hellothere', 'clever', columnwise=True) - 'htleehoelr' """ + # >>> column_transposition_encipher('hellothere', 'clever') + # 'hleolteher' + # >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!') + # 'hleolthre!e!' + # >>> column_transposition_encipher('hellothere', 'clever', columnwise=True) + # 'htleehoelr' + # """ transpositions = transpositions_of(keyword) message += pad(len(message), len(transpositions), fillvalue) if fillcolumnwise: @@ -463,14 +449,14 @@ def column_transposition_decipher(message, keyword, fillvalue=' ', columnwise=False): """Deciphers using the column transposition cipher. Message is padded to allow all rows to be the same length. - - >>> column_transposition_decipher('hleolteher', 'clever') - 'hellothere' - >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?') - 'hellothere!!' - >>> column_transposition_decipher('htleehoelr', 'clever', columnwise=True) - 'hellothere' """ + # >>> column_transposition_decipher('hleolteher', 'clever') + # 'hellothere' + # >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?') + # 'hellothere!!' + # >>> column_transposition_decipher('htleehoelr', 'clever', columnwise=True) + # 'hellothere' + # """ transpositions = transpositions_of(keyword) if columnwise: columns = chunks(message, int(len(message) / len(transpositions))) diff --git a/cipherbreak.py b/cipherbreak.py index 315278b..728cb81 100644 --- a/cipherbreak.py +++ b/cipherbreak.py @@ -9,8 +9,8 @@ from math import log10 import matplotlib.pyplot as plt -from counts import * from cipher import * +from language_models import * # To time a run: # @@ -19,9 +19,6 @@ from cipher import * # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1) # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1) -with open('words.txt', 'r') as f: - keywords = [line.rstrip() for line in f] - transpositions = collections.defaultdict(list) for word in keywords: transpositions[transpositions_of(word)] += [word] @@ -249,33 +246,33 @@ def column_transposition_break_mp(message, chunksize=500): """Breaks a column transposition cipher using a dictionary and n-gram frequency analysis - - >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in the \ - minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 'encipher'), \ - translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ - (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ - (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS - (((2, 0, 5, 3, 1, 4, 6), False), 0.0628106372...) - >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in the \ - minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 'encipher'), \ - translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ - (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ - (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \ - target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS - (((2, 0, 5, 3, 1, 4, 6), False), 0.0592259560...) """ + # >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + # "It is a truth universally acknowledged, that a single man in \ + # possession of a good fortune, must be in want of a wife. However \ + # little known the feelings or views of such a man may be on his \ + # first entering a neighbourhood, this truth is so well fixed in the \ + # minds of the surrounding families, that he is considered the \ + # rightful property of some one or other of their daughters."), \ + # 'encipher'), \ + # translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ + # (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ + # (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS + # (((2, 0, 5, 3, 1, 4, 6), False), 0.0628106372...) + # >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + # "It is a truth universally acknowledged, that a single man in \ + # possession of a good fortune, must be in want of a wife. However \ + # little known the feelings or views of such a man may be on his \ + # first entering a neighbourhood, this truth is so well fixed in the \ + # minds of the surrounding families, that he is considered the \ + # rightful property of some one or other of their daughters."), \ + # 'encipher'), \ + # translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ + # (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ + # (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \ + # target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS + # (((2, 0, 5, 3, 1, 4, 6), False), 0.0592259560...) + # """ ngram_length = len(next(iter(target_counts.keys()))) with Pool() as pool: helper_args = [(message, trans, columnwise, metric, target_counts, ngram_length, diff --git a/counts.py b/counts.py deleted file mode 100644 index 2714db2..0000000 --- a/counts.py +++ /dev/null @@ -1,32 +0,0 @@ -import norms -import itertools -import random -import bisect -import collections - -english_counts = collections.defaultdict(int) -with open('count_1l.txt', 'r') as f: - for line in f: - (letter, count) = line.split("\t") - english_counts[letter] = int(count) -normalised_english_counts = norms.normalise(english_counts) - -english_bigram_counts = collections.defaultdict(int) -with open('count_2l.txt', 'r') as f: - for line in f: - (bigram, count) = line.split("\t") - english_bigram_counts[bigram] = int(count) -normalised_english_bigram_counts = norms.normalise(english_bigram_counts) - -english_trigram_counts = collections.defaultdict(int) -with open('count_3l.txt', 'r') as f: - for line in f: - (trigram, count) = line.split("\t") - english_trigram_counts[trigram] = int(count) -normalised_english_trigram_counts = norms.normalise(english_trigram_counts) - - -# choices, weights = zip(*weighted_choices) -# cumdist = list(itertools.accumulate(weights)) -# x = random.random() * cumdist[-1] -# choices[bisect.bisect(cumdist, x)] diff --git a/language_models.py b/language_models.py new file mode 100644 index 0000000..e4db178 --- /dev/null +++ b/language_models.py @@ -0,0 +1,45 @@ +import norms +import itertools +import random +import bisect +import collections + +english_counts = collections.defaultdict(int) +with open('count_1l.txt', 'r') as f: + for line in f: + (letter, count) = line.split("\t") + english_counts[letter] = int(count) +normalised_english_counts = norms.normalise(english_counts) + +english_bigram_counts = collections.defaultdict(int) +with open('count_2l.txt', 'r') as f: + for line in f: + (bigram, count) = line.split("\t") + english_bigram_counts[bigram] = int(count) +normalised_english_bigram_counts = norms.normalise(english_bigram_counts) + +english_trigram_counts = collections.defaultdict(int) +with open('count_3l.txt', 'r') as f: + for line in f: + (trigram, count) = line.split("\t") + english_trigram_counts[trigram] = int(count) +normalised_english_trigram_counts = norms.normalise(english_trigram_counts) + +with open('words.txt', 'r') as f: + keywords = [line.rstrip() for line in f] + +def weighted_choice(d): + """Generate a set of random items from a dictionary of item counts + """ + target = random.uniform(0, sum(d.values())) + cuml = 0.0 + for (l, p) in d.items(): + cuml += p + if cuml > target: + return l + return None + +def random_english_letter(): + """Generate a random letter based on English letter counts + """ + return weighted_choice(normalised_english_counts)