X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=cipherbreak.py;h=3993a42b5b8f786e9352160032389ac5ae58d476;hb=49dc272d2fc91e7340e56e9e7b96da6ab63514bb;hp=f5e1f45d2d9daac301960e562811fa48bc01ede7;hpb=d86a492d92a9dfa8f27db824ac67acd641b24714;p=cipher-tools.git diff --git a/cipherbreak.py b/cipherbreak.py index f5e1f45..3993a42 100644 --- a/cipherbreak.py +++ b/cipherbreak.py @@ -3,13 +3,14 @@ import collections import norms import logging from itertools import zip_longest, cycle, permutations -from segment import segment, Pwords +from segment import segment from multiprocessing import Pool from math import log10 import matplotlib.pyplot as plt from cipher import * +from language_models import * # To time a run: # @@ -18,32 +19,6 @@ from cipher import * # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1) # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1) - -english_counts = collections.defaultdict(int) -with open('count_1l.txt', 'r') as f: - for line in f: - (letter, count) = line.split("\t") - english_counts[letter] = int(count) -normalised_english_counts = norms.normalise(english_counts) - -english_bigram_counts = collections.defaultdict(int) -with open('count_2l.txt', 'r') as f: - for line in f: - (bigram, count) = line.split("\t") - english_bigram_counts[bigram] = int(count) -normalised_english_bigram_counts = norms.normalise(english_bigram_counts) - -english_trigram_counts = collections.defaultdict(int) -with open('count_3l.txt', 'r') as f: - for line in f: - (trigram, count) = line.split("\t") - english_trigram_counts[trigram] = int(count) -normalised_english_trigram_counts = norms.normalise(english_trigram_counts) - - -with open('words.txt', 'r') as f: - keywords = [line.rstrip() for line in f] - transpositions = collections.defaultdict(list) for word in keywords: transpositions[transpositions_of(word)] += [word] @@ -80,39 +55,39 @@ def frequencies(text): # counts[c] += 1 #return counts return collections.Counter(c for c in text) -letter_frequencies = frequencies -def bigram_likelihood(bigram, bf, lf): - return bf[bigram] / (lf[bigram[0]] * lf[bigram[1]]) +def frequency_compare(text, target_frequency, frequency_scaling, metric): + counts = frequency_scaling(frequencies(text)) + return -1 * metric(target_frequency, counts) +def euclidean_compare(text): + return frequency_compare(text, norms.euclidean_scale(english_counts), + norms.euclidean_scale, norms.euclidean_distance) -def caesar_break(message, - metric=norms.euclidean_distance, - target_counts=normalised_english_counts, - message_frequency_scaling=norms.normalise): + +def caesar_break(message, fitness=Pletters): """Breaks a Caesar cipher using frequency analysis >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \ 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS - (4, 0.080345432737...) + (4, -130.849890899...) >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \ 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS - (19, 0.11189290326...) + (19, -128.82516920...) >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \ 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS - (13, 0.08293968842...) + (13, -126.25233502...) """ sanitised_message = sanitise(message) best_shift = 0 - best_fit = float("inf") + best_fit = float('-inf') for shift in range(26): plaintext = caesar_decipher(sanitised_message, shift) - counts = message_frequency_scaling(letter_frequencies(plaintext)) - fit = metric(target_counts, counts) + fit = fitness(plaintext) logger.debug('Caesar break attempt using key {0} gives fit of {1} ' 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50])) - if fit < best_fit: + if fit > best_fit: best_fit = fit best_shift = shift logger.info('Caesar break best fit: key {0} gives fit of {1} and ' @@ -130,8 +105,8 @@ def affine_break(message, 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \ 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \ 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \ - 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \ - 'clm ckuxj.') # doctest: +ELLIPSIS + 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \ + 'kxd clm ckuxj.') # doctest: +ELLIPSIS ((15, 22, True), 0.0598745365924...) """ sanitised_message = sanitise(message) @@ -144,7 +119,7 @@ def affine_break(message, for adder in range(26): plaintext = affine_decipher(sanitised_message, multiplier, adder, one_based) - counts = message_frequency_scaling(letter_frequencies(plaintext)) + counts = message_frequency_scaling(frequencies(plaintext)) fit = metric(target_counts, counts) logger.debug('Affine break attempt using key {0}x+{1} ({2}) ' 'gives fit of {3} and decrypt starting: {4}'. @@ -181,7 +156,7 @@ def keyword_break(message, for wrap_alphabet in range(3): for keyword in wordlist: plaintext = keyword_decipher(message, keyword, wrap_alphabet) - counts = message_frequency_scaling(letter_frequencies(plaintext)) + counts = message_frequency_scaling(frequencies(plaintext)) fit = metric(target_counts, counts) logger.debug('Keyword break attempt using key {0} (wrap={1}) ' 'gives fit of {2} and decrypt starting: {3}'.format( @@ -224,7 +199,7 @@ def keyword_break_mp(message, def keyword_break_worker(message, keyword, wrap_alphabet, metric, target_counts, message_frequency_scaling): plaintext = keyword_decipher(message, keyword, wrap_alphabet) - counts = message_frequency_scaling(letter_frequencies(plaintext)) + counts = message_frequency_scaling(frequencies(plaintext)) fit = metric(target_counts, counts) logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of ' '{2} and decrypt starting: {3}'.format(keyword, @@ -271,33 +246,33 @@ def column_transposition_break_mp(message, chunksize=500): """Breaks a column transposition cipher using a dictionary and n-gram frequency analysis - - >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in the \ - minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 'encipher'), \ - translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ - (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ - (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS - (((2, 0, 5, 3, 1, 4, 6), False), 0.0628106372...) - >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in the \ - minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 'encipher'), \ - translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ - (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ - (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \ - target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS - (((2, 0, 5, 3, 1, 4, 6), False), 0.0592259560...) """ + # >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + # "It is a truth universally acknowledged, that a single man in \ + # possession of a good fortune, must be in want of a wife. However \ + # little known the feelings or views of such a man may be on his \ + # first entering a neighbourhood, this truth is so well fixed in the \ + # minds of the surrounding families, that he is considered the \ + # rightful property of some one or other of their daughters."), \ + # 'encipher'), \ + # translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ + # (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ + # (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS + # (((2, 0, 5, 3, 1, 4, 6), False), 0.0628106372...) + # >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + # "It is a truth universally acknowledged, that a single man in \ + # possession of a good fortune, must be in want of a wife. However \ + # little known the feelings or views of such a man may be on his \ + # first entering a neighbourhood, this truth is so well fixed in the \ + # minds of the surrounding families, that he is considered the \ + # rightful property of some one or other of their daughters."), \ + # 'encipher'), \ + # translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ + # (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ + # (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \ + # target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS + # (((2, 0, 5, 3, 1, 4, 6), False), 0.0592259560...) + # """ ngram_length = len(next(iter(target_counts.keys()))) with Pool() as pool: helper_args = [(message, trans, columnwise, metric, target_counts, ngram_length, @@ -364,7 +339,7 @@ def vigenere_keyword_break(message, best_fit = float("inf") for keyword in wordlist: plaintext = vigenere_decipher(message, keyword) - counts = message_frequency_scaling(letter_frequencies(plaintext)) + counts = message_frequency_scaling(frequencies(plaintext)) fit = metric(target_counts, counts) logger.debug('Vigenere break attempt using key {0} ' 'gives fit of {1} and decrypt starting: {2}'.format( @@ -379,11 +354,11 @@ def vigenere_keyword_break(message, vigenere_decipher(message, best_keyword))[:50])) return best_keyword, best_fit -def vigenere_keyword_break_mp(message, - wordlist=keywords, - metric=norms.euclidean_distance, - target_counts=normalised_english_counts, - message_frequency_scaling=norms.normalise, +def vigenere_keyword_break_mp(message, + wordlist=keywords, + metric=norms.euclidean_distance, + target_counts=normalised_english_counts, + message_frequency_scaling=norms.normalise, chunksize=500): """Breaks a vigenere cipher using a dictionary and frequency analysis @@ -405,7 +380,7 @@ def vigenere_keyword_break_mp(message, def vigenere_keyword_break_worker(message, keyword, metric, target_counts, message_frequency_scaling): plaintext = vigenere_decipher(message, keyword) - counts = message_frequency_scaling(letter_frequencies(plaintext)) + counts = message_frequency_scaling(frequencies(plaintext)) fit = metric(target_counts, counts) logger.debug('Vigenere keyword break attempt using key {0} gives fit of ' '{1} and decrypt starting: {2}'.format(keyword, @@ -431,7 +406,7 @@ def vigenere_frequency_break(message, sanitised_message = sanitise(message) for trial_length in range(1, 20): splits = every_nth(sanitised_message, trial_length) - key = ''.join([chr(caesar_break(s, target_counts=target_counts)[0] + ord('a')) for s in splits]) + key = ''.join([chr(caesar_break(s)[0] + ord('a')) for s in splits]) plaintext = vigenere_decipher(sanitised_message, key) counts = message_frequency_scaling(frequencies(plaintext)) fit = metric(target_counts, counts) @@ -452,7 +427,7 @@ def beaufort_frequency_break(message, message_frequency_scaling=norms.normalise): """Breaks a Beaufort cipher with frequency analysis - >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \ + >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \ "run. She is ready and so am I. I stole Daniel's pocketbook this " \ "afternoon when he left his jacket hanging on the easel in the " \ "attic."), 'florence')) # doctest: +ELLIPSIS @@ -463,7 +438,7 @@ def beaufort_frequency_break(message, sanitised_message = sanitise(message) for trial_length in range(1, 20): splits = every_nth(sanitised_message, trial_length) - key = ''.join([chr(caesar_break(s, target_counts=target_counts)[0] + ord('a')) for s in splits]) + key = ''.join([chr(caesar_break(s)[0] + ord('a')) for s in splits]) plaintext = beaufort_decipher(sanitised_message, key) counts = message_frequency_scaling(frequencies(plaintext)) fit = metric(target_counts, counts)