X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=cipher.py;h=248155edb8a54956df51348a64fc26c9532db339;hb=3d8f7067b9c3a48ef140d7cff834d18ee91f58b3;hp=3251edd47ddc62190d2b7cb31895971506a139c4;hpb=6a55783b704379a61ac1e2eaecba03ad714a463a;p=cipher-tools.git diff --git a/cipher.py b/cipher.py index 3251edd..248155e 100644 --- a/cipher.py +++ b/cipher.py @@ -1,774 +1,31 @@ -import string -import collections -import norms -import logging -import math -from itertools import zip_longest, repeat -from segment import segment -from multiprocessing import Pool +# import string +# import collections +# import math +# from enum import Enum +# from itertools import zip_longest, cycle, chain, count +# import numpy as np +# from numpy import matrix +# from numpy import linalg +# from language_models import * +# import pprint + + + +from utilities import * +from segment import * +from text_prettify import * +from plot_frequency_histogram import * + +from caesar import * +from affine import * +from keyword import * +from polybius import * +from column_transposition import * +from railfence import * +from cadenus import * +from hill import * +from amsco import * +from bifid import * +from autokey import * +from pocket_enigma import * -# To time a run: -# -# import timeit -# c5a = open('2012/5a.ciphertext', 'r').read() -# timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1) -# timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1 - -logger = logging.getLogger(__name__) -logger.addHandler(logging.FileHandler('cipher.log')) -logger.setLevel(logging.WARNING) -#logger.setLevel(logging.INFO) -#logger.setLevel(logging.DEBUG) - -english_counts = collections.defaultdict(int) -with open('count_1l.txt', 'r') as f: - for line in f: - (letter, count) = line.split("\t") - english_counts[letter] = int(count) -normalised_english_counts = norms.normalise(english_counts) - -english_bigram_counts = collections.defaultdict(int) -with open('count_2l.txt', 'r') as f: - for line in f: - (bigram, count) = line.split("\t") - english_bigram_counts[bigram] = int(count) -normalised_english_bigram_counts = norms.normalise(english_bigram_counts) - -english_trigram_counts = collections.defaultdict(int) -with open('count_3l.txt', 'r') as f: - for line in f: - (trigram, count) = line.split("\t") - english_trigram_counts[trigram] = int(count) -normalised_english_trigram_counts = norms.normalise(english_trigram_counts) - - -with open('words.txt', 'r') as f: - keywords = [line.rstrip() for line in f] - -modular_division_table = [[0]*26 for x in range(26)] -for a in range(26): - for b in range(26): - c = (a * b) % 26 - modular_division_table[b][c] = a - -def letters(text): - """Remove all non-alphabetic characters from a text - >>> letters('The Quick') - 'TheQuick' - >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG') - 'TheQuickBROWNfoxjumpedoverthelazyDOG' - """ - return ''.join([c for c in text if c in string.ascii_letters]) - -def sanitise(text): - """Remove all non-alphabetic characters and convert the text to lowercase - - >>> sanitise('The Quick') - 'thequick' - >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG') - 'thequickbrownfoxjumpedoverthelazydog' - """ - # sanitised = [c.lower() for c in text if c in string.ascii_letters] - # return ''.join(sanitised) - return letters(text).lower() - -def ngrams(text, n): - """Returns all n-grams of a text - - >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE - ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn', - 'nf', 'fo', 'ox'] - >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE - ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow', - 'rown', 'ownf', 'wnfo', 'nfox'] - """ - return [text[i:i+n] for i in range(len(text)-n+1)] - -def every_nth(text, n, fillvalue=''): - """Returns n strings, each of which consists of every nth character, - starting with the 0th, 1st, 2nd, ... (n-1)th character - - >>> every_nth(string.ascii_lowercase, 5) - ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty'] - >>> every_nth(string.ascii_lowercase, 1) - ['abcdefghijklmnopqrstuvwxyz'] - >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE - ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] - >>> every_nth(string.ascii_lowercase, 5, fillvalue='!') - ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!'] - """ - split_text = [text[i:i+n] for i in range(0, len(text), n)] - return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)] - -def combine_every_nth(split_text): - """Reforms a text split into every_nth strings - - >>> combine_every_nth(every_nth(string.ascii_lowercase, 5)) - 'abcdefghijklmnopqrstuvwxyz' - >>> combine_every_nth(every_nth(string.ascii_lowercase, 1)) - 'abcdefghijklmnopqrstuvwxyz' - >>> combine_every_nth(every_nth(string.ascii_lowercase, 26)) - 'abcdefghijklmnopqrstuvwxyz' - """ - return ''.join([''.join(l) - for l in zip_longest(*split_text, fillvalue='')]) - -def transpose(items, transposition): - """Moves items around according to the given transposition - - >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3]) - ['a', 'b', 'c', 'd'] - >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0]) - ['d', 'b', 'c', 'a'] - >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0]) - [13, 12, 14, 11, 15, 10] - """ - transposed = list(repeat('', len(transposition))) - for p, t in enumerate(transposition): - transposed[p] = items[t] - return transposed - -def untranspose(items, transposition): - """Undoes a transpose - - >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3]) - ['a', 'b', 'c', 'd'] - >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0]) - ['a', 'b', 'c', 'd'] - >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0]) - [10, 11, 12, 13, 14, 15] - """ - transposed = list(repeat('', len(transposition))) - for p, t in enumerate(transposition): - transposed[t] = items[p] - return transposed - - -def frequencies(text): - """Count the number of occurrences of each character in text - - >>> sorted(frequencies('abcdefabc').items()) - [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)] - >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \ - 'dog').items()) # doctest: +NORMALIZE_WHITESPACE - [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), - ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), - ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), - ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)] - >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \ - '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE - [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), - ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), - ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), - ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), - ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)] - >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \ - 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE - [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), - ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), - ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), - ('w', 1), ('x', 1), ('y', 1), ('z', 1)] - >>> frequencies('abcdefabcdef')['x'] - 0 - """ - #counts = collections.defaultdict(int) - #for c in text: - # counts[c] += 1 - #return counts - return collections.Counter(c for c in text) -letter_frequencies = frequencies - -def deduplicate(text): - return list(collections.OrderedDict.fromkeys(text)) - - - -def caesar_encipher_letter(letter, shift): - """Encipher a letter, given a shift amount - - >>> caesar_encipher_letter('a', 1) - 'b' - >>> caesar_encipher_letter('a', 2) - 'c' - >>> caesar_encipher_letter('b', 2) - 'd' - >>> caesar_encipher_letter('x', 2) - 'z' - >>> caesar_encipher_letter('y', 2) - 'a' - >>> caesar_encipher_letter('z', 2) - 'b' - >>> caesar_encipher_letter('z', -1) - 'y' - >>> caesar_encipher_letter('a', -1) - 'z' - """ - if letter in string.ascii_letters: - if letter in string.ascii_uppercase: - alphabet_start = ord('A') - else: - alphabet_start = ord('a') - return chr(((ord(letter) - alphabet_start + shift) % 26) + - alphabet_start) - else: - return letter - -def caesar_decipher_letter(letter, shift): - """Decipher a letter, given a shift amount - - >>> caesar_decipher_letter('b', 1) - 'a' - >>> caesar_decipher_letter('b', 2) - 'z' - """ - return caesar_encipher_letter(letter, -shift) - -def caesar_encipher(message, shift): - """Encipher a message with the Caesar cipher of given shift - - >>> caesar_encipher('abc', 1) - 'bcd' - >>> caesar_encipher('abc', 2) - 'cde' - >>> caesar_encipher('abcxyz', 2) - 'cdezab' - >>> caesar_encipher('ab cx yz', 2) - 'cd ez ab' - """ - enciphered = [caesar_encipher_letter(l, shift) for l in message] - return ''.join(enciphered) - -def caesar_decipher(message, shift): - """Encipher a message with the Caesar cipher of given shift - - >>> caesar_decipher('bcd', 1) - 'abc' - >>> caesar_decipher('cde', 2) - 'abc' - >>> caesar_decipher('cd ez ab', 2) - 'ab cx yz' - """ - return caesar_encipher(message, -shift) - -def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True): - """Encipher a letter, given a multiplier and adder - - >>> ''.join([affine_encipher_letter(l, 3, 5, True) \ - for l in string.ascii_uppercase]) - 'HKNQTWZCFILORUXADGJMPSVYBE' - >>> ''.join([affine_encipher_letter(l, 3, 5, False) \ - for l in string.ascii_uppercase]) - 'FILORUXADGJMPSVYBEHKNQTWZC' - """ - if letter in string.ascii_letters: - if letter in string.ascii_uppercase: - alphabet_start = ord('A') - else: - alphabet_start = ord('a') - letter_number = ord(letter) - alphabet_start - if one_based: letter_number += 1 - cipher_number = (letter_number * multiplier + adder) % 26 - if one_based: cipher_number -= 1 - return chr(cipher_number % 26 + alphabet_start) - else: - return letter - -def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): - """Encipher a letter, given a multiplier and adder - - >>> ''.join([affine_decipher_letter(l, 3, 5, True) \ - for l in 'HKNQTWZCFILORUXADGJMPSVYBE']) - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - >>> ''.join([affine_decipher_letter(l, 3, 5, False) \ - for l in 'FILORUXADGJMPSVYBEHKNQTWZC']) - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - """ - if letter in string.ascii_letters: - if letter in string.ascii_uppercase: - alphabet_start = ord('A') - else: - alphabet_start = ord('a') - cipher_number = ord(letter) - alphabet_start - if one_based: cipher_number += 1 - plaintext_number = ( modular_division_table[multiplier] - [(cipher_number - adder) % 26] ) - if one_based: plaintext_number -= 1 - return chr(plaintext_number % 26 + alphabet_start) - else: - return letter - -def affine_encipher(message, multiplier=1, adder=0, one_based=True): - """Encipher a message - - >>> affine_encipher('hours passed during which jerico tried every ' \ - 'trick he could think of', 15, 22, True) - 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh' - """ - enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) - for l in message] - return ''.join(enciphered) - -def affine_decipher(message, multiplier=1, adder=0, one_based=True): - """Decipher a message - - >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \ - 'jfaoe ls omytd jlaxe mh', 15, 22, True) - 'hours passed during which jerico tried every trick he could think of' - """ - enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) - for l in message] - return ''.join(enciphered) - - -def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0): - """Find the cipher alphabet given a keyword. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - 0 : from 'a' - 1 : from the last letter in the sanitised keyword - 2 : from the largest letter in the sanitised keyword - - >>> keyword_cipher_alphabet_of('bayes') - 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', 0) - 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', 1) - 'bayestuvwxzcdfghijklmnopqr' - >>> keyword_cipher_alphabet_of('bayes', 2) - 'bayeszcdfghijklmnopqrtuvwx' - """ - if wrap_alphabet == 0: - cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + - string.ascii_lowercase)) - else: - if wrap_alphabet == 1: - last_keyword_letter = deduplicate(sanitise(keyword))[-1] - else: - last_keyword_letter = sorted(sanitise(keyword))[-1] - last_keyword_position = string.ascii_lowercase.find( - last_keyword_letter) + 1 - cipher_alphabet = ''.join( - deduplicate(sanitise(keyword) + - string.ascii_lowercase[last_keyword_position:] + - string.ascii_lowercase)) - return cipher_alphabet - - -def keyword_encipher(message, keyword, wrap_alphabet=0): - """Enciphers a message with a keyword substitution cipher. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - 0 : from 'a' - 1 : from the last letter in the sanitised keyword - 2 : from the largest letter in the sanitised keyword - - >>> keyword_encipher('test message', 'bayes') - 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', 0) - 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', 1) - 'lskl dskkbus' - >>> keyword_encipher('test message', 'bayes', 2) - 'qspq jsppbcs' - """ - cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) - cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) - return message.lower().translate(cipher_translation) - -def keyword_decipher(message, keyword, wrap_alphabet=0): - """Deciphers a message with a keyword substitution cipher. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - 0 : from 'a' - 1 : from the last letter in the sanitised keyword - 2 : from the largest letter in the sanitised keyword - - >>> keyword_decipher('rsqr ksqqbds', 'bayes') - 'test message' - >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0) - 'test message' - >>> keyword_decipher('lskl dskkbus', 'bayes', 1) - 'test message' - >>> keyword_decipher('qspq jsppbcs', 'bayes', 2) - 'test message' - """ - cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) - cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) - return message.lower().translate(cipher_translation) - -def scytale_encipher(message, rows): - """Enciphers using the scytale transposition cipher. - Message is padded with spaces to allow all rows to be the same length. - - >>> scytale_encipher('thequickbrownfox', 3) - 'tcnhkfeboqrxuo iw ' - >>> scytale_encipher('thequickbrownfox', 4) - 'tubnhirfecooqkwx' - >>> scytale_encipher('thequickbrownfox', 5) - 'tubn hirf ecoo qkwx ' - >>> scytale_encipher('thequickbrownfox', 6) - 'tqcrnxhukof eibwo ' - >>> scytale_encipher('thequickbrownfox', 7) - 'tqcrnx hukof eibwo ' - """ - if len(message) % rows != 0: - message += ' '*(rows - len(message) % rows) - row_length = round(len(message) / rows) - slices = [message[i:i+row_length] - for i in range(0, len(message), row_length)] - return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')]) - -def scytale_decipher(message, rows): - """Deciphers using the scytale transposition cipher. - Assumes the message is padded so that all rows are the same length. - - >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3) - 'thequickbrownfox ' - >>> scytale_decipher('tubnhirfecooqkwx', 4) - 'thequickbrownfox' - >>> scytale_decipher('tubn hirf ecoo qkwx ', 5) - 'thequickbrownfox ' - >>> scytale_decipher('tqcrnxhukof eibwo ', 6) - 'thequickbrownfox ' - >>> scytale_decipher('tqcrnx hukof eibwo ', 7) - 'thequickbrownfox ' - """ - cols = round(len(message) / rows) - columns = [message[i:i+rows] for i in range(0, cols * rows, rows)] - return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')]) - - -def transpositions_of(keyword): - """Finds the transpostions given by a keyword. For instance, the keyword - 'clever' rearranges to 'celrv', so the first column (0) stays first, the - second column (1) moves to third, the third column (2) moves to second, - and so on. - - >>> transpositions_of('clever') - [0, 2, 1, 4, 3] - """ - key = deduplicate(keyword) - transpositions = [key.index(l) for l in sorted(key)] - return transpositions - -def column_transposition_encipher(message, keyword, fillvalue=' '): - """Enciphers using the column transposition cipher. - Message is padded to allow all rows to be the same length. - - >>> column_transposition_encipher('hellothere', 'clever') - 'hleolteher' - >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!') - 'hleolthre!e!' - """ - return column_transposition_worker(message, keyword, encipher=True, - fillvalue=fillvalue) - -def column_transposition_decipher(message, keyword, fillvalue=' '): - """Deciphers using the column transposition cipher. - Message is padded to allow all rows to be the same length. - - >>> column_transposition_decipher('hleolteher', 'clever') - 'hellothere' - >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?') - 'hellothere!!' - """ - return column_transposition_worker(message, keyword, encipher=False, - fillvalue=fillvalue) - -def column_transposition_worker(message, keyword, - encipher=True, fillvalue=' '): - """Does the actual work of the column transposition cipher. - Message is padded with spaces to allow all rows to be the same length. - - >>> column_transposition_worker('hellothere', 'clever') - 'hleolteher' - >>> column_transposition_worker('hellothere', 'clever', encipher=True) - 'hleolteher' - >>> column_transposition_worker('hleolteher', 'clever', encipher=False) - 'hellothere' - """ - transpositions = transpositions_of(keyword) - columns = every_nth(message, len(transpositions), fillvalue=fillvalue) - if encipher: - transposed_columns = transpose(columns, transpositions) - else: - transposed_columns = untranspose(columns, transpositions) - return combine_every_nth(transposed_columns) - -def vigenere_encipher(message, keyword): - - - - -def caesar_break(message, - metric=norms.euclidean_distance, - target_counts=normalised_english_counts, - message_frequency_scaling=norms.normalise): - """Breaks a Caesar cipher using frequency analysis - - >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \ - 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS - (4, 0.31863952890183...) - >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \ - 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS - (19, 0.42152901235832...) - >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \ - 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS - (13, 0.316029208075451...) - """ - sanitised_message = sanitise(message) - best_shift = 0 - best_fit = float("inf") - for shift in range(26): - plaintext = caesar_decipher(sanitised_message, shift) - counts = message_frequency_scaling(letter_frequencies(plaintext)) - fit = metric(target_counts, counts) - logger.debug('Caesar break attempt using key {0} gives fit of {1} ' - 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50])) - if fit < best_fit: - best_fit = fit - best_shift = shift - logger.info('Caesar break best fit: key {0} gives fit of {1} and ' - 'decrypt starting: {2}'.format(best_shift, best_fit, - caesar_decipher(sanitised_message, best_shift)[:50])) - return best_shift, best_fit - -def affine_break(message, - metric=norms.euclidean_distance, - target_counts=normalised_english_counts, - message_frequency_scaling=norms.normalise): - """Breaks an affine cipher using frequency analysis - - >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \ - 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \ - 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \ - 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \ - 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \ - 'clm ckuxj.') # doctest: +ELLIPSIS - ((15, 22, True), 0.23570361818655...) - """ - sanitised_message = sanitise(message) - best_multiplier = 0 - best_adder = 0 - best_one_based = True - best_fit = float("inf") - for one_based in [True, False]: - for multiplier in range(1, 26, 2): - for adder in range(26): - plaintext = affine_decipher(sanitised_message, - multiplier, adder, one_based) - counts = message_frequency_scaling(letter_frequencies(plaintext)) - fit = metric(target_counts, counts) - logger.debug('Affine break attempt using key {0}x+{1} ({2}) ' - 'gives fit of {3} and decrypt starting: {4}'. - format(multiplier, adder, one_based, fit, - plaintext[:50])) - if fit < best_fit: - best_fit = fit - best_multiplier = multiplier - best_adder = adder - best_one_based = one_based - logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} ' - 'and decrypt starting: {4}'.format( - best_multiplier, best_adder, best_one_based, best_fit, - affine_decipher(sanitised_message, best_multiplier, - best_adder, best_one_based)[:50])) - return (best_multiplier, best_adder, best_one_based), best_fit - -def keyword_break(message, - wordlist=keywords, - metric=norms.euclidean_distance, - target_counts=normalised_english_counts, - message_frequency_scaling=norms.normalise): - """Breaks a keyword substitution cipher using a dictionary and - frequency analysis - - >>> keyword_break(keyword_encipher('this is a test message for the ' \ - 'keyword decipherment', 'elephant', 1), \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS - (('elephant', 1), 0.41643991598441...) - """ - best_keyword = '' - best_wrap_alphabet = True - best_fit = float("inf") - for wrap_alphabet in range(3): - for keyword in wordlist: - plaintext = keyword_decipher(message, keyword, wrap_alphabet) - counts = message_frequency_scaling(letter_frequencies(plaintext)) - fit = metric(target_counts, counts) - logger.debug('Keyword break attempt using key {0} (wrap={1}) ' - 'gives fit of {2} and decrypt starting: {3}'.format( - keyword, wrap_alphabet, fit, - sanitise(plaintext)[:50])) - if fit < best_fit: - best_fit = fit - best_keyword = keyword - best_wrap_alphabet = wrap_alphabet - logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of ' - '{2} and decrypt starting: {3}'.format(best_keyword, - best_wrap_alphabet, best_fit, sanitise( - keyword_decipher(message, best_keyword, - best_wrap_alphabet))[:50])) - return (best_keyword, best_wrap_alphabet), best_fit - -def keyword_break_mp(message, - wordlist=keywords, - metric=norms.euclidean_distance, - target_counts=normalised_english_counts, - message_frequency_scaling=norms.normalise, - chunksize=500): - """Breaks a keyword substitution cipher using a dictionary and - frequency analysis - - >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ - 'keyword decipherment', 'elephant', 1), \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS - (('elephant', 1), 0.41643991598441...) - """ - with Pool() as pool: - helper_args = [(message, word, wrap, metric, target_counts, - message_frequency_scaling) - for word in wordlist for wrap in range(3)] - # Gotcha: the helper function here needs to be defined at the top level - # (limitation of Pool.starmap) - breaks = pool.starmap(keyword_break_one, helper_args, chunksize) - return min(breaks, key=lambda k: k[1]) - -def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts, - message_frequency_scaling): - plaintext = keyword_decipher(message, keyword, wrap_alphabet) - counts = message_frequency_scaling(letter_frequencies(plaintext)) - fit = metric(target_counts, counts) - logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of ' - '{2} and decrypt starting: {3}'.format(keyword, - wrap_alphabet, fit, sanitise(plaintext)[:50])) - return (keyword, wrap_alphabet), fit - -def scytale_break(message, - metric=norms.euclidean_distance, - target_counts=normalised_english_bigram_counts, - message_frequency_scaling=norms.normalise): - """Breaks a Scytale cipher - - >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \ - 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \ - 'aek') # doctest: +ELLIPSIS - (6, 0.83453041115025...) - """ - best_key = 0 - best_fit = float("inf") - ngram_length = len(next(iter(target_counts.keys()))) - for key in range(1, 20): - if len(message) % key == 0: - plaintext = scytale_decipher(message, key) - counts = message_frequency_scaling(frequencies( - ngrams(sanitise(plaintext), ngram_length))) - fit = metric(target_counts, counts) - logger.debug('Scytale break attempt using key {0} gives fit of ' - '{1} and decrypt starting: {2}'.format(key, - fit, sanitise(plaintext)[:50])) - if fit < best_fit: - best_fit = fit - best_key = key - logger.info('Scytale break best fit with key {0} gives fit of {1} and ' - 'decrypt starting: {2}'.format(best_key, best_fit, - sanitise(scytale_decipher(message, best_key))[:50])) - return best_key, best_fit - -def column_transposition_break(message, - wordlist=keywords, - metric=norms.euclidean_distance, - target_counts=normalised_english_bigram_counts, - message_frequency_scaling=norms.normalise): - """Breaks a column transposition cipher using a dictionary and - n-gram frequency analysis - - >>> column_transposition_break(column_transposition_encipher(sanitise( \ - "Turing's homosexuality resulted in a criminal prosecution in 1952, \ - when homosexual acts were still illegal in the United Kingdom. "), \ - 'encipher'), \ - wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS - ('encipher', 0.898128626285...) - >>> column_transposition_break(column_transposition_encipher(sanitise( \ - "Turing's homosexuality resulted in a criminal prosecution in 1952, " \ - "when homosexual acts were still illegal in the United Kingdom."), \ - 'encipher'), \ - wordlist=['encipher', 'keyword', 'fourteen'], \ - target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS - ('encipher', 1.1958792913127...) - """ - best_keyword = '' - best_fit = float("inf") - ngram_length = len(next(iter(target_counts.keys()))) - for keyword in wordlist: - if len(message) % len(deduplicate(keyword)) == 0: - plaintext = column_transposition_decipher(message, keyword) - counts = message_frequency_scaling(frequencies( - ngrams(sanitise(plaintext), ngram_length))) - fit = metric(target_counts, counts) - logger.debug('Column transposition break attempt using key {0} ' - 'gives fit of {1} and decrypt starting: {2}'.format( - keyword, fit, - sanitise(plaintext)[:50])) - if fit < best_fit: - best_fit = fit - best_keyword = keyword - logger.info('Column transposition break best fit with key {0} gives fit ' - 'of {1} and decrypt starting: {2}'.format(best_keyword, - best_fit, sanitise( - column_transposition_decipher(message, - best_keyword))[:50])) - return best_keyword, best_fit - - -def column_transposition_break_mp(message, - wordlist=keywords, - metric=norms.euclidean_distance, - target_counts=normalised_english_bigram_counts, - message_frequency_scaling=norms.normalise, - chunksize=500): - """Breaks a column transposition cipher using a dictionary and - n-gram frequency analysis - - >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ - "Turing's homosexuality resulted in a criminal prosecution in 1952, \ - when homosexual acts were still illegal in the United Kingdom. "), \ - 'encipher'), \ - wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS - ('encipher', 0.898128626285...) - >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ - "Turing's homosexuality resulted in a criminal prosecution in 1952, " \ - "when homosexual acts were still illegal in the United Kingdom."), \ - 'encipher'), \ - wordlist=['encipher', 'keyword', 'fourteen'], \ - target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS - ('encipher', 1.1958792913127...) - """ - ngram_length = len(next(iter(target_counts.keys()))) - with Pool() as pool: - helper_args = [(message, word, metric, target_counts, ngram_length, - message_frequency_scaling) - for word in wordlist] - # Gotcha: the helper function here needs to be defined at the top level - # (limitation of Pool.starmap) - breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize) - return min(breaks, key=lambda k: k[1]) - -def column_transposition_break_worker(message, keyword, metric, target_counts, - ngram_length, message_frequency_scaling): - plaintext = column_transposition_decipher(message, keyword) - counts = message_frequency_scaling(frequencies( - ngrams(sanitise(plaintext), ngram_length))) - fit = metric(target_counts, counts) - logger.debug('Column transposition break attempt using key {0} ' - 'gives fit of {1} and decrypt starting: {2}'.format( - keyword, fit, - sanitise(plaintext)[:50])) - return keyword, fit - - - -if __name__ == "__main__": - import doctest - doctest.testmod()