X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=cipher.py;h=273da4681040feff53f28d7fcee21ff22ad1a923;hb=38f3707a2337652adf616b47b2f35f3f20d433bd;hp=6de268281d41233efd5498ba3d391ace544dcd74;hpb=5010cde507d2b6b25ee549efd3dec8d663937e15;p=cipher-tools.git diff --git a/cipher.py b/cipher.py index 6de2682..273da46 100644 --- a/cipher.py +++ b/cipher.py @@ -2,6 +2,15 @@ import string import collections import norms import logging +from itertools import zip_longest +from segment import segment + +# To time a run: +# +# import timeit +# c5a = open('2012/5a.ciphertext', 'r').read() +# timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1) + logger = logging.getLogger(__name__) logger.addHandler(logging.FileHandler('cipher.log')) @@ -15,11 +24,9 @@ with open('count_1l.txt', 'r') as f: english_counts[letter] = int(count) normalised_english_counts = norms.normalise(english_counts) -keywords = [] with open('words.txt', 'r') as f: keywords = [line.rstrip() for line in f] - modular_division_table = [[0]*26 for x in range(26)] for a in range(26): for b in range(26): @@ -55,6 +62,33 @@ def ngrams(text, n): """ return [tuple(text[i:i+n]) for i in range(len(text)-n+1)] +def every_nth(text, n): + """Returns n strings, each of which consists of every nth character, + starting with the 0th, 1st, 2nd, ... (n-1)th character + + >>> every_nth(string.ascii_lowercase, 5) + ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty'] + >>> every_nth(string.ascii_lowercase, 1) + ['abcdefghijklmnopqrstuvwxyz'] + >>> every_nth(string.ascii_lowercase, 26) + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] + """ + split_text = [text[i:i+n] for i in range(0, len(text), n)] + return [''.join(l) for l in zip_longest(*split_text, fillvalue='')] + +def combine_every_nth(split_text): + """Reforms a text split into every_nth strings + + >>> combine_every_nth(every_nth(string.ascii_lowercase, 5)) + 'abcdefghijklmnopqrstuvwxyz' + >>> combine_every_nth(every_nth(string.ascii_lowercase, 1)) + 'abcdefghijklmnopqrstuvwxyz' + >>> combine_every_nth(every_nth(string.ascii_lowercase, 26)) + 'abcdefghijklmnopqrstuvwxyz' + """ + return ''.join([''.join(l) for l in zip_longest(*split_text, fillvalue='')]) + + def letter_frequencies(text): """Count the number of occurrences of each character in text @@ -157,9 +191,9 @@ def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True): else: alphabet_start = ord('a') letter_number = ord(letter) - alphabet_start - if one_based: letter_number += 1 + if one_based: + letter_number += 1 raw_cipher_number = (letter_number * multiplier + adder) - cipher_number = 0 if one_based: cipher_number = (raw_cipher_number - 1) % 26 else: @@ -182,12 +216,9 @@ def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): else: alphabet_start = ord('a') cipher_number = ord(letter) - alphabet_start - if one_based: cipher_number += 1 - plaintext_number = 0 if one_based: - plaintext_number = (modular_division_table_one_based[multiplier][(cipher_number - adder + 26) % 26] - 1) % 26 + plaintext_number = (modular_division_table_one_based[multiplier][(cipher_number + 1 - adder + 26) % 26] - 1) % 26 else: - #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26 plaintext_number = modular_division_table[multiplier][(cipher_number - adder + 26) % 26] return chr(plaintext_number + alphabet_start) else: @@ -199,7 +230,6 @@ def affine_encipher(message, multiplier=1, adder=0, one_based=True): >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True) 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh' """ - enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) for l in message] return ''.join(enciphered) @@ -213,48 +243,73 @@ def affine_decipher(message, multiplier=1, adder=0, one_based=True): return ''.join(enciphered) -def keyword_cipher_alphabet_of(keyword, wrap_alphabet=False): - """Find the cipher alphabet given a keyword - - >>> keyword_cipher_alphabet_of('harry') - 'harybcdefgijklmnopqstuvwxz' - >>> keyword_cipher_alphabet_of('harry', True) - 'haryzbcdefgijklmnopqstuvwx' - >>> keyword_cipher_alphabet_of('harry', False) - 'harybcdefgijklmnopqstuvwxz' +def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0): + """Find the cipher alphabet given a keyword. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_cipher_alphabet_of('bayes') + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', 0) + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', 1) + 'bayestuvwxzcdfghijklmnopqr' + >>> keyword_cipher_alphabet_of('bayes', 2) + 'bayeszcdfghijklmnopqrtuvwx' """ - cipher_alphabet = '' - if wrap_alphabet: - last_keyword_letter = deduplicate(sanitise(keyword))[-1] - last_keyword_position = string.ascii_lowercase.find(last_keyword_letter) + 1 - cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase[last_keyword_position:] + string.ascii_lowercase)) - else: + if wrap_alphabet == 0: cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase)) + else: + if wrap_alphabet == 1: + last_keyword_letter = deduplicate(sanitise(keyword))[-1] + else: + last_keyword_letter = sorted(sanitise(keyword))[-1] + last_keyword_position = string.ascii_lowercase.find(last_keyword_letter) + 1 + cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + + string.ascii_lowercase[last_keyword_position:] + + string.ascii_lowercase)) return cipher_alphabet -def keyword_encipher(message, keyword, wrap_alphabet=False): - """Enciphers a message with a keyword substitution cipher - - >>> keyword_encipher('test message', 'harry') - 'sbqs kbqqhdb' - >>> keyword_encipher('test message', 'harry', True) - 'qzpq jzpphcz' - >>> keyword_encipher('test message', 'harry', False) - 'sbqs kbqqhdb' +def keyword_encipher(message, keyword, wrap_alphabet=0): + """Enciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_encipher('test message', 'bayes') + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', 0) + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', 1) + 'lskl dskkbus' + >>> keyword_encipher('test message', 'bayes', 2) + 'qspq jsppbcs' """ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) return message.lower().translate(cipher_translation) -def keyword_decipher(message, keyword, wrap_alphabet=False): - """Deciphers a message with a keyword substitution cipher - - >>> keyword_decipher('sbqs kbqqhdb', 'harry') +def keyword_decipher(message, keyword, wrap_alphabet=0): + """Deciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_decipher('rsqr ksqqbds', 'bayes') + 'test message' + >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0) 'test message' - >>> keyword_decipher('qzpq jzpphcz', 'harry', True) + >>> keyword_decipher('lskl dskkbus', 'bayes', 1) 'test message' - >>> keyword_decipher('sbqs kbqqhdb', 'harry', False) + >>> keyword_decipher('qspq jsppbcs', 'bayes', 2) 'test message' """ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) @@ -316,23 +371,23 @@ def affine_break(message, metric=norms.euclidean_distance, target_frequencies=no def keyword_break(message, wordlist=keywords, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise): """Breaks a keyword substitution cipher using a dictionary and frequency analysis - >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', True), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS - (('elephant', True), 0.41643991598441...) + >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS + (('elephant', 1), 0.41643991598441...) """ best_keyword = '' best_wrap_alphabet = True best_fit = float("inf") - for wrap_alphabet in [True, False]: + for wrap_alphabet in range(3): for keyword in wordlist: plaintext = keyword_decipher(message, keyword, wrap_alphabet) frequencies = message_frequency_scaling(letter_frequencies(plaintext)) fit = metric(target_frequencies, frequencies) - logger.debug('Keyword break attempt using key {0} ({1}) gives fit of {2} and decrypt starting: {3}'.format(keyword, wrap_alphabet, fit, sanitise(plaintext)[:50])) + logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword, wrap_alphabet, fit, sanitise(plaintext)[:50])) if fit < best_fit: best_fit = fit best_keyword = keyword best_wrap_alphabet = wrap_alphabet - logger.info('Keyword break best fit with key {0} ({1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword, best_wrap_alphabet, best_fit, sanitise(keyword_decipher(message, best_keyword))[:50])) + logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword, best_wrap_alphabet, best_fit, sanitise(keyword_decipher(message, best_keyword))[:50])) return (best_keyword, best_wrap_alphabet), best_fit