import collections
import norms
import logging
+import math
+from itertools import zip_longest
+from segment import segment
+
+# To time a run:
+#
+# import timeit
+# c5a = open('2012/5a.ciphertext', 'r').read()
+# timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
+
logger = logging.getLogger(__name__)
logger.addHandler(logging.FileHandler('cipher.log'))
english_counts[letter] = int(count)
normalised_english_counts = norms.normalise(english_counts)
-keywords = []
with open('words.txt', 'r') as f:
keywords = [line.rstrip() for line in f]
-
modular_division_table = [[0]*26 for x in range(26)]
for a in range(26):
for b in range(26):
c = (a * b) % 26
modular_division_table[b][c] = a
-modular_division_table_one_based = [[0]*27 for x in range(27)]
-for a in range(27):
- for b in range(27):
- c = ((a * b)-1) % 26 + 1
- modular_division_table_one_based[b][c] = a
-
-
def sanitise(text):
"""Remove all non-alphabetic characters and convert the text to lowercase
"""
return [tuple(text[i:i+n]) for i in range(len(text)-n+1)]
+def every_nth(text, n):
+ """Returns n strings, each of which consists of every nth character,
+ starting with the 0th, 1st, 2nd, ... (n-1)th character
+
+ >>> every_nth(string.ascii_lowercase, 5)
+ ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
+ >>> every_nth(string.ascii_lowercase, 1)
+ ['abcdefghijklmnopqrstuvwxyz']
+ >>> every_nth(string.ascii_lowercase, 26)
+ ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+ """
+ split_text = [text[i:i+n] for i in range(0, len(text), n)]
+ return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
+
+def combine_every_nth(split_text):
+ """Reforms a text split into every_nth strings
+
+ >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
+ 'abcdefghijklmnopqrstuvwxyz'
+ >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
+ 'abcdefghijklmnopqrstuvwxyz'
+ >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
+ 'abcdefghijklmnopqrstuvwxyz'
+ """
+ return ''.join([''.join(l) for l in zip_longest(*split_text, fillvalue='')])
+
+
def letter_frequencies(text):
"""Count the number of occurrences of each character in text
alphabet_start = ord('a')
letter_number = ord(letter) - alphabet_start
if one_based: letter_number += 1
- raw_cipher_number = (letter_number * multiplier + adder)
- cipher_number = 0
- if one_based:
- cipher_number = (raw_cipher_number - 1) % 26
- else:
- cipher_number = raw_cipher_number % 26
- return chr(cipher_number + alphabet_start)
+ cipher_number = (letter_number * multiplier + adder) % 26
+ if one_based: cipher_number -= 1
+ return chr(cipher_number % 26 + alphabet_start)
else:
return letter
alphabet_start = ord('a')
cipher_number = ord(letter) - alphabet_start
if one_based: cipher_number += 1
- plaintext_number = 0
- if one_based:
- plaintext_number = (modular_division_table_one_based[multiplier][(cipher_number - adder + 26) % 26] - 1) % 26
- else:
- #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
- plaintext_number = modular_division_table[multiplier][(cipher_number - adder + 26) % 26]
- return chr(plaintext_number + alphabet_start)
+ plaintext_number = modular_division_table[multiplier][(cipher_number - adder) % 26]
+ if one_based: plaintext_number -= 1
+ return chr(plaintext_number % 26 + alphabet_start)
else:
return letter
>>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
"""
-
enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) for l in message]
return ''.join(enciphered)
return ''.join(enciphered)
-def keyword_encipher(message, keyword, wrap_alphabet=False):
- cipher_alphabet = ''
- if wrap_alphabet:
- last_keyword_letter = deduplicate(sanitise(keyword))[-1]
- last_keyword_position = string.ascii_lowercase.find(last_keyword_letter) + 1
- cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase[last_keyword_position:] + string.ascii_lowercase[:last_keyword_position]))
- else:
+def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
+ """Find the cipher alphabet given a keyword.
+ wrap_alphabet controls how the rest of the alphabet is added
+ after the keyword.
+ 0 : from 'a'
+ 1 : from the last letter in the sanitised keyword
+ 2 : from the largest letter in the sanitised keyword
+
+ >>> keyword_cipher_alphabet_of('bayes')
+ 'bayescdfghijklmnopqrtuvwxz'
+ >>> keyword_cipher_alphabet_of('bayes', 0)
+ 'bayescdfghijklmnopqrtuvwxz'
+ >>> keyword_cipher_alphabet_of('bayes', 1)
+ 'bayestuvwxzcdfghijklmnopqr'
+ >>> keyword_cipher_alphabet_of('bayes', 2)
+ 'bayeszcdfghijklmnopqrtuvwx'
+ """
+ if wrap_alphabet == 0:
cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
+ else:
+ if wrap_alphabet == 1:
+ last_keyword_letter = deduplicate(sanitise(keyword))[-1]
+ else:
+ last_keyword_letter = sorted(sanitise(keyword))[-1]
+ last_keyword_position = string.ascii_lowercase.find(last_keyword_letter) + 1
+ cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
+ string.ascii_lowercase[last_keyword_position:] +
+ string.ascii_lowercase))
+ return cipher_alphabet
+
+
+def keyword_encipher(message, keyword, wrap_alphabet=0):
+ """Enciphers a message with a keyword substitution cipher.
+ wrap_alphabet controls how the rest of the alphabet is added
+ after the keyword.
+ 0 : from 'a'
+ 1 : from the last letter in the sanitised keyword
+ 2 : from the largest letter in the sanitised keyword
+
+ >>> keyword_encipher('test message', 'bayes')
+ 'rsqr ksqqbds'
+ >>> keyword_encipher('test message', 'bayes', 0)
+ 'rsqr ksqqbds'
+ >>> keyword_encipher('test message', 'bayes', 1)
+ 'lskl dskkbus'
+ >>> keyword_encipher('test message', 'bayes', 2)
+ 'qspq jsppbcs'
+ """
+ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
return message.lower().translate(cipher_translation)
-def keyword_decipher(message, keyword, wrap_alphabet=False):
- cipher_alphabet = ''
- if wrap_alphabet:
- last_keyword_letter = deduplicate(sanitise(keyword))[-1]
- last_keyword_position = string.ascii_lowercase.find(last_keyword_letter) + 1
- cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase[last_keyword_position:] + string.ascii_lowercase[:last_keyword_position]))
- else:
- cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
- #cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
+def keyword_decipher(message, keyword, wrap_alphabet=0):
+ """Deciphers a message with a keyword substitution cipher.
+ wrap_alphabet controls how the rest of the alphabet is added
+ after the keyword.
+ 0 : from 'a'
+ 1 : from the last letter in the sanitised keyword
+ 2 : from the largest letter in the sanitised keyword
+
+ >>> keyword_decipher('rsqr ksqqbds', 'bayes')
+ 'test message'
+ >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
+ 'test message'
+ >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
+ 'test message'
+ >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
+ 'test message'
+ """
+ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
return message.lower().translate(cipher_translation)
+def scytale_encipher(message, rows):
+ """Enciphers using the scytale transposition cipher.
+ Message is padded with spaces to allow all rows to be the same length.
+
+ >>> scytale_encipher('thequickbrownfox', 3)
+ 'tcnhkfeboqrxuo iw '
+ >>> scytale_encipher('thequickbrownfox', 4)
+ 'tubnhirfecooqkwx'
+ >>> scytale_encipher('thequickbrownfox', 5)
+ 'tubn hirf ecoo qkwx '
+ >>> scytale_encipher('thequickbrownfox', 6)
+ 'tqcrnxhukof eibwo '
+ >>> scytale_encipher('thequickbrownfox', 7)
+ 'tqcrnx hukof eibwo '
+ """
+ if len(message) % rows != 0:
+ message += ' '*(rows - len(message) % rows)
+ row_length = round(len(message) / rows)
+ slices = [message[i:i+row_length] for i in range(0, len(message), row_length)]
+ return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
+
+def scytale_decipher(message, rows):
+ """Deciphers using the scytale transposition cipher.
+ Assumes the message is padded so that all rows are the same length.
+
+ >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
+ 'thequickbrownfox '
+ >>> scytale_decipher('tubnhirfecooqkwx', 4)
+ 'thequickbrownfox'
+ >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
+ 'thequickbrownfox '
+ >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
+ 'thequickbrownfox '
+ >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
+ 'thequickbrownfox '
+ """
+ cols = round(len(message) / rows)
+ columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
+ return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
+
def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
"""Breaks a Caesar cipher using frequency analysis
plaintext = caesar_decipher(sanitised_message, shift)
frequencies = message_frequency_scaling(letter_frequencies(plaintext))
fit = metric(target_frequencies, frequencies)
- logger.info('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
+ logger.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
if fit < best_fit:
best_fit = fit
best_shift = shift
plaintext = affine_decipher(sanitised_message, multiplier, adder, one_based)
frequencies = message_frequency_scaling(letter_frequencies(plaintext))
fit = metric(target_frequencies, frequencies)
- logger.info('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier, adder, one_based, fit, plaintext[:50]))
+ logger.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier, adder, one_based, fit, plaintext[:50]))
if fit < best_fit:
best_fit = fit
best_multiplier = multiplier
return (best_multiplier, best_adder, best_one_based), best_fit
-def keyword_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
+def keyword_break(message, wordlist=keywords, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
+ """Breaks a keyword substitution cipher using a dictionary and frequency analysis
+
+ >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+ (('elephant', 1), 0.41643991598441...)
+ """
best_keyword = ''
best_wrap_alphabet = True
best_fit = float("inf")
- for wrap_alphabet in [True, False]:
- for keyword in keywords:
+ for wrap_alphabet in range(3):
+ for keyword in wordlist:
plaintext = keyword_decipher(message, keyword, wrap_alphabet)
frequencies = message_frequency_scaling(letter_frequencies(plaintext))
fit = metric(target_frequencies, frequencies)
- logger.info('Keyword break attempt using key {0} ({1}) gives fit of {2} and decrypt starting: {3}'.format(keyword, wrap_alphabet, fit, sanitise(plaintext)[:50]))
+ logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword, wrap_alphabet, fit, sanitise(plaintext)[:50]))
if fit < best_fit:
best_fit = fit
best_keyword = keyword
best_wrap_alphabet = wrap_alphabet
- logger.info('Keyword break best fit with key {0} ({1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword, best_wrap_alphabet, best_fit, sanitise(keyword_decipher(message, best_keyword))[:50]))
+ logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword, best_wrap_alphabet, best_fit, sanitise(keyword_decipher(message, best_keyword))[:50]))
return (best_keyword, best_wrap_alphabet), best_fit