import collections
import norms
import logging
+from itertools import zip_longest
+from segment import segment
+
+# To time a run:
+#
+# import timeit
+# c5a = open('2012/5a.ciphertext', 'r').read()
+# timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
+
logger = logging.getLogger(__name__)
logger.addHandler(logging.FileHandler('cipher.log'))
english_counts[letter] = int(count)
normalised_english_counts = norms.normalise(english_counts)
+with open('words.txt', 'r') as f:
+ keywords = [line.rstrip() for line in f]
modular_division_table = [[0]*26 for x in range(26)]
for a in range(26):
c = (a * b) % 26
modular_division_table[b][c] = a
-modular_division_table_one_based = [[0]*27 for x in range(27)]
-for a in range(27):
- for b in range(27):
- c = ((a * b)-1) % 26 + 1
- modular_division_table_one_based[b][c] = a
-
-
def sanitise(text):
"""Remove all non-alphabetic characters and convert the text to lowercase
"""
return [tuple(text[i:i+n]) for i in range(len(text)-n+1)]
+def every_nth(text, n):
+ """Returns n strings, each of which consists of every nth character,
+ starting with the 0th, 1st, 2nd, ... (n-1)th character
+
+ >>> every_nth(string.ascii_lowercase, 5)
+ ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
+ >>> every_nth(string.ascii_lowercase, 1)
+ ['abcdefghijklmnopqrstuvwxyz']
+ >>> every_nth(string.ascii_lowercase, 26)
+ ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+ """
+ split_text = [text[i:i+n] for i in range(0, len(text), n)]
+ return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
+
+def combine_every_nth(split_text):
+ """Reforms a text split into every_nth strings
+
+ >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
+ 'abcdefghijklmnopqrstuvwxyz'
+ >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
+ 'abcdefghijklmnopqrstuvwxyz'
+ >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
+ 'abcdefghijklmnopqrstuvwxyz'
+ """
+ return ''.join([''.join(l) for l in zip_longest(*split_text, fillvalue='')])
+
+
def letter_frequencies(text):
"""Count the number of occurrences of each character in text
counts[c] += 1
return counts
+def deduplicate(text):
+ return list(collections.OrderedDict.fromkeys(text))
+
+
+
def caesar_encipher_letter(letter, shift):
"""Encipher a letter, given a shift amount
alphabet_start = ord('a')
letter_number = ord(letter) - alphabet_start
if one_based: letter_number += 1
- raw_cipher_number = (letter_number * multiplier + adder)
- cipher_number = 0
- if one_based:
- cipher_number = (raw_cipher_number - 1) % 26
- else:
- cipher_number = raw_cipher_number % 26
- return chr(cipher_number + alphabet_start)
+ cipher_number = (letter_number * multiplier + adder) % 26
+ if one_based: cipher_number -= 1
+ return chr(cipher_number % 26 + alphabet_start)
else:
return letter
alphabet_start = ord('a')
cipher_number = ord(letter) - alphabet_start
if one_based: cipher_number += 1
- plaintext_number = 0
- if one_based:
- plaintext_number = (modular_division_table_one_based[multiplier][(cipher_number - adder + 26) % 26] - 1) % 26
- else:
- #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
- plaintext_number = modular_division_table[multiplier][(cipher_number - adder + 26) % 26]
- return chr(plaintext_number + alphabet_start)
+ plaintext_number = modular_division_table[multiplier][(cipher_number - adder) % 26]
+ if one_based: plaintext_number -= 1
+ return chr(plaintext_number % 26 + alphabet_start)
else:
return letter
>>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
"""
-
enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) for l in message]
return ''.join(enciphered)
return ''.join(enciphered)
+def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
+ """Find the cipher alphabet given a keyword.
+ wrap_alphabet controls how the rest of the alphabet is added
+ after the keyword.
+ 0 : from 'a'
+ 1 : from the last letter in the sanitised keyword
+ 2 : from the largest letter in the sanitised keyword
+
+ >>> keyword_cipher_alphabet_of('bayes')
+ 'bayescdfghijklmnopqrtuvwxz'
+ >>> keyword_cipher_alphabet_of('bayes', 0)
+ 'bayescdfghijklmnopqrtuvwxz'
+ >>> keyword_cipher_alphabet_of('bayes', 1)
+ 'bayestuvwxzcdfghijklmnopqr'
+ >>> keyword_cipher_alphabet_of('bayes', 2)
+ 'bayeszcdfghijklmnopqrtuvwx'
+ """
+ if wrap_alphabet == 0:
+ cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
+ else:
+ if wrap_alphabet == 1:
+ last_keyword_letter = deduplicate(sanitise(keyword))[-1]
+ else:
+ last_keyword_letter = sorted(sanitise(keyword))[-1]
+ last_keyword_position = string.ascii_lowercase.find(last_keyword_letter) + 1
+ cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
+ string.ascii_lowercase[last_keyword_position:] +
+ string.ascii_lowercase))
+ return cipher_alphabet
+
+
+def keyword_encipher(message, keyword, wrap_alphabet=0):
+ """Enciphers a message with a keyword substitution cipher.
+ wrap_alphabet controls how the rest of the alphabet is added
+ after the keyword.
+ 0 : from 'a'
+ 1 : from the last letter in the sanitised keyword
+ 2 : from the largest letter in the sanitised keyword
+
+ >>> keyword_encipher('test message', 'bayes')
+ 'rsqr ksqqbds'
+ >>> keyword_encipher('test message', 'bayes', 0)
+ 'rsqr ksqqbds'
+ >>> keyword_encipher('test message', 'bayes', 1)
+ 'lskl dskkbus'
+ >>> keyword_encipher('test message', 'bayes', 2)
+ 'qspq jsppbcs'
+ """
+ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
+ cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
+ return message.lower().translate(cipher_translation)
+
+def keyword_decipher(message, keyword, wrap_alphabet=0):
+ """Deciphers a message with a keyword substitution cipher.
+ wrap_alphabet controls how the rest of the alphabet is added
+ after the keyword.
+ 0 : from 'a'
+ 1 : from the last letter in the sanitised keyword
+ 2 : from the largest letter in the sanitised keyword
+
+ >>> keyword_decipher('rsqr ksqqbds', 'bayes')
+ 'test message'
+ >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
+ 'test message'
+ >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
+ 'test message'
+ >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
+ 'test message'
+ """
+ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
+ cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
+ return message.lower().translate(cipher_translation)
+
+
def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
"""Breaks a Caesar cipher using frequency analysis
- >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm')
- (4, 0.3186395289018361)
- >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert')
- (19, 0.4215290123583277)
- >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur')
- (13, 0.31602920807545154)
+ >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
+ (4, 0.31863952890183...)
+ >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
+ (19, 0.42152901235832...)
+ >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
+ (13, 0.316029208075451...)
"""
sanitised_message = sanitise(message)
best_shift = 0
plaintext = caesar_decipher(sanitised_message, shift)
frequencies = message_frequency_scaling(letter_frequencies(plaintext))
fit = metric(target_frequencies, frequencies)
- logger.info('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
+ logger.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
if fit < best_fit:
best_fit = fit
best_shift = shift
def affine_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
"""Breaks an affine cipher using frequency analysis
- >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.')
- ((15, 22, True), 0.2357036181865554)
+ >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
+ ((15, 22, True), 0.23570361818655...)
"""
sanitised_message = sanitise(message)
best_multiplier = 0
plaintext = affine_decipher(sanitised_message, multiplier, adder, one_based)
frequencies = message_frequency_scaling(letter_frequencies(plaintext))
fit = metric(target_frequencies, frequencies)
- logger.info('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier, adder, one_based, fit, plaintext[:50]))
+ logger.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier, adder, one_based, fit, plaintext[:50]))
if fit < best_fit:
best_fit = fit
best_multiplier = multiplier
return (best_multiplier, best_adder, best_one_based), best_fit
+def keyword_break(message, wordlist=keywords, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
+ """Breaks a keyword substitution cipher using a dictionary and frequency analysis
+
+ >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+ (('elephant', 1), 0.41643991598441...)
+ """
+ best_keyword = ''
+ best_wrap_alphabet = True
+ best_fit = float("inf")
+ for wrap_alphabet in range(3):
+ for keyword in wordlist:
+ plaintext = keyword_decipher(message, keyword, wrap_alphabet)
+ frequencies = message_frequency_scaling(letter_frequencies(plaintext))
+ fit = metric(target_frequencies, frequencies)
+ logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword, wrap_alphabet, fit, sanitise(plaintext)[:50]))
+ if fit < best_fit:
+ best_fit = fit
+ best_keyword = keyword
+ best_wrap_alphabet = wrap_alphabet
+ logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword, best_wrap_alphabet, best_fit, sanitise(keyword_decipher(message, best_keyword))[:50]))
+ return (best_keyword, best_wrap_alphabet), best_fit
+
+
if __name__ == "__main__":
import doctest
doctest.testmod()