--- /dev/null
+from utilities import *
+from language_models import *
+from logger import logger
+
+
+modular_division_table = [[0]*26 for _ in range(26)]
+for a in range(26):
+ for b in range(26):
+ c = (a * b) % 26
+ modular_division_table[b][c] = a
+
+
+
+
+def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True):
+ """Encipher a letter, given a multiplier and adder
+
+ >>> cat(affine_encipher_letter(l, 3, 5, True) \
+ for l in string.ascii_letters)
+ 'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE'
+ >>> cat(affine_encipher_letter(l, 3, 5, False) \
+ for l in string.ascii_letters)
+ 'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC'
+ """
+ # letter = unaccent(accented_letter)
+ # if letter in string.ascii_letters:
+ # if letter in string.ascii_uppercase:
+ # alphabet_start = ord('A')
+ # else:
+ # alphabet_start = ord('a')
+ # letter_number = ord(letter) - alphabet_start
+ # if one_based: letter_number += 1
+ # cipher_number = (letter_number * multiplier + adder) % 26
+ # if one_based: cipher_number -= 1
+ # return chr(cipher_number % 26 + alphabet_start)
+ # else:
+ # return letter
+ letter = unaccent(accented_letter)
+ if letter in string.ascii_letters:
+ letter_number = pos(letter)
+ if one_based: letter_number += 1
+ cipher_number = (letter_number * multiplier + adder) % 26
+ if one_based: cipher_number -= 1
+ if letter in string.ascii_uppercase:
+ return unpos(cipher_number).upper()
+ else:
+ return unpos(cipher_number)
+ else:
+ return letter
+
+def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
+ """Encipher a letter, given a multiplier and adder
+
+ >>> cat(affine_decipher_letter(l, 3, 5, True) \
+ for l in 'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE')
+ 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ >>> cat(affine_decipher_letter(l, 3, 5, False) \
+ for l in 'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC')
+ 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ """
+ # if letter in string.ascii_letters:
+ # if letter in string.ascii_uppercase:
+ # alphabet_start = ord('A')
+ # else:
+ # alphabet_start = ord('a')
+ # cipher_number = ord(letter) - alphabet_start
+ # if one_based: cipher_number += 1
+ # plaintext_number = (
+ # modular_division_table[multiplier]
+ # [(cipher_number - adder) % 26])
+ # if one_based: plaintext_number -= 1
+ # return chr(plaintext_number % 26 + alphabet_start)
+ # else:
+ # return letter
+ if letter in string.ascii_letters:
+ cipher_number = pos(letter)
+ if one_based: cipher_number += 1
+ plaintext_number = (
+ modular_division_table[multiplier]
+ [(cipher_number - adder) % 26])
+ if one_based: plaintext_number -= 1
+ if letter in string.ascii_uppercase:
+ return unpos(plaintext_number).upper()
+ else:
+ return unpos(plaintext_number)
+ else:
+ return letter
+
+def affine_encipher(message, multiplier=1, adder=0, one_based=True):
+ """Encipher a message
+
+ >>> affine_encipher('hours passed during which jerico tried every ' \
+ 'trick he could think of', 15, 22, True)
+ 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
+ """
+ enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
+ for l in message]
+ return cat(enciphered)
+
+def affine_decipher(message, multiplier=1, adder=0, one_based=True):
+ """Decipher a message
+
+ >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
+ 'jfaoe ls omytd jlaxe mh', 15, 22, True)
+ 'hours passed during which jerico tried every trick he could think of'
+ """
+ enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
+ for l in message]
+ return cat(enciphered)
+
+
+
+def affine_break(message, fitness=Pletters):
+ """Breaks an affine cipher using frequency analysis
+
+ >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
+ 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
+ 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
+ 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
+ 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
+ 'kxd clm ckuxj.') # doctest: +ELLIPSIS
+ ((15, 22, True), -340.601181913...)
+ """
+ sanitised_message = sanitise(message)
+ best_multiplier = 0
+ best_adder = 0
+ best_one_based = True
+ best_fit = float("-inf")
+ for one_based in [True, False]:
+ for multiplier in [x for x in range(1, 26, 2) if x != 13]:
+ for adder in range(26):
+ plaintext = affine_decipher(sanitised_message,
+ multiplier, adder, one_based)
+ fit = fitness(plaintext)
+ logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
+ 'gives fit of {3} and decrypt starting: {4}'.
+ format(multiplier, adder, one_based, fit,
+ plaintext[:50]))
+ if fit > best_fit:
+ best_fit = fit
+ best_multiplier = multiplier
+ best_adder = adder
+ best_one_based = one_based
+ logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
+ '{3} and decrypt starting: {4}'.format(
+ best_multiplier, best_adder, best_one_based, best_fit,
+ affine_decipher(sanitised_message, best_multiplier,
+ best_adder, best_one_based)[:50]))
+ return (best_multiplier, best_adder, best_one_based), best_fit
--- /dev/null
+from utilities import *
+from language_models import *
+from logger import logger
+
+def caesar_encipher_letter(accented_letter, shift):
+ """Encipher a letter, given a shift amount
+
+ >>> caesar_encipher_letter('a', 1)
+ 'b'
+ >>> caesar_encipher_letter('a', 2)
+ 'c'
+ >>> caesar_encipher_letter('b', 2)
+ 'd'
+ >>> caesar_encipher_letter('x', 2)
+ 'z'
+ >>> caesar_encipher_letter('y', 2)
+ 'a'
+ >>> caesar_encipher_letter('z', 2)
+ 'b'
+ >>> caesar_encipher_letter('z', -1)
+ 'y'
+ >>> caesar_encipher_letter('a', -1)
+ 'z'
+ >>> caesar_encipher_letter('A', 1)
+ 'B'
+ >>> caesar_encipher_letter('é', 1)
+ 'f'
+ """
+ # letter = unaccent(accented_letter)
+ # if letter in string.ascii_letters:
+ # if letter in string.ascii_uppercase:
+ # alphabet_start = ord('A')
+ # else:
+ # alphabet_start = ord('a')
+ # return chr(((ord(letter) - alphabet_start + shift) % 26) +
+ # alphabet_start)
+ # else:
+ # return letter
+
+ letter = unaccent(accented_letter)
+ if letter in string.ascii_letters:
+ cipherletter = unpos(pos(letter) + shift)
+ if letter in string.ascii_uppercase:
+ return cipherletter.upper()
+ else:
+ return cipherletter
+ else:
+ return letter
+
+def caesar_decipher_letter(letter, shift):
+ """Decipher a letter, given a shift amount
+
+ >>> caesar_decipher_letter('b', 1)
+ 'a'
+ >>> caesar_decipher_letter('b', 2)
+ 'z'
+ """
+ return caesar_encipher_letter(letter, -shift)
+
+def caesar_encipher(message, shift):
+ """Encipher a message with the Caesar cipher of given shift
+
+ >>> caesar_encipher('abc', 1)
+ 'bcd'
+ >>> caesar_encipher('abc', 2)
+ 'cde'
+ >>> caesar_encipher('abcxyz', 2)
+ 'cdezab'
+ >>> caesar_encipher('ab cx yz', 2)
+ 'cd ez ab'
+ >>> caesar_encipher('Héllo World!', 2)
+ 'Jgnnq Yqtnf!'
+ """
+ enciphered = [caesar_encipher_letter(l, shift) for l in message]
+ return cat(enciphered)
+
+def caesar_decipher(message, shift):
+ """Decipher a message with the Caesar cipher of given shift
+
+ >>> caesar_decipher('bcd', 1)
+ 'abc'
+ >>> caesar_decipher('cde', 2)
+ 'abc'
+ >>> caesar_decipher('cd ez ab', 2)
+ 'ab cx yz'
+ >>> caesar_decipher('Jgnnq Yqtnf!', 2)
+ 'Hello World!'
+ """
+ return caesar_encipher(message, -shift)
+
+
+def caesar_break(message, fitness=Pletters):
+ """Breaks a Caesar cipher using frequency analysis
+
+ >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
+ 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
+ (4, -130.849989015...)
+ >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
+ 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
+ (19, -128.82410410...)
+ >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
+ 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
+ (13, -126.25403935...)
+ """
+ sanitised_message = sanitise(message)
+ best_shift = 0
+ best_fit = float('-inf')
+ for shift in range(26):
+ plaintext = caesar_decipher(sanitised_message, shift)
+ fit = fitness(plaintext)
+ logger.debug('Caesar break attempt using key {0} gives fit of {1} '
+ 'and decrypt starting: {2}'.format(shift, fit,
+ plaintext[:50]))
+ if fit > best_fit:
+ best_fit = fit
+ best_shift = shift
+ logger.info('Caesar break best fit: key {0} gives fit of {1} and '
+ 'decrypt starting: {2}'.format(best_shift, best_fit,
+ caesar_decipher(sanitised_message, best_shift)[:50]))
+ return best_shift, best_fit
import pprint
-## Utility functions
-cat = ''.join
-wcat = ' '.join
-lcat = '\n'.join
-
-def pos(letter):
- if letter in string.ascii_lowercase:
- return ord(letter) - ord('a')
- elif letter in string.ascii_uppercase:
- return ord(letter) - ord('A')
- else:
- return ''
-
-def unpos(number): return chr(number % 26 + ord('a'))
-
-
-modular_division_table = [[0]*26 for _ in range(26)]
-for a in range(26):
- for b in range(26):
- c = (a * b) % 26
- modular_division_table[b][c] = a
-
-
-def every_nth(text, n, fillvalue=''):
- """Returns n strings, each of which consists of every nth character,
- starting with the 0th, 1st, 2nd, ... (n-1)th character
-
- >>> every_nth(string.ascii_lowercase, 5)
- ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
- >>> every_nth(string.ascii_lowercase, 1)
- ['abcdefghijklmnopqrstuvwxyz']
- >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
- ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
- 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
- >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
- ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
- """
- split_text = chunks(text, n, fillvalue)
- return [cat(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
-
-def combine_every_nth(split_text):
- """Reforms a text split into every_nth strings
-
- >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
- 'abcdefghijklmnopqrstuvwxyz'
- >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
- 'abcdefghijklmnopqrstuvwxyz'
- >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
- 'abcdefghijklmnopqrstuvwxyz'
- """
- return cat([cat(l)
- for l in zip_longest(*split_text, fillvalue='')])
-
-def chunks(text, n, fillvalue=None):
- """Split a text into chunks of n characters
-
- >>> chunks('abcdefghi', 3)
- ['abc', 'def', 'ghi']
- >>> chunks('abcdefghi', 4)
- ['abcd', 'efgh', 'i']
- >>> chunks('abcdefghi', 4, fillvalue='!')
- ['abcd', 'efgh', 'i!!!']
- """
- if fillvalue:
- padding = fillvalue[0] * (n - len(text) % n)
- else:
- padding = ''
- return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
-
-def transpose(items, transposition):
- """Moves items around according to the given transposition
-
- >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
- ['a', 'b', 'c', 'd']
- >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
- ['d', 'b', 'c', 'a']
- >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
- [13, 12, 14, 11, 15, 10]
- """
- transposed = [''] * len(transposition)
- for p, t in enumerate(transposition):
- transposed[p] = items[t]
- return transposed
-
-def untranspose(items, transposition):
- """Undoes a transpose
-
- >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
- ['a', 'b', 'c', 'd']
- >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
- ['a', 'b', 'c', 'd']
- >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
- [10, 11, 12, 13, 14, 15]
- """
- transposed = [''] * len(transposition)
- for p, t in enumerate(transposition):
- transposed[t] = items[p]
- return transposed
-
-def deduplicate(text):
- return list(collections.OrderedDict.fromkeys(text))
-
-
-def caesar_encipher_letter(accented_letter, shift):
- """Encipher a letter, given a shift amount
-
- >>> caesar_encipher_letter('a', 1)
- 'b'
- >>> caesar_encipher_letter('a', 2)
- 'c'
- >>> caesar_encipher_letter('b', 2)
- 'd'
- >>> caesar_encipher_letter('x', 2)
- 'z'
- >>> caesar_encipher_letter('y', 2)
- 'a'
- >>> caesar_encipher_letter('z', 2)
- 'b'
- >>> caesar_encipher_letter('z', -1)
- 'y'
- >>> caesar_encipher_letter('a', -1)
- 'z'
- >>> caesar_encipher_letter('A', 1)
- 'B'
- >>> caesar_encipher_letter('é', 1)
- 'f'
- """
- # letter = unaccent(accented_letter)
- # if letter in string.ascii_letters:
- # if letter in string.ascii_uppercase:
- # alphabet_start = ord('A')
- # else:
- # alphabet_start = ord('a')
- # return chr(((ord(letter) - alphabet_start + shift) % 26) +
- # alphabet_start)
- # else:
- # return letter
-
- letter = unaccent(accented_letter)
- if letter in string.ascii_letters:
- cipherletter = unpos(pos(letter) + shift)
- if letter in string.ascii_uppercase:
- return cipherletter.upper()
- else:
- return cipherletter
- else:
- return letter
-
-def caesar_decipher_letter(letter, shift):
- """Decipher a letter, given a shift amount
-
- >>> caesar_decipher_letter('b', 1)
- 'a'
- >>> caesar_decipher_letter('b', 2)
- 'z'
- """
- return caesar_encipher_letter(letter, -shift)
-
-def caesar_encipher(message, shift):
- """Encipher a message with the Caesar cipher of given shift
-
- >>> caesar_encipher('abc', 1)
- 'bcd'
- >>> caesar_encipher('abc', 2)
- 'cde'
- >>> caesar_encipher('abcxyz', 2)
- 'cdezab'
- >>> caesar_encipher('ab cx yz', 2)
- 'cd ez ab'
- >>> caesar_encipher('Héllo World!', 2)
- 'Jgnnq Yqtnf!'
- """
- enciphered = [caesar_encipher_letter(l, shift) for l in message]
- return cat(enciphered)
-
-def caesar_decipher(message, shift):
- """Decipher a message with the Caesar cipher of given shift
-
- >>> caesar_decipher('bcd', 1)
- 'abc'
- >>> caesar_decipher('cde', 2)
- 'abc'
- >>> caesar_decipher('cd ez ab', 2)
- 'ab cx yz'
- >>> caesar_decipher('Jgnnq Yqtnf!', 2)
- 'Hello World!'
- """
- return caesar_encipher(message, -shift)
-
-def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True):
- """Encipher a letter, given a multiplier and adder
-
- >>> cat(affine_encipher_letter(l, 3, 5, True) \
- for l in string.ascii_letters)
- 'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE'
- >>> cat(affine_encipher_letter(l, 3, 5, False) \
- for l in string.ascii_letters)
- 'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC'
- """
- # letter = unaccent(accented_letter)
- # if letter in string.ascii_letters:
- # if letter in string.ascii_uppercase:
- # alphabet_start = ord('A')
- # else:
- # alphabet_start = ord('a')
- # letter_number = ord(letter) - alphabet_start
- # if one_based: letter_number += 1
- # cipher_number = (letter_number * multiplier + adder) % 26
- # if one_based: cipher_number -= 1
- # return chr(cipher_number % 26 + alphabet_start)
- # else:
- # return letter
- letter = unaccent(accented_letter)
- if letter in string.ascii_letters:
- letter_number = pos(letter)
- if one_based: letter_number += 1
- cipher_number = (letter_number * multiplier + adder) % 26
- if one_based: cipher_number -= 1
- if letter in string.ascii_uppercase:
- return unpos(cipher_number).upper()
- else:
- return unpos(cipher_number)
- else:
- return letter
-
-def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
- """Encipher a letter, given a multiplier and adder
-
- >>> cat(affine_decipher_letter(l, 3, 5, True) \
- for l in 'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE')
- 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- >>> cat(affine_decipher_letter(l, 3, 5, False) \
- for l in 'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC')
- 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- """
- # if letter in string.ascii_letters:
- # if letter in string.ascii_uppercase:
- # alphabet_start = ord('A')
- # else:
- # alphabet_start = ord('a')
- # cipher_number = ord(letter) - alphabet_start
- # if one_based: cipher_number += 1
- # plaintext_number = (
- # modular_division_table[multiplier]
- # [(cipher_number - adder) % 26])
- # if one_based: plaintext_number -= 1
- # return chr(plaintext_number % 26 + alphabet_start)
- # else:
- # return letter
- if letter in string.ascii_letters:
- cipher_number = pos(letter)
- if one_based: cipher_number += 1
- plaintext_number = (
- modular_division_table[multiplier]
- [(cipher_number - adder) % 26])
- if one_based: plaintext_number -= 1
- if letter in string.ascii_uppercase:
- return unpos(plaintext_number).upper()
- else:
- return unpos(plaintext_number)
- else:
- return letter
-
-def affine_encipher(message, multiplier=1, adder=0, one_based=True):
- """Encipher a message
-
- >>> affine_encipher('hours passed during which jerico tried every ' \
- 'trick he could think of', 15, 22, True)
- 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
- """
- enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
- for l in message]
- return cat(enciphered)
-
-def affine_decipher(message, multiplier=1, adder=0, one_based=True):
- """Decipher a message
-
- >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
- 'jfaoe ls omytd jlaxe mh', 15, 22, True)
- 'hours passed during which jerico tried every trick he could think of'
- """
- enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
- for l in message]
- return cat(enciphered)
-
-
-class KeywordWrapAlphabet(Enum):
- from_a = 1
- from_last = 2
- from_largest = 3
-
-
-def keyword_cipher_alphabet_of(keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
- """Find the cipher alphabet given a keyword.
- wrap_alphabet controls how the rest of the alphabet is added
- after the keyword.
-
- >>> keyword_cipher_alphabet_of('bayes')
- 'bayescdfghijklmnopqrtuvwxz'
- >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a)
- 'bayescdfghijklmnopqrtuvwxz'
- >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last)
- 'bayestuvwxzcdfghijklmnopqr'
- >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest)
- 'bayeszcdfghijklmnopqrtuvwx'
- """
- if wrap_alphabet == KeywordWrapAlphabet.from_a:
- cipher_alphabet = cat(deduplicate(sanitise(keyword) +
- string.ascii_lowercase))
- else:
- if wrap_alphabet == KeywordWrapAlphabet.from_last:
- last_keyword_letter = deduplicate(sanitise(keyword))[-1]
- else:
- last_keyword_letter = sorted(sanitise(keyword))[-1]
- last_keyword_position = string.ascii_lowercase.find(
- last_keyword_letter) + 1
- cipher_alphabet = cat(
- deduplicate(sanitise(keyword) +
- string.ascii_lowercase[last_keyword_position:] +
- string.ascii_lowercase))
- return cipher_alphabet
-
-
-def keyword_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
- """Enciphers a message with a keyword substitution cipher.
- wrap_alphabet controls how the rest of the alphabet is added
- after the keyword.
- 0 : from 'a'
- 1 : from the last letter in the sanitised keyword
- 2 : from the largest letter in the sanitised keyword
-
- >>> keyword_encipher('test message', 'bayes')
- 'rsqr ksqqbds'
- >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a)
- 'rsqr ksqqbds'
- >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last)
- 'lskl dskkbus'
- >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest)
- 'qspq jsppbcs'
- """
- cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
- cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
- return unaccent(message).lower().translate(cipher_translation)
-
-def keyword_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
- """Deciphers a message with a keyword substitution cipher.
- wrap_alphabet controls how the rest of the alphabet is added
- after the keyword.
- 0 : from 'a'
- 1 : from the last letter in the sanitised keyword
- 2 : from the largest letter in the sanitised keyword
-
- >>> keyword_decipher('rsqr ksqqbds', 'bayes')
- 'test message'
- >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a)
- 'test message'
- >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last)
- 'test message'
- >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest)
- 'test message'
- """
- cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
- cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
- return message.lower().translate(cipher_translation)
-
-
-def vigenere_encipher(message, keyword):
- """Vigenere encipher
-
- >>> vigenere_encipher('hello', 'abc')
- 'hfnlp'
- """
- shifts = [pos(l) for l in sanitise(keyword)]
- pairs = zip(message, cycle(shifts))
- return cat([caesar_encipher_letter(l, k) for l, k in pairs])
-
-def vigenere_decipher(message, keyword):
- """Vigenere decipher
-
- >>> vigenere_decipher('hfnlp', 'abc')
- 'hello'
- """
- shifts = [pos(l) for l in sanitise(keyword)]
- pairs = zip(message, cycle(shifts))
- return cat([caesar_decipher_letter(l, k) for l, k in pairs])
-
-
-def beaufort_encipher(message, keyword):
- """Beaufort encipher
-
- >>> beaufort_encipher('inhisjournaldatedtheidesofoctober', 'arcanaimperii')
- 'sevsvrusyrrxfayyxuteemazudmpjmmwr'
- """
- shifts = [pos(l) for l in sanitise(keyword)]
- pairs = zip(message, cycle(shifts))
- return cat([unpos(k - pos(l)) for l, k in pairs])
-
-beaufort_decipher = beaufort_encipher
-
-beaufort_variant_encipher=vigenere_decipher
-beaufort_variant_decipher=vigenere_encipher
-
-
-def polybius_grid(keyword, column_order, row_order, letters_to_merge=None,
- wrap_alphabet=KeywordWrapAlphabet.from_a):
- """Grid for a Polybius cipher, using a keyword to rearrange the
- alphabet.
-
-
- >>> polybius_grid('a', 'abcde', 'abcde')['x'] == ('e', 'c')
- True
- >>> polybius_grid('elephant', 'abcde', 'abcde')['e'] == ('a', 'a')
- True
- >>> polybius_grid('elephant', 'abcde', 'abcde')['b'] == ('b', 'c')
- True
- """
- alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet)
- if letters_to_merge is None:
- letters_to_merge = {'j': 'i'}
- grid = {l: k
- for k, l in zip([(c, r) for c in column_order for r in row_order],
- [l for l in alphabet if l not in letters_to_merge])}
- for l in letters_to_merge:
- grid[l] = grid[letters_to_merge[l]]
- return grid
-
-def polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge=None,
- wrap_alphabet=KeywordWrapAlphabet.from_a):
- """Grid for decrypting using a Polybius cipher, using a keyword to
- rearrange the alphabet.
-
- >>> polybius_reverse_grid('a', 'abcde', 'abcde')['e', 'c'] == 'x'
- True
- >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['a', 'a'] == 'e'
- True
- >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['b', 'c'] == 'b'
- True
- """
- alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet)
- if letters_to_merge is None:
- letters_to_merge = {'j': 'i'}
- grid = {k: l
- for k, l in zip([(c, r) for c in column_order for r in row_order],
- [l for l in alphabet if l not in letters_to_merge])}
- return grid
-
-
-def polybius_flatten(pair, column_first):
- """Convert a series of pairs into a single list of characters"""
- if column_first:
- return str(pair[1]) + str(pair[0])
- else:
- return str(pair[0]) + str(pair[1])
-
-def polybius_encipher(message, keyword, column_order, row_order,
- column_first=False,
- letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a):
- """Encipher a message with Polybius cipher, using a keyword to rearrange
- the alphabet
+from utilities import *
+from segment import *
- >>> polybius_encipher('this is a test message for the ' \
- 'polybius decipherment', 'elephant', \
- [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], \
- wrap_alphabet=KeywordWrapAlphabet.from_last)
- '2214445544551522115522511155551543114252542214111352123234442355411135441314115451112122'
- >>> polybius_encipher('this is a test message for the ' \
- 'polybius decipherment', 'elephant', 'abcde', 'abcde', \
- column_first=False)
- 'bbadccddccddaebbaaddbbceaaddddaecbaacadadcbbadaaacdaabedbcccdeddbeaabdccacadaadcceaababb'
- >>> polybius_encipher('this is a test message for the ' \
- 'polybius decipherment', 'elephant', 'abcde', 'abcde', \
- column_first=True)
- 'bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaacaadbadecbccedddebaadbcccadaaacdecaaabbb'
- """
- grid = polybius_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet)
- return cat(polybius_flatten(grid[l], column_first)
- for l in message
- if l in grid)
-
-
-def polybius_decipher(message, keyword, column_order, row_order,
- column_first=False,
- letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a):
- """Decipher a message with a Polybius cipher, using a keyword to rearrange
- the alphabet
-
- >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\
- 'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \
- column_first=False)
- 'toisisvtestxessvbephktoefhnugiysweqifoekxelt'
-
- >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\
- 'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \
- column_first=True)
- 'thisisatestmessageforthepolybiusdecipherment'
- """
- grid = polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet)
- column_index_type = type(column_order[0])
- row_index_type = type(row_order[0])
- if column_first:
- pairs = [(column_index_type(p[1]), row_index_type(p[0])) for p in chunks(message, 2)]
- else:
- pairs = [(row_index_type(p[0]), column_index_type(p[1])) for p in chunks(message, 2)]
- return cat(grid[p] for p in pairs if p in grid)
-
-
-def transpositions_of(keyword):
- """Finds the transpostions given by a keyword. For instance, the keyword
- 'clever' rearranges to 'celrv', so the first column (0) stays first, the
- second column (1) moves to third, the third column (2) moves to second,
- and so on.
-
- If passed a tuple, assume it's already a transposition and just return it.
+from caesar import *
+from affine import *
+from keyword import *
+from polybius import *
+from column_transposition import *
+from railfence import *
- >>> transpositions_of('clever')
- (0, 2, 1, 4, 3)
- >>> transpositions_of('fred')
- (3, 2, 0, 1)
- >>> transpositions_of((3, 2, 0, 1))
- (3, 2, 0, 1)
- """
- if isinstance(keyword, tuple):
- return keyword
- else:
- key = deduplicate(keyword)
- transpositions = tuple(key.index(l) for l in sorted(key))
- return transpositions
-
-def pad(message_len, group_len, fillvalue):
- padding_length = group_len - message_len % group_len
- if padding_length == group_len: padding_length = 0
- padding = ''
- for i in range(padding_length):
- if callable(fillvalue):
- padding += fillvalue()
- else:
- padding += fillvalue
- return padding
-
-def column_transposition_encipher(message, keyword, fillvalue=' ',
- fillcolumnwise=False,
- emptycolumnwise=False):
- """Enciphers using the column transposition cipher.
- Message is padded to allow all rows to be the same length.
-
- >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
- 'hlohr eltee '
- >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
- 'hellothere '
- >>> column_transposition_encipher('hellothere', 'abcdef')
- 'hellothere '
- >>> column_transposition_encipher('hellothere', 'abcde')
- 'hellothere'
- >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
- 'hellothere'
- >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
- 'hlohreltee'
- >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
- 'htehlelroe'
- >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
- 'hellothere'
- >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
- 'heotllrehe'
- >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
- 'holrhetlee'
- >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
- 'htleehoelr'
- >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
- 'hleolteher'
- >>> column_transposition_encipher('hellothere', 'cleverly')
- 'hleolthre e '
- >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
- 'hleolthre!e!'
- >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
- 'hleolthre*e*'
- """
- transpositions = transpositions_of(keyword)
- message += pad(len(message), len(transpositions), fillvalue)
- if fillcolumnwise:
- rows = every_nth(message, len(message) // len(transpositions))
- else:
- rows = chunks(message, len(transpositions))
- transposed = [transpose(r, transpositions) for r in rows]
- if emptycolumnwise:
- return combine_every_nth(transposed)
- else:
- return cat(chain(*transposed))
-
-def column_transposition_decipher(message, keyword, fillvalue=' ',
- fillcolumnwise=False,
- emptycolumnwise=False):
- """Deciphers using the column transposition cipher.
- Message is padded to allow all rows to be the same length.
-
- >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
- 'hellothere'
- >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
- 'hellothere'
- >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
- 'hellothere'
- >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
- 'hellothere'
- >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
- 'hellothere'
- >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
- 'hellothere'
- >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
- 'hellothere'
- >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
- 'hellothere'
- """
- transpositions = transpositions_of(keyword)
- message += pad(len(message), len(transpositions), fillvalue)
- if emptycolumnwise:
- rows = every_nth(message, len(message) // len(transpositions))
- else:
- rows = chunks(message, len(transpositions))
- untransposed = [untranspose(r, transpositions) for r in rows]
- if fillcolumnwise:
- return combine_every_nth(untransposed)
- else:
- return cat(chain(*untransposed))
-
-def scytale_encipher(message, rows, fillvalue=' '):
- """Enciphers using the scytale transposition cipher.
- Message is padded with spaces to allow all rows to be the same length.
-
- >>> scytale_encipher('thequickbrownfox', 3)
- 'tcnhkfeboqrxuo iw '
- >>> scytale_encipher('thequickbrownfox', 4)
- 'tubnhirfecooqkwx'
- >>> scytale_encipher('thequickbrownfox', 5)
- 'tubn hirf ecoo qkwx '
- >>> scytale_encipher('thequickbrownfox', 6)
- 'tqcrnxhukof eibwo '
- >>> scytale_encipher('thequickbrownfox', 7)
- 'tqcrnx hukof eibwo '
- """
- # transpositions = [i for i in range(math.ceil(len(message) / rows))]
- # return column_transposition_encipher(message, transpositions,
- # fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True)
- transpositions = [i for i in range(rows)]
- return column_transposition_encipher(message, transpositions,
- fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False)
-
-def scytale_decipher(message, rows):
- """Deciphers using the scytale transposition cipher.
- Assumes the message is padded so that all rows are the same length.
-
- >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
- 'thequickbrownfox '
- >>> scytale_decipher('tubnhirfecooqkwx', 4)
- 'thequickbrownfox'
- >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
- 'thequickbrownfox '
- >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
- 'thequickbrownfox '
- >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
- 'thequickbrownfox '
- """
- # transpositions = [i for i in range(math.ceil(len(message) / rows))]
- # return column_transposition_decipher(message, transpositions,
- # fillcolumnwise=False, emptycolumnwise=True)
- transpositions = [i for i in range(rows)]
- return column_transposition_decipher(message, transpositions,
- fillcolumnwise=True, emptycolumnwise=False)
-
-
-def railfence_encipher(message, height, fillvalue=''):
- """Railfence cipher.
- Works by splitting the text into sections, then reading across them to
- generate the rows in the cipher. The rows are then combined to form the
- ciphertext.
-
- Example: the plaintext "hellotherefriends", with a height of four, written
- out in the railfence as
- h h i
- etere*
- lorfns
- l e d
- (with the * showing the one character to finish the last section).
- Each 'section' is two columns, but unfolded. In the example, the first
- section is 'hellot'.
-
- >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 2, fillvalue='!')
- 'hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!'
- >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3, fillvalue='!')
- 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!'
- >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5, fillvalue='!')
- 'hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!'
- >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 10, fillvalue='!')
- 'hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!'
- >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3)
- 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece'
- >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5)
- 'hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp'
- >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 7)
- 'haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic'
- """
- sections = chunks(message, (height - 1) * 2, fillvalue=fillvalue)
- n_sections = len(sections)
- # Add the top row
- rows = [cat([s[0] for s in sections])]
- # process the middle rows of the grid
- for r in range(1, height-1):
- rows += [cat([s[r:r+1] + s[height*2-r-2:height*2-r-1] for s in sections])]
- # process the bottom row
- rows += [cat([s[height - 1:height] for s in sections])]
- # rows += [wcat([s[height - 1] for s in sections])]
- return cat(rows)
-
-def railfence_decipher(message, height, fillvalue=''):
- """Railfence decipher.
- Works by reconstructing the grid used to generate the ciphertext, then
- unfolding the sections so the text can be concatenated together.
-
- Example: given the ciphertext 'hhieterelorfnsled' and a height of 4, first
- work out that the second row has a character missing, find the rows of the
- grid, then split the section into its two columns.
-
- 'hhieterelorfnsled' is split into
- h h i
- etere
- lorfns
- l e d
- (spaces added for clarity), which is stored in 'rows'. This is then split
- into 'down_rows' and 'up_rows':
-
- down_rows:
- hhi
- eee
- lrn
- led
-
- up_rows:
- tr
- ofs
-
- These are then zipped together (after the up_rows are reversed) to recover
- the plaintext.
-
- Most of the procedure is about finding the correct lengths for each row then
- splitting the ciphertext into those rows.
-
- >>> railfence_decipher('hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!', 2).strip('!')
- 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
- >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!', 3).strip('!')
- 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
- >>> railfence_decipher('hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!', 5).strip('!')
- 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
- >>> railfence_decipher('hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!', 10).strip('!')
- 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
- >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece', 3)
- 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
- >>> railfence_decipher('hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp', 5)
- 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
- >>> railfence_decipher('haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic', 7)
- 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
- """
- # find the number and size of the sections, including how many characters
- # are missing for a full grid
- n_sections = math.ceil(len(message) / ((height - 1) * 2))
- padding_to_add = n_sections * (height - 1) * 2 - len(message)
- # row_lengths are for the both up rows and down rows
- row_lengths = [n_sections] * (height - 1) * 2
- for i in range((height - 1) * 2 - 1, (height - 1) * 2 - (padding_to_add + 1), -1):
- row_lengths[i] -= 1
- # folded_rows are the combined row lengths in the middle of the railfence
- folded_row_lengths = [row_lengths[0]]
- for i in range(1, height-1):
- folded_row_lengths += [row_lengths[i] + row_lengths[-i]]
- folded_row_lengths += [row_lengths[height - 1]]
- # find the rows that form the railfence grid
- rows = []
- row_start = 0
- for i in folded_row_lengths:
- rows += [message[row_start:row_start + i]]
- row_start += i
- # split the rows into the 'down_rows' (those that form the first column of
- # a section) and the 'up_rows' (those that ofrm the second column of a
- # section).
- down_rows = [rows[0]]
- up_rows = []
- for i in range(1, height-1):
- down_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 0])]
- up_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 1])]
- down_rows += [rows[-1]]
- up_rows.reverse()
- return cat(c for r in zip_longest(*(down_rows + up_rows), fillvalue='') for c in r)
def make_cadenus_keycolumn(doubled_letters = 'vw', start='a', reverse=False):
"""Makes the key column for a Cadenus cipher (the column down between the
import matplotlib.pyplot as plt
-# logging.basicConfig(filename="cipher.log", level=logging.INFO)
-# logger = logging.getLogger(__name__)
-
-logger = logging.getLogger('cipherbreak')
-logger.setLevel(logging.WARNING)
-# logger.setLevel(logging.INFO)
-# logger.setLevel(logging.DEBUG)
-
-# create the logging file handler
-fh = logging.FileHandler("cipher.log")
-formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-fh.setFormatter(formatter)
-
-# add handler to logger object
-logger.addHandler(fh)
-
from cipher import *
from language_models import *
# timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
-def index_of_coincidence(text):
- stext = sanitise(text)
- counts = collections.Counter(stext)
- denom = len(stext) * (len(text) - 1) / 26
- return (
- sum(max(counts[l] * counts[l] - 1, 0) for l in string.ascii_lowercase)
- /
- denom
- )
-
-
-transpositions = collections.defaultdict(list)
-for word in keywords:
- transpositions[transpositions_of(word)] += [word]
-
-def frequencies(text):
- """Count the number of occurrences of each character in text
-
- >>> sorted(frequencies('abcdefabc').items())
- [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
- >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
- 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
- [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
- ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
- ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
- ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
- >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
- '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
- [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
- ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
- ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
- ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
- ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
- >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
- 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
- [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
- ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
- ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
- ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
- >>> frequencies('abcdefabcdef')['x']
- 0
- """
- return collections.Counter(c for c in text)
-
-
-def caesar_break(message, fitness=Pletters):
- """Breaks a Caesar cipher using frequency analysis
-
- >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
- 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
- (4, -130.849989015...)
- >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
- 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
- (19, -128.82410410...)
- >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
- 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
- (13, -126.25403935...)
- """
- sanitised_message = sanitise(message)
- best_shift = 0
- best_fit = float('-inf')
- for shift in range(26):
- plaintext = caesar_decipher(sanitised_message, shift)
- fit = fitness(plaintext)
- logger.debug('Caesar break attempt using key {0} gives fit of {1} '
- 'and decrypt starting: {2}'.format(shift, fit,
- plaintext[:50]))
- if fit > best_fit:
- best_fit = fit
- best_shift = shift
- logger.info('Caesar break best fit: key {0} gives fit of {1} and '
- 'decrypt starting: {2}'.format(best_shift, best_fit,
- caesar_decipher(sanitised_message, best_shift)[:50]))
- return best_shift, best_fit
-
-def affine_break(message, fitness=Pletters):
- """Breaks an affine cipher using frequency analysis
-
- >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
- 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
- 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
- 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
- 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
- 'kxd clm ckuxj.') # doctest: +ELLIPSIS
- ((15, 22, True), -340.601181913...)
- """
- sanitised_message = sanitise(message)
- best_multiplier = 0
- best_adder = 0
- best_one_based = True
- best_fit = float("-inf")
- for one_based in [True, False]:
- for multiplier in [x for x in range(1, 26, 2) if x != 13]:
- for adder in range(26):
- plaintext = affine_decipher(sanitised_message,
- multiplier, adder, one_based)
- fit = fitness(plaintext)
- logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
- 'gives fit of {3} and decrypt starting: {4}'.
- format(multiplier, adder, one_based, fit,
- plaintext[:50]))
- if fit > best_fit:
- best_fit = fit
- best_multiplier = multiplier
- best_adder = adder
- best_one_based = one_based
- logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
- '{3} and decrypt starting: {4}'.format(
- best_multiplier, best_adder, best_one_based, best_fit,
- affine_decipher(sanitised_message, best_multiplier,
- best_adder, best_one_based)[:50]))
- return (best_multiplier, best_adder, best_one_based), best_fit
-
-def keyword_break(message, wordlist=keywords, fitness=Pletters):
- """Breaks a keyword substitution cipher using a dictionary and
- frequency analysis.
-
- >>> keyword_break(keyword_encipher('this is a test message for the ' \
- 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
- wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
- (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
- """
- best_keyword = ''
- best_wrap_alphabet = True
- best_fit = float("-inf")
- for wrap_alphabet in KeywordWrapAlphabet:
- for keyword in wordlist:
- plaintext = keyword_decipher(message, keyword, wrap_alphabet)
- fit = fitness(plaintext)
- logger.debug('Keyword break attempt using key {0} (wrap={1}) '
- 'gives fit of {2} and decrypt starting: {3}'.format(
- keyword, wrap_alphabet, fit,
- sanitise(plaintext)[:50]))
- if fit > best_fit:
- best_fit = fit
- best_keyword = keyword
- best_wrap_alphabet = wrap_alphabet
- logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
- '{2} and decrypt starting: {3}'.format(best_keyword,
- best_wrap_alphabet, best_fit, sanitise(
- keyword_decipher(message, best_keyword,
- best_wrap_alphabet))[:50]))
- return (best_keyword, best_wrap_alphabet), best_fit
-
-def keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
- number_of_solutions=1, chunksize=500):
- """Breaks a keyword substitution cipher using a dictionary and
- frequency analysis
-
- >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
- 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
- wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
- (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
- >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
- 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
- wordlist=['cat', 'elephant', 'kangaroo'], \
- number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
- [(('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...),
- (('elephant', <KeywordWrapAlphabet.from_largest: 3>), -52.834575011...)]
- """
- with Pool() as pool:
- helper_args = [(message, word, wrap, fitness)
- for word in wordlist
- for wrap in KeywordWrapAlphabet]
- # Gotcha: the helper function here needs to be defined at the top level
- # (limitation of Pool.starmap)
- breaks = pool.starmap(keyword_break_worker, helper_args, chunksize)
- if number_of_solutions == 1:
- return max(breaks, key=lambda k: k[1])
- else:
- return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
-
-def keyword_break_worker(message, keyword, wrap_alphabet, fitness):
- plaintext = keyword_decipher(message, keyword, wrap_alphabet)
- fit = fitness(plaintext)
- logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
- '{2} and decrypt starting: {3}'.format(keyword,
- wrap_alphabet, fit, sanitise(plaintext)[:50]))
- return (keyword, wrap_alphabet), fit
-
-# def monoalphabetic_break_hillclimbing(message, max_iterations=10000000,
-# alphabet=None, fitness=Pletters):
-# ciphertext = unaccent(message).lower()
-# if not alphabet:
-# alphabet = list(string.ascii_lowercase)
-# random.shuffle(alphabet)
-# alphabet = cat(alphabet)
-# return monoalphabetic_break_hillclimbing_worker(ciphertext, alphabet,
-# max_iterations, fitness)
-
-# def monoalphabetic_break_hillclimbing_mp(message, workers=10,
-# max_iterations = 10000000, alphabet=None, fitness=Pletters, chunksize=1):
-# worker_args = []
-# ciphertext = unaccent(message).lower()
-# for i in range(workers):
-# if alphabet:
-# this_alphabet = alphabet
-# else:
-# this_alphabet = list(string.ascii_lowercase)
-# random.shuffle(this_alphabet)
-# this_alphabet = cat(this_alphabet)
-# worker_args.append((ciphertext, this_alphabet, max_iterations, fitness))
-# with Pool() as pool:
-# breaks = pool.starmap(monoalphabetic_break_hillclimbing_worker,
-# worker_args, chunksize)
-# return max(breaks, key=lambda k: k[1])
-
-# def monoalphabetic_break_hillclimbing_worker(message, alphabet,
-# max_iterations, fitness):
-# def swap(letters, i, j):
-# if i > j:
-# i, j = j, i
-# if i == j:
-# return letters
-# else:
-# return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +
-# letters[j+1:])
-# best_alphabet = alphabet
-# best_fitness = float('-inf')
-# for i in range(max_iterations):
-# alphabet = swap(best_alphabet, random.randrange(26), random.randrange(26))
-# cipher_translation = ''.maketrans(string.ascii_lowercase, alphabet)
-# plaintext = message.translate(cipher_translation)
-# if fitness(plaintext) > best_fitness:
-# best_fitness = fitness(plaintext)
-# best_alphabet = alphabet
-# print(i, best_alphabet, best_fitness, plaintext[:50])
-# return best_alphabet, best_fitness
-
-
-def monoalphabetic_break_hillclimbing(message,
- max_iterations=20000,
- plain_alphabet=None,
- cipher_alphabet=None,
- fitness=Pletters, chunksize=1):
- return simulated_annealing_break(message,
- workers=1,
- initial_temperature=0,
- max_iterations=max_iterations,
- plain_alphabet=plain_alphabet,
- cipher_alphabet=cipher_alphabet,
- fitness=fitness, chunksize=chunksize)
-
-
-def monoalphabetic_break_hillclimbing_mp(message,
- workers=10,
- max_iterations=20000,
- plain_alphabet=None,
- cipher_alphabet=None,
- fitness=Pletters, chunksize=1):
- return simulated_annealing_break(message,
- workers=workers,
- initial_temperature=0,
- max_iterations=max_iterations,
- plain_alphabet=plain_alphabet,
- cipher_alphabet=cipher_alphabet,
- fitness=fitness, chunksize=chunksize)
-
-
-def simulated_annealing_break(message, workers=10,
- initial_temperature=200,
- max_iterations=20000,
- plain_alphabet=None,
- cipher_alphabet=None,
- fitness=Pletters, chunksize=1):
- worker_args = []
- ciphertext = sanitise(message)
- for i in range(workers):
- if not plain_alphabet:
- plain_alphabet = string.ascii_lowercase
- if not cipher_alphabet:
- cipher_alphabet = list(string.ascii_lowercase)
- random.shuffle(cipher_alphabet)
- cipher_alphabet = cat(cipher_alphabet)
- worker_args.append((ciphertext, plain_alphabet, cipher_alphabet,
- initial_temperature, max_iterations, fitness))
- with Pool() as pool:
- breaks = pool.starmap(simulated_annealing_break_worker,
- worker_args, chunksize)
- return max(breaks, key=lambda k: k[1])
-
-
-def simulated_annealing_break_worker(message, plain_alphabet, cipher_alphabet,
- t0, max_iterations, fitness):
- def swap(letters, i, j):
- if i > j:
- i, j = j, i
- if i == j:
- return letters
- else:
- return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +
- letters[j+1:])
-
- temperature = t0
-
- dt = t0 / (0.9 * max_iterations)
-
- current_alphabet = cipher_alphabet
- alphabet = current_alphabet
- cipher_translation = ''.maketrans(current_alphabet, plain_alphabet)
- plaintext = message.translate(cipher_translation)
- current_fitness = fitness(plaintext)
-
- best_alphabet = current_alphabet
- best_fitness = current_fitness
- best_plaintext = plaintext
-
- # print('starting for', max_iterations)
- for i in range(max_iterations):
- swap_a = random.randrange(26)
- swap_b = (swap_a + int(random.gauss(0, 4))) % 26
- alphabet = swap(current_alphabet, swap_a, swap_b)
- cipher_translation = ''.maketrans(alphabet, plain_alphabet)
- plaintext = message.translate(cipher_translation)
- new_fitness = fitness(plaintext)
- try:
- sa_chance = math.exp((new_fitness - current_fitness) / temperature)
- except (OverflowError, ZeroDivisionError):
- # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
- sa_chance = 0
- if (new_fitness > current_fitness or random.random() < sa_chance):
- # logger.debug('Simulated annealing: iteration {}, temperature {}, '
- # 'current alphabet {}, current_fitness {}, '
- # 'best_plaintext {}'.format(i, temperature, current_alphabet,
- # current_fitness, best_plaintext[:50]))
-
- # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
- current_fitness = new_fitness
- current_alphabet = alphabet
-
- if current_fitness > best_fitness:
- best_alphabet = current_alphabet
- best_fitness = current_fitness
- best_plaintext = plaintext
- if i % 500 == 0:
- logger.debug('Simulated annealing: iteration {}, temperature {}, '
- 'current alphabet {}, current_fitness {}, '
- 'best_plaintext {}'.format(i, temperature, current_alphabet,
- current_fitness, plaintext[:50]))
- temperature = max(temperature - dt, 0.001)
-
- return best_alphabet, best_fitness # current_alphabet, current_fitness
-
-
-def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
- chunksize=500):
- """Breaks a vigenere cipher using a dictionary and frequency analysis.
-
- >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
- 'message for the vigenere decipherment'), 'cat'), \
- wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
- ('cat', -52.9472712...)
- """
- with Pool() as pool:
- helper_args = [(message, word, fitness)
- for word in wordlist]
- # Gotcha: the helper function here needs to be defined at the top level
- # (limitation of Pool.starmap)
- breaks = pool.starmap(vigenere_keyword_break_worker, helper_args,
- chunksize)
- return max(breaks, key=lambda k: k[1])
-vigenere_keyword_break = vigenere_keyword_break_mp
-
-def vigenere_keyword_break_worker(message, keyword, fitness):
- plaintext = vigenere_decipher(message, keyword)
- fit = fitness(plaintext)
- logger.debug('Vigenere keyword break attempt using key {0} gives fit of '
- '{1} and decrypt starting: {2}'.format(keyword,
- fit, sanitise(plaintext)[:50]))
- return keyword, fit
-
-def vigenere_frequency_break(message, max_key_length=20, fitness=Pletters):
- """Breaks a Vigenere cipher with frequency analysis
- >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
- "run. She is ready and so am I. I stole Daniel's pocketbook this " \
- "afternoon when he left his jacket hanging on the easel in the " \
- "attic. I jump every time I hear a footstep on the stairs, " \
- "certain that the theft has been discovered and that I will " \
- "be caught. The SS officer visits less often now that he is " \
- "sure"), 'florence')) # doctest: +ELLIPSIS
- ('florence', -307.5473096...)
- """
- def worker(message, key_length, fitness):
- splits = every_nth(sanitised_message, key_length)
- key = cat([unpos(caesar_break(s)[0]) for s in splits])
- plaintext = vigenere_decipher(message, key)
- fit = fitness(plaintext)
- return key, fit
- sanitised_message = sanitise(message)
- results = starmap(worker, [(sanitised_message, i, fitness)
- for i in range(1, max_key_length+1)])
- return max(results, key=lambda k: k[1])
-
-
-def beaufort_sub_break(message, fitness=Pletters):
- """Breaks one chunk of a Beaufort cipher with frequency analysis
-
- >>> beaufort_sub_break('samwpplggnnmmyaazgympjapopnwiywwomwspgpjmefwmawx' \
- 'jafjhxwwwdigxshnlywiamhyshtasxptwueahhytjwsn') # doctest: +ELLIPSIS
- (0, -117.4492...)
- >>> beaufort_sub_break('eyprzjjzznxymrygryjqmqhznjrjjapenejznawngnnezgza' \
- 'dgndknaogpdjneadadazlhkhxkryevrronrmdjnndjlo') # doctest: +ELLIPSIS
- (17, -114.9598...)
- """
- best_shift = 0
- best_fit = float('-inf')
- for key in range(26):
- plaintext = [unpos(key - pos(l)) for l in message]
- fit = fitness(plaintext)
- logger.debug('Beaufort sub break attempt using key {0} gives fit of {1} '
- 'and decrypt starting: {2}'.format(key, fit,
- plaintext[:50]))
- if fit > best_fit:
- best_fit = fit
- best_key = key
- logger.info('Beaufort sub break best fit: key {0} gives fit of {1} and '
- 'decrypt starting: {2}'.format(best_key, best_fit,
- cat([unpos(best_key - pos(l)) for l in message[:50]])))
- return best_key, best_fit
-
-
-def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters):
- """Breaks a Beaufort cipher with frequency analysis
-
- >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
- "run. She is ready and so am I. I stole Daniel's pocketbook this " \
- "afternoon when he left his jacket hanging on the easel in the " \
- "attic. I jump every time I hear a footstep on the stairs, " \
- "certain that the theft has been discovered and that I will " \
- "be caught. The SS officer visits less often now " \
- "that he is sure"), 'florence')) # doctest: +ELLIPSIS
- ('florence', -307.5473096791...)
- """
- def worker(message, key_length, fitness):
- splits = every_nth(message, key_length)
- key = cat([unpos(beaufort_sub_break(s)[0]) for s in splits])
- plaintext = beaufort_decipher(message, key)
- fit = fitness(plaintext)
- return key, fit
- sanitised_message = sanitise(message)
- results = starmap(worker, [(sanitised_message, i, fitness)
- for i in range(1, max_key_length+1)])
- return max(results, key=lambda k: k[1])
-
-
-def beaufort_variant_frequency_break(message, max_key_length=20, fitness=Pletters):
- """Breaks a Beaufort cipher with frequency analysis
-
- >>> beaufort_variant_frequency_break(beaufort_variant_encipher(sanitise("It is time to " \
- "run. She is ready and so am I. I stole Daniel's pocketbook this " \
- "afternoon when he left his jacket hanging on the easel in the " \
- "attic. I jump every time I hear a footstep on the stairs, " \
- "certain that the theft has been discovered and that I will " \
- "be caught. The SS officer visits less often now " \
- "that he is sure"), 'florence')) # doctest: +ELLIPSIS
- ('florence', -307.5473096791...)
- """
- def worker(message, key_length, fitness):
- splits = every_nth(sanitised_message, key_length)
- key = cat([unpos(-caesar_break(s)[0]) for s in splits])
- plaintext = beaufort_variant_decipher(message, key)
- fit = fitness(plaintext)
- return key, fit
- sanitised_message = sanitise(message)
- results = starmap(worker, [(sanitised_message, i, fitness)
- for i in range(1, max_key_length+1)])
- return max(results, key=lambda k: k[1])
-
-def polybius_break_mp(message, column_labels, row_labels,
- letters_to_merge=None,
- wordlist=keywords, fitness=Pletters,
- number_of_solutions=1, chunksize=500):
- """Breaks a Polybius substitution cipher using a dictionary and
- frequency analysis
-
- >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
- 'polybius decipherment', 'elephant', 'abcde', 'abcde'), \
- 'abcde', 'abcde', \
- wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
- (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
- -54.53880...)
- >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
- 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=True), \
- 'abcde', 'abcde', \
- wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
- (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', True), \
- -54.53880...)
- >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
- 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=False), \
- 'abcde', 'abcde', \
- wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
- (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
- -54.53880...)
- >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
- 'polybius decipherment', 'elephant', 'abcde', 'pqrst', column_first=True), \
- 'abcde', 'pqrst', \
- wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
- (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'pqrst', True), \
- -54.53880...)
- """
- if letters_to_merge is None:
- letters_to_merge = {'j': 'i'}
- with Pool() as pool:
- helper_args = [(message, word, wrap,
- column_labels, row_labels, column_first,
- letters_to_merge,
- fitness)
- for word in wordlist
- for wrap in KeywordWrapAlphabet
- for column_first in [False, True]]
- # Gotcha: the helper function here needs to be defined at the top level
- # (limitation of Pool.starmap)
- breaks = pool.starmap(polybius_break_worker, helper_args, chunksize)
- if number_of_solutions == 1:
- return max(breaks, key=lambda k: k[1])
- else:
- return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
-
-def polybius_break_worker(message, keyword, wrap_alphabet,
- column_order, row_order, column_first,
- letters_to_merge,
- fitness):
- plaintext = polybius_decipher(message, keyword,
- column_order, row_order,
- column_first=column_first,
- letters_to_merge=letters_to_merge,
- wrap_alphabet=wrap_alphabet)
- if plaintext:
- fit = fitness(plaintext)
- else:
- fit = float('-inf')
- logger.debug('Polybius break attempt using key {0} (wrap={1}, merging {2}), '
- 'columns as {3}, rows as {4} (column_first={5}) '
- 'gives fit of {6} and decrypt starting: '
- '{7}'.format(keyword, wrap_alphabet, letters_to_merge,
- column_order, row_order, column_first,
- fit, sanitise(plaintext)[:50]))
- return (keyword, wrap_alphabet, column_order, row_order, column_first), fit
-
-
-def column_transposition_break_mp(message, translist=transpositions,
- fitness=Pbigrams, chunksize=500):
- """Breaks a column transposition cipher using a dictionary and
- n-gram frequency analysis
-
- >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
- "It is a truth universally acknowledged, that a single man in \
- possession of a good fortune, must be in want of a wife. However \
- little known the feelings or views of such a man may be on his \
- first entering a neighbourhood, this truth is so well fixed in \
- the minds of the surrounding families, that he is considered the \
- rightful property of some one or other of their daughters."), \
- 'encipher'), \
- translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
- (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
- (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
- (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...)
- >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
- "It is a truth universally acknowledged, that a single man in \
- possession of a good fortune, must be in want of a wife. However \
- little known the feelings or views of such a man may be on his \
- first entering a neighbourhood, this truth is so well fixed in \
- the minds of the surrounding families, that he is considered the \
- rightful property of some one or other of their daughters."), \
- 'encipher'), \
- translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
- (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
- (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
- fitness=Ptrigrams) # doctest: +ELLIPSIS
- (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
- """
- with Pool() as pool:
- helper_args = [(message, trans, fillcolumnwise, emptycolumnwise,
- fitness)
- for trans in translist
- for fillcolumnwise in [True, False]
- for emptycolumnwise in [True, False]]
- # Gotcha: the helper function here needs to be defined at the top level
- # (limitation of Pool.starmap)
- breaks = pool.starmap(column_transposition_break_worker,
- helper_args, chunksize)
- return max(breaks, key=lambda k: k[1])
-column_transposition_break = column_transposition_break_mp
-
-def column_transposition_break_worker(message, transposition,
- fillcolumnwise, emptycolumnwise, fitness):
- plaintext = column_transposition_decipher(message, transposition,
- fillcolumnwise=fillcolumnwise, emptycolumnwise=emptycolumnwise)
- fit = fitness(sanitise(plaintext))
- logger.debug('Column transposition break attempt using key {0} '
- 'gives fit of {1} and decrypt starting: {2}'.format(
- transposition, fit,
- sanitise(plaintext)[:50]))
- return (transposition, fillcolumnwise, emptycolumnwise), fit
-
-
-def scytale_break_mp(message, max_key_length=20,
- fitness=Pbigrams, chunksize=500):
- """Breaks a scytale cipher using a range of lengths and
- n-gram frequency analysis
-
- >>> scytale_break_mp(scytale_encipher(sanitise( \
- "It is a truth universally acknowledged, that a single man in \
- possession of a good fortune, must be in want of a wife. However \
- little known the feelings or views of such a man may be on his \
- first entering a neighbourhood, this truth is so well fixed in \
- the minds of the surrounding families, that he is considered the \
- rightful property of some one or other of their daughters."), \
- 5)) # doctest: +ELLIPSIS
- (5, -709.4646722...)
- >>> scytale_break_mp(scytale_encipher(sanitise( \
- "It is a truth universally acknowledged, that a single man in \
- possession of a good fortune, must be in want of a wife. However \
- little known the feelings or views of such a man may be on his \
- first entering a neighbourhood, this truth is so well fixed in \
- the minds of the surrounding families, that he is considered the \
- rightful property of some one or other of their daughters."), \
- 5), \
- fitness=Ptrigrams) # doctest: +ELLIPSIS
- (5, -997.0129085...)
- """
- with Pool() as pool:
- helper_args = [(message, trans, False, True, fitness)
- for trans in
- [[col for col in range(math.ceil(len(message)/rows))]
- for rows in range(1,max_key_length+1)]]
- # Gotcha: the helper function here needs to be defined at the top level
- # (limitation of Pool.starmap)
- breaks = pool.starmap(column_transposition_break_worker,
- helper_args, chunksize)
- best = max(breaks, key=lambda k: k[1])
- return math.trunc(len(message) / len(best[0][0])), best[1]
-scytale_break = scytale_break_mp
-
-
-def railfence_break(message, max_key_length=20,
- fitness=Pletters, chunksize=500):
- """Breaks a railfence cipher using a matrix of given rank and letter frequencies
-
-
- """
-
- sanitised_message = sanitise(message)
- results = starmap(worker, [(sanitised_message, i, fitness)
- for i in range(2, max_key_length+1)])
- return max(results, key=lambda k: k[1])
-
-
-def railfence_break(message, max_key_length=20,
- fitness=Pbigrams, chunksize=500):
- """Breaks a railfence cipher using a range of lengths and
- n-gram frequency analysis
-
- >>> railfence_break(railfence_encipher(sanitise( \
- "It is a truth universally acknowledged, that a single man in \
- possession of a good fortune, must be in want of a wife. However \
- little known the feelings or views of such a man may be on his \
- first entering a neighbourhood, this truth is so well fixed in \
- the minds of the surrounding families, that he is considered the \
- rightful property of some one or other of their daughters."), \
- 7)) # doctest: +ELLIPSIS
- (7, -709.46467226...)
- >>> railfence_break(railfence_encipher(sanitise( \
- "It is a truth universally acknowledged, that a single man in \
- possession of a good fortune, must be in want of a wife. However \
- little known the feelings or views of such a man may be on his \
- first entering a neighbourhood, this truth is so well fixed in \
- the minds of the surrounding families, that he is considered the \
- rightful property of some one or other of their daughters."), \
- 7), \
- fitness=Ptrigrams) # doctest: +ELLIPSIS
- (7, -997.0129085...)
- """
- def worker(message, height, fitness):
- plaintext = railfence_decipher(message, height)
- fit = fitness(plaintext)
- return height, fit
-
- sanitised_message = sanitise(message)
- results = starmap(worker, [(sanitised_message, i, fitness)
- for i in range(2, max_key_length+1)])
- return max(results, key=lambda k: k[1])
def amsco_break(message, translist=transpositions, patterns = [(1, 2), (2, 1)],
fillstyles = [AmscoFillStyle.continuous,
--- /dev/null
+from utilities import *
+from language_models import *
+from multiprocessing import Pool
+
+from logger import logger
+
+def transpositions_of(keyword):
+ """Finds the transpostions given by a keyword. For instance, the keyword
+ 'clever' rearranges to 'celrv', so the first column (0) stays first, the
+ second column (1) moves to third, the third column (2) moves to second,
+ and so on.
+
+ If passed a tuple, assume it's already a transposition and just return it.
+
+ >>> transpositions_of('clever')
+ (0, 2, 1, 4, 3)
+ >>> transpositions_of('fred')
+ (3, 2, 0, 1)
+ >>> transpositions_of((3, 2, 0, 1))
+ (3, 2, 0, 1)
+ """
+ if isinstance(keyword, tuple):
+ return keyword
+ else:
+ key = deduplicate(keyword)
+ transpositions = tuple(key.index(l) for l in sorted(key))
+ return transpositions
+
+def pad(message_len, group_len, fillvalue):
+ padding_length = group_len - message_len % group_len
+ if padding_length == group_len: padding_length = 0
+ padding = ''
+ for i in range(padding_length):
+ if callable(fillvalue):
+ padding += fillvalue()
+ else:
+ padding += fillvalue
+ return padding
+
+def column_transposition_encipher(message, keyword, fillvalue=' ',
+ fillcolumnwise=False,
+ emptycolumnwise=False):
+ """Enciphers using the column transposition cipher.
+ Message is padded to allow all rows to be the same length.
+
+ >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
+ 'hlohr eltee '
+ >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
+ 'hellothere '
+ >>> column_transposition_encipher('hellothere', 'abcdef')
+ 'hellothere '
+ >>> column_transposition_encipher('hellothere', 'abcde')
+ 'hellothere'
+ >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
+ 'hellothere'
+ >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
+ 'hlohreltee'
+ >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
+ 'htehlelroe'
+ >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
+ 'hellothere'
+ >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
+ 'heotllrehe'
+ >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
+ 'holrhetlee'
+ >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
+ 'htleehoelr'
+ >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
+ 'hleolteher'
+ >>> column_transposition_encipher('hellothere', 'cleverly')
+ 'hleolthre e '
+ >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
+ 'hleolthre!e!'
+ >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
+ 'hleolthre*e*'
+ """
+ transpositions = transpositions_of(keyword)
+ message += pad(len(message), len(transpositions), fillvalue)
+ if fillcolumnwise:
+ rows = every_nth(message, len(message) // len(transpositions))
+ else:
+ rows = chunks(message, len(transpositions))
+ transposed = [transpose(r, transpositions) for r in rows]
+ if emptycolumnwise:
+ return combine_every_nth(transposed)
+ else:
+ return cat(chain(*transposed))
+
+def column_transposition_decipher(message, keyword, fillvalue=' ',
+ fillcolumnwise=False,
+ emptycolumnwise=False):
+ """Deciphers using the column transposition cipher.
+ Message is padded to allow all rows to be the same length.
+
+ >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
+ 'hellothere'
+ >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
+ 'hellothere'
+ >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
+ 'hellothere'
+ >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
+ 'hellothere'
+ >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
+ 'hellothere'
+ >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
+ 'hellothere'
+ >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
+ 'hellothere'
+ >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
+ 'hellothere'
+ """
+ transpositions = transpositions_of(keyword)
+ message += pad(len(message), len(transpositions), fillvalue)
+ if emptycolumnwise:
+ rows = every_nth(message, len(message) // len(transpositions))
+ else:
+ rows = chunks(message, len(transpositions))
+ untransposed = [untranspose(r, transpositions) for r in rows]
+ if fillcolumnwise:
+ return combine_every_nth(untransposed)
+ else:
+ return cat(chain(*untransposed))
+
+def scytale_encipher(message, rows, fillvalue=' '):
+ """Enciphers using the scytale transposition cipher.
+ Message is padded with spaces to allow all rows to be the same length.
+
+ >>> scytale_encipher('thequickbrownfox', 3)
+ 'tcnhkfeboqrxuo iw '
+ >>> scytale_encipher('thequickbrownfox', 4)
+ 'tubnhirfecooqkwx'
+ >>> scytale_encipher('thequickbrownfox', 5)
+ 'tubn hirf ecoo qkwx '
+ >>> scytale_encipher('thequickbrownfox', 6)
+ 'tqcrnxhukof eibwo '
+ >>> scytale_encipher('thequickbrownfox', 7)
+ 'tqcrnx hukof eibwo '
+ """
+ # transpositions = [i for i in range(math.ceil(len(message) / rows))]
+ # return column_transposition_encipher(message, transpositions,
+ # fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True)
+ transpositions = [i for i in range(rows)]
+ return column_transposition_encipher(message, transpositions,
+ fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False)
+
+def scytale_decipher(message, rows):
+ """Deciphers using the scytale transposition cipher.
+ Assumes the message is padded so that all rows are the same length.
+
+ >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
+ 'thequickbrownfox '
+ >>> scytale_decipher('tubnhirfecooqkwx', 4)
+ 'thequickbrownfox'
+ >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
+ 'thequickbrownfox '
+ >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
+ 'thequickbrownfox '
+ >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
+ 'thequickbrownfox '
+ """
+ # transpositions = [i for i in range(math.ceil(len(message) / rows))]
+ # return column_transposition_decipher(message, transpositions,
+ # fillcolumnwise=False, emptycolumnwise=True)
+ transpositions = [i for i in range(rows)]
+ return column_transposition_decipher(message, transpositions,
+ fillcolumnwise=True, emptycolumnwise=False)
+
+
+def column_transposition_break_mp(message, translist=transpositions,
+ fitness=Pbigrams, chunksize=500):
+ """Breaks a column transposition cipher using a dictionary and
+ n-gram frequency analysis
+
+ >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+ "It is a truth universally acknowledged, that a single man in \
+ possession of a good fortune, must be in want of a wife. However \
+ little known the feelings or views of such a man may be on his \
+ first entering a neighbourhood, this truth is so well fixed in \
+ the minds of the surrounding families, that he is considered the \
+ rightful property of some one or other of their daughters."), \
+ 'encipher'), \
+ translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
+ (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
+ (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
+ (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...)
+ >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+ "It is a truth universally acknowledged, that a single man in \
+ possession of a good fortune, must be in want of a wife. However \
+ little known the feelings or views of such a man may be on his \
+ first entering a neighbourhood, this truth is so well fixed in \
+ the minds of the surrounding families, that he is considered the \
+ rightful property of some one or other of their daughters."), \
+ 'encipher'), \
+ translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
+ (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
+ (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
+ fitness=Ptrigrams) # doctest: +ELLIPSIS
+ (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
+ """
+ with Pool() as pool:
+ helper_args = [(message, trans, fillcolumnwise, emptycolumnwise,
+ fitness)
+ for trans in translist
+ for fillcolumnwise in [True, False]
+ for emptycolumnwise in [True, False]]
+ # Gotcha: the helper function here needs to be defined at the top level
+ # (limitation of Pool.starmap)
+ breaks = pool.starmap(column_transposition_break_worker,
+ helper_args, chunksize)
+ return max(breaks, key=lambda k: k[1])
+column_transposition_break = column_transposition_break_mp
+
+def column_transposition_break_worker(message, transposition,
+ fillcolumnwise, emptycolumnwise, fitness):
+ plaintext = column_transposition_decipher(message, transposition,
+ fillcolumnwise=fillcolumnwise, emptycolumnwise=emptycolumnwise)
+ fit = fitness(sanitise(plaintext))
+ logger.debug('Column transposition break attempt using key {0} '
+ 'gives fit of {1} and decrypt starting: {2}'.format(
+ transposition, fit,
+ sanitise(plaintext)[:50]))
+ return (transposition, fillcolumnwise, emptycolumnwise), fit
+
+
+def scytale_break_mp(message, max_key_length=20,
+ fitness=Pbigrams, chunksize=500):
+ """Breaks a scytale cipher using a range of lengths and
+ n-gram frequency analysis
+
+ >>> scytale_break_mp(scytale_encipher(sanitise( \
+ "It is a truth universally acknowledged, that a single man in \
+ possession of a good fortune, must be in want of a wife. However \
+ little known the feelings or views of such a man may be on his \
+ first entering a neighbourhood, this truth is so well fixed in \
+ the minds of the surrounding families, that he is considered the \
+ rightful property of some one or other of their daughters."), \
+ 5)) # doctest: +ELLIPSIS
+ (5, -709.4646722...)
+ >>> scytale_break_mp(scytale_encipher(sanitise( \
+ "It is a truth universally acknowledged, that a single man in \
+ possession of a good fortune, must be in want of a wife. However \
+ little known the feelings or views of such a man may be on his \
+ first entering a neighbourhood, this truth is so well fixed in \
+ the minds of the surrounding families, that he is considered the \
+ rightful property of some one or other of their daughters."), \
+ 5), \
+ fitness=Ptrigrams) # doctest: +ELLIPSIS
+ (5, -997.0129085...)
+ """
+ with Pool() as pool:
+ helper_args = [(message, trans, False, True, fitness)
+ for trans in
+ [[col for col in range(math.ceil(len(message)/rows))]
+ for rows in range(1,max_key_length+1)]]
+ # Gotcha: the helper function here needs to be defined at the top level
+ # (limitation of Pool.starmap)
+ breaks = pool.starmap(column_transposition_break_worker,
+ helper_args, chunksize)
+ best = max(breaks, key=lambda k: k[1])
+ return math.trunc(len(message) / len(best[0][0])), best[1]
+scytale_break = scytale_break_mp
+
--- /dev/null
+from utilities import *
+from language_models import *
+from enum import Enum
+# from itertools import starmap
+from multiprocessing import Pool
+
+from logger import logger
+
+
+class KeywordWrapAlphabet(Enum):
+ from_a = 1
+ from_last = 2
+ from_largest = 3
+
+
+def keyword_cipher_alphabet_of(keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
+ """Find the cipher alphabet given a keyword.
+ wrap_alphabet controls how the rest of the alphabet is added
+ after the keyword.
+
+ >>> keyword_cipher_alphabet_of('bayes')
+ 'bayescdfghijklmnopqrtuvwxz'
+ >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a)
+ 'bayescdfghijklmnopqrtuvwxz'
+ >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last)
+ 'bayestuvwxzcdfghijklmnopqr'
+ >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest)
+ 'bayeszcdfghijklmnopqrtuvwx'
+ """
+ if wrap_alphabet == KeywordWrapAlphabet.from_a:
+ cipher_alphabet = cat(deduplicate(sanitise(keyword) +
+ string.ascii_lowercase))
+ else:
+ if wrap_alphabet == KeywordWrapAlphabet.from_last:
+ last_keyword_letter = deduplicate(sanitise(keyword))[-1]
+ else:
+ last_keyword_letter = sorted(sanitise(keyword))[-1]
+ last_keyword_position = string.ascii_lowercase.find(
+ last_keyword_letter) + 1
+ cipher_alphabet = cat(
+ deduplicate(sanitise(keyword) +
+ string.ascii_lowercase[last_keyword_position:] +
+ string.ascii_lowercase))
+ return cipher_alphabet
+
+
+def keyword_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
+ """Enciphers a message with a keyword substitution cipher.
+ wrap_alphabet controls how the rest of the alphabet is added
+ after the keyword.
+ 0 : from 'a'
+ 1 : from the last letter in the sanitised keyword
+ 2 : from the largest letter in the sanitised keyword
+
+ >>> keyword_encipher('test message', 'bayes')
+ 'rsqr ksqqbds'
+ >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a)
+ 'rsqr ksqqbds'
+ >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last)
+ 'lskl dskkbus'
+ >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest)
+ 'qspq jsppbcs'
+ """
+ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
+ cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
+ return unaccent(message).lower().translate(cipher_translation)
+
+def keyword_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
+ """Deciphers a message with a keyword substitution cipher.
+ wrap_alphabet controls how the rest of the alphabet is added
+ after the keyword.
+ 0 : from 'a'
+ 1 : from the last letter in the sanitised keyword
+ 2 : from the largest letter in the sanitised keyword
+
+ >>> keyword_decipher('rsqr ksqqbds', 'bayes')
+ 'test message'
+ >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a)
+ 'test message'
+ >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last)
+ 'test message'
+ >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest)
+ 'test message'
+ """
+ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
+ cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
+ return message.lower().translate(cipher_translation)
+
+
+def keyword_break(message, wordlist=keywords, fitness=Pletters):
+ """Breaks a keyword substitution cipher using a dictionary and
+ frequency analysis.
+
+ >>> keyword_break(keyword_encipher('this is a test message for the ' \
+ 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+ (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
+ """
+ best_keyword = ''
+ best_wrap_alphabet = True
+ best_fit = float("-inf")
+ for wrap_alphabet in KeywordWrapAlphabet:
+ for keyword in wordlist:
+ plaintext = keyword_decipher(message, keyword, wrap_alphabet)
+ fit = fitness(plaintext)
+ logger.debug('Keyword break attempt using key {0} (wrap={1}) '
+ 'gives fit of {2} and decrypt starting: {3}'.format(
+ keyword, wrap_alphabet, fit,
+ sanitise(plaintext)[:50]))
+ if fit > best_fit:
+ best_fit = fit
+ best_keyword = keyword
+ best_wrap_alphabet = wrap_alphabet
+ logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
+ '{2} and decrypt starting: {3}'.format(best_keyword,
+ best_wrap_alphabet, best_fit, sanitise(
+ keyword_decipher(message, best_keyword,
+ best_wrap_alphabet))[:50]))
+ return (best_keyword, best_wrap_alphabet), best_fit
+
+def keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
+ number_of_solutions=1, chunksize=500):
+ """Breaks a keyword substitution cipher using a dictionary and
+ frequency analysis
+
+ >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
+ 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+ (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
+ >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
+ 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
+ wordlist=['cat', 'elephant', 'kangaroo'], \
+ number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ [(('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...),
+ (('elephant', <KeywordWrapAlphabet.from_largest: 3>), -52.834575011...)]
+ """
+ with Pool() as pool:
+ helper_args = [(message, word, wrap, fitness)
+ for word in wordlist
+ for wrap in KeywordWrapAlphabet]
+ # Gotcha: the helper function here needs to be defined at the top level
+ # (limitation of Pool.starmap)
+ breaks = pool.starmap(keyword_break_worker, helper_args, chunksize)
+ if number_of_solutions == 1:
+ return max(breaks, key=lambda k: k[1])
+ else:
+ return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
+
+def keyword_break_worker(message, keyword, wrap_alphabet, fitness):
+ plaintext = keyword_decipher(message, keyword, wrap_alphabet)
+ fit = fitness(plaintext)
+ logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
+ '{2} and decrypt starting: {3}'.format(keyword,
+ wrap_alphabet, fit, sanitise(plaintext)[:50]))
+ return (keyword, wrap_alphabet), fit
+
+
+def monoalphabetic_break_hillclimbing(message,
+ max_iterations=20000,
+ plain_alphabet=None,
+ cipher_alphabet=None,
+ fitness=Pletters, chunksize=1):
+ return simulated_annealing_break(message,
+ workers=1,
+ initial_temperature=0,
+ max_iterations=max_iterations,
+ plain_alphabet=plain_alphabet,
+ cipher_alphabet=cipher_alphabet,
+ fitness=fitness, chunksize=chunksize)
+
+
+def monoalphabetic_break_hillclimbing_mp(message,
+ workers=10,
+ max_iterations=20000,
+ plain_alphabet=None,
+ cipher_alphabet=None,
+ fitness=Pletters, chunksize=1):
+ return simulated_annealing_break(message,
+ workers=workers,
+ initial_temperature=0,
+ max_iterations=max_iterations,
+ plain_alphabet=plain_alphabet,
+ cipher_alphabet=cipher_alphabet,
+ fitness=fitness, chunksize=chunksize)
+
+
+def simulated_annealing_break(message, workers=10,
+ initial_temperature=200,
+ max_iterations=20000,
+ plain_alphabet=None,
+ cipher_alphabet=None,
+ fitness=Pletters, chunksize=1):
+ worker_args = []
+ ciphertext = sanitise(message)
+ for i in range(workers):
+ if not plain_alphabet:
+ plain_alphabet = string.ascii_lowercase
+ if not cipher_alphabet:
+ cipher_alphabet = list(string.ascii_lowercase)
+ random.shuffle(cipher_alphabet)
+ cipher_alphabet = cat(cipher_alphabet)
+ worker_args.append((ciphertext, plain_alphabet, cipher_alphabet,
+ initial_temperature, max_iterations, fitness))
+ with Pool() as pool:
+ breaks = pool.starmap(simulated_annealing_break_worker,
+ worker_args, chunksize)
+ return max(breaks, key=lambda k: k[1])
+
+
+def simulated_annealing_break_worker(message, plain_alphabet, cipher_alphabet,
+ t0, max_iterations, fitness):
+ def swap(letters, i, j):
+ if i > j:
+ i, j = j, i
+ if i == j:
+ return letters
+ else:
+ return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +
+ letters[j+1:])
+
+ temperature = t0
+
+ dt = t0 / (0.9 * max_iterations)
+
+ current_alphabet = cipher_alphabet
+ alphabet = current_alphabet
+ cipher_translation = ''.maketrans(current_alphabet, plain_alphabet)
+ plaintext = message.translate(cipher_translation)
+ current_fitness = fitness(plaintext)
+
+ best_alphabet = current_alphabet
+ best_fitness = current_fitness
+ best_plaintext = plaintext
+
+ # print('starting for', max_iterations)
+ for i in range(max_iterations):
+ swap_a = random.randrange(26)
+ swap_b = (swap_a + int(random.gauss(0, 4))) % 26
+ alphabet = swap(current_alphabet, swap_a, swap_b)
+ cipher_translation = ''.maketrans(alphabet, plain_alphabet)
+ plaintext = message.translate(cipher_translation)
+ new_fitness = fitness(plaintext)
+ try:
+ sa_chance = math.exp((new_fitness - current_fitness) / temperature)
+ except (OverflowError, ZeroDivisionError):
+ # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
+ sa_chance = 0
+ if (new_fitness > current_fitness or random.random() < sa_chance):
+ # logger.debug('Simulated annealing: iteration {}, temperature {}, '
+ # 'current alphabet {}, current_fitness {}, '
+ # 'best_plaintext {}'.format(i, temperature, current_alphabet,
+ # current_fitness, best_plaintext[:50]))
+
+ # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
+ current_fitness = new_fitness
+ current_alphabet = alphabet
+
+ if current_fitness > best_fitness:
+ best_alphabet = current_alphabet
+ best_fitness = current_fitness
+ best_plaintext = plaintext
+ if i % 500 == 0:
+ logger.debug('Simulated annealing: iteration {}, temperature {}, '
+ 'current alphabet {}, current_fitness {}, '
+ 'best_plaintext {}'.format(i, temperature, current_alphabet,
+ current_fitness, plaintext[:50]))
+ temperature = max(temperature - dt, 0.001)
+
+ return best_alphabet, best_fitness # current_alphabet, current_fitness
from math import log10
import os
-unaccent_specials = ''.maketrans({"’": "'", '“': '"', '”': '"'})
-
-def letters(text):
- """Remove all non-alphabetic characters from a text
- >>> letters('The Quick')
- 'TheQuick'
- >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
- 'TheQuickBROWNfoxjumpedoverthelazyDOG'
- """
- return ''.join([c for c in text if c in string.ascii_letters])
-
-def unaccent(text):
- """Remove all accents from letters.
- It does this by converting the unicode string to decomposed compatability
- form, dropping all the combining accents, then re-encoding the bytes.
-
- >>> unaccent('hello')
- 'hello'
- >>> unaccent('HELLO')
- 'HELLO'
- >>> unaccent('héllo')
- 'hello'
- >>> unaccent('héllö')
- 'hello'
- >>> unaccent('HÉLLÖ')
- 'HELLO'
- """
- translated_text = text.translate(unaccent_specials)
- return unicodedata.normalize('NFKD', translated_text).\
- encode('ascii', 'ignore').\
- decode('utf-8')
-
-def sanitise(text):
- """Remove all non-alphabetic characters and convert the text to lowercase
-
- >>> sanitise('The Quick')
- 'thequick'
- >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
- 'thequickbrownfoxjumpedoverthelazydog'
- >>> sanitise('HÉLLÖ')
- 'hello'
- """
- # sanitised = [c.lower() for c in text if c in string.ascii_letters]
- # return ''.join(sanitised)
- return letters(unaccent(text)).lower()
def datafile(name, sep='\t'):
--- /dev/null
+import logging
+
+# logging.basicConfig(filename="cipher.log", level=logging.INFO)
+# logger = logging.getLogger(__name__)
+
+logger = logging.getLogger('cipherbreak')
+logger.setLevel(logging.WARNING)
+# logger.setLevel(logging.INFO)
+# logger.setLevel(logging.DEBUG)
+
+# create the logging file handler
+fh = logging.FileHandler("cipher.log")
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+fh.setFormatter(formatter)
+
+# add handler to logger object
+logger.addHandler(fh)
--- /dev/null
+from utilities import *
+from language_models import *
+from multiprocessing import Pool
+
+from logger import logger
+
+def polybius_grid(keyword, column_order, row_order, letters_to_merge=None,
+ wrap_alphabet=KeywordWrapAlphabet.from_a):
+ """Grid for a Polybius cipher, using a keyword to rearrange the
+ alphabet.
+
+
+ >>> polybius_grid('a', 'abcde', 'abcde')['x'] == ('e', 'c')
+ True
+ >>> polybius_grid('elephant', 'abcde', 'abcde')['e'] == ('a', 'a')
+ True
+ >>> polybius_grid('elephant', 'abcde', 'abcde')['b'] == ('b', 'c')
+ True
+ """
+ alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet)
+ if letters_to_merge is None:
+ letters_to_merge = {'j': 'i'}
+ grid = {l: k
+ for k, l in zip([(c, r) for c in column_order for r in row_order],
+ [l for l in alphabet if l not in letters_to_merge])}
+ for l in letters_to_merge:
+ grid[l] = grid[letters_to_merge[l]]
+ return grid
+
+def polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge=None,
+ wrap_alphabet=KeywordWrapAlphabet.from_a):
+ """Grid for decrypting using a Polybius cipher, using a keyword to
+ rearrange the alphabet.
+
+ >>> polybius_reverse_grid('a', 'abcde', 'abcde')['e', 'c'] == 'x'
+ True
+ >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['a', 'a'] == 'e'
+ True
+ >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['b', 'c'] == 'b'
+ True
+ """
+ alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet)
+ if letters_to_merge is None:
+ letters_to_merge = {'j': 'i'}
+ grid = {k: l
+ for k, l in zip([(c, r) for c in column_order for r in row_order],
+ [l for l in alphabet if l not in letters_to_merge])}
+ return grid
+
+
+def polybius_flatten(pair, column_first):
+ """Convert a series of pairs into a single list of characters"""
+ if column_first:
+ return str(pair[1]) + str(pair[0])
+ else:
+ return str(pair[0]) + str(pair[1])
+
+def polybius_encipher(message, keyword, column_order, row_order,
+ column_first=False,
+ letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a):
+ """Encipher a message with Polybius cipher, using a keyword to rearrange
+ the alphabet
+
+
+ >>> polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', \
+ [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], \
+ wrap_alphabet=KeywordWrapAlphabet.from_last)
+ '2214445544551522115522511155551543114252542214111352123234442355411135441314115451112122'
+ >>> polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'abcde', \
+ column_first=False)
+ 'bbadccddccddaebbaaddbbceaaddddaecbaacadadcbbadaaacdaabedbcccdeddbeaabdccacadaadcceaababb'
+ >>> polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'abcde', \
+ column_first=True)
+ 'bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaacaadbadecbccedddebaadbcccadaaacdecaaabbb'
+ """
+ grid = polybius_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet)
+ return cat(polybius_flatten(grid[l], column_first)
+ for l in message
+ if l in grid)
+
+
+def polybius_decipher(message, keyword, column_order, row_order,
+ column_first=False,
+ letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a):
+ """Decipher a message with a Polybius cipher, using a keyword to rearrange
+ the alphabet
+
+ >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\
+ 'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \
+ column_first=False)
+ 'toisisvtestxessvbephktoefhnugiysweqifoekxelt'
+
+ >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\
+ 'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \
+ column_first=True)
+ 'thisisatestmessageforthepolybiusdecipherment'
+ """
+ grid = polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet)
+ column_index_type = type(column_order[0])
+ row_index_type = type(row_order[0])
+ if column_first:
+ pairs = [(column_index_type(p[1]), row_index_type(p[0])) for p in chunks(message, 2)]
+ else:
+ pairs = [(row_index_type(p[0]), column_index_type(p[1])) for p in chunks(message, 2)]
+ return cat(grid[p] for p in pairs if p in grid)
+
+
+def polybius_break_mp(message, column_labels, row_labels,
+ letters_to_merge=None,
+ wordlist=keywords, fitness=Pletters,
+ number_of_solutions=1, chunksize=500):
+ """Breaks a Polybius substitution cipher using a dictionary and
+ frequency analysis
+
+ >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'abcde'), \
+ 'abcde', 'abcde', \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
+ -54.53880...)
+ >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=True), \
+ 'abcde', 'abcde', \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', True), \
+ -54.53880...)
+ >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=False), \
+ 'abcde', 'abcde', \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
+ -54.53880...)
+ >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'pqrst', column_first=True), \
+ 'abcde', 'pqrst', \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'pqrst', True), \
+ -54.53880...)
+ """
+ if letters_to_merge is None:
+ letters_to_merge = {'j': 'i'}
+ with Pool() as pool:
+ helper_args = [(message, word, wrap,
+ column_labels, row_labels, column_first,
+ letters_to_merge,
+ fitness)
+ for word in wordlist
+ for wrap in KeywordWrapAlphabet
+ for column_first in [False, True]]
+ # Gotcha: the helper function here needs to be defined at the top level
+ # (limitation of Pool.starmap)
+ breaks = pool.starmap(polybius_break_worker, helper_args, chunksize)
+ if number_of_solutions == 1:
+ return max(breaks, key=lambda k: k[1])
+ else:
+ return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
+
+def polybius_break_worker(message, keyword, wrap_alphabet,
+ column_order, row_order, column_first,
+ letters_to_merge,
+ fitness):
+ plaintext = polybius_decipher(message, keyword,
+ column_order, row_order,
+ column_first=column_first,
+ letters_to_merge=letters_to_merge,
+ wrap_alphabet=wrap_alphabet)
+ if plaintext:
+ fit = fitness(plaintext)
+ else:
+ fit = float('-inf')
+ logger.debug('Polybius break attempt using key {0} (wrap={1}, merging {2}), '
+ 'columns as {3}, rows as {4} (column_first={5}) '
+ 'gives fit of {6} and decrypt starting: '
+ '{7}'.format(keyword, wrap_alphabet, letters_to_merge,
+ column_order, row_order, column_first,
+ fit, sanitise(plaintext)[:50]))
+ return (keyword, wrap_alphabet, column_order, row_order, column_first), fit
+
--- /dev/null
+from utilities import *
+from language_models import *
+from enum import Enum
+from itertools import starmap
+from itertools import zip_longest
+
+from logger import logger
+
+def railfence_encipher(message, height, fillvalue=''):
+ """Railfence cipher.
+ Works by splitting the text into sections, then reading across them to
+ generate the rows in the cipher. The rows are then combined to form the
+ ciphertext.
+
+ Example: the plaintext "hellotherefriends", with a height of four, written
+ out in the railfence as
+ h h i
+ etere*
+ lorfns
+ l e d
+ (with the * showing the one character to finish the last section).
+ Each 'section' is two columns, but unfolded. In the example, the first
+ section is 'hellot'.
+
+ >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 2, fillvalue='!')
+ 'hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!'
+ >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3, fillvalue='!')
+ 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!'
+ >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5, fillvalue='!')
+ 'hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!'
+ >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 10, fillvalue='!')
+ 'hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!'
+ >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3)
+ 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece'
+ >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5)
+ 'hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp'
+ >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 7)
+ 'haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic'
+ """
+ sections = chunks(message, (height - 1) * 2, fillvalue=fillvalue)
+ n_sections = len(sections)
+ # Add the top row
+ rows = [cat([s[0] for s in sections])]
+ # process the middle rows of the grid
+ for r in range(1, height-1):
+ rows += [cat([s[r:r+1] + s[height*2-r-2:height*2-r-1] for s in sections])]
+ # process the bottom row
+ rows += [cat([s[height - 1:height] for s in sections])]
+ # rows += [wcat([s[height - 1] for s in sections])]
+ return cat(rows)
+
+def railfence_decipher(message, height, fillvalue=''):
+ """Railfence decipher.
+ Works by reconstructing the grid used to generate the ciphertext, then
+ unfolding the sections so the text can be concatenated together.
+
+ Example: given the ciphertext 'hhieterelorfnsled' and a height of 4, first
+ work out that the second row has a character missing, find the rows of the
+ grid, then split the section into its two columns.
+
+ 'hhieterelorfnsled' is split into
+ h h i
+ etere
+ lorfns
+ l e d
+ (spaces added for clarity), which is stored in 'rows'. This is then split
+ into 'down_rows' and 'up_rows':
+
+ down_rows:
+ hhi
+ eee
+ lrn
+ led
+
+ up_rows:
+ tr
+ ofs
+
+ These are then zipped together (after the up_rows are reversed) to recover
+ the plaintext.
+
+ Most of the procedure is about finding the correct lengths for each row then
+ splitting the ciphertext into those rows.
+
+ >>> railfence_decipher('hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!', 2).strip('!')
+ 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+ >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!', 3).strip('!')
+ 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+ >>> railfence_decipher('hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!', 5).strip('!')
+ 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+ >>> railfence_decipher('hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!', 10).strip('!')
+ 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+ >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece', 3)
+ 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+ >>> railfence_decipher('hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp', 5)
+ 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+ >>> railfence_decipher('haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic', 7)
+ 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+ """
+ # find the number and size of the sections, including how many characters
+ # are missing for a full grid
+ n_sections = math.ceil(len(message) / ((height - 1) * 2))
+ padding_to_add = n_sections * (height - 1) * 2 - len(message)
+ # row_lengths are for the both up rows and down rows
+ row_lengths = [n_sections] * (height - 1) * 2
+ for i in range((height - 1) * 2 - 1, (height - 1) * 2 - (padding_to_add + 1), -1):
+ row_lengths[i] -= 1
+ # folded_rows are the combined row lengths in the middle of the railfence
+ folded_row_lengths = [row_lengths[0]]
+ for i in range(1, height-1):
+ folded_row_lengths += [row_lengths[i] + row_lengths[-i]]
+ folded_row_lengths += [row_lengths[height - 1]]
+ # find the rows that form the railfence grid
+ rows = []
+ row_start = 0
+ for i in folded_row_lengths:
+ rows += [message[row_start:row_start + i]]
+ row_start += i
+ # split the rows into the 'down_rows' (those that form the first column of
+ # a section) and the 'up_rows' (those that ofrm the second column of a
+ # section).
+ down_rows = [rows[0]]
+ up_rows = []
+ for i in range(1, height-1):
+ down_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 0])]
+ up_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 1])]
+ down_rows += [rows[-1]]
+ up_rows.reverse()
+ return cat(c for r in zip_longest(*(down_rows + up_rows), fillvalue='') for c in r)
+
+
+def railfence_break(message, max_key_length=20,
+ fitness=Pletters, chunksize=500):
+ """Breaks a railfence cipher using a matrix of given rank and letter frequencies
+
+
+ """
+
+ sanitised_message = sanitise(message)
+ results = starmap(worker, [(sanitised_message, i, fitness)
+ for i in range(2, max_key_length+1)])
+ return max(results, key=lambda k: k[1])
+
+
+def railfence_break(message, max_key_length=20,
+ fitness=Pbigrams, chunksize=500):
+ """Breaks a railfence cipher using a range of lengths and
+ n-gram frequency analysis
+
+ >>> railfence_break(railfence_encipher(sanitise( \
+ "It is a truth universally acknowledged, that a single man in \
+ possession of a good fortune, must be in want of a wife. However \
+ little known the feelings or views of such a man may be on his \
+ first entering a neighbourhood, this truth is so well fixed in \
+ the minds of the surrounding families, that he is considered the \
+ rightful property of some one or other of their daughters."), \
+ 7)) # doctest: +ELLIPSIS
+ (7, -709.46467226...)
+ >>> railfence_break(railfence_encipher(sanitise( \
+ "It is a truth universally acknowledged, that a single man in \
+ possession of a good fortune, must be in want of a wife. However \
+ little known the feelings or views of such a man may be on his \
+ first entering a neighbourhood, this truth is so well fixed in \
+ the minds of the surrounding families, that he is considered the \
+ rightful property of some one or other of their daughters."), \
+ 7), \
+ fitness=Ptrigrams) # doctest: +ELLIPSIS
+ (7, -997.0129085...)
+ """
+ def worker(message, height, fitness):
+ plaintext = railfence_decipher(message, height)
+ fit = fitness(plaintext)
+ return height, fit
+
+ sanitised_message = sanitise(message)
+ results = starmap(worker, [(sanitised_message, i, fitness)
+ for i in range(2, max_key_length+1)])
+ return max(results, key=lambda k: k[1])
--- /dev/null
+import string
+import collections
+
+# join a a list of letters into a string
+cat = ''.join
+
+# join a list of words into a string, separated by spaces
+wcat = ' '.join
+
+# join a list of lines, separated by newline
+lcat = '\n'.join
+
+def pos(letter):
+ """Return the position of a letter in the alphabet (0-25)"""
+ if letter in string.ascii_lowercase:
+ return ord(letter) - ord('a')
+ elif letter in string.ascii_uppercase:
+ return ord(letter) - ord('A')
+ else:
+ return 0
+
+def unpos(number):
+ """Return the letter in the given position in the alphabet (mod 26)"""
+ return chr(number % 26 + ord('a'))
+
+def every_nth(text, n, fillvalue=''):
+ """Returns n strings, each of which consists of every nth character,
+ starting with the 0th, 1st, 2nd, ... (n-1)th character
+
+ >>> every_nth(string.ascii_lowercase, 5)
+ ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
+ >>> every_nth(string.ascii_lowercase, 1)
+ ['abcdefghijklmnopqrstuvwxyz']
+ >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
+ ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+ 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+ >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
+ ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
+ """
+ split_text = chunks(text, n, fillvalue)
+ return [cat(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
+
+def combine_every_nth(split_text):
+ """Reforms a text split into every_nth strings
+
+ >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
+ 'abcdefghijklmnopqrstuvwxyz'
+ >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
+ 'abcdefghijklmnopqrstuvwxyz'
+ >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
+ 'abcdefghijklmnopqrstuvwxyz'
+ """
+ return cat([cat(l)
+ for l in zip_longest(*split_text, fillvalue='')])
+
+def chunks(text, n, fillvalue=None):
+ """Split a text into chunks of n characters
+
+ >>> chunks('abcdefghi', 3)
+ ['abc', 'def', 'ghi']
+ >>> chunks('abcdefghi', 4)
+ ['abcd', 'efgh', 'i']
+ >>> chunks('abcdefghi', 4, fillvalue='!')
+ ['abcd', 'efgh', 'i!!!']
+ """
+ if fillvalue:
+ padding = fillvalue[0] * (n - len(text) % n)
+ else:
+ padding = ''
+ return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
+
+def transpose(items, transposition):
+ """Moves items around according to the given transposition
+
+ >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
+ ['a', 'b', 'c', 'd']
+ >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
+ ['d', 'b', 'c', 'a']
+ >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
+ [13, 12, 14, 11, 15, 10]
+ """
+ transposed = [''] * len(transposition)
+ for p, t in enumerate(transposition):
+ transposed[p] = items[t]
+ return transposed
+
+def untranspose(items, transposition):
+ """Undoes a transpose
+
+ >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
+ ['a', 'b', 'c', 'd']
+ >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
+ ['a', 'b', 'c', 'd']
+ >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
+ [10, 11, 12, 13, 14, 15]
+ """
+ transposed = [''] * len(transposition)
+ for p, t in enumerate(transposition):
+ transposed[t] = items[p]
+ return transposed
+
+def deduplicate(text):
+ return list(collections.OrderedDict.fromkeys(text))
+
+
+def letters(text):
+ """Remove all non-alphabetic characters from a text
+ >>> letters('The Quick')
+ 'TheQuick'
+ >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
+ 'TheQuickBROWNfoxjumpedoverthelazyDOG'
+ """
+ return ''.join([c for c in text if c in string.ascii_letters])
+
+# Special characters for conversion, such as smart quotes.
+unaccent_specials = ''.maketrans({"’": "'", '“': '"', '”': '"'})
+
+def unaccent(text):
+ """Remove all accents from letters.
+ It does this by converting the unicode string to decomposed compatability
+ form, dropping all the combining accents, then re-encoding the bytes.
+
+ >>> unaccent('hello')
+ 'hello'
+ >>> unaccent('HELLO')
+ 'HELLO'
+ >>> unaccent('héllo')
+ 'hello'
+ >>> unaccent('héllö')
+ 'hello'
+ >>> unaccent('HÉLLÖ')
+ 'HELLO'
+ """
+ translated_text = text.translate(unaccent_specials)
+ return unicodedata.normalize('NFKD', translated_text).\
+ encode('ascii', 'ignore').\
+ decode('utf-8')
+
+def sanitise(text):
+ """Remove all non-alphabetic characters and convert the text to lowercase
+
+ >>> sanitise('The Quick')
+ 'thequick'
+ >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
+ 'thequickbrownfoxjumpedoverthelazydog'
+ >>> sanitise('HÉLLÖ')
+ 'hello'
+ """
+ return letters(unaccent(text)).lower()
+
+
+def index_of_coincidence(text):
+ stext = sanitise(text)
+ counts = collections.Counter(stext)
+ denom = len(stext) * (len(text) - 1) / 26
+ return (
+ sum(max(counts[l] * counts[l] - 1, 0) for l in string.ascii_lowercase)
+ /
+ denom
+ )
+
+
+transpositions = collections.defaultdict(list)
+for word in keywords:
+ transpositions[transpositions_of(word)] += [word]
+
+def frequencies(text):
+ """Count the number of occurrences of each character in text
+
+ >>> sorted(frequencies('abcdefabc').items())
+ [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
+ >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
+ 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
+ [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
+ ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
+ ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
+ ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
+ >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
+ '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
+ [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
+ ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
+ ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
+ ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
+ ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
+ >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
+ 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
+ [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
+ ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
+ ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
+ ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
+ >>> frequencies('abcdefabcdef')['x']
+ 0
+ """
+ return collections.Counter(c for c in text)
--- /dev/null
+from utilities import *
+from language_models import *
+from enum import Enum
+from itertools import starmap
+from multiprocessing import Pool
+
+from logger import logger
+
+def vigenere_encipher(message, keyword):
+ """Vigenere encipher
+
+ >>> vigenere_encipher('hello', 'abc')
+ 'hfnlp'
+ """
+ shifts = [pos(l) for l in sanitise(keyword)]
+ pairs = zip(message, cycle(shifts))
+ return cat([caesar_encipher_letter(l, k) for l, k in pairs])
+
+def vigenere_decipher(message, keyword):
+ """Vigenere decipher
+
+ >>> vigenere_decipher('hfnlp', 'abc')
+ 'hello'
+ """
+ shifts = [pos(l) for l in sanitise(keyword)]
+ pairs = zip(message, cycle(shifts))
+ return cat([caesar_decipher_letter(l, k) for l, k in pairs])
+
+
+def beaufort_encipher(message, keyword):
+ """Beaufort encipher
+
+ >>> beaufort_encipher('inhisjournaldatedtheidesofoctober', 'arcanaimperii')
+ 'sevsvrusyrrxfayyxuteemazudmpjmmwr'
+ """
+ shifts = [pos(l) for l in sanitise(keyword)]
+ pairs = zip(message, cycle(shifts))
+ return cat([unpos(k - pos(l)) for l, k in pairs])
+
+beaufort_decipher = beaufort_encipher
+
+beaufort_variant_encipher=vigenere_decipher
+beaufort_variant_decipher=vigenere_encipher
+
+
+def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
+ chunksize=500):
+ """Breaks a vigenere cipher using a dictionary and frequency analysis.
+
+ >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
+ 'message for the vigenere decipherment'), 'cat'), \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+ ('cat', -52.9472712...)
+ """
+ with Pool() as pool:
+ helper_args = [(message, word, fitness)
+ for word in wordlist]
+ # Gotcha: the helper function here needs to be defined at the top level
+ # (limitation of Pool.starmap)
+ breaks = pool.starmap(vigenere_keyword_break_worker, helper_args,
+ chunksize)
+ return max(breaks, key=lambda k: k[1])
+vigenere_keyword_break = vigenere_keyword_break_mp
+
+def vigenere_keyword_break_worker(message, keyword, fitness):
+ plaintext = vigenere_decipher(message, keyword)
+ fit = fitness(plaintext)
+ logger.debug('Vigenere keyword break attempt using key {0} gives fit of '
+ '{1} and decrypt starting: {2}'.format(keyword,
+ fit, sanitise(plaintext)[:50]))
+ return keyword, fit
+
+
+def vigenere_frequency_break(message, max_key_length=20, fitness=Pletters):
+ """Breaks a Vigenere cipher with frequency analysis
+
+ >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
+ "run. She is ready and so am I. I stole Daniel's pocketbook this " \
+ "afternoon when he left his jacket hanging on the easel in the " \
+ "attic. I jump every time I hear a footstep on the stairs, " \
+ "certain that the theft has been discovered and that I will " \
+ "be caught. The SS officer visits less often now that he is " \
+ "sure"), 'florence')) # doctest: +ELLIPSIS
+ ('florence', -307.5473096...)
+ """
+ def worker(message, key_length, fitness):
+ splits = every_nth(sanitised_message, key_length)
+ key = cat([unpos(caesar_break(s)[0]) for s in splits])
+ plaintext = vigenere_decipher(message, key)
+ fit = fitness(plaintext)
+ return key, fit
+ sanitised_message = sanitise(message)
+ results = starmap(worker, [(sanitised_message, i, fitness)
+ for i in range(1, max_key_length+1)])
+ return max(results, key=lambda k: k[1])
+
+
+def beaufort_sub_break(message, fitness=Pletters):
+ """Breaks one chunk of a Beaufort cipher with frequency analysis
+
+ >>> beaufort_sub_break('samwpplggnnmmyaazgympjapopnwiywwomwspgpjmefwmawx' \
+ 'jafjhxwwwdigxshnlywiamhyshtasxptwueahhytjwsn') # doctest: +ELLIPSIS
+ (0, -117.4492...)
+ >>> beaufort_sub_break('eyprzjjzznxymrygryjqmqhznjrjjapenejznawngnnezgza' \
+ 'dgndknaogpdjneadadazlhkhxkryevrronrmdjnndjlo') # doctest: +ELLIPSIS
+ (17, -114.9598...)
+ """
+ best_shift = 0
+ best_fit = float('-inf')
+ for key in range(26):
+ plaintext = [unpos(key - pos(l)) for l in message]
+ fit = fitness(plaintext)
+ logger.debug('Beaufort sub break attempt using key {0} gives fit of {1} '
+ 'and decrypt starting: {2}'.format(key, fit,
+ plaintext[:50]))
+ if fit > best_fit:
+ best_fit = fit
+ best_key = key
+ logger.info('Beaufort sub break best fit: key {0} gives fit of {1} and '
+ 'decrypt starting: {2}'.format(best_key, best_fit,
+ cat([unpos(best_key - pos(l)) for l in message[:50]])))
+ return best_key, best_fit
+
+
+def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters):
+ """Breaks a Beaufort cipher with frequency analysis
+
+ >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
+ "run. She is ready and so am I. I stole Daniel's pocketbook this " \
+ "afternoon when he left his jacket hanging on the easel in the " \
+ "attic. I jump every time I hear a footstep on the stairs, " \
+ "certain that the theft has been discovered and that I will " \
+ "be caught. The SS officer visits less often now " \
+ "that he is sure"), 'florence')) # doctest: +ELLIPSIS
+ ('florence', -307.5473096791...)
+ """
+ def worker(message, key_length, fitness):
+ splits = every_nth(message, key_length)
+ key = cat([unpos(beaufort_sub_break(s)[0]) for s in splits])
+ plaintext = beaufort_decipher(message, key)
+ fit = fitness(plaintext)
+ return key, fit
+ sanitised_message = sanitise(message)
+ results = starmap(worker, [(sanitised_message, i, fitness)
+ for i in range(1, max_key_length+1)])
+ return max(results, key=lambda k: k[1])
+
+
+def beaufort_variant_frequency_break(message, max_key_length=20, fitness=Pletters):
+ """Breaks a Beaufort cipher with frequency analysis
+
+ >>> beaufort_variant_frequency_break(beaufort_variant_encipher(sanitise("It is time to " \
+ "run. She is ready and so am I. I stole Daniel's pocketbook this " \
+ "afternoon when he left his jacket hanging on the easel in the " \
+ "attic. I jump every time I hear a footstep on the stairs, " \
+ "certain that the theft has been discovered and that I will " \
+ "be caught. The SS officer visits less often now " \
+ "that he is sure"), 'florence')) # doctest: +ELLIPSIS
+ ('florence', -307.5473096791...)
+ """
+ def worker(message, key_length, fitness):
+ splits = every_nth(sanitised_message, key_length)
+ key = cat([unpos(-caesar_break(s)[0]) for s in splits])
+ plaintext = beaufort_variant_decipher(message, key)
+ fit = fitness(plaintext)
+ return key, fit
+ sanitised_message = sanitise(message)
+ results = starmap(worker, [(sanitised_message, i, fitness)
+ for i in range(1, max_key_length+1)])
+ return max(results, key=lambda k: k[1])