From: Neil Smith Date: Tue, 6 Mar 2018 13:11:29 +0000 (+0000) Subject: Partly refactored X-Git-Url: https://git.njae.me.uk/?p=cipher-tools.git;a=commitdiff_plain;h=21c390a77d42729afa23844ef2f1295106bed3de Partly refactored --- diff --git a/affine.py b/affine.py new file mode 100644 index 0000000..6ba90e6 --- /dev/null +++ b/affine.py @@ -0,0 +1,149 @@ +from utilities import * +from language_models import * +from logger import logger + + +modular_division_table = [[0]*26 for _ in range(26)] +for a in range(26): + for b in range(26): + c = (a * b) % 26 + modular_division_table[b][c] = a + + + + +def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True): + """Encipher a letter, given a multiplier and adder + + >>> cat(affine_encipher_letter(l, 3, 5, True) \ + for l in string.ascii_letters) + 'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE' + >>> cat(affine_encipher_letter(l, 3, 5, False) \ + for l in string.ascii_letters) + 'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC' + """ + # letter = unaccent(accented_letter) + # if letter in string.ascii_letters: + # if letter in string.ascii_uppercase: + # alphabet_start = ord('A') + # else: + # alphabet_start = ord('a') + # letter_number = ord(letter) - alphabet_start + # if one_based: letter_number += 1 + # cipher_number = (letter_number * multiplier + adder) % 26 + # if one_based: cipher_number -= 1 + # return chr(cipher_number % 26 + alphabet_start) + # else: + # return letter + letter = unaccent(accented_letter) + if letter in string.ascii_letters: + letter_number = pos(letter) + if one_based: letter_number += 1 + cipher_number = (letter_number * multiplier + adder) % 26 + if one_based: cipher_number -= 1 + if letter in string.ascii_uppercase: + return unpos(cipher_number).upper() + else: + return unpos(cipher_number) + else: + return letter + +def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): + """Encipher a letter, given a multiplier and adder + + >>> cat(affine_decipher_letter(l, 3, 5, True) \ + for l in 'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE') + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + >>> cat(affine_decipher_letter(l, 3, 5, False) \ + for l in 'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC') + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + """ + # if letter in string.ascii_letters: + # if letter in string.ascii_uppercase: + # alphabet_start = ord('A') + # else: + # alphabet_start = ord('a') + # cipher_number = ord(letter) - alphabet_start + # if one_based: cipher_number += 1 + # plaintext_number = ( + # modular_division_table[multiplier] + # [(cipher_number - adder) % 26]) + # if one_based: plaintext_number -= 1 + # return chr(plaintext_number % 26 + alphabet_start) + # else: + # return letter + if letter in string.ascii_letters: + cipher_number = pos(letter) + if one_based: cipher_number += 1 + plaintext_number = ( + modular_division_table[multiplier] + [(cipher_number - adder) % 26]) + if one_based: plaintext_number -= 1 + if letter in string.ascii_uppercase: + return unpos(plaintext_number).upper() + else: + return unpos(plaintext_number) + else: + return letter + +def affine_encipher(message, multiplier=1, adder=0, one_based=True): + """Encipher a message + + >>> affine_encipher('hours passed during which jerico tried every ' \ + 'trick he could think of', 15, 22, True) + 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh' + """ + enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) + for l in message] + return cat(enciphered) + +def affine_decipher(message, multiplier=1, adder=0, one_based=True): + """Decipher a message + + >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \ + 'jfaoe ls omytd jlaxe mh', 15, 22, True) + 'hours passed during which jerico tried every trick he could think of' + """ + enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) + for l in message] + return cat(enciphered) + + + +def affine_break(message, fitness=Pletters): + """Breaks an affine cipher using frequency analysis + + >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \ + 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \ + 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \ + 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \ + 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \ + 'kxd clm ckuxj.') # doctest: +ELLIPSIS + ((15, 22, True), -340.601181913...) + """ + sanitised_message = sanitise(message) + best_multiplier = 0 + best_adder = 0 + best_one_based = True + best_fit = float("-inf") + for one_based in [True, False]: + for multiplier in [x for x in range(1, 26, 2) if x != 13]: + for adder in range(26): + plaintext = affine_decipher(sanitised_message, + multiplier, adder, one_based) + fit = fitness(plaintext) + logger.debug('Affine break attempt using key {0}x+{1} ({2}) ' + 'gives fit of {3} and decrypt starting: {4}'. + format(multiplier, adder, one_based, fit, + plaintext[:50])) + if fit > best_fit: + best_fit = fit + best_multiplier = multiplier + best_adder = adder + best_one_based = one_based + logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of ' + '{3} and decrypt starting: {4}'.format( + best_multiplier, best_adder, best_one_based, best_fit, + affine_decipher(sanitised_message, best_multiplier, + best_adder, best_one_based)[:50])) + return (best_multiplier, best_adder, best_one_based), best_fit diff --git a/caesar.py b/caesar.py new file mode 100644 index 0000000..4d9a722 --- /dev/null +++ b/caesar.py @@ -0,0 +1,120 @@ +from utilities import * +from language_models import * +from logger import logger + +def caesar_encipher_letter(accented_letter, shift): + """Encipher a letter, given a shift amount + + >>> caesar_encipher_letter('a', 1) + 'b' + >>> caesar_encipher_letter('a', 2) + 'c' + >>> caesar_encipher_letter('b', 2) + 'd' + >>> caesar_encipher_letter('x', 2) + 'z' + >>> caesar_encipher_letter('y', 2) + 'a' + >>> caesar_encipher_letter('z', 2) + 'b' + >>> caesar_encipher_letter('z', -1) + 'y' + >>> caesar_encipher_letter('a', -1) + 'z' + >>> caesar_encipher_letter('A', 1) + 'B' + >>> caesar_encipher_letter('é', 1) + 'f' + """ + # letter = unaccent(accented_letter) + # if letter in string.ascii_letters: + # if letter in string.ascii_uppercase: + # alphabet_start = ord('A') + # else: + # alphabet_start = ord('a') + # return chr(((ord(letter) - alphabet_start + shift) % 26) + + # alphabet_start) + # else: + # return letter + + letter = unaccent(accented_letter) + if letter in string.ascii_letters: + cipherletter = unpos(pos(letter) + shift) + if letter in string.ascii_uppercase: + return cipherletter.upper() + else: + return cipherletter + else: + return letter + +def caesar_decipher_letter(letter, shift): + """Decipher a letter, given a shift amount + + >>> caesar_decipher_letter('b', 1) + 'a' + >>> caesar_decipher_letter('b', 2) + 'z' + """ + return caesar_encipher_letter(letter, -shift) + +def caesar_encipher(message, shift): + """Encipher a message with the Caesar cipher of given shift + + >>> caesar_encipher('abc', 1) + 'bcd' + >>> caesar_encipher('abc', 2) + 'cde' + >>> caesar_encipher('abcxyz', 2) + 'cdezab' + >>> caesar_encipher('ab cx yz', 2) + 'cd ez ab' + >>> caesar_encipher('Héllo World!', 2) + 'Jgnnq Yqtnf!' + """ + enciphered = [caesar_encipher_letter(l, shift) for l in message] + return cat(enciphered) + +def caesar_decipher(message, shift): + """Decipher a message with the Caesar cipher of given shift + + >>> caesar_decipher('bcd', 1) + 'abc' + >>> caesar_decipher('cde', 2) + 'abc' + >>> caesar_decipher('cd ez ab', 2) + 'ab cx yz' + >>> caesar_decipher('Jgnnq Yqtnf!', 2) + 'Hello World!' + """ + return caesar_encipher(message, -shift) + + +def caesar_break(message, fitness=Pletters): + """Breaks a Caesar cipher using frequency analysis + + >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \ + 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS + (4, -130.849989015...) + >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \ + 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS + (19, -128.82410410...) + >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \ + 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS + (13, -126.25403935...) + """ + sanitised_message = sanitise(message) + best_shift = 0 + best_fit = float('-inf') + for shift in range(26): + plaintext = caesar_decipher(sanitised_message, shift) + fit = fitness(plaintext) + logger.debug('Caesar break attempt using key {0} gives fit of {1} ' + 'and decrypt starting: {2}'.format(shift, fit, + plaintext[:50])) + if fit > best_fit: + best_fit = fit + best_shift = shift + logger.info('Caesar break best fit: key {0} gives fit of {1} and ' + 'decrypt starting: {2}'.format(best_shift, best_fit, + caesar_decipher(sanitised_message, best_shift)[:50])) + return best_shift, best_fit diff --git a/cipher.py b/cipher.py index e318aa4..2eb89f7 100644 --- a/cipher.py +++ b/cipher.py @@ -10,796 +10,17 @@ from language_models import * import pprint -## Utility functions -cat = ''.join -wcat = ' '.join -lcat = '\n'.join - -def pos(letter): - if letter in string.ascii_lowercase: - return ord(letter) - ord('a') - elif letter in string.ascii_uppercase: - return ord(letter) - ord('A') - else: - return '' - -def unpos(number): return chr(number % 26 + ord('a')) - - -modular_division_table = [[0]*26 for _ in range(26)] -for a in range(26): - for b in range(26): - c = (a * b) % 26 - modular_division_table[b][c] = a - - -def every_nth(text, n, fillvalue=''): - """Returns n strings, each of which consists of every nth character, - starting with the 0th, 1st, 2nd, ... (n-1)th character - - >>> every_nth(string.ascii_lowercase, 5) - ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty'] - >>> every_nth(string.ascii_lowercase, 1) - ['abcdefghijklmnopqrstuvwxyz'] - >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE - ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] - >>> every_nth(string.ascii_lowercase, 5, fillvalue='!') - ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!'] - """ - split_text = chunks(text, n, fillvalue) - return [cat(l) for l in zip_longest(*split_text, fillvalue=fillvalue)] - -def combine_every_nth(split_text): - """Reforms a text split into every_nth strings - - >>> combine_every_nth(every_nth(string.ascii_lowercase, 5)) - 'abcdefghijklmnopqrstuvwxyz' - >>> combine_every_nth(every_nth(string.ascii_lowercase, 1)) - 'abcdefghijklmnopqrstuvwxyz' - >>> combine_every_nth(every_nth(string.ascii_lowercase, 26)) - 'abcdefghijklmnopqrstuvwxyz' - """ - return cat([cat(l) - for l in zip_longest(*split_text, fillvalue='')]) - -def chunks(text, n, fillvalue=None): - """Split a text into chunks of n characters - - >>> chunks('abcdefghi', 3) - ['abc', 'def', 'ghi'] - >>> chunks('abcdefghi', 4) - ['abcd', 'efgh', 'i'] - >>> chunks('abcdefghi', 4, fillvalue='!') - ['abcd', 'efgh', 'i!!!'] - """ - if fillvalue: - padding = fillvalue[0] * (n - len(text) % n) - else: - padding = '' - return [(text+padding)[i:i+n] for i in range(0, len(text), n)] - -def transpose(items, transposition): - """Moves items around according to the given transposition - - >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3)) - ['a', 'b', 'c', 'd'] - >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0)) - ['d', 'b', 'c', 'a'] - >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0)) - [13, 12, 14, 11, 15, 10] - """ - transposed = [''] * len(transposition) - for p, t in enumerate(transposition): - transposed[p] = items[t] - return transposed - -def untranspose(items, transposition): - """Undoes a transpose - - >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3]) - ['a', 'b', 'c', 'd'] - >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0]) - ['a', 'b', 'c', 'd'] - >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0]) - [10, 11, 12, 13, 14, 15] - """ - transposed = [''] * len(transposition) - for p, t in enumerate(transposition): - transposed[t] = items[p] - return transposed - -def deduplicate(text): - return list(collections.OrderedDict.fromkeys(text)) - - -def caesar_encipher_letter(accented_letter, shift): - """Encipher a letter, given a shift amount - - >>> caesar_encipher_letter('a', 1) - 'b' - >>> caesar_encipher_letter('a', 2) - 'c' - >>> caesar_encipher_letter('b', 2) - 'd' - >>> caesar_encipher_letter('x', 2) - 'z' - >>> caesar_encipher_letter('y', 2) - 'a' - >>> caesar_encipher_letter('z', 2) - 'b' - >>> caesar_encipher_letter('z', -1) - 'y' - >>> caesar_encipher_letter('a', -1) - 'z' - >>> caesar_encipher_letter('A', 1) - 'B' - >>> caesar_encipher_letter('é', 1) - 'f' - """ - # letter = unaccent(accented_letter) - # if letter in string.ascii_letters: - # if letter in string.ascii_uppercase: - # alphabet_start = ord('A') - # else: - # alphabet_start = ord('a') - # return chr(((ord(letter) - alphabet_start + shift) % 26) + - # alphabet_start) - # else: - # return letter - - letter = unaccent(accented_letter) - if letter in string.ascii_letters: - cipherletter = unpos(pos(letter) + shift) - if letter in string.ascii_uppercase: - return cipherletter.upper() - else: - return cipherletter - else: - return letter - -def caesar_decipher_letter(letter, shift): - """Decipher a letter, given a shift amount - - >>> caesar_decipher_letter('b', 1) - 'a' - >>> caesar_decipher_letter('b', 2) - 'z' - """ - return caesar_encipher_letter(letter, -shift) - -def caesar_encipher(message, shift): - """Encipher a message with the Caesar cipher of given shift - - >>> caesar_encipher('abc', 1) - 'bcd' - >>> caesar_encipher('abc', 2) - 'cde' - >>> caesar_encipher('abcxyz', 2) - 'cdezab' - >>> caesar_encipher('ab cx yz', 2) - 'cd ez ab' - >>> caesar_encipher('Héllo World!', 2) - 'Jgnnq Yqtnf!' - """ - enciphered = [caesar_encipher_letter(l, shift) for l in message] - return cat(enciphered) - -def caesar_decipher(message, shift): - """Decipher a message with the Caesar cipher of given shift - - >>> caesar_decipher('bcd', 1) - 'abc' - >>> caesar_decipher('cde', 2) - 'abc' - >>> caesar_decipher('cd ez ab', 2) - 'ab cx yz' - >>> caesar_decipher('Jgnnq Yqtnf!', 2) - 'Hello World!' - """ - return caesar_encipher(message, -shift) - -def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True): - """Encipher a letter, given a multiplier and adder - - >>> cat(affine_encipher_letter(l, 3, 5, True) \ - for l in string.ascii_letters) - 'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE' - >>> cat(affine_encipher_letter(l, 3, 5, False) \ - for l in string.ascii_letters) - 'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC' - """ - # letter = unaccent(accented_letter) - # if letter in string.ascii_letters: - # if letter in string.ascii_uppercase: - # alphabet_start = ord('A') - # else: - # alphabet_start = ord('a') - # letter_number = ord(letter) - alphabet_start - # if one_based: letter_number += 1 - # cipher_number = (letter_number * multiplier + adder) % 26 - # if one_based: cipher_number -= 1 - # return chr(cipher_number % 26 + alphabet_start) - # else: - # return letter - letter = unaccent(accented_letter) - if letter in string.ascii_letters: - letter_number = pos(letter) - if one_based: letter_number += 1 - cipher_number = (letter_number * multiplier + adder) % 26 - if one_based: cipher_number -= 1 - if letter in string.ascii_uppercase: - return unpos(cipher_number).upper() - else: - return unpos(cipher_number) - else: - return letter - -def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): - """Encipher a letter, given a multiplier and adder - - >>> cat(affine_decipher_letter(l, 3, 5, True) \ - for l in 'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE') - 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' - >>> cat(affine_decipher_letter(l, 3, 5, False) \ - for l in 'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC') - 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' - """ - # if letter in string.ascii_letters: - # if letter in string.ascii_uppercase: - # alphabet_start = ord('A') - # else: - # alphabet_start = ord('a') - # cipher_number = ord(letter) - alphabet_start - # if one_based: cipher_number += 1 - # plaintext_number = ( - # modular_division_table[multiplier] - # [(cipher_number - adder) % 26]) - # if one_based: plaintext_number -= 1 - # return chr(plaintext_number % 26 + alphabet_start) - # else: - # return letter - if letter in string.ascii_letters: - cipher_number = pos(letter) - if one_based: cipher_number += 1 - plaintext_number = ( - modular_division_table[multiplier] - [(cipher_number - adder) % 26]) - if one_based: plaintext_number -= 1 - if letter in string.ascii_uppercase: - return unpos(plaintext_number).upper() - else: - return unpos(plaintext_number) - else: - return letter - -def affine_encipher(message, multiplier=1, adder=0, one_based=True): - """Encipher a message - - >>> affine_encipher('hours passed during which jerico tried every ' \ - 'trick he could think of', 15, 22, True) - 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh' - """ - enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) - for l in message] - return cat(enciphered) - -def affine_decipher(message, multiplier=1, adder=0, one_based=True): - """Decipher a message - - >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \ - 'jfaoe ls omytd jlaxe mh', 15, 22, True) - 'hours passed during which jerico tried every trick he could think of' - """ - enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) - for l in message] - return cat(enciphered) - - -class KeywordWrapAlphabet(Enum): - from_a = 1 - from_last = 2 - from_largest = 3 - - -def keyword_cipher_alphabet_of(keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): - """Find the cipher alphabet given a keyword. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - - >>> keyword_cipher_alphabet_of('bayes') - 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a) - 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last) - 'bayestuvwxzcdfghijklmnopqr' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest) - 'bayeszcdfghijklmnopqrtuvwx' - """ - if wrap_alphabet == KeywordWrapAlphabet.from_a: - cipher_alphabet = cat(deduplicate(sanitise(keyword) + - string.ascii_lowercase)) - else: - if wrap_alphabet == KeywordWrapAlphabet.from_last: - last_keyword_letter = deduplicate(sanitise(keyword))[-1] - else: - last_keyword_letter = sorted(sanitise(keyword))[-1] - last_keyword_position = string.ascii_lowercase.find( - last_keyword_letter) + 1 - cipher_alphabet = cat( - deduplicate(sanitise(keyword) + - string.ascii_lowercase[last_keyword_position:] + - string.ascii_lowercase)) - return cipher_alphabet - - -def keyword_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): - """Enciphers a message with a keyword substitution cipher. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - 0 : from 'a' - 1 : from the last letter in the sanitised keyword - 2 : from the largest letter in the sanitised keyword - - >>> keyword_encipher('test message', 'bayes') - 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a) - 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last) - 'lskl dskkbus' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest) - 'qspq jsppbcs' - """ - cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) - cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) - return unaccent(message).lower().translate(cipher_translation) - -def keyword_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): - """Deciphers a message with a keyword substitution cipher. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - 0 : from 'a' - 1 : from the last letter in the sanitised keyword - 2 : from the largest letter in the sanitised keyword - - >>> keyword_decipher('rsqr ksqqbds', 'bayes') - 'test message' - >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a) - 'test message' - >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last) - 'test message' - >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest) - 'test message' - """ - cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) - cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) - return message.lower().translate(cipher_translation) - - -def vigenere_encipher(message, keyword): - """Vigenere encipher - - >>> vigenere_encipher('hello', 'abc') - 'hfnlp' - """ - shifts = [pos(l) for l in sanitise(keyword)] - pairs = zip(message, cycle(shifts)) - return cat([caesar_encipher_letter(l, k) for l, k in pairs]) - -def vigenere_decipher(message, keyword): - """Vigenere decipher - - >>> vigenere_decipher('hfnlp', 'abc') - 'hello' - """ - shifts = [pos(l) for l in sanitise(keyword)] - pairs = zip(message, cycle(shifts)) - return cat([caesar_decipher_letter(l, k) for l, k in pairs]) - - -def beaufort_encipher(message, keyword): - """Beaufort encipher - - >>> beaufort_encipher('inhisjournaldatedtheidesofoctober', 'arcanaimperii') - 'sevsvrusyrrxfayyxuteemazudmpjmmwr' - """ - shifts = [pos(l) for l in sanitise(keyword)] - pairs = zip(message, cycle(shifts)) - return cat([unpos(k - pos(l)) for l, k in pairs]) - -beaufort_decipher = beaufort_encipher - -beaufort_variant_encipher=vigenere_decipher -beaufort_variant_decipher=vigenere_encipher - - -def polybius_grid(keyword, column_order, row_order, letters_to_merge=None, - wrap_alphabet=KeywordWrapAlphabet.from_a): - """Grid for a Polybius cipher, using a keyword to rearrange the - alphabet. - - - >>> polybius_grid('a', 'abcde', 'abcde')['x'] == ('e', 'c') - True - >>> polybius_grid('elephant', 'abcde', 'abcde')['e'] == ('a', 'a') - True - >>> polybius_grid('elephant', 'abcde', 'abcde')['b'] == ('b', 'c') - True - """ - alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet) - if letters_to_merge is None: - letters_to_merge = {'j': 'i'} - grid = {l: k - for k, l in zip([(c, r) for c in column_order for r in row_order], - [l for l in alphabet if l not in letters_to_merge])} - for l in letters_to_merge: - grid[l] = grid[letters_to_merge[l]] - return grid - -def polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge=None, - wrap_alphabet=KeywordWrapAlphabet.from_a): - """Grid for decrypting using a Polybius cipher, using a keyword to - rearrange the alphabet. - - >>> polybius_reverse_grid('a', 'abcde', 'abcde')['e', 'c'] == 'x' - True - >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['a', 'a'] == 'e' - True - >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['b', 'c'] == 'b' - True - """ - alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet) - if letters_to_merge is None: - letters_to_merge = {'j': 'i'} - grid = {k: l - for k, l in zip([(c, r) for c in column_order for r in row_order], - [l for l in alphabet if l not in letters_to_merge])} - return grid - - -def polybius_flatten(pair, column_first): - """Convert a series of pairs into a single list of characters""" - if column_first: - return str(pair[1]) + str(pair[0]) - else: - return str(pair[0]) + str(pair[1]) - -def polybius_encipher(message, keyword, column_order, row_order, - column_first=False, - letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a): - """Encipher a message with Polybius cipher, using a keyword to rearrange - the alphabet +from utilities import * +from segment import * - >>> polybius_encipher('this is a test message for the ' \ - 'polybius decipherment', 'elephant', \ - [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], \ - wrap_alphabet=KeywordWrapAlphabet.from_last) - '2214445544551522115522511155551543114252542214111352123234442355411135441314115451112122' - >>> polybius_encipher('this is a test message for the ' \ - 'polybius decipherment', 'elephant', 'abcde', 'abcde', \ - column_first=False) - 'bbadccddccddaebbaaddbbceaaddddaecbaacadadcbbadaaacdaabedbcccdeddbeaabdccacadaadcceaababb' - >>> polybius_encipher('this is a test message for the ' \ - 'polybius decipherment', 'elephant', 'abcde', 'abcde', \ - column_first=True) - 'bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaacaadbadecbccedddebaadbcccadaaacdecaaabbb' - """ - grid = polybius_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet) - return cat(polybius_flatten(grid[l], column_first) - for l in message - if l in grid) - - -def polybius_decipher(message, keyword, column_order, row_order, - column_first=False, - letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a): - """Decipher a message with a Polybius cipher, using a keyword to rearrange - the alphabet - - >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\ - 'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \ - column_first=False) - 'toisisvtestxessvbephktoefhnugiysweqifoekxelt' - - >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\ - 'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \ - column_first=True) - 'thisisatestmessageforthepolybiusdecipherment' - """ - grid = polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet) - column_index_type = type(column_order[0]) - row_index_type = type(row_order[0]) - if column_first: - pairs = [(column_index_type(p[1]), row_index_type(p[0])) for p in chunks(message, 2)] - else: - pairs = [(row_index_type(p[0]), column_index_type(p[1])) for p in chunks(message, 2)] - return cat(grid[p] for p in pairs if p in grid) - - -def transpositions_of(keyword): - """Finds the transpostions given by a keyword. For instance, the keyword - 'clever' rearranges to 'celrv', so the first column (0) stays first, the - second column (1) moves to third, the third column (2) moves to second, - and so on. - - If passed a tuple, assume it's already a transposition and just return it. +from caesar import * +from affine import * +from keyword import * +from polybius import * +from column_transposition import * +from railfence import * - >>> transpositions_of('clever') - (0, 2, 1, 4, 3) - >>> transpositions_of('fred') - (3, 2, 0, 1) - >>> transpositions_of((3, 2, 0, 1)) - (3, 2, 0, 1) - """ - if isinstance(keyword, tuple): - return keyword - else: - key = deduplicate(keyword) - transpositions = tuple(key.index(l) for l in sorted(key)) - return transpositions - -def pad(message_len, group_len, fillvalue): - padding_length = group_len - message_len % group_len - if padding_length == group_len: padding_length = 0 - padding = '' - for i in range(padding_length): - if callable(fillvalue): - padding += fillvalue() - else: - padding += fillvalue - return padding - -def column_transposition_encipher(message, keyword, fillvalue=' ', - fillcolumnwise=False, - emptycolumnwise=False): - """Enciphers using the column transposition cipher. - Message is padded to allow all rows to be the same length. - - >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True) - 'hlohr eltee ' - >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True) - 'hellothere ' - >>> column_transposition_encipher('hellothere', 'abcdef') - 'hellothere ' - >>> column_transposition_encipher('hellothere', 'abcde') - 'hellothere' - >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False) - 'hlohreltee' - >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True) - 'htehlelroe' - >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False) - 'hellothere' - >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True) - 'heotllrehe' - >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False) - 'holrhetlee' - >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True) - 'htleehoelr' - >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False) - 'hleolteher' - >>> column_transposition_encipher('hellothere', 'cleverly') - 'hleolthre e ' - >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!') - 'hleolthre!e!' - >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*') - 'hleolthre*e*' - """ - transpositions = transpositions_of(keyword) - message += pad(len(message), len(transpositions), fillvalue) - if fillcolumnwise: - rows = every_nth(message, len(message) // len(transpositions)) - else: - rows = chunks(message, len(transpositions)) - transposed = [transpose(r, transpositions) for r in rows] - if emptycolumnwise: - return combine_every_nth(transposed) - else: - return cat(chain(*transposed)) - -def column_transposition_decipher(message, keyword, fillvalue=' ', - fillcolumnwise=False, - emptycolumnwise=False): - """Deciphers using the column transposition cipher. - Message is padded to allow all rows to be the same length. - - >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False) - 'hellothere' - >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False) - 'hellothere' - >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False) - 'hellothere' - >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True) - 'hellothere' - >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False) - 'hellothere' - """ - transpositions = transpositions_of(keyword) - message += pad(len(message), len(transpositions), fillvalue) - if emptycolumnwise: - rows = every_nth(message, len(message) // len(transpositions)) - else: - rows = chunks(message, len(transpositions)) - untransposed = [untranspose(r, transpositions) for r in rows] - if fillcolumnwise: - return combine_every_nth(untransposed) - else: - return cat(chain(*untransposed)) - -def scytale_encipher(message, rows, fillvalue=' '): - """Enciphers using the scytale transposition cipher. - Message is padded with spaces to allow all rows to be the same length. - - >>> scytale_encipher('thequickbrownfox', 3) - 'tcnhkfeboqrxuo iw ' - >>> scytale_encipher('thequickbrownfox', 4) - 'tubnhirfecooqkwx' - >>> scytale_encipher('thequickbrownfox', 5) - 'tubn hirf ecoo qkwx ' - >>> scytale_encipher('thequickbrownfox', 6) - 'tqcrnxhukof eibwo ' - >>> scytale_encipher('thequickbrownfox', 7) - 'tqcrnx hukof eibwo ' - """ - # transpositions = [i for i in range(math.ceil(len(message) / rows))] - # return column_transposition_encipher(message, transpositions, - # fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True) - transpositions = [i for i in range(rows)] - return column_transposition_encipher(message, transpositions, - fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False) - -def scytale_decipher(message, rows): - """Deciphers using the scytale transposition cipher. - Assumes the message is padded so that all rows are the same length. - - >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3) - 'thequickbrownfox ' - >>> scytale_decipher('tubnhirfecooqkwx', 4) - 'thequickbrownfox' - >>> scytale_decipher('tubn hirf ecoo qkwx ', 5) - 'thequickbrownfox ' - >>> scytale_decipher('tqcrnxhukof eibwo ', 6) - 'thequickbrownfox ' - >>> scytale_decipher('tqcrnx hukof eibwo ', 7) - 'thequickbrownfox ' - """ - # transpositions = [i for i in range(math.ceil(len(message) / rows))] - # return column_transposition_decipher(message, transpositions, - # fillcolumnwise=False, emptycolumnwise=True) - transpositions = [i for i in range(rows)] - return column_transposition_decipher(message, transpositions, - fillcolumnwise=True, emptycolumnwise=False) - - -def railfence_encipher(message, height, fillvalue=''): - """Railfence cipher. - Works by splitting the text into sections, then reading across them to - generate the rows in the cipher. The rows are then combined to form the - ciphertext. - - Example: the plaintext "hellotherefriends", with a height of four, written - out in the railfence as - h h i - etere* - lorfns - l e d - (with the * showing the one character to finish the last section). - Each 'section' is two columns, but unfolded. In the example, the first - section is 'hellot'. - - >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 2, fillvalue='!') - 'hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!' - >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3, fillvalue='!') - 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!' - >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5, fillvalue='!') - 'hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!' - >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 10, fillvalue='!') - 'hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!' - >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3) - 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece' - >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5) - 'hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp' - >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 7) - 'haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic' - """ - sections = chunks(message, (height - 1) * 2, fillvalue=fillvalue) - n_sections = len(sections) - # Add the top row - rows = [cat([s[0] for s in sections])] - # process the middle rows of the grid - for r in range(1, height-1): - rows += [cat([s[r:r+1] + s[height*2-r-2:height*2-r-1] for s in sections])] - # process the bottom row - rows += [cat([s[height - 1:height] for s in sections])] - # rows += [wcat([s[height - 1] for s in sections])] - return cat(rows) - -def railfence_decipher(message, height, fillvalue=''): - """Railfence decipher. - Works by reconstructing the grid used to generate the ciphertext, then - unfolding the sections so the text can be concatenated together. - - Example: given the ciphertext 'hhieterelorfnsled' and a height of 4, first - work out that the second row has a character missing, find the rows of the - grid, then split the section into its two columns. - - 'hhieterelorfnsled' is split into - h h i - etere - lorfns - l e d - (spaces added for clarity), which is stored in 'rows'. This is then split - into 'down_rows' and 'up_rows': - - down_rows: - hhi - eee - lrn - led - - up_rows: - tr - ofs - - These are then zipped together (after the up_rows are reversed) to recover - the plaintext. - - Most of the procedure is about finding the correct lengths for each row then - splitting the ciphertext into those rows. - - >>> railfence_decipher('hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!', 2).strip('!') - 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' - >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!', 3).strip('!') - 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' - >>> railfence_decipher('hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!', 5).strip('!') - 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' - >>> railfence_decipher('hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!', 10).strip('!') - 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' - >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece', 3) - 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' - >>> railfence_decipher('hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp', 5) - 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' - >>> railfence_decipher('haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic', 7) - 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' - """ - # find the number and size of the sections, including how many characters - # are missing for a full grid - n_sections = math.ceil(len(message) / ((height - 1) * 2)) - padding_to_add = n_sections * (height - 1) * 2 - len(message) - # row_lengths are for the both up rows and down rows - row_lengths = [n_sections] * (height - 1) * 2 - for i in range((height - 1) * 2 - 1, (height - 1) * 2 - (padding_to_add + 1), -1): - row_lengths[i] -= 1 - # folded_rows are the combined row lengths in the middle of the railfence - folded_row_lengths = [row_lengths[0]] - for i in range(1, height-1): - folded_row_lengths += [row_lengths[i] + row_lengths[-i]] - folded_row_lengths += [row_lengths[height - 1]] - # find the rows that form the railfence grid - rows = [] - row_start = 0 - for i in folded_row_lengths: - rows += [message[row_start:row_start + i]] - row_start += i - # split the rows into the 'down_rows' (those that form the first column of - # a section) and the 'up_rows' (those that ofrm the second column of a - # section). - down_rows = [rows[0]] - up_rows = [] - for i in range(1, height-1): - down_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 0])] - up_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 1])] - down_rows += [rows[-1]] - up_rows.reverse() - return cat(c for r in zip_longest(*(down_rows + up_rows), fillvalue='') for c in r) def make_cadenus_keycolumn(doubled_letters = 'vw', start='a', reverse=False): """Makes the key column for a Cadenus cipher (the column down between the diff --git a/cipherbreak.py b/cipherbreak.py index 7c609ab..368d669 100644 --- a/cipherbreak.py +++ b/cipherbreak.py @@ -13,22 +13,6 @@ from multiprocessing import Pool import matplotlib.pyplot as plt -# logging.basicConfig(filename="cipher.log", level=logging.INFO) -# logger = logging.getLogger(__name__) - -logger = logging.getLogger('cipherbreak') -logger.setLevel(logging.WARNING) -# logger.setLevel(logging.INFO) -# logger.setLevel(logging.DEBUG) - -# create the logging file handler -fh = logging.FileHandler("cipher.log") -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') -fh.setFormatter(formatter) - -# add handler to logger object -logger.addHandler(fh) - from cipher import * from language_models import * @@ -41,690 +25,8 @@ from language_models import * # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1) -def index_of_coincidence(text): - stext = sanitise(text) - counts = collections.Counter(stext) - denom = len(stext) * (len(text) - 1) / 26 - return ( - sum(max(counts[l] * counts[l] - 1, 0) for l in string.ascii_lowercase) - / - denom - ) - - -transpositions = collections.defaultdict(list) -for word in keywords: - transpositions[transpositions_of(word)] += [word] - -def frequencies(text): - """Count the number of occurrences of each character in text - - >>> sorted(frequencies('abcdefabc').items()) - [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)] - >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \ - 'dog').items()) # doctest: +NORMALIZE_WHITESPACE - [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), - ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), - ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), - ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)] - >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \ - '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE - [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), - ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), - ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), - ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), - ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)] - >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\ - 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE - [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), - ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), - ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), - ('w', 1), ('x', 1), ('y', 1), ('z', 1)] - >>> frequencies('abcdefabcdef')['x'] - 0 - """ - return collections.Counter(c for c in text) - - -def caesar_break(message, fitness=Pletters): - """Breaks a Caesar cipher using frequency analysis - - >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \ - 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS - (4, -130.849989015...) - >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \ - 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS - (19, -128.82410410...) - >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \ - 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS - (13, -126.25403935...) - """ - sanitised_message = sanitise(message) - best_shift = 0 - best_fit = float('-inf') - for shift in range(26): - plaintext = caesar_decipher(sanitised_message, shift) - fit = fitness(plaintext) - logger.debug('Caesar break attempt using key {0} gives fit of {1} ' - 'and decrypt starting: {2}'.format(shift, fit, - plaintext[:50])) - if fit > best_fit: - best_fit = fit - best_shift = shift - logger.info('Caesar break best fit: key {0} gives fit of {1} and ' - 'decrypt starting: {2}'.format(best_shift, best_fit, - caesar_decipher(sanitised_message, best_shift)[:50])) - return best_shift, best_fit - -def affine_break(message, fitness=Pletters): - """Breaks an affine cipher using frequency analysis - - >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \ - 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \ - 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \ - 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \ - 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \ - 'kxd clm ckuxj.') # doctest: +ELLIPSIS - ((15, 22, True), -340.601181913...) - """ - sanitised_message = sanitise(message) - best_multiplier = 0 - best_adder = 0 - best_one_based = True - best_fit = float("-inf") - for one_based in [True, False]: - for multiplier in [x for x in range(1, 26, 2) if x != 13]: - for adder in range(26): - plaintext = affine_decipher(sanitised_message, - multiplier, adder, one_based) - fit = fitness(plaintext) - logger.debug('Affine break attempt using key {0}x+{1} ({2}) ' - 'gives fit of {3} and decrypt starting: {4}'. - format(multiplier, adder, one_based, fit, - plaintext[:50])) - if fit > best_fit: - best_fit = fit - best_multiplier = multiplier - best_adder = adder - best_one_based = one_based - logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of ' - '{3} and decrypt starting: {4}'.format( - best_multiplier, best_adder, best_one_based, best_fit, - affine_decipher(sanitised_message, best_multiplier, - best_adder, best_one_based)[:50])) - return (best_multiplier, best_adder, best_one_based), best_fit - -def keyword_break(message, wordlist=keywords, fitness=Pletters): - """Breaks a keyword substitution cipher using a dictionary and - frequency analysis. - - >>> keyword_break(keyword_encipher('this is a test message for the ' \ - 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS - (('elephant', ), -52.834575011...) - """ - best_keyword = '' - best_wrap_alphabet = True - best_fit = float("-inf") - for wrap_alphabet in KeywordWrapAlphabet: - for keyword in wordlist: - plaintext = keyword_decipher(message, keyword, wrap_alphabet) - fit = fitness(plaintext) - logger.debug('Keyword break attempt using key {0} (wrap={1}) ' - 'gives fit of {2} and decrypt starting: {3}'.format( - keyword, wrap_alphabet, fit, - sanitise(plaintext)[:50])) - if fit > best_fit: - best_fit = fit - best_keyword = keyword - best_wrap_alphabet = wrap_alphabet - logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of ' - '{2} and decrypt starting: {3}'.format(best_keyword, - best_wrap_alphabet, best_fit, sanitise( - keyword_decipher(message, best_keyword, - best_wrap_alphabet))[:50])) - return (best_keyword, best_wrap_alphabet), best_fit - -def keyword_break_mp(message, wordlist=keywords, fitness=Pletters, - number_of_solutions=1, chunksize=500): - """Breaks a keyword substitution cipher using a dictionary and - frequency analysis - - >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ - 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS - (('elephant', ), -52.834575011...) - >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ - 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ - wordlist=['cat', 'elephant', 'kangaroo'], \ - number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE - [(('elephant', ), -52.834575011...), - (('elephant', ), -52.834575011...)] - """ - with Pool() as pool: - helper_args = [(message, word, wrap, fitness) - for word in wordlist - for wrap in KeywordWrapAlphabet] - # Gotcha: the helper function here needs to be defined at the top level - # (limitation of Pool.starmap) - breaks = pool.starmap(keyword_break_worker, helper_args, chunksize) - if number_of_solutions == 1: - return max(breaks, key=lambda k: k[1]) - else: - return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions] - -def keyword_break_worker(message, keyword, wrap_alphabet, fitness): - plaintext = keyword_decipher(message, keyword, wrap_alphabet) - fit = fitness(plaintext) - logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of ' - '{2} and decrypt starting: {3}'.format(keyword, - wrap_alphabet, fit, sanitise(plaintext)[:50])) - return (keyword, wrap_alphabet), fit - -# def monoalphabetic_break_hillclimbing(message, max_iterations=10000000, -# alphabet=None, fitness=Pletters): -# ciphertext = unaccent(message).lower() -# if not alphabet: -# alphabet = list(string.ascii_lowercase) -# random.shuffle(alphabet) -# alphabet = cat(alphabet) -# return monoalphabetic_break_hillclimbing_worker(ciphertext, alphabet, -# max_iterations, fitness) - -# def monoalphabetic_break_hillclimbing_mp(message, workers=10, -# max_iterations = 10000000, alphabet=None, fitness=Pletters, chunksize=1): -# worker_args = [] -# ciphertext = unaccent(message).lower() -# for i in range(workers): -# if alphabet: -# this_alphabet = alphabet -# else: -# this_alphabet = list(string.ascii_lowercase) -# random.shuffle(this_alphabet) -# this_alphabet = cat(this_alphabet) -# worker_args.append((ciphertext, this_alphabet, max_iterations, fitness)) -# with Pool() as pool: -# breaks = pool.starmap(monoalphabetic_break_hillclimbing_worker, -# worker_args, chunksize) -# return max(breaks, key=lambda k: k[1]) - -# def monoalphabetic_break_hillclimbing_worker(message, alphabet, -# max_iterations, fitness): -# def swap(letters, i, j): -# if i > j: -# i, j = j, i -# if i == j: -# return letters -# else: -# return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] + -# letters[j+1:]) -# best_alphabet = alphabet -# best_fitness = float('-inf') -# for i in range(max_iterations): -# alphabet = swap(best_alphabet, random.randrange(26), random.randrange(26)) -# cipher_translation = ''.maketrans(string.ascii_lowercase, alphabet) -# plaintext = message.translate(cipher_translation) -# if fitness(plaintext) > best_fitness: -# best_fitness = fitness(plaintext) -# best_alphabet = alphabet -# print(i, best_alphabet, best_fitness, plaintext[:50]) -# return best_alphabet, best_fitness - - -def monoalphabetic_break_hillclimbing(message, - max_iterations=20000, - plain_alphabet=None, - cipher_alphabet=None, - fitness=Pletters, chunksize=1): - return simulated_annealing_break(message, - workers=1, - initial_temperature=0, - max_iterations=max_iterations, - plain_alphabet=plain_alphabet, - cipher_alphabet=cipher_alphabet, - fitness=fitness, chunksize=chunksize) - - -def monoalphabetic_break_hillclimbing_mp(message, - workers=10, - max_iterations=20000, - plain_alphabet=None, - cipher_alphabet=None, - fitness=Pletters, chunksize=1): - return simulated_annealing_break(message, - workers=workers, - initial_temperature=0, - max_iterations=max_iterations, - plain_alphabet=plain_alphabet, - cipher_alphabet=cipher_alphabet, - fitness=fitness, chunksize=chunksize) - - -def simulated_annealing_break(message, workers=10, - initial_temperature=200, - max_iterations=20000, - plain_alphabet=None, - cipher_alphabet=None, - fitness=Pletters, chunksize=1): - worker_args = [] - ciphertext = sanitise(message) - for i in range(workers): - if not plain_alphabet: - plain_alphabet = string.ascii_lowercase - if not cipher_alphabet: - cipher_alphabet = list(string.ascii_lowercase) - random.shuffle(cipher_alphabet) - cipher_alphabet = cat(cipher_alphabet) - worker_args.append((ciphertext, plain_alphabet, cipher_alphabet, - initial_temperature, max_iterations, fitness)) - with Pool() as pool: - breaks = pool.starmap(simulated_annealing_break_worker, - worker_args, chunksize) - return max(breaks, key=lambda k: k[1]) - - -def simulated_annealing_break_worker(message, plain_alphabet, cipher_alphabet, - t0, max_iterations, fitness): - def swap(letters, i, j): - if i > j: - i, j = j, i - if i == j: - return letters - else: - return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] + - letters[j+1:]) - - temperature = t0 - - dt = t0 / (0.9 * max_iterations) - - current_alphabet = cipher_alphabet - alphabet = current_alphabet - cipher_translation = ''.maketrans(current_alphabet, plain_alphabet) - plaintext = message.translate(cipher_translation) - current_fitness = fitness(plaintext) - - best_alphabet = current_alphabet - best_fitness = current_fitness - best_plaintext = plaintext - - # print('starting for', max_iterations) - for i in range(max_iterations): - swap_a = random.randrange(26) - swap_b = (swap_a + int(random.gauss(0, 4))) % 26 - alphabet = swap(current_alphabet, swap_a, swap_b) - cipher_translation = ''.maketrans(alphabet, plain_alphabet) - plaintext = message.translate(cipher_translation) - new_fitness = fitness(plaintext) - try: - sa_chance = math.exp((new_fitness - current_fitness) / temperature) - except (OverflowError, ZeroDivisionError): - # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature)) - sa_chance = 0 - if (new_fitness > current_fitness or random.random() < sa_chance): - # logger.debug('Simulated annealing: iteration {}, temperature {}, ' - # 'current alphabet {}, current_fitness {}, ' - # 'best_plaintext {}'.format(i, temperature, current_alphabet, - # current_fitness, best_plaintext[:50])) - - # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance)) - current_fitness = new_fitness - current_alphabet = alphabet - - if current_fitness > best_fitness: - best_alphabet = current_alphabet - best_fitness = current_fitness - best_plaintext = plaintext - if i % 500 == 0: - logger.debug('Simulated annealing: iteration {}, temperature {}, ' - 'current alphabet {}, current_fitness {}, ' - 'best_plaintext {}'.format(i, temperature, current_alphabet, - current_fitness, plaintext[:50])) - temperature = max(temperature - dt, 0.001) - - return best_alphabet, best_fitness # current_alphabet, current_fitness - - -def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters, - chunksize=500): - """Breaks a vigenere cipher using a dictionary and frequency analysis. - - >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \ - 'message for the vigenere decipherment'), 'cat'), \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS - ('cat', -52.9472712...) - """ - with Pool() as pool: - helper_args = [(message, word, fitness) - for word in wordlist] - # Gotcha: the helper function here needs to be defined at the top level - # (limitation of Pool.starmap) - breaks = pool.starmap(vigenere_keyword_break_worker, helper_args, - chunksize) - return max(breaks, key=lambda k: k[1]) -vigenere_keyword_break = vigenere_keyword_break_mp - -def vigenere_keyword_break_worker(message, keyword, fitness): - plaintext = vigenere_decipher(message, keyword) - fit = fitness(plaintext) - logger.debug('Vigenere keyword break attempt using key {0} gives fit of ' - '{1} and decrypt starting: {2}'.format(keyword, - fit, sanitise(plaintext)[:50])) - return keyword, fit - -def vigenere_frequency_break(message, max_key_length=20, fitness=Pletters): - """Breaks a Vigenere cipher with frequency analysis - >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \ - "run. She is ready and so am I. I stole Daniel's pocketbook this " \ - "afternoon when he left his jacket hanging on the easel in the " \ - "attic. I jump every time I hear a footstep on the stairs, " \ - "certain that the theft has been discovered and that I will " \ - "be caught. The SS officer visits less often now that he is " \ - "sure"), 'florence')) # doctest: +ELLIPSIS - ('florence', -307.5473096...) - """ - def worker(message, key_length, fitness): - splits = every_nth(sanitised_message, key_length) - key = cat([unpos(caesar_break(s)[0]) for s in splits]) - plaintext = vigenere_decipher(message, key) - fit = fitness(plaintext) - return key, fit - sanitised_message = sanitise(message) - results = starmap(worker, [(sanitised_message, i, fitness) - for i in range(1, max_key_length+1)]) - return max(results, key=lambda k: k[1]) - - -def beaufort_sub_break(message, fitness=Pletters): - """Breaks one chunk of a Beaufort cipher with frequency analysis - - >>> beaufort_sub_break('samwpplggnnmmyaazgympjapopnwiywwomwspgpjmefwmawx' \ - 'jafjhxwwwdigxshnlywiamhyshtasxptwueahhytjwsn') # doctest: +ELLIPSIS - (0, -117.4492...) - >>> beaufort_sub_break('eyprzjjzznxymrygryjqmqhznjrjjapenejznawngnnezgza' \ - 'dgndknaogpdjneadadazlhkhxkryevrronrmdjnndjlo') # doctest: +ELLIPSIS - (17, -114.9598...) - """ - best_shift = 0 - best_fit = float('-inf') - for key in range(26): - plaintext = [unpos(key - pos(l)) for l in message] - fit = fitness(plaintext) - logger.debug('Beaufort sub break attempt using key {0} gives fit of {1} ' - 'and decrypt starting: {2}'.format(key, fit, - plaintext[:50])) - if fit > best_fit: - best_fit = fit - best_key = key - logger.info('Beaufort sub break best fit: key {0} gives fit of {1} and ' - 'decrypt starting: {2}'.format(best_key, best_fit, - cat([unpos(best_key - pos(l)) for l in message[:50]]))) - return best_key, best_fit - - -def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters): - """Breaks a Beaufort cipher with frequency analysis - - >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \ - "run. She is ready and so am I. I stole Daniel's pocketbook this " \ - "afternoon when he left his jacket hanging on the easel in the " \ - "attic. I jump every time I hear a footstep on the stairs, " \ - "certain that the theft has been discovered and that I will " \ - "be caught. The SS officer visits less often now " \ - "that he is sure"), 'florence')) # doctest: +ELLIPSIS - ('florence', -307.5473096791...) - """ - def worker(message, key_length, fitness): - splits = every_nth(message, key_length) - key = cat([unpos(beaufort_sub_break(s)[0]) for s in splits]) - plaintext = beaufort_decipher(message, key) - fit = fitness(plaintext) - return key, fit - sanitised_message = sanitise(message) - results = starmap(worker, [(sanitised_message, i, fitness) - for i in range(1, max_key_length+1)]) - return max(results, key=lambda k: k[1]) - - -def beaufort_variant_frequency_break(message, max_key_length=20, fitness=Pletters): - """Breaks a Beaufort cipher with frequency analysis - - >>> beaufort_variant_frequency_break(beaufort_variant_encipher(sanitise("It is time to " \ - "run. She is ready and so am I. I stole Daniel's pocketbook this " \ - "afternoon when he left his jacket hanging on the easel in the " \ - "attic. I jump every time I hear a footstep on the stairs, " \ - "certain that the theft has been discovered and that I will " \ - "be caught. The SS officer visits less often now " \ - "that he is sure"), 'florence')) # doctest: +ELLIPSIS - ('florence', -307.5473096791...) - """ - def worker(message, key_length, fitness): - splits = every_nth(sanitised_message, key_length) - key = cat([unpos(-caesar_break(s)[0]) for s in splits]) - plaintext = beaufort_variant_decipher(message, key) - fit = fitness(plaintext) - return key, fit - sanitised_message = sanitise(message) - results = starmap(worker, [(sanitised_message, i, fitness) - for i in range(1, max_key_length+1)]) - return max(results, key=lambda k: k[1]) - -def polybius_break_mp(message, column_labels, row_labels, - letters_to_merge=None, - wordlist=keywords, fitness=Pletters, - number_of_solutions=1, chunksize=500): - """Breaks a Polybius substitution cipher using a dictionary and - frequency analysis - - >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \ - 'polybius decipherment', 'elephant', 'abcde', 'abcde'), \ - 'abcde', 'abcde', \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE - (('elephant', , 'abcde', 'abcde', False), \ - -54.53880...) - >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \ - 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=True), \ - 'abcde', 'abcde', \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE - (('elephant', , 'abcde', 'abcde', True), \ - -54.53880...) - >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \ - 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=False), \ - 'abcde', 'abcde', \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE - (('elephant', , 'abcde', 'abcde', False), \ - -54.53880...) - >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \ - 'polybius decipherment', 'elephant', 'abcde', 'pqrst', column_first=True), \ - 'abcde', 'pqrst', \ - wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE - (('elephant', , 'abcde', 'pqrst', True), \ - -54.53880...) - """ - if letters_to_merge is None: - letters_to_merge = {'j': 'i'} - with Pool() as pool: - helper_args = [(message, word, wrap, - column_labels, row_labels, column_first, - letters_to_merge, - fitness) - for word in wordlist - for wrap in KeywordWrapAlphabet - for column_first in [False, True]] - # Gotcha: the helper function here needs to be defined at the top level - # (limitation of Pool.starmap) - breaks = pool.starmap(polybius_break_worker, helper_args, chunksize) - if number_of_solutions == 1: - return max(breaks, key=lambda k: k[1]) - else: - return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions] - -def polybius_break_worker(message, keyword, wrap_alphabet, - column_order, row_order, column_first, - letters_to_merge, - fitness): - plaintext = polybius_decipher(message, keyword, - column_order, row_order, - column_first=column_first, - letters_to_merge=letters_to_merge, - wrap_alphabet=wrap_alphabet) - if plaintext: - fit = fitness(plaintext) - else: - fit = float('-inf') - logger.debug('Polybius break attempt using key {0} (wrap={1}, merging {2}), ' - 'columns as {3}, rows as {4} (column_first={5}) ' - 'gives fit of {6} and decrypt starting: ' - '{7}'.format(keyword, wrap_alphabet, letters_to_merge, - column_order, row_order, column_first, - fit, sanitise(plaintext)[:50])) - return (keyword, wrap_alphabet, column_order, row_order, column_first), fit - - -def column_transposition_break_mp(message, translist=transpositions, - fitness=Pbigrams, chunksize=500): - """Breaks a column transposition cipher using a dictionary and - n-gram frequency analysis - - >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in \ - the minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 'encipher'), \ - translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ - (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ - (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS - (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...) - >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in \ - the minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 'encipher'), \ - translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ - (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ - (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \ - fitness=Ptrigrams) # doctest: +ELLIPSIS - (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...) - """ - with Pool() as pool: - helper_args = [(message, trans, fillcolumnwise, emptycolumnwise, - fitness) - for trans in translist - for fillcolumnwise in [True, False] - for emptycolumnwise in [True, False]] - # Gotcha: the helper function here needs to be defined at the top level - # (limitation of Pool.starmap) - breaks = pool.starmap(column_transposition_break_worker, - helper_args, chunksize) - return max(breaks, key=lambda k: k[1]) -column_transposition_break = column_transposition_break_mp - -def column_transposition_break_worker(message, transposition, - fillcolumnwise, emptycolumnwise, fitness): - plaintext = column_transposition_decipher(message, transposition, - fillcolumnwise=fillcolumnwise, emptycolumnwise=emptycolumnwise) - fit = fitness(sanitise(plaintext)) - logger.debug('Column transposition break attempt using key {0} ' - 'gives fit of {1} and decrypt starting: {2}'.format( - transposition, fit, - sanitise(plaintext)[:50])) - return (transposition, fillcolumnwise, emptycolumnwise), fit - - -def scytale_break_mp(message, max_key_length=20, - fitness=Pbigrams, chunksize=500): - """Breaks a scytale cipher using a range of lengths and - n-gram frequency analysis - - >>> scytale_break_mp(scytale_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in \ - the minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 5)) # doctest: +ELLIPSIS - (5, -709.4646722...) - >>> scytale_break_mp(scytale_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in \ - the minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 5), \ - fitness=Ptrigrams) # doctest: +ELLIPSIS - (5, -997.0129085...) - """ - with Pool() as pool: - helper_args = [(message, trans, False, True, fitness) - for trans in - [[col for col in range(math.ceil(len(message)/rows))] - for rows in range(1,max_key_length+1)]] - # Gotcha: the helper function here needs to be defined at the top level - # (limitation of Pool.starmap) - breaks = pool.starmap(column_transposition_break_worker, - helper_args, chunksize) - best = max(breaks, key=lambda k: k[1]) - return math.trunc(len(message) / len(best[0][0])), best[1] -scytale_break = scytale_break_mp - - -def railfence_break(message, max_key_length=20, - fitness=Pletters, chunksize=500): - """Breaks a railfence cipher using a matrix of given rank and letter frequencies - - - """ - - sanitised_message = sanitise(message) - results = starmap(worker, [(sanitised_message, i, fitness) - for i in range(2, max_key_length+1)]) - return max(results, key=lambda k: k[1]) - - -def railfence_break(message, max_key_length=20, - fitness=Pbigrams, chunksize=500): - """Breaks a railfence cipher using a range of lengths and - n-gram frequency analysis - - >>> railfence_break(railfence_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in \ - the minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 7)) # doctest: +ELLIPSIS - (7, -709.46467226...) - >>> railfence_break(railfence_encipher(sanitise( \ - "It is a truth universally acknowledged, that a single man in \ - possession of a good fortune, must be in want of a wife. However \ - little known the feelings or views of such a man may be on his \ - first entering a neighbourhood, this truth is so well fixed in \ - the minds of the surrounding families, that he is considered the \ - rightful property of some one or other of their daughters."), \ - 7), \ - fitness=Ptrigrams) # doctest: +ELLIPSIS - (7, -997.0129085...) - """ - def worker(message, height, fitness): - plaintext = railfence_decipher(message, height) - fit = fitness(plaintext) - return height, fit - - sanitised_message = sanitise(message) - results = starmap(worker, [(sanitised_message, i, fitness) - for i in range(2, max_key_length+1)]) - return max(results, key=lambda k: k[1]) def amsco_break(message, translist=transpositions, patterns = [(1, 2), (2, 1)], fillstyles = [AmscoFillStyle.continuous, diff --git a/column_transposition.py b/column_transposition.py new file mode 100644 index 0000000..ecef331 --- /dev/null +++ b/column_transposition.py @@ -0,0 +1,262 @@ +from utilities import * +from language_models import * +from multiprocessing import Pool + +from logger import logger + +def transpositions_of(keyword): + """Finds the transpostions given by a keyword. For instance, the keyword + 'clever' rearranges to 'celrv', so the first column (0) stays first, the + second column (1) moves to third, the third column (2) moves to second, + and so on. + + If passed a tuple, assume it's already a transposition and just return it. + + >>> transpositions_of('clever') + (0, 2, 1, 4, 3) + >>> transpositions_of('fred') + (3, 2, 0, 1) + >>> transpositions_of((3, 2, 0, 1)) + (3, 2, 0, 1) + """ + if isinstance(keyword, tuple): + return keyword + else: + key = deduplicate(keyword) + transpositions = tuple(key.index(l) for l in sorted(key)) + return transpositions + +def pad(message_len, group_len, fillvalue): + padding_length = group_len - message_len % group_len + if padding_length == group_len: padding_length = 0 + padding = '' + for i in range(padding_length): + if callable(fillvalue): + padding += fillvalue() + else: + padding += fillvalue + return padding + +def column_transposition_encipher(message, keyword, fillvalue=' ', + fillcolumnwise=False, + emptycolumnwise=False): + """Enciphers using the column transposition cipher. + Message is padded to allow all rows to be the same length. + + >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True) + 'hlohr eltee ' + >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True) + 'hellothere ' + >>> column_transposition_encipher('hellothere', 'abcdef') + 'hellothere ' + >>> column_transposition_encipher('hellothere', 'abcde') + 'hellothere' + >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False) + 'hlohreltee' + >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True) + 'htehlelroe' + >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False) + 'hellothere' + >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True) + 'heotllrehe' + >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False) + 'holrhetlee' + >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True) + 'htleehoelr' + >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False) + 'hleolteher' + >>> column_transposition_encipher('hellothere', 'cleverly') + 'hleolthre e ' + >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!') + 'hleolthre!e!' + >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*') + 'hleolthre*e*' + """ + transpositions = transpositions_of(keyword) + message += pad(len(message), len(transpositions), fillvalue) + if fillcolumnwise: + rows = every_nth(message, len(message) // len(transpositions)) + else: + rows = chunks(message, len(transpositions)) + transposed = [transpose(r, transpositions) for r in rows] + if emptycolumnwise: + return combine_every_nth(transposed) + else: + return cat(chain(*transposed)) + +def column_transposition_decipher(message, keyword, fillvalue=' ', + fillcolumnwise=False, + emptycolumnwise=False): + """Deciphers using the column transposition cipher. + Message is padded to allow all rows to be the same length. + + >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False) + 'hellothere' + >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False) + 'hellothere' + >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False) + 'hellothere' + >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False) + 'hellothere' + """ + transpositions = transpositions_of(keyword) + message += pad(len(message), len(transpositions), fillvalue) + if emptycolumnwise: + rows = every_nth(message, len(message) // len(transpositions)) + else: + rows = chunks(message, len(transpositions)) + untransposed = [untranspose(r, transpositions) for r in rows] + if fillcolumnwise: + return combine_every_nth(untransposed) + else: + return cat(chain(*untransposed)) + +def scytale_encipher(message, rows, fillvalue=' '): + """Enciphers using the scytale transposition cipher. + Message is padded with spaces to allow all rows to be the same length. + + >>> scytale_encipher('thequickbrownfox', 3) + 'tcnhkfeboqrxuo iw ' + >>> scytale_encipher('thequickbrownfox', 4) + 'tubnhirfecooqkwx' + >>> scytale_encipher('thequickbrownfox', 5) + 'tubn hirf ecoo qkwx ' + >>> scytale_encipher('thequickbrownfox', 6) + 'tqcrnxhukof eibwo ' + >>> scytale_encipher('thequickbrownfox', 7) + 'tqcrnx hukof eibwo ' + """ + # transpositions = [i for i in range(math.ceil(len(message) / rows))] + # return column_transposition_encipher(message, transpositions, + # fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True) + transpositions = [i for i in range(rows)] + return column_transposition_encipher(message, transpositions, + fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False) + +def scytale_decipher(message, rows): + """Deciphers using the scytale transposition cipher. + Assumes the message is padded so that all rows are the same length. + + >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3) + 'thequickbrownfox ' + >>> scytale_decipher('tubnhirfecooqkwx', 4) + 'thequickbrownfox' + >>> scytale_decipher('tubn hirf ecoo qkwx ', 5) + 'thequickbrownfox ' + >>> scytale_decipher('tqcrnxhukof eibwo ', 6) + 'thequickbrownfox ' + >>> scytale_decipher('tqcrnx hukof eibwo ', 7) + 'thequickbrownfox ' + """ + # transpositions = [i for i in range(math.ceil(len(message) / rows))] + # return column_transposition_decipher(message, transpositions, + # fillcolumnwise=False, emptycolumnwise=True) + transpositions = [i for i in range(rows)] + return column_transposition_decipher(message, transpositions, + fillcolumnwise=True, emptycolumnwise=False) + + +def column_transposition_break_mp(message, translist=transpositions, + fitness=Pbigrams, chunksize=500): + """Breaks a column transposition cipher using a dictionary and + n-gram frequency analysis + + >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + "It is a truth universally acknowledged, that a single man in \ + possession of a good fortune, must be in want of a wife. However \ + little known the feelings or views of such a man may be on his \ + first entering a neighbourhood, this truth is so well fixed in \ + the minds of the surrounding families, that he is considered the \ + rightful property of some one or other of their daughters."), \ + 'encipher'), \ + translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ + (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ + (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS + (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...) + >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \ + "It is a truth universally acknowledged, that a single man in \ + possession of a good fortune, must be in want of a wife. However \ + little known the feelings or views of such a man may be on his \ + first entering a neighbourhood, this truth is so well fixed in \ + the minds of the surrounding families, that he is considered the \ + rightful property of some one or other of their daughters."), \ + 'encipher'), \ + translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \ + (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \ + (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \ + fitness=Ptrigrams) # doctest: +ELLIPSIS + (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...) + """ + with Pool() as pool: + helper_args = [(message, trans, fillcolumnwise, emptycolumnwise, + fitness) + for trans in translist + for fillcolumnwise in [True, False] + for emptycolumnwise in [True, False]] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(column_transposition_break_worker, + helper_args, chunksize) + return max(breaks, key=lambda k: k[1]) +column_transposition_break = column_transposition_break_mp + +def column_transposition_break_worker(message, transposition, + fillcolumnwise, emptycolumnwise, fitness): + plaintext = column_transposition_decipher(message, transposition, + fillcolumnwise=fillcolumnwise, emptycolumnwise=emptycolumnwise) + fit = fitness(sanitise(plaintext)) + logger.debug('Column transposition break attempt using key {0} ' + 'gives fit of {1} and decrypt starting: {2}'.format( + transposition, fit, + sanitise(plaintext)[:50])) + return (transposition, fillcolumnwise, emptycolumnwise), fit + + +def scytale_break_mp(message, max_key_length=20, + fitness=Pbigrams, chunksize=500): + """Breaks a scytale cipher using a range of lengths and + n-gram frequency analysis + + >>> scytale_break_mp(scytale_encipher(sanitise( \ + "It is a truth universally acknowledged, that a single man in \ + possession of a good fortune, must be in want of a wife. However \ + little known the feelings or views of such a man may be on his \ + first entering a neighbourhood, this truth is so well fixed in \ + the minds of the surrounding families, that he is considered the \ + rightful property of some one or other of their daughters."), \ + 5)) # doctest: +ELLIPSIS + (5, -709.4646722...) + >>> scytale_break_mp(scytale_encipher(sanitise( \ + "It is a truth universally acknowledged, that a single man in \ + possession of a good fortune, must be in want of a wife. However \ + little known the feelings or views of such a man may be on his \ + first entering a neighbourhood, this truth is so well fixed in \ + the minds of the surrounding families, that he is considered the \ + rightful property of some one or other of their daughters."), \ + 5), \ + fitness=Ptrigrams) # doctest: +ELLIPSIS + (5, -997.0129085...) + """ + with Pool() as pool: + helper_args = [(message, trans, False, True, fitness) + for trans in + [[col for col in range(math.ceil(len(message)/rows))] + for rows in range(1,max_key_length+1)]] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(column_transposition_break_worker, + helper_args, chunksize) + best = max(breaks, key=lambda k: k[1]) + return math.trunc(len(message) / len(best[0][0])), best[1] +scytale_break = scytale_break_mp + diff --git a/keyword.py b/keyword.py new file mode 100644 index 0000000..91491ba --- /dev/null +++ b/keyword.py @@ -0,0 +1,269 @@ +from utilities import * +from language_models import * +from enum import Enum +# from itertools import starmap +from multiprocessing import Pool + +from logger import logger + + +class KeywordWrapAlphabet(Enum): + from_a = 1 + from_last = 2 + from_largest = 3 + + +def keyword_cipher_alphabet_of(keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Find the cipher alphabet given a keyword. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + + >>> keyword_cipher_alphabet_of('bayes') + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a) + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last) + 'bayestuvwxzcdfghijklmnopqr' + >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest) + 'bayeszcdfghijklmnopqrtuvwx' + """ + if wrap_alphabet == KeywordWrapAlphabet.from_a: + cipher_alphabet = cat(deduplicate(sanitise(keyword) + + string.ascii_lowercase)) + else: + if wrap_alphabet == KeywordWrapAlphabet.from_last: + last_keyword_letter = deduplicate(sanitise(keyword))[-1] + else: + last_keyword_letter = sorted(sanitise(keyword))[-1] + last_keyword_position = string.ascii_lowercase.find( + last_keyword_letter) + 1 + cipher_alphabet = cat( + deduplicate(sanitise(keyword) + + string.ascii_lowercase[last_keyword_position:] + + string.ascii_lowercase)) + return cipher_alphabet + + +def keyword_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Enciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_encipher('test message', 'bayes') + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a) + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last) + 'lskl dskkbus' + >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest) + 'qspq jsppbcs' + """ + cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) + cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) + return unaccent(message).lower().translate(cipher_translation) + +def keyword_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Deciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_decipher('rsqr ksqqbds', 'bayes') + 'test message' + >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a) + 'test message' + >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last) + 'test message' + >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest) + 'test message' + """ + cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) + cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) + return message.lower().translate(cipher_translation) + + +def keyword_break(message, wordlist=keywords, fitness=Pletters): + """Breaks a keyword substitution cipher using a dictionary and + frequency analysis. + + >>> keyword_break(keyword_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS + (('elephant', ), -52.834575011...) + """ + best_keyword = '' + best_wrap_alphabet = True + best_fit = float("-inf") + for wrap_alphabet in KeywordWrapAlphabet: + for keyword in wordlist: + plaintext = keyword_decipher(message, keyword, wrap_alphabet) + fit = fitness(plaintext) + logger.debug('Keyword break attempt using key {0} (wrap={1}) ' + 'gives fit of {2} and decrypt starting: {3}'.format( + keyword, wrap_alphabet, fit, + sanitise(plaintext)[:50])) + if fit > best_fit: + best_fit = fit + best_keyword = keyword + best_wrap_alphabet = wrap_alphabet + logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of ' + '{2} and decrypt starting: {3}'.format(best_keyword, + best_wrap_alphabet, best_fit, sanitise( + keyword_decipher(message, best_keyword, + best_wrap_alphabet))[:50])) + return (best_keyword, best_wrap_alphabet), best_fit + +def keyword_break_mp(message, wordlist=keywords, fitness=Pletters, + number_of_solutions=1, chunksize=500): + """Breaks a keyword substitution cipher using a dictionary and + frequency analysis + + >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS + (('elephant', ), -52.834575011...) + >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \ + 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \ + wordlist=['cat', 'elephant', 'kangaroo'], \ + number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + [(('elephant', ), -52.834575011...), + (('elephant', ), -52.834575011...)] + """ + with Pool() as pool: + helper_args = [(message, word, wrap, fitness) + for word in wordlist + for wrap in KeywordWrapAlphabet] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(keyword_break_worker, helper_args, chunksize) + if number_of_solutions == 1: + return max(breaks, key=lambda k: k[1]) + else: + return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions] + +def keyword_break_worker(message, keyword, wrap_alphabet, fitness): + plaintext = keyword_decipher(message, keyword, wrap_alphabet) + fit = fitness(plaintext) + logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of ' + '{2} and decrypt starting: {3}'.format(keyword, + wrap_alphabet, fit, sanitise(plaintext)[:50])) + return (keyword, wrap_alphabet), fit + + +def monoalphabetic_break_hillclimbing(message, + max_iterations=20000, + plain_alphabet=None, + cipher_alphabet=None, + fitness=Pletters, chunksize=1): + return simulated_annealing_break(message, + workers=1, + initial_temperature=0, + max_iterations=max_iterations, + plain_alphabet=plain_alphabet, + cipher_alphabet=cipher_alphabet, + fitness=fitness, chunksize=chunksize) + + +def monoalphabetic_break_hillclimbing_mp(message, + workers=10, + max_iterations=20000, + plain_alphabet=None, + cipher_alphabet=None, + fitness=Pletters, chunksize=1): + return simulated_annealing_break(message, + workers=workers, + initial_temperature=0, + max_iterations=max_iterations, + plain_alphabet=plain_alphabet, + cipher_alphabet=cipher_alphabet, + fitness=fitness, chunksize=chunksize) + + +def simulated_annealing_break(message, workers=10, + initial_temperature=200, + max_iterations=20000, + plain_alphabet=None, + cipher_alphabet=None, + fitness=Pletters, chunksize=1): + worker_args = [] + ciphertext = sanitise(message) + for i in range(workers): + if not plain_alphabet: + plain_alphabet = string.ascii_lowercase + if not cipher_alphabet: + cipher_alphabet = list(string.ascii_lowercase) + random.shuffle(cipher_alphabet) + cipher_alphabet = cat(cipher_alphabet) + worker_args.append((ciphertext, plain_alphabet, cipher_alphabet, + initial_temperature, max_iterations, fitness)) + with Pool() as pool: + breaks = pool.starmap(simulated_annealing_break_worker, + worker_args, chunksize) + return max(breaks, key=lambda k: k[1]) + + +def simulated_annealing_break_worker(message, plain_alphabet, cipher_alphabet, + t0, max_iterations, fitness): + def swap(letters, i, j): + if i > j: + i, j = j, i + if i == j: + return letters + else: + return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] + + letters[j+1:]) + + temperature = t0 + + dt = t0 / (0.9 * max_iterations) + + current_alphabet = cipher_alphabet + alphabet = current_alphabet + cipher_translation = ''.maketrans(current_alphabet, plain_alphabet) + plaintext = message.translate(cipher_translation) + current_fitness = fitness(plaintext) + + best_alphabet = current_alphabet + best_fitness = current_fitness + best_plaintext = plaintext + + # print('starting for', max_iterations) + for i in range(max_iterations): + swap_a = random.randrange(26) + swap_b = (swap_a + int(random.gauss(0, 4))) % 26 + alphabet = swap(current_alphabet, swap_a, swap_b) + cipher_translation = ''.maketrans(alphabet, plain_alphabet) + plaintext = message.translate(cipher_translation) + new_fitness = fitness(plaintext) + try: + sa_chance = math.exp((new_fitness - current_fitness) / temperature) + except (OverflowError, ZeroDivisionError): + # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature)) + sa_chance = 0 + if (new_fitness > current_fitness or random.random() < sa_chance): + # logger.debug('Simulated annealing: iteration {}, temperature {}, ' + # 'current alphabet {}, current_fitness {}, ' + # 'best_plaintext {}'.format(i, temperature, current_alphabet, + # current_fitness, best_plaintext[:50])) + + # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance)) + current_fitness = new_fitness + current_alphabet = alphabet + + if current_fitness > best_fitness: + best_alphabet = current_alphabet + best_fitness = current_fitness + best_plaintext = plaintext + if i % 500 == 0: + logger.debug('Simulated annealing: iteration {}, temperature {}, ' + 'current alphabet {}, current_fitness {}, ' + 'best_plaintext {}'.format(i, temperature, current_alphabet, + current_fitness, plaintext[:50])) + temperature = max(temperature - dt, 0.001) + + return best_alphabet, best_fitness # current_alphabet, current_fitness diff --git a/language_models.py b/language_models.py index da5d2d0..a6a711f 100644 --- a/language_models.py +++ b/language_models.py @@ -7,51 +7,6 @@ import itertools from math import log10 import os -unaccent_specials = ''.maketrans({"’": "'", '“': '"', '”': '"'}) - -def letters(text): - """Remove all non-alphabetic characters from a text - >>> letters('The Quick') - 'TheQuick' - >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG') - 'TheQuickBROWNfoxjumpedoverthelazyDOG' - """ - return ''.join([c for c in text if c in string.ascii_letters]) - -def unaccent(text): - """Remove all accents from letters. - It does this by converting the unicode string to decomposed compatability - form, dropping all the combining accents, then re-encoding the bytes. - - >>> unaccent('hello') - 'hello' - >>> unaccent('HELLO') - 'HELLO' - >>> unaccent('héllo') - 'hello' - >>> unaccent('héllö') - 'hello' - >>> unaccent('HÉLLÖ') - 'HELLO' - """ - translated_text = text.translate(unaccent_specials) - return unicodedata.normalize('NFKD', translated_text).\ - encode('ascii', 'ignore').\ - decode('utf-8') - -def sanitise(text): - """Remove all non-alphabetic characters and convert the text to lowercase - - >>> sanitise('The Quick') - 'thequick' - >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG') - 'thequickbrownfoxjumpedoverthelazydog' - >>> sanitise('HÉLLÖ') - 'hello' - """ - # sanitised = [c.lower() for c in text if c in string.ascii_letters] - # return ''.join(sanitised) - return letters(unaccent(text)).lower() def datafile(name, sep='\t'): diff --git a/logger.py b/logger.py new file mode 100644 index 0000000..bb2239d --- /dev/null +++ b/logger.py @@ -0,0 +1,17 @@ +import logging + +# logging.basicConfig(filename="cipher.log", level=logging.INFO) +# logger = logging.getLogger(__name__) + +logger = logging.getLogger('cipherbreak') +logger.setLevel(logging.WARNING) +# logger.setLevel(logging.INFO) +# logger.setLevel(logging.DEBUG) + +# create the logging file handler +fh = logging.FileHandler("cipher.log") +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +fh.setFormatter(formatter) + +# add handler to logger object +logger.addHandler(fh) diff --git a/polybius.py b/polybius.py new file mode 100644 index 0000000..48509b1 --- /dev/null +++ b/polybius.py @@ -0,0 +1,181 @@ +from utilities import * +from language_models import * +from multiprocessing import Pool + +from logger import logger + +def polybius_grid(keyword, column_order, row_order, letters_to_merge=None, + wrap_alphabet=KeywordWrapAlphabet.from_a): + """Grid for a Polybius cipher, using a keyword to rearrange the + alphabet. + + + >>> polybius_grid('a', 'abcde', 'abcde')['x'] == ('e', 'c') + True + >>> polybius_grid('elephant', 'abcde', 'abcde')['e'] == ('a', 'a') + True + >>> polybius_grid('elephant', 'abcde', 'abcde')['b'] == ('b', 'c') + True + """ + alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet) + if letters_to_merge is None: + letters_to_merge = {'j': 'i'} + grid = {l: k + for k, l in zip([(c, r) for c in column_order for r in row_order], + [l for l in alphabet if l not in letters_to_merge])} + for l in letters_to_merge: + grid[l] = grid[letters_to_merge[l]] + return grid + +def polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge=None, + wrap_alphabet=KeywordWrapAlphabet.from_a): + """Grid for decrypting using a Polybius cipher, using a keyword to + rearrange the alphabet. + + >>> polybius_reverse_grid('a', 'abcde', 'abcde')['e', 'c'] == 'x' + True + >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['a', 'a'] == 'e' + True + >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['b', 'c'] == 'b' + True + """ + alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet) + if letters_to_merge is None: + letters_to_merge = {'j': 'i'} + grid = {k: l + for k, l in zip([(c, r) for c in column_order for r in row_order], + [l for l in alphabet if l not in letters_to_merge])} + return grid + + +def polybius_flatten(pair, column_first): + """Convert a series of pairs into a single list of characters""" + if column_first: + return str(pair[1]) + str(pair[0]) + else: + return str(pair[0]) + str(pair[1]) + +def polybius_encipher(message, keyword, column_order, row_order, + column_first=False, + letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Encipher a message with Polybius cipher, using a keyword to rearrange + the alphabet + + + >>> polybius_encipher('this is a test message for the ' \ + 'polybius decipherment', 'elephant', \ + [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], \ + wrap_alphabet=KeywordWrapAlphabet.from_last) + '2214445544551522115522511155551543114252542214111352123234442355411135441314115451112122' + >>> polybius_encipher('this is a test message for the ' \ + 'polybius decipherment', 'elephant', 'abcde', 'abcde', \ + column_first=False) + 'bbadccddccddaebbaaddbbceaaddddaecbaacadadcbbadaaacdaabedbcccdeddbeaabdccacadaadcceaababb' + >>> polybius_encipher('this is a test message for the ' \ + 'polybius decipherment', 'elephant', 'abcde', 'abcde', \ + column_first=True) + 'bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaacaadbadecbccedddebaadbcccadaaacdecaaabbb' + """ + grid = polybius_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet) + return cat(polybius_flatten(grid[l], column_first) + for l in message + if l in grid) + + +def polybius_decipher(message, keyword, column_order, row_order, + column_first=False, + letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Decipher a message with a Polybius cipher, using a keyword to rearrange + the alphabet + + >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\ + 'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \ + column_first=False) + 'toisisvtestxessvbephktoefhnugiysweqifoekxelt' + + >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\ + 'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \ + column_first=True) + 'thisisatestmessageforthepolybiusdecipherment' + """ + grid = polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet) + column_index_type = type(column_order[0]) + row_index_type = type(row_order[0]) + if column_first: + pairs = [(column_index_type(p[1]), row_index_type(p[0])) for p in chunks(message, 2)] + else: + pairs = [(row_index_type(p[0]), column_index_type(p[1])) for p in chunks(message, 2)] + return cat(grid[p] for p in pairs if p in grid) + + +def polybius_break_mp(message, column_labels, row_labels, + letters_to_merge=None, + wordlist=keywords, fitness=Pletters, + number_of_solutions=1, chunksize=500): + """Breaks a Polybius substitution cipher using a dictionary and + frequency analysis + + >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \ + 'polybius decipherment', 'elephant', 'abcde', 'abcde'), \ + 'abcde', 'abcde', \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + (('elephant', , 'abcde', 'abcde', False), \ + -54.53880...) + >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \ + 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=True), \ + 'abcde', 'abcde', \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + (('elephant', , 'abcde', 'abcde', True), \ + -54.53880...) + >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \ + 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=False), \ + 'abcde', 'abcde', \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + (('elephant', , 'abcde', 'abcde', False), \ + -54.53880...) + >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \ + 'polybius decipherment', 'elephant', 'abcde', 'pqrst', column_first=True), \ + 'abcde', 'pqrst', \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + (('elephant', , 'abcde', 'pqrst', True), \ + -54.53880...) + """ + if letters_to_merge is None: + letters_to_merge = {'j': 'i'} + with Pool() as pool: + helper_args = [(message, word, wrap, + column_labels, row_labels, column_first, + letters_to_merge, + fitness) + for word in wordlist + for wrap in KeywordWrapAlphabet + for column_first in [False, True]] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(polybius_break_worker, helper_args, chunksize) + if number_of_solutions == 1: + return max(breaks, key=lambda k: k[1]) + else: + return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions] + +def polybius_break_worker(message, keyword, wrap_alphabet, + column_order, row_order, column_first, + letters_to_merge, + fitness): + plaintext = polybius_decipher(message, keyword, + column_order, row_order, + column_first=column_first, + letters_to_merge=letters_to_merge, + wrap_alphabet=wrap_alphabet) + if plaintext: + fit = fitness(plaintext) + else: + fit = float('-inf') + logger.debug('Polybius break attempt using key {0} (wrap={1}, merging {2}), ' + 'columns as {3}, rows as {4} (column_first={5}) ' + 'gives fit of {6} and decrypt starting: ' + '{7}'.format(keyword, wrap_alphabet, letters_to_merge, + column_order, row_order, column_first, + fit, sanitise(plaintext)[:50])) + return (keyword, wrap_alphabet, column_order, row_order, column_first), fit + diff --git a/railfence.py b/railfence.py new file mode 100644 index 0000000..78154aa --- /dev/null +++ b/railfence.py @@ -0,0 +1,178 @@ +from utilities import * +from language_models import * +from enum import Enum +from itertools import starmap +from itertools import zip_longest + +from logger import logger + +def railfence_encipher(message, height, fillvalue=''): + """Railfence cipher. + Works by splitting the text into sections, then reading across them to + generate the rows in the cipher. The rows are then combined to form the + ciphertext. + + Example: the plaintext "hellotherefriends", with a height of four, written + out in the railfence as + h h i + etere* + lorfns + l e d + (with the * showing the one character to finish the last section). + Each 'section' is two columns, but unfolded. In the example, the first + section is 'hellot'. + + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 2, fillvalue='!') + 'hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3, fillvalue='!') + 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5, fillvalue='!') + 'hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 10, fillvalue='!') + 'hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3) + 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5) + 'hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 7) + 'haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic' + """ + sections = chunks(message, (height - 1) * 2, fillvalue=fillvalue) + n_sections = len(sections) + # Add the top row + rows = [cat([s[0] for s in sections])] + # process the middle rows of the grid + for r in range(1, height-1): + rows += [cat([s[r:r+1] + s[height*2-r-2:height*2-r-1] for s in sections])] + # process the bottom row + rows += [cat([s[height - 1:height] for s in sections])] + # rows += [wcat([s[height - 1] for s in sections])] + return cat(rows) + +def railfence_decipher(message, height, fillvalue=''): + """Railfence decipher. + Works by reconstructing the grid used to generate the ciphertext, then + unfolding the sections so the text can be concatenated together. + + Example: given the ciphertext 'hhieterelorfnsled' and a height of 4, first + work out that the second row has a character missing, find the rows of the + grid, then split the section into its two columns. + + 'hhieterelorfnsled' is split into + h h i + etere + lorfns + l e d + (spaces added for clarity), which is stored in 'rows'. This is then split + into 'down_rows' and 'up_rows': + + down_rows: + hhi + eee + lrn + led + + up_rows: + tr + ofs + + These are then zipped together (after the up_rows are reversed) to recover + the plaintext. + + Most of the procedure is about finding the correct lengths for each row then + splitting the ciphertext into those rows. + + >>> railfence_decipher('hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!', 2).strip('!') + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!', 3).strip('!') + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!', 5).strip('!') + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!', 10).strip('!') + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece', 3) + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp', 5) + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic', 7) + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + """ + # find the number and size of the sections, including how many characters + # are missing for a full grid + n_sections = math.ceil(len(message) / ((height - 1) * 2)) + padding_to_add = n_sections * (height - 1) * 2 - len(message) + # row_lengths are for the both up rows and down rows + row_lengths = [n_sections] * (height - 1) * 2 + for i in range((height - 1) * 2 - 1, (height - 1) * 2 - (padding_to_add + 1), -1): + row_lengths[i] -= 1 + # folded_rows are the combined row lengths in the middle of the railfence + folded_row_lengths = [row_lengths[0]] + for i in range(1, height-1): + folded_row_lengths += [row_lengths[i] + row_lengths[-i]] + folded_row_lengths += [row_lengths[height - 1]] + # find the rows that form the railfence grid + rows = [] + row_start = 0 + for i in folded_row_lengths: + rows += [message[row_start:row_start + i]] + row_start += i + # split the rows into the 'down_rows' (those that form the first column of + # a section) and the 'up_rows' (those that ofrm the second column of a + # section). + down_rows = [rows[0]] + up_rows = [] + for i in range(1, height-1): + down_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 0])] + up_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 1])] + down_rows += [rows[-1]] + up_rows.reverse() + return cat(c for r in zip_longest(*(down_rows + up_rows), fillvalue='') for c in r) + + +def railfence_break(message, max_key_length=20, + fitness=Pletters, chunksize=500): + """Breaks a railfence cipher using a matrix of given rank and letter frequencies + + + """ + + sanitised_message = sanitise(message) + results = starmap(worker, [(sanitised_message, i, fitness) + for i in range(2, max_key_length+1)]) + return max(results, key=lambda k: k[1]) + + +def railfence_break(message, max_key_length=20, + fitness=Pbigrams, chunksize=500): + """Breaks a railfence cipher using a range of lengths and + n-gram frequency analysis + + >>> railfence_break(railfence_encipher(sanitise( \ + "It is a truth universally acknowledged, that a single man in \ + possession of a good fortune, must be in want of a wife. However \ + little known the feelings or views of such a man may be on his \ + first entering a neighbourhood, this truth is so well fixed in \ + the minds of the surrounding families, that he is considered the \ + rightful property of some one or other of their daughters."), \ + 7)) # doctest: +ELLIPSIS + (7, -709.46467226...) + >>> railfence_break(railfence_encipher(sanitise( \ + "It is a truth universally acknowledged, that a single man in \ + possession of a good fortune, must be in want of a wife. However \ + little known the feelings or views of such a man may be on his \ + first entering a neighbourhood, this truth is so well fixed in \ + the minds of the surrounding families, that he is considered the \ + rightful property of some one or other of their daughters."), \ + 7), \ + fitness=Ptrigrams) # doctest: +ELLIPSIS + (7, -997.0129085...) + """ + def worker(message, height, fitness): + plaintext = railfence_decipher(message, height) + fit = fitness(plaintext) + return height, fit + + sanitised_message = sanitise(message) + results = starmap(worker, [(sanitised_message, i, fitness) + for i in range(2, max_key_length+1)]) + return max(results, key=lambda k: k[1]) diff --git a/utilities.py b/utilities.py new file mode 100644 index 0000000..d1961a8 --- /dev/null +++ b/utilities.py @@ -0,0 +1,194 @@ +import string +import collections + +# join a a list of letters into a string +cat = ''.join + +# join a list of words into a string, separated by spaces +wcat = ' '.join + +# join a list of lines, separated by newline +lcat = '\n'.join + +def pos(letter): + """Return the position of a letter in the alphabet (0-25)""" + if letter in string.ascii_lowercase: + return ord(letter) - ord('a') + elif letter in string.ascii_uppercase: + return ord(letter) - ord('A') + else: + return 0 + +def unpos(number): + """Return the letter in the given position in the alphabet (mod 26)""" + return chr(number % 26 + ord('a')) + +def every_nth(text, n, fillvalue=''): + """Returns n strings, each of which consists of every nth character, + starting with the 0th, 1st, 2nd, ... (n-1)th character + + >>> every_nth(string.ascii_lowercase, 5) + ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty'] + >>> every_nth(string.ascii_lowercase, 1) + ['abcdefghijklmnopqrstuvwxyz'] + >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] + >>> every_nth(string.ascii_lowercase, 5, fillvalue='!') + ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!'] + """ + split_text = chunks(text, n, fillvalue) + return [cat(l) for l in zip_longest(*split_text, fillvalue=fillvalue)] + +def combine_every_nth(split_text): + """Reforms a text split into every_nth strings + + >>> combine_every_nth(every_nth(string.ascii_lowercase, 5)) + 'abcdefghijklmnopqrstuvwxyz' + >>> combine_every_nth(every_nth(string.ascii_lowercase, 1)) + 'abcdefghijklmnopqrstuvwxyz' + >>> combine_every_nth(every_nth(string.ascii_lowercase, 26)) + 'abcdefghijklmnopqrstuvwxyz' + """ + return cat([cat(l) + for l in zip_longest(*split_text, fillvalue='')]) + +def chunks(text, n, fillvalue=None): + """Split a text into chunks of n characters + + >>> chunks('abcdefghi', 3) + ['abc', 'def', 'ghi'] + >>> chunks('abcdefghi', 4) + ['abcd', 'efgh', 'i'] + >>> chunks('abcdefghi', 4, fillvalue='!') + ['abcd', 'efgh', 'i!!!'] + """ + if fillvalue: + padding = fillvalue[0] * (n - len(text) % n) + else: + padding = '' + return [(text+padding)[i:i+n] for i in range(0, len(text), n)] + +def transpose(items, transposition): + """Moves items around according to the given transposition + + >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3)) + ['a', 'b', 'c', 'd'] + >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0)) + ['d', 'b', 'c', 'a'] + >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0)) + [13, 12, 14, 11, 15, 10] + """ + transposed = [''] * len(transposition) + for p, t in enumerate(transposition): + transposed[p] = items[t] + return transposed + +def untranspose(items, transposition): + """Undoes a transpose + + >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3]) + ['a', 'b', 'c', 'd'] + >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0]) + ['a', 'b', 'c', 'd'] + >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0]) + [10, 11, 12, 13, 14, 15] + """ + transposed = [''] * len(transposition) + for p, t in enumerate(transposition): + transposed[t] = items[p] + return transposed + +def deduplicate(text): + return list(collections.OrderedDict.fromkeys(text)) + + +def letters(text): + """Remove all non-alphabetic characters from a text + >>> letters('The Quick') + 'TheQuick' + >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG') + 'TheQuickBROWNfoxjumpedoverthelazyDOG' + """ + return ''.join([c for c in text if c in string.ascii_letters]) + +# Special characters for conversion, such as smart quotes. +unaccent_specials = ''.maketrans({"’": "'", '“': '"', '”': '"'}) + +def unaccent(text): + """Remove all accents from letters. + It does this by converting the unicode string to decomposed compatability + form, dropping all the combining accents, then re-encoding the bytes. + + >>> unaccent('hello') + 'hello' + >>> unaccent('HELLO') + 'HELLO' + >>> unaccent('héllo') + 'hello' + >>> unaccent('héllö') + 'hello' + >>> unaccent('HÉLLÖ') + 'HELLO' + """ + translated_text = text.translate(unaccent_specials) + return unicodedata.normalize('NFKD', translated_text).\ + encode('ascii', 'ignore').\ + decode('utf-8') + +def sanitise(text): + """Remove all non-alphabetic characters and convert the text to lowercase + + >>> sanitise('The Quick') + 'thequick' + >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG') + 'thequickbrownfoxjumpedoverthelazydog' + >>> sanitise('HÉLLÖ') + 'hello' + """ + return letters(unaccent(text)).lower() + + +def index_of_coincidence(text): + stext = sanitise(text) + counts = collections.Counter(stext) + denom = len(stext) * (len(text) - 1) / 26 + return ( + sum(max(counts[l] * counts[l] - 1, 0) for l in string.ascii_lowercase) + / + denom + ) + + +transpositions = collections.defaultdict(list) +for word in keywords: + transpositions[transpositions_of(word)] += [word] + +def frequencies(text): + """Count the number of occurrences of each character in text + + >>> sorted(frequencies('abcdefabc').items()) + [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)] + >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \ + 'dog').items()) # doctest: +NORMALIZE_WHITESPACE + [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), + ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), + ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), + ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)] + >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \ + '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE + [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), + ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), + ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), + ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), + ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)] + >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\ + 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE + [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), + ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), + ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), + ('w', 1), ('x', 1), ('y', 1), ('z', 1)] + >>> frequencies('abcdefabcdef')['x'] + 0 + """ + return collections.Counter(c for c in text) diff --git a/vigenere.py b/vigenere.py new file mode 100644 index 0000000..f0181dd --- /dev/null +++ b/vigenere.py @@ -0,0 +1,170 @@ +from utilities import * +from language_models import * +from enum import Enum +from itertools import starmap +from multiprocessing import Pool + +from logger import logger + +def vigenere_encipher(message, keyword): + """Vigenere encipher + + >>> vigenere_encipher('hello', 'abc') + 'hfnlp' + """ + shifts = [pos(l) for l in sanitise(keyword)] + pairs = zip(message, cycle(shifts)) + return cat([caesar_encipher_letter(l, k) for l, k in pairs]) + +def vigenere_decipher(message, keyword): + """Vigenere decipher + + >>> vigenere_decipher('hfnlp', 'abc') + 'hello' + """ + shifts = [pos(l) for l in sanitise(keyword)] + pairs = zip(message, cycle(shifts)) + return cat([caesar_decipher_letter(l, k) for l, k in pairs]) + + +def beaufort_encipher(message, keyword): + """Beaufort encipher + + >>> beaufort_encipher('inhisjournaldatedtheidesofoctober', 'arcanaimperii') + 'sevsvrusyrrxfayyxuteemazudmpjmmwr' + """ + shifts = [pos(l) for l in sanitise(keyword)] + pairs = zip(message, cycle(shifts)) + return cat([unpos(k - pos(l)) for l, k in pairs]) + +beaufort_decipher = beaufort_encipher + +beaufort_variant_encipher=vigenere_decipher +beaufort_variant_decipher=vigenere_encipher + + +def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters, + chunksize=500): + """Breaks a vigenere cipher using a dictionary and frequency analysis. + + >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \ + 'message for the vigenere decipherment'), 'cat'), \ + wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS + ('cat', -52.9472712...) + """ + with Pool() as pool: + helper_args = [(message, word, fitness) + for word in wordlist] + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(vigenere_keyword_break_worker, helper_args, + chunksize) + return max(breaks, key=lambda k: k[1]) +vigenere_keyword_break = vigenere_keyword_break_mp + +def vigenere_keyword_break_worker(message, keyword, fitness): + plaintext = vigenere_decipher(message, keyword) + fit = fitness(plaintext) + logger.debug('Vigenere keyword break attempt using key {0} gives fit of ' + '{1} and decrypt starting: {2}'.format(keyword, + fit, sanitise(plaintext)[:50])) + return keyword, fit + + +def vigenere_frequency_break(message, max_key_length=20, fitness=Pletters): + """Breaks a Vigenere cipher with frequency analysis + + >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \ + "run. She is ready and so am I. I stole Daniel's pocketbook this " \ + "afternoon when he left his jacket hanging on the easel in the " \ + "attic. I jump every time I hear a footstep on the stairs, " \ + "certain that the theft has been discovered and that I will " \ + "be caught. The SS officer visits less often now that he is " \ + "sure"), 'florence')) # doctest: +ELLIPSIS + ('florence', -307.5473096...) + """ + def worker(message, key_length, fitness): + splits = every_nth(sanitised_message, key_length) + key = cat([unpos(caesar_break(s)[0]) for s in splits]) + plaintext = vigenere_decipher(message, key) + fit = fitness(plaintext) + return key, fit + sanitised_message = sanitise(message) + results = starmap(worker, [(sanitised_message, i, fitness) + for i in range(1, max_key_length+1)]) + return max(results, key=lambda k: k[1]) + + +def beaufort_sub_break(message, fitness=Pletters): + """Breaks one chunk of a Beaufort cipher with frequency analysis + + >>> beaufort_sub_break('samwpplggnnmmyaazgympjapopnwiywwomwspgpjmefwmawx' \ + 'jafjhxwwwdigxshnlywiamhyshtasxptwueahhytjwsn') # doctest: +ELLIPSIS + (0, -117.4492...) + >>> beaufort_sub_break('eyprzjjzznxymrygryjqmqhznjrjjapenejznawngnnezgza' \ + 'dgndknaogpdjneadadazlhkhxkryevrronrmdjnndjlo') # doctest: +ELLIPSIS + (17, -114.9598...) + """ + best_shift = 0 + best_fit = float('-inf') + for key in range(26): + plaintext = [unpos(key - pos(l)) for l in message] + fit = fitness(plaintext) + logger.debug('Beaufort sub break attempt using key {0} gives fit of {1} ' + 'and decrypt starting: {2}'.format(key, fit, + plaintext[:50])) + if fit > best_fit: + best_fit = fit + best_key = key + logger.info('Beaufort sub break best fit: key {0} gives fit of {1} and ' + 'decrypt starting: {2}'.format(best_key, best_fit, + cat([unpos(best_key - pos(l)) for l in message[:50]]))) + return best_key, best_fit + + +def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters): + """Breaks a Beaufort cipher with frequency analysis + + >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \ + "run. She is ready and so am I. I stole Daniel's pocketbook this " \ + "afternoon when he left his jacket hanging on the easel in the " \ + "attic. I jump every time I hear a footstep on the stairs, " \ + "certain that the theft has been discovered and that I will " \ + "be caught. The SS officer visits less often now " \ + "that he is sure"), 'florence')) # doctest: +ELLIPSIS + ('florence', -307.5473096791...) + """ + def worker(message, key_length, fitness): + splits = every_nth(message, key_length) + key = cat([unpos(beaufort_sub_break(s)[0]) for s in splits]) + plaintext = beaufort_decipher(message, key) + fit = fitness(plaintext) + return key, fit + sanitised_message = sanitise(message) + results = starmap(worker, [(sanitised_message, i, fitness) + for i in range(1, max_key_length+1)]) + return max(results, key=lambda k: k[1]) + + +def beaufort_variant_frequency_break(message, max_key_length=20, fitness=Pletters): + """Breaks a Beaufort cipher with frequency analysis + + >>> beaufort_variant_frequency_break(beaufort_variant_encipher(sanitise("It is time to " \ + "run. She is ready and so am I. I stole Daniel's pocketbook this " \ + "afternoon when he left his jacket hanging on the easel in the " \ + "attic. I jump every time I hear a footstep on the stairs, " \ + "certain that the theft has been discovered and that I will " \ + "be caught. The SS officer visits less often now " \ + "that he is sure"), 'florence')) # doctest: +ELLIPSIS + ('florence', -307.5473096791...) + """ + def worker(message, key_length, fitness): + splits = every_nth(sanitised_message, key_length) + key = cat([unpos(-caesar_break(s)[0]) for s in splits]) + plaintext = beaufort_variant_decipher(message, key) + fit = fitness(plaintext) + return key, fit + sanitised_message = sanitise(message) + results = starmap(worker, [(sanitised_message, i, fitness) + for i in range(1, max_key_length+1)]) + return max(results, key=lambda k: k[1])