import matplotlib.pyplot as plt
-logging.basicConfig(filename="cipher.log", level=logging.INFO)
-logger = logging.getLogger(__name__)
-# logger.setLevel(logging.WARNING)
+# logging.basicConfig(filename="cipher.log", level=logging.INFO)
+# logger = logging.getLogger(__name__)
+
+logger = logging.getLogger('cipherbreak')
+logger.setLevel(logging.WARNING)
# logger.setLevel(logging.INFO)
# logger.setLevel(logging.DEBUG)
+# create the logging file handler
+fh = logging.FileHandler("cipher.log")
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+fh.setFormatter(formatter)
+
+# add handler to logger object
+logger.addHandler(fh)
+
+
from cipher import *
from language_models import *
# timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
# timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
+
+def index_of_coincidence(text):
+ stext = sanitise(text)
+ counts = collections.Counter(stext)
+ denom = len(stext) * (len(text) - 1) / 26
+ return (
+ sum(max(counts[l] * counts[l] - 1, 0) for l in string.ascii_lowercase)
+ /
+ denom
+ )
+
+
transpositions = collections.defaultdict(list)
for word in keywords:
transpositions[transpositions_of(word)] += [word]
wrap_alphabet, fit, sanitise(plaintext)[:50]))
return (keyword, wrap_alphabet), fit
-def monoalphabetic_break_hillclimbing(message, max_iterations=10000000,
- alphabet=None, fitness=Pletters):
- ciphertext = unaccent(message).lower()
- if not alphabet:
- alphabet = list(string.ascii_lowercase)
- random.shuffle(alphabet)
- alphabet = cat(alphabet)
- return monoalphabetic_break_hillclimbing_worker(ciphertext, alphabet,
- max_iterations, fitness)
-
-def monoalphabetic_break_hillclimbing_mp(message, workers=10,
- max_iterations = 10000000, alphabet=None, fitness=Pletters, chunksize=1):
+# def monoalphabetic_break_hillclimbing(message, max_iterations=10000000,
+# alphabet=None, fitness=Pletters):
+# ciphertext = unaccent(message).lower()
+# if not alphabet:
+# alphabet = list(string.ascii_lowercase)
+# random.shuffle(alphabet)
+# alphabet = cat(alphabet)
+# return monoalphabetic_break_hillclimbing_worker(ciphertext, alphabet,
+# max_iterations, fitness)
+
+# def monoalphabetic_break_hillclimbing_mp(message, workers=10,
+# max_iterations = 10000000, alphabet=None, fitness=Pletters, chunksize=1):
+# worker_args = []
+# ciphertext = unaccent(message).lower()
+# for i in range(workers):
+# if alphabet:
+# this_alphabet = alphabet
+# else:
+# this_alphabet = list(string.ascii_lowercase)
+# random.shuffle(this_alphabet)
+# this_alphabet = cat(this_alphabet)
+# worker_args.append((ciphertext, this_alphabet, max_iterations, fitness))
+# with Pool() as pool:
+# breaks = pool.starmap(monoalphabetic_break_hillclimbing_worker,
+# worker_args, chunksize)
+# return max(breaks, key=lambda k: k[1])
+
+# def monoalphabetic_break_hillclimbing_worker(message, alphabet,
+# max_iterations, fitness):
+# def swap(letters, i, j):
+# if i > j:
+# i, j = j, i
+# if i == j:
+# return letters
+# else:
+# return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +
+# letters[j+1:])
+# best_alphabet = alphabet
+# best_fitness = float('-inf')
+# for i in range(max_iterations):
+# alphabet = swap(best_alphabet, random.randrange(26), random.randrange(26))
+# cipher_translation = ''.maketrans(string.ascii_lowercase, alphabet)
+# plaintext = message.translate(cipher_translation)
+# if fitness(plaintext) > best_fitness:
+# best_fitness = fitness(plaintext)
+# best_alphabet = alphabet
+# print(i, best_alphabet, best_fitness, plaintext[:50])
+# return best_alphabet, best_fitness
+
+
+def monoalphabetic_break_hillclimbing(message,
+ max_iterations=20000,
+ plain_alphabet=None,
+ cipher_alphabet=None,
+ fitness=Pletters, chunksize=1):
+ return simulated_annealing_break(message,
+ workers=1,
+ initial_temperature=0,
+ max_iterations=max_iterations,
+ plain_alphabet=plain_alphabet,
+ cipher_alphabet=cipher_alphabet,
+ fitness=fitness, chunksize=chunksize)
+
+
+def monoalphabetic_break_hillclimbing_mp(message,
+ workers=10,
+ max_iterations=20000,
+ plain_alphabet=None,
+ cipher_alphabet=None,
+ fitness=Pletters, chunksize=1):
+ return simulated_annealing_break(message,
+ workers=workers,
+ initial_temperature=0,
+ max_iterations=max_iterations,
+ plain_alphabet=plain_alphabet,
+ cipher_alphabet=cipher_alphabet,
+ fitness=fitness, chunksize=chunksize)
+
+
+def simulated_annealing_break(message, workers=10,
+ initial_temperature=200,
+ max_iterations=20000,
+ plain_alphabet=None,
+ cipher_alphabet=None,
+ fitness=Pletters, chunksize=1):
worker_args = []
- ciphertext = unaccent(message).lower()
+ ciphertext = sanitise(message)
for i in range(workers):
- if alphabet:
- this_alphabet = alphabet
- else:
- this_alphabet = list(string.ascii_lowercase)
- random.shuffle(this_alphabet)
- this_alphabet = cat(this_alphabet)
- worker_args.append((ciphertext, this_alphabet, max_iterations, fitness))
+ if not plain_alphabet:
+ plain_alphabet = string.ascii_lowercase
+ if not cipher_alphabet:
+ cipher_alphabet = list(string.ascii_lowercase)
+ random.shuffle(cipher_alphabet)
+ cipher_alphabet = cat(cipher_alphabet)
+ worker_args.append((ciphertext, plain_alphabet, cipher_alphabet,
+ initial_temperature, max_iterations, fitness))
with Pool() as pool:
- breaks = pool.starmap(monoalphabetic_break_hillclimbing_worker,
+ breaks = pool.starmap(simulated_annealing_break_worker,
worker_args, chunksize)
return max(breaks, key=lambda k: k[1])
-def monoalphabetic_break_hillclimbing_worker(message, alphabet,
- max_iterations, fitness):
+
+def simulated_annealing_break_worker(message, plain_alphabet, cipher_alphabet,
+ t0, max_iterations, fitness):
def swap(letters, i, j):
if i > j:
i, j = j, i
else:
return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +
letters[j+1:])
- best_alphabet = alphabet
- best_fitness = float('-inf')
+
+ temperature = t0
+
+ dt = t0 / (0.9 * max_iterations)
+
+ current_alphabet = cipher_alphabet
+ alphabet = current_alphabet
+ cipher_translation = ''.maketrans(current_alphabet, plain_alphabet)
+ plaintext = message.translate(cipher_translation)
+ current_fitness = fitness(plaintext)
+
+ best_alphabet = current_alphabet
+ best_fitness = current_fitness
+ best_plaintext = plaintext
+
+ # print('starting for', max_iterations)
for i in range(max_iterations):
- alphabet = swap(alphabet, random.randrange(26), random.randrange(26))
- cipher_translation = ''.maketrans(string.ascii_lowercase, alphabet)
+ swap_a = random.randrange(26)
+ swap_b = (swap_a + int(random.gauss(0, 4))) % 26
+ alphabet = swap(current_alphabet, swap_a, swap_b)
+ cipher_translation = ''.maketrans(alphabet, plain_alphabet)
plaintext = message.translate(cipher_translation)
- if fitness(plaintext) > best_fitness:
- best_fitness = fitness(plaintext)
- best_alphabet = alphabet
- print(i, best_alphabet, best_fitness, plaintext)
- return best_alphabet, best_fitness
+ new_fitness = fitness(plaintext)
+ try:
+ sa_chance = math.exp((new_fitness - current_fitness) / temperature)
+ except (OverflowError, ZeroDivisionError):
+ # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
+ sa_chance = 0
+ if (new_fitness > current_fitness or random.random() < sa_chance):
+ # logger.debug('Simulated annealing: iteration {}, temperature {}, '
+ # 'current alphabet {}, current_fitness {}, '
+ # 'best_plaintext {}'.format(i, temperature, current_alphabet,
+ # current_fitness, best_plaintext[:50]))
+
+ # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
+ current_fitness = new_fitness
+ current_alphabet = alphabet
+
+ if current_fitness > best_fitness:
+ best_alphabet = current_alphabet
+ best_fitness = current_fitness
+ best_plaintext = plaintext
+ if i % 500 == 0:
+ logger.debug('Simulated annealing: iteration {}, temperature {}, '
+ 'current alphabet {}, current_fitness {}, '
+ 'best_plaintext {}'.format(i, temperature, current_alphabet,
+ current_fitness, plaintext[:50]))
+ temperature = max(temperature - dt, 0.001)
+
+ return best_alphabet, best_fitness # current_alphabet, current_fitness
def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
>>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
'message for the vigenere decipherment'), 'cat'), \
wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
- ('cat', -52.947271216...)
+ ('cat', -52.9472712...)
"""
with Pool() as pool:
helper_args = [(message, word, fitness)
"certain that the theft has been discovered and that I will " \
"be caught. The SS officer visits less often now that he is " \
"sure"), 'florence')) # doctest: +ELLIPSIS
- ('florence', -307.5473096791...)
+ ('florence', -307.5473096...)
"""
def worker(message, key_length, fitness):
splits = every_nth(sanitised_message, key_length)
- key = cat([chr(caesar_break(s)[0] + ord('a')) for s in splits])
+ key = cat([unpos(caesar_break(s)[0]) for s in splits])
plaintext = vigenere_decipher(message, key)
fit = fitness(plaintext)
return key, fit
return max(results, key=lambda k: k[1])
+def beaufort_sub_break(message, fitness=Pletters):
+ """Breaks one chunk of a Beaufort cipher with frequency analysis
+
+ >>> beaufort_sub_break('samwpplggnnmmyaazgympjapopnwiywwomwspgpjmefwmawx' \
+ 'jafjhxwwwdigxshnlywiamhyshtasxptwueahhytjwsn') # doctest: +ELLIPSIS
+ (0, -117.4492...)
+ >>> beaufort_sub_break('eyprzjjzznxymrygryjqmqhznjrjjapenejznawngnnezgza' \
+ 'dgndknaogpdjneadadazlhkhxkryevrronrmdjnndjlo') # doctest: +ELLIPSIS
+ (17, -114.9598...)
+ """
+ best_shift = 0
+ best_fit = float('-inf')
+ for key in range(26):
+ plaintext = [unpos(key - pos(l)) for l in message]
+ fit = fitness(plaintext)
+ logger.debug('Beaufort sub break attempt using key {0} gives fit of {1} '
+ 'and decrypt starting: {2}'.format(key, fit,
+ plaintext[:50]))
+ if fit > best_fit:
+ best_fit = fit
+ best_key = key
+ logger.info('Beaufort sub break best fit: key {0} gives fit of {1} and '
+ 'decrypt starting: {2}'.format(best_key, best_fit,
+ cat([unpos(best_key - pos(l)) for l in message[:50]])))
+ return best_key, best_fit
+
+
def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters):
"""Breaks a Beaufort cipher with frequency analysis
('florence', -307.5473096791...)
"""
def worker(message, key_length, fitness):
- splits = every_nth(sanitised_message, key_length)
- key = cat([chr(-caesar_break(s)[0] % 26 + ord('a'))
- for s in splits])
+ splits = every_nth(message, key_length)
+ key = cat([unpos(beaufort_sub_break(s)[0]) for s in splits])
plaintext = beaufort_decipher(message, key)
fit = fitness(plaintext)
return key, fit
sanitised_message = sanitise(message)
+ results = starmap(worker, [(sanitised_message, i, fitness)
+ for i in range(1, max_key_length+1)])
+ return max(results, key=lambda k: k[1])
+
+
+def beaufort_variant_frequency_break(message, max_key_length=20, fitness=Pletters):
+ """Breaks a Beaufort cipher with frequency analysis
+
+ >>> beaufort_variant_frequency_break(beaufort_variant_encipher(sanitise("It is time to " \
+ "run. She is ready and so am I. I stole Daniel's pocketbook this " \
+ "afternoon when he left his jacket hanging on the easel in the " \
+ "attic. I jump every time I hear a footstep on the stairs, " \
+ "certain that the theft has been discovered and that I will " \
+ "be caught. The SS officer visits less often now " \
+ "that he is sure"), 'florence')) # doctest: +ELLIPSIS
+ ('florence', -307.5473096791...)
+ """
+ def worker(message, key_length, fitness):
+ splits = every_nth(sanitised_message, key_length)
+ key = cat([unpos(-caesar_break(s)[0]) for s in splits])
+ plaintext = beaufort_variant_decipher(message, key)
+ fit = fitness(plaintext)
+ return key, fit
+ sanitised_message = sanitise(message)
results = starmap(worker, [(sanitised_message, i, fitness)
for i in range(1, max_key_length+1)])
return max(results, key=lambda k: k[1])
+def polybius_break_mp(message, column_labels, row_labels,
+ letters_to_merge=None,
+ wordlist=keywords, fitness=Pletters,
+ number_of_solutions=1, chunksize=500):
+ """Breaks a Polybius substitution cipher using a dictionary and
+ frequency analysis
+
+ >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'abcde'), \
+ 'abcde', 'abcde', \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
+ -54.53880...)
+ >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=True), \
+ 'abcde', 'abcde', \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', True), \
+ -54.53880...)
+ >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=False), \
+ 'abcde', 'abcde', \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
+ -54.53880...)
+ >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+ 'polybius decipherment', 'elephant', 'abcde', 'pqrst', column_first=True), \
+ 'abcde', 'pqrst', \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'pqrst', True), \
+ -54.53880...)
+ """
+ if letters_to_merge is None:
+ letters_to_merge = {'j': 'i'}
+ with Pool() as pool:
+ helper_args = [(message, word, wrap,
+ column_labels, row_labels, column_first,
+ letters_to_merge,
+ fitness)
+ for word in wordlist
+ for wrap in KeywordWrapAlphabet
+ for column_first in [False, True]]
+ # Gotcha: the helper function here needs to be defined at the top level
+ # (limitation of Pool.starmap)
+ breaks = pool.starmap(polybius_break_worker, helper_args, chunksize)
+ if number_of_solutions == 1:
+ return max(breaks, key=lambda k: k[1])
+ else:
+ return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
+
+def polybius_break_worker(message, keyword, wrap_alphabet,
+ column_order, row_order, column_first,
+ letters_to_merge,
+ fitness):
+ plaintext = polybius_decipher(message, keyword,
+ column_order, row_order,
+ column_first=column_first,
+ letters_to_merge=letters_to_merge,
+ wrap_alphabet=wrap_alphabet)
+ if plaintext:
+ fit = fitness(plaintext)
+ else:
+ fit = float('-inf')
+ logger.debug('Polybius break attempt using key {0} (wrap={1}, merging {2}), '
+ 'columns as {3}, rows as {4} (column_first={5}) '
+ 'gives fit of {6} and decrypt starting: '
+ '{7}'.format(keyword, wrap_alphabet, letters_to_merge,
+ column_order, row_order, column_first,
+ fit, sanitise(plaintext)[:50]))
+ return (keyword, wrap_alphabet, column_order, row_order, column_first), fit
+
def column_transposition_break_mp(message, translist=transpositions,
fitness=Pbigrams, chunksize=500):
fit, sanitise(plaintext)[:50]))
return matrix, fit
+def bifid_break_mp(message, wordlist=keywords, fitness=Pletters, max_period=10,
+ number_of_solutions=1, chunksize=500):
+ """Breaks a keyword substitution cipher using a dictionary and
+ frequency analysis
+
+ >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
+ 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+ (('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...)
+ >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
+ 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
+ wordlist=['cat', 'elephant', 'kangaroo'], \
+ number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ [(('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...),
+ (('elephant', <KeywordWrapAlphabet.from_largest: 3>, 0), -52.834575011...)]
+ """
+ with Pool() as pool:
+ helper_args = [(message, word, wrap, period, fitness)
+ for word in wordlist
+ for wrap in KeywordWrapAlphabet
+ for period in range(max_period+1)]
+ # Gotcha: the helper function here needs to be defined at the top level
+ # (limitation of Pool.starmap)
+ breaks = pool.starmap(bifid_break_worker, helper_args, chunksize)
+ if number_of_solutions == 1:
+ return max(breaks, key=lambda k: k[1])
+ else:
+ return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
+
+def bifid_break_worker(message, keyword, wrap_alphabet, period, fitness):
+ plaintext = bifid_decipher(message, keyword, wrap_alphabet, period=period)
+ fit = fitness(plaintext)
+ logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
+ '{2} and decrypt starting: {3}'.format(keyword,
+ wrap_alphabet, fit, sanitise(plaintext)[:50]))
+ return (keyword, wrap_alphabet, period), fit
+
+
+def autokey_sa_break( message
+ , min_keylength=2
+ , max_keylength=20
+ , workers=10
+ , initial_temperature=200
+ , max_iterations=20000
+ , fitness=Pletters
+ , chunksize=1
+ , result_count=1
+ ):
+ """Break an autokey cipher by simulated annealing
+ """
+ worker_args = []
+ ciphertext = sanitise(message)
+ for keylength in range(min_keylength, max_keylength+1):
+ for i in range(workers):
+ key = cat(random.choice(string.ascii_lowercase) for _ in range(keylength))
+ worker_args.append((ciphertext, key,
+ initial_temperature, max_iterations, fitness))
+
+ with Pool() as pool:
+ breaks = pool.starmap(autokey_sa_break_worker,
+ worker_args, chunksize)
+ if result_count <= 1:
+ return max(breaks, key=lambda k: k[1])
+ else:
+ return sorted(set(breaks), key=lambda k: k[1], reverse=True)[:result_count]
+
+
+def autokey_sa_break_worker(message, key,
+ t0, max_iterations, fitness):
+
+ temperature = t0
+
+ dt = t0 / (0.9 * max_iterations)
+
+ plaintext = autokey_decipher(message, key)
+ current_fitness = fitness(plaintext)
+ current_key = key
+
+ best_key = current_key
+ best_fitness = current_fitness
+ best_plaintext = plaintext
+
+ # print('starting for', max_iterations)
+ for i in range(max_iterations):
+ swap_pos = random.randrange(len(current_key))
+ swap_char = random.choice(string.ascii_lowercase)
+
+ new_key = current_key[:swap_pos] + swap_char + current_key[swap_pos+1:]
+
+ plaintext = autokey_decipher(message, new_key)
+ new_fitness = fitness(plaintext)
+ try:
+ sa_chance = math.exp((new_fitness - current_fitness) / temperature)
+ except (OverflowError, ZeroDivisionError):
+ # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
+ sa_chance = 0
+ if (new_fitness > current_fitness or random.random() < sa_chance):
+ # logger.debug('Simulated annealing: iteration {}, temperature {}, '
+ # 'current alphabet {}, current_fitness {}, '
+ # 'best_plaintext {}'.format(i, temperature, current_alphabet,
+ # current_fitness, best_plaintext[:50]))
+
+ # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
+# print(new_fitness, new_key, plaintext[:100])
+ current_fitness = new_fitness
+ current_key = new_key
+
+ if current_fitness > best_fitness:
+ best_key = current_key
+ best_fitness = current_fitness
+ best_plaintext = plaintext
+ if i % 500 == 0:
+ logger.debug('Simulated annealing: iteration {}, temperature {}, '
+ 'current key {}, current_fitness {}, '
+ 'best_plaintext {}'.format(i, temperature, current_key,
+ current_fitness, plaintext[:50]))
+ temperature = max(temperature - dt, 0.001)
+
+# print(best_key, best_fitness, best_plaintext[:70])
+ return best_key, best_fitness # current_alphabet, current_fitness
+
def pocket_enigma_break_by_crib(message, wheel_spec, crib, crib_position):
"""Break a pocket enigma using a crib (some plaintext that's expected to