import norms
import logging
import math
-from itertools import zip_longest
+from itertools import zip_longest, repeat
from segment import segment
from multiprocessing import Pool
english_bigram_counts[bigram] = int(count)
normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
+english_trigram_counts = collections.defaultdict(int)
+with open('count_3l.txt', 'r') as f:
+ for line in f:
+ (trigram, count) = line.split("\t")
+ english_trigram_counts[trigram] = int(count)
+normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
+
+
with open('words.txt', 'r') as f:
keywords = [line.rstrip() for line in f]
"""
return [text[i:i+n] for i in range(len(text)-n+1)]
-def every_nth(text, n):
+def every_nth(text, n, fillvalue=''):
"""Returns n strings, each of which consists of every nth character,
starting with the 0th, 1st, 2nd, ... (n-1)th character
>>> every_nth(string.ascii_lowercase, 5)
['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
- >>> every_nth(string.ascii_lowercase, 1)
+ >>> every_nth(string.ascii_lowercase, 1)
['abcdefghijklmnopqrstuvwxyz']
>>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+ >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
+ ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
"""
split_text = [text[i:i+n] for i in range(0, len(text), n)]
- return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
+ return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
def combine_every_nth(split_text):
"""Reforms a text split into every_nth strings
return ''.join([''.join(l)
for l in zip_longest(*split_text, fillvalue='')])
+def transpose(items, transposition):
+ """Moves items around according to the given transposition
+
+ >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
+ ['a', 'b', 'c', 'd']
+ >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
+ ['d', 'b', 'c', 'a']
+ >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
+ [13, 12, 14, 11, 15, 10]
+ """
+ transposed = list(repeat('', len(transposition)))
+ for p, t in enumerate(transposition):
+ transposed[p] = items[t]
+ return transposed
+
+def untranspose(items, transposition):
+ """Undoes a transpose
+
+ >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
+ ['a', 'b', 'c', 'd']
+ >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
+ ['a', 'b', 'c', 'd']
+ >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
+ [10, 11, 12, 13, 14, 15]
+ """
+ transposed = list(repeat('', len(transposition)))
+ for p, t in enumerate(transposition):
+ transposed[t] = items[p]
+ return transposed
+
def frequencies(text):
"""Count the number of occurrences of each character in text
return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
+def transpositions_of(keyword):
+ """Finds the transpostions given by a keyword. For instance, the keyword
+ 'clever' rearranges to 'celrv', so the first column (0) stays first, the
+ second column (1) moves to third, the third column (2) moves to second,
+ and so on.
+
+ >>> transpositions_of('clever')
+ [0, 2, 1, 4, 3]
+ """
+ key = deduplicate(keyword)
+ transpositions = [key.index(l) for l in sorted(key)]
+ return transpositions
+
+def column_transposition_encipher(message, keyword, fillvalue=' '):
+ """Enciphers using the column transposition cipher.
+ Message is padded to allow all rows to be the same length.
+
+ >>> column_transposition_encipher('hellothere', 'clever')
+ 'hleolteher'
+ >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
+ 'hleolthre!e!'
+ """
+ return column_transposition_worker(message, keyword, encipher=True,
+ fillvalue=fillvalue)
+
+def column_transposition_decipher(message, keyword, fillvalue=' '):
+ """Deciphers using the column transposition cipher.
+ Message is padded to allow all rows to be the same length.
+
+ >>> column_transposition_decipher('hleolteher', 'clever')
+ 'hellothere'
+ >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
+ 'hellothere!!'
+ """
+ return column_transposition_worker(message, keyword, encipher=False,
+ fillvalue=fillvalue)
+
+def column_transposition_worker(message, keyword,
+ encipher=True, fillvalue=' '):
+ """Does the actual work of the column transposition cipher.
+ Message is padded with spaces to allow all rows to be the same length.
+
+ >>> column_transposition_worker('hellothere', 'clever')
+ 'hleolteher'
+ >>> column_transposition_worker('hellothere', 'clever', encipher=True)
+ 'hleolteher'
+ >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
+ 'hellothere'
+ """
+ transpositions = transpositions_of(keyword)
+ columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
+ if encipher:
+ transposed_columns = transpose(columns, transpositions)
+ else:
+ transposed_columns = untranspose(columns, transpositions)
+ return combine_every_nth(transposed_columns)
+
+
+
def caesar_break(message,
metric=norms.euclidean_distance,
target_counts=normalised_english_counts,
sanitise(scytale_decipher(message, best_key))[:50]))
return best_key, best_fit
+def column_transposition_break(message,
+ wordlist=keywords,
+ metric=norms.euclidean_distance,
+ #test_ngram_length=2,
+ target_counts=normalised_english_bigram_counts,
+ message_frequency_scaling=norms.normalise):
+ """Breaks a column transposition cipher using a dictionary and
+ n-gram frequency analysis
+
+ >>> column_transposition_break(column_transposition_encipher(sanitise( \
+ "Turing's homosexuality resulted in a criminal prosecution in 1952, \
+ when homosexual acts were still illegal in the United Kingdom. "), \
+ 'encipher'), \
+ wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
+ ('encipher', 0.898128626285...)
+ >>> column_transposition_break(column_transposition_encipher(sanitise( \
+ "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
+ "when homosexual acts were still illegal in the United Kingdom."), \
+ 'encipher'), \
+ wordlist=['encipher', 'keyword', 'fourteen'], \
+ target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
+ ('encipher', 1.1958792913127...)
+ """
+ best_keyword = ''
+ best_fit = float("inf")
+ ngram_length = len(next(iter(target_counts.keys())))
+ for keyword in wordlist:
+ if len(message) % len(deduplicate(keyword)) == 0:
+ plaintext = column_transposition_decipher(message, keyword)
+ counts = message_frequency_scaling(frequencies(
+ ngrams(sanitise(plaintext), ngram_length)))
+ fit = metric(target_counts, counts)
+ logger.debug('Column transposition break attempt using key {0} '
+ 'gives fit of {1} and decrypt starting: {2}'.format(
+ keyword, fit,
+ sanitise(plaintext)[:50]))
+ if fit < best_fit:
+ best_fit = fit
+ best_keyword = keyword
+ logger.info('Column transposition break best fit with key {0} gives fit '
+ 'of {1} and decrypt starting: {2}'.format(best_keyword,
+ best_fit, sanitise(
+ column_transposition_decipher(message,
+ best_keyword))[:50]))
+ return best_keyword, best_fit
+
+
if __name__ == "__main__":
import doctest