c = (a * b) % 26
modular_division_table[b][c] = a
+def letters(text):
+ """Remove all non-alphabetic characters from a text
+ >>> letters('The Quick')
+ 'TheQuick'
+ >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
+ 'TheQuickBROWNfoxjumpedoverthelazyDOG'
+ """
+ return ''.join([c for c in text if c in string.ascii_letters])
def sanitise(text):
"""Remove all non-alphabetic characters and convert the text to lowercase
>>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
'thequickbrownfoxjumpedoverthelazydog'
"""
- sanitised = [c.lower() for c in text if c in string.ascii_letters]
- return ''.join(sanitised)
+ # sanitised = [c.lower() for c in text if c in string.ascii_letters]
+ # return ''.join(sanitised)
+ return letters(text).lower()
def ngrams(text, n):
"""Returns all n-grams of a text
('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
('w', 1), ('x', 1), ('y', 1), ('z', 1)]
+ >>> frequencies('abcdefabcdef')['x']
+ 0
"""
- counts = collections.defaultdict(int)
- for c in text:
- counts[c] += 1
- return counts
+ #counts = collections.defaultdict(int)
+ #for c in text:
+ # counts[c] += 1
+ #return counts
+ return collections.Counter(c for c in text)
letter_frequencies = frequencies
def deduplicate(text):
def column_transposition_break(message,
wordlist=keywords,
metric=norms.euclidean_distance,
- #test_ngram_length=2,
target_counts=normalised_english_bigram_counts,
message_frequency_scaling=norms.normalise):
"""Breaks a column transposition cipher using a dictionary and
return best_keyword, best_fit
+def column_transposition_break_mp(message,
+ wordlist=keywords,
+ metric=norms.euclidean_distance,
+ target_counts=normalised_english_bigram_counts,
+ message_frequency_scaling=norms.normalise,
+ chunksize=500):
+ """Breaks a column transposition cipher using a dictionary and
+ n-gram frequency analysis
+
+ >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+ "Turing's homosexuality resulted in a criminal prosecution in 1952, \
+ when homosexual acts were still illegal in the United Kingdom. "), \
+ 'encipher'), \
+ wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
+ ('encipher', 0.898128626285...)
+ >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+ "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
+ "when homosexual acts were still illegal in the United Kingdom."), \
+ 'encipher'), \
+ wordlist=['encipher', 'keyword', 'fourteen'], \
+ target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
+ ('encipher', 1.1958792913127...)
+ """
+ ngram_length = len(next(iter(target_counts.keys())))
+ with Pool() as pool:
+ helper_args = [(message, word, metric, target_counts, ngram_length,
+ message_frequency_scaling)
+ for word in wordlist]
+ # Gotcha: the helper function here needs to be defined at the top level
+ # (limitation of Pool.starmap)
+ breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize)
+ return min(breaks, key=lambda k: k[1])
+
+def column_transposition_break_worker(message, keyword, metric, target_counts,
+ ngram_length, message_frequency_scaling):
+ plaintext = column_transposition_decipher(message, keyword)
+ counts = message_frequency_scaling(frequencies(
+ ngrams(sanitise(plaintext), ngram_length)))
+ fit = metric(target_counts, counts)
+ logger.debug('Column transposition break attempt using key {0} '
+ 'gives fit of {1} and decrypt starting: {2}'.format(
+ keyword, fit,
+ sanitise(plaintext)[:50]))
+ return keyword, fit
+
+
if __name__ == "__main__":
import doctest