logger = logging.getLogger(__name__)
logger.addHandler(logging.FileHandler('cipher.log'))
logger.setLevel(logging.WARNING)
-logger.setLevel(logging.INFO)
+#logger.setLevel(logging.INFO)
english_counts = collections.defaultdict(int)
with open('count_1l.txt', 'r') as f:
english_counts[letter] = int(count)
normalised_english_counts = norms.normalise(english_counts)
+keywords = []
+with open('words.txt', 'r') as f:
+ keywords = [line.rstrip() for line in f]
+
modular_division_table = [[0]*26 for x in range(26)]
for a in range(26):
return ''.join(sanitised)
def ngrams(text, n):
+ """Returns all n-grams of a text
+
+ >>> ngrams(sanitise('the quick brown fox'), 2)
+ [('t', 'h'), ('h', 'e'), ('e', 'q'), ('q', 'u'), ('u', 'i'), ('i', 'c'), ('c', 'k'), ('k', 'b'), ('b', 'r'), ('r', 'o'), ('o', 'w'), ('w', 'n'), ('n', 'f'), ('f', 'o'), ('o', 'x')]
+ >>> ngrams(sanitise('the quick brown fox'), 4)
+ [('t', 'h', 'e', 'q'), ('h', 'e', 'q', 'u'), ('e', 'q', 'u', 'i'), ('q', 'u', 'i', 'c'), ('u', 'i', 'c', 'k'), ('i', 'c', 'k', 'b'), ('c', 'k', 'b', 'r'), ('k', 'b', 'r', 'o'), ('b', 'r', 'o', 'w'), ('r', 'o', 'w', 'n'), ('o', 'w', 'n', 'f'), ('w', 'n', 'f', 'o'), ('n', 'f', 'o', 'x')]
+ """
return [tuple(text[i:i+n]) for i in range(len(text)-n+1)]
def letter_frequencies(text):
counts[c] += 1
return counts
+def deduplicate(text):
+ return list(collections.OrderedDict.fromkeys(text))
+
+
+
def caesar_encipher_letter(letter, shift):
"""Encipher a letter, given a shift amount
return ''.join(enciphered)
+def keyword_encipher(message, keyword):
+ cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
+ cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
+ return message.lower().translate(cipher_translation)
+
+def keyword_decipher(message, keyword):
+ cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
+ cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
+ return message.lower().translate(cipher_translation)
+
+
def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
"""Breaks a Caesar cipher using frequency analysis
return (best_multiplier, best_adder, best_one_based), best_fit
+def keyword_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
+ best_keyword = ''
+ best_fit = float("inf")
+ for keyword in keywords:
+ plaintext = keyword_decipher(message, keyword)
+ frequencies = message_frequency_scaling(letter_frequencies(plaintext))
+ fit = metric(target_frequencies, frequencies)
+ logger.info('Keyword break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(keyword, fit, plaintext[:50]))
+ if fit < best_fit:
+ best_fit = fit
+ best_keyword = keyword
+ logger.info('Keyword break best fit with key {0} gives fit of {1} and decrypt starting: {2}'.format(best_keyword, best_fit, keyword_decipher(message, best_keyword)[:50]))
+ return best_keyword, best_fit
+
+
if __name__ == "__main__":
import doctest
doctest.testmod()