All breaking tests now pass

[cipher-tools.git] / cipherbreak.py
diff --git a/cipherbreak.py b/cipherbreak.py

index 7c27216daf0be2d680168302ecb1f379b65c8fdd..d2c35c93ed77b1c693dd68fd98bce7989b108066 100644 (file)
--- a/cipherbreak.py
+++ b/cipherbreak.py
@@ -2,11 +2,15 @@ import string
  import collections
  import norms
  import logging
-from itertools import zip_longest, cycle
-from segment import segment, Pwords
+from itertools import zip_longest, cycle, permutations
+from segment import segment
  from multiprocessing import Pool
+from math import log10
+
+import matplotlib.pyplot as plt
  
  from cipher import *
+from language_models import *
  
  # To time a run:
  #
@@ -15,32 +19,6 @@ from cipher import *
  # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
  
-
-english_counts = collections.defaultdict(int)
-with open('count_1l.txt', 'r') as f:
-    for line in f:
-        (letter, count) = line.split("\t")
-        english_counts[letter] = int(count)
-normalised_english_counts = norms.normalise(english_counts)
-
-english_bigram_counts = collections.defaultdict(int)
-with open('count_2l.txt', 'r') as f:
-    for line in f:
-        (bigram, count) = line.split("\t")
-        english_bigram_counts[bigram] = int(count)
-normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
-
-english_trigram_counts = collections.defaultdict(int)
-with open('count_3l.txt', 'r') as f:
-    for line in f:
-        (trigram, count) = line.split("\t")
-        english_trigram_counts[trigram] = int(count)
-normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
-
-
-with open('words.txt', 'r') as f:
-    keywords = [line.rstrip() for line in f]
-
  transpositions = collections.defaultdict(list)
  for word in keywords:
      transpositions[transpositions_of(word)] += [word]
@@ -77,36 +55,30 @@ def frequencies(text):
      #    counts[c] += 1
      #return counts
      return collections.Counter(c for c in text)
-letter_frequencies = frequencies
-
  
  
-def caesar_break(message, 
-                 metric=norms.euclidean_distance, 
-                 target_counts=normalised_english_counts, 
-                 message_frequency_scaling=norms.normalise):
+def caesar_break(message, fitness=Pletters):
      """Breaks a Caesar cipher using frequency analysis
      
      >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
            'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
-    (4, 0.080345432737...)
+    (4, -130.849890899...)
      >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
            'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
-    (19, 0.11189290326...)
+    (19, -128.82516920...)
      >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
            'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
-    (13, 0.08293968842...)
+    (13, -126.25233502...)
      """
      sanitised_message = sanitise(message)
      best_shift = 0
-    best_fit = float("inf")
+    best_fit = float('-inf')
      for shift in range(26):
          plaintext = caesar_decipher(sanitised_message, shift)
-        counts = message_frequency_scaling(letter_frequencies(plaintext))
-        fit = metric(target_counts, counts)
+        fit = fitness(plaintext)
          logger.debug('Caesar break attempt using key {0} gives fit of {1} '
                        'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
-        if fit < best_fit:
+        if fit > best_fit:
              best_fit = fit
              best_shift = shift
      logger.info('Caesar break best fit: key {0} gives fit of {1} and '
@@ -114,37 +86,33 @@ def caesar_break(message,
                      caesar_decipher(sanitised_message, best_shift)[:50]))
      return best_shift, best_fit
  
-def affine_break(message, 
-                 metric=norms.euclidean_distance, 
-                 target_counts=normalised_english_counts, 
-                 message_frequency_scaling=norms.normalise):
+def affine_break(message, fitness=Pletters):
      """Breaks an affine cipher using frequency analysis
      
      >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
            'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
            'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
            'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
-          'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
-          'clm ckuxj.') # doctest: +ELLIPSIS
-    ((15, 22, True), 0.0598745365924...)
+          'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
+          'kxd clm ckuxj.') # doctest: +ELLIPSIS
+    ((15, 22, True), -340.611412245...)
      """
      sanitised_message = sanitise(message)
      best_multiplier = 0
      best_adder = 0
      best_one_based = True
-    best_fit = float("inf")
+    best_fit = float("-inf")
      for one_based in [True, False]:
-        for multiplier in range(1, 26, 2):
+        for multiplier in [x for x in range(1, 26, 2) if x != 13]:
              for adder in range(26):
                  plaintext = affine_decipher(sanitised_message, 
                                              multiplier, adder, one_based)
-                counts = message_frequency_scaling(letter_frequencies(plaintext))
-                fit = metric(target_counts, counts)
+                fit = fitness(plaintext)
                  logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
                               'gives fit of {3} and decrypt starting: {4}'.
                               format(multiplier, adder, one_based, fit, 
                                      plaintext[:50]))
-                if fit < best_fit:
+                if fit > best_fit:
                      best_fit = fit
                      best_multiplier = multiplier
                      best_adder = adder
@@ -156,32 +124,27 @@ def affine_break(message,
                          best_adder, best_one_based)[:50]))
      return (best_multiplier, best_adder, best_one_based), best_fit
  
-def keyword_break(message, 
-                  wordlist=keywords, 
-                  metric=norms.euclidean_distance, 
-                  target_counts=normalised_english_counts, 
-                  message_frequency_scaling=norms.normalise):
+def keyword_break(message, wordlist=keywords, fitness=Pletters):
      """Breaks a keyword substitution cipher using a dictionary and 
      frequency analysis
  
      >>> keyword_break(keyword_encipher('this is a test message for the ' \
            'keyword decipherment', 'elephant', 1), \
            wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
-    (('elephant', 1), 0.1066453448861...)
+    (('elephant', 1), -52.8345642265...)
      """
      best_keyword = ''
      best_wrap_alphabet = True
-    best_fit = float("inf")
+    best_fit = float("-inf")
      for wrap_alphabet in range(3):
          for keyword in wordlist:
              plaintext = keyword_decipher(message, keyword, wrap_alphabet)
-            counts = message_frequency_scaling(letter_frequencies(plaintext))
-            fit = metric(target_counts, counts)
+            fit = fitness(plaintext)
              logger.debug('Keyword break attempt using key {0} (wrap={1}) '
                           'gives fit of {2} and decrypt starting: {3}'.format(
                               keyword, wrap_alphabet, fit, 
                               sanitise(plaintext)[:50]))
-            if fit < best_fit:
+            if fit > best_fit:
                  best_fit = fit
                  best_keyword = keyword
                  best_wrap_alphabet = wrap_alphabet
@@ -192,63 +155,49 @@ def keyword_break(message,
                                           best_wrap_alphabet))[:50]))
      return (best_keyword, best_wrap_alphabet), best_fit
  
-def keyword_break_mp(message, 
-                     wordlist=keywords, 
-                     metric=norms.euclidean_distance, 
-                     target_counts=normalised_english_counts, 
-                     message_frequency_scaling=norms.normalise, 
-                     chunksize=500):
+def keyword_break_mp(message, wordlist=keywords, fitness=Pletters, chunksize=500):
      """Breaks a keyword substitution cipher using a dictionary and 
      frequency analysis
  
      >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
            'keyword decipherment', 'elephant', 1), \
            wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
-    (('elephant', 1), 0.106645344886...)
+    (('elephant', 1), -52.834564226507...)
      """
      with Pool() as pool:
-        helper_args = [(message, word, wrap, metric, target_counts, 
-                        message_frequency_scaling) 
+        helper_args = [(message, word, wrap, fitness) 
                         for word in wordlist for wrap in range(3)]
          # Gotcha: the helper function here needs to be defined at the top level 
          #   (limitation of Pool.starmap)
          breaks = pool.starmap(keyword_break_worker, helper_args, chunksize) 
-        return min(breaks, key=lambda k: k[1])
+        return max(breaks, key=lambda k: k[1])
  
-def keyword_break_worker(message, keyword, wrap_alphabet, metric, target_counts, 
-                      message_frequency_scaling):
+def keyword_break_worker(message, keyword, wrap_alphabet, fitness):
      plaintext = keyword_decipher(message, keyword, wrap_alphabet)
-    counts = message_frequency_scaling(letter_frequencies(plaintext))
-    fit = metric(target_counts, counts)
+    fit = fitness(plaintext)
      logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
                   '{2} and decrypt starting: {3}'.format(keyword, 
                       wrap_alphabet, fit, sanitise(plaintext)[:50]))
      return (keyword, wrap_alphabet), fit
  
-def scytale_break(message, 
-                  metric=norms.euclidean_distance, 
-                  target_counts=normalised_english_bigram_counts, 
-                  message_frequency_scaling=norms.normalise):
+def scytale_break(message, fitness=Pbigrams):
      """Breaks a Scytale cipher
      
      >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
             'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
             'aek') # doctest: +ELLIPSIS
-    (6, 0.092599933059...)
+    (6, -281.276219108...)
      """
      best_key = 0
-    best_fit = float("inf")
-    ngram_length = len(next(iter(target_counts.keys())))
+    best_fit = float("-inf")
      for key in range(1, 20):
          if len(message) % key == 0:
              plaintext = scytale_decipher(message, key)
-            counts = message_frequency_scaling(frequencies(
-                         ngrams(sanitise(plaintext), ngram_length)))
-            fit = metric(target_counts, counts)
+            fit = fitness(sanitise(plaintext))
              logger.debug('Scytale break attempt using key {0} gives fit of '
                           '{1} and decrypt starting: {2}'.format(key, 
                               fit, sanitise(plaintext)[:50]))
-            if fit < best_fit:
+            if fit > best_fit:
                  best_fit = fit
                  best_key = key
      logger.info('Scytale break best fit with key {0} gives fit of {1} and '
@@ -256,146 +205,100 @@ def scytale_break(message,
                      sanitise(scytale_decipher(message, best_key))[:50]))
      return best_key, best_fit
  
-def column_transposition_break(message, 
-                  translist=transpositions, 
-                  metric=norms.euclidean_distance, 
-                  target_counts=normalised_english_bigram_counts, 
-                  message_frequency_scaling=norms.normalise):
-    """Breaks a column transposition cipher using a dictionary and 
-    n-gram frequency analysis
-
-    >>> column_transposition_break(column_transposition_encipher(sanitise( \
-            "It is a truth universally acknowledged, that a single man in \
-             possession of a good fortune, must be in want of a wife. However \
-             little known the feelings or views of such a man may be on his \
-             first entering a neighbourhood, this truth is so well fixed in the \
-             minds of the surrounding families, that he is considered the \
-             rightful property of some one or other of their daughters."), \
-        'encipher'), \
-        translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
-                   (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
-                   (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
-    ((2, 0, 5, 3, 1, 4, 6), 0.0628106372...)
-    >>> column_transposition_break(column_transposition_encipher(sanitise( \
-            "It is a truth universally acknowledged, that a single man in \
-             possession of a good fortune, must be in want of a wife. However \
-             little known the feelings or views of such a man may be on his \
-             first entering a neighbourhood, this truth is so well fixed in the \
-             minds of the surrounding families, that he is considered the \
-             rightful property of some one or other of their daughters."), \
-        'encipher'), \
-        translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
-                   (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
-                   (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
-        target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
-    ((2, 0, 5, 3, 1, 4, 6), 0.0592259560...)
-    """
-    best_transposition = ''
-    best_fit = float("inf")
-    ngram_length = len(next(iter(target_counts.keys())))
-    for transposition in translist.keys():
-        if len(message) % len(transposition) == 0:
-            plaintext = column_transposition_decipher(message, transposition)
-            counts = message_frequency_scaling(frequencies(
-                         ngrams(sanitise(plaintext), ngram_length)))
-            fit = metric(target_counts, counts)
-            logger.debug('Column transposition break attempt using key {0} '
-                         'gives fit of {1} and decrypt starting: {2}'.format(
-                             translist[transposition][0], fit, 
-                             sanitise(plaintext)[:50]))
-            if fit < best_fit:
-                best_fit = fit
-                best_transposition = transposition
-    logger.info('Column transposition break best fit with key {0} gives fit '
-                'of {1} and decrypt starting: {2}'.format(
-                    translist[best_transposition][0], 
-                    best_fit, sanitise(
-                        column_transposition_decipher(message, 
-                            best_transposition))[:50]))
-    return best_transposition, best_fit
-
  
-def column_transposition_break_mp(message, 
-                     translist=transpositions, 
-                     metric=norms.euclidean_distance, 
-                     target_counts=normalised_english_bigram_counts, 
-                     message_frequency_scaling=norms.normalise, 
-                     chunksize=500):
+def column_transposition_break_mp(message, translist=transpositions, 
+                     fitness=Pbigrams, chunksize=500):
      """Breaks a column transposition cipher using a dictionary and 
      n-gram frequency analysis
-
-    >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
-            "It is a truth universally acknowledged, that a single man in \
-             possession of a good fortune, must be in want of a wife. However \
-             little known the feelings or views of such a man may be on his \
-             first entering a neighbourhood, this truth is so well fixed in the \
-             minds of the surrounding families, that he is considered the \
-             rightful property of some one or other of their daughters."), \
-        'encipher'), \
-        translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
-                   (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
-                   (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
-    ((2, 0, 5, 3, 1, 4, 6), 0.0628106372...)
-    >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
-            "It is a truth universally acknowledged, that a single man in \
-             possession of a good fortune, must be in want of a wife. However \
-             little known the feelings or views of such a man may be on his \
-             first entering a neighbourhood, this truth is so well fixed in the \
-             minds of the surrounding families, that he is considered the \
-             rightful property of some one or other of their daughters."), \
-        'encipher'), \
-        translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
-                   (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
-                   (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
-        target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
-    ((2, 0, 5, 3, 1, 4, 6), 0.0592259560...)
      """
-    ngram_length = len(next(iter(target_counts.keys())))
+    # >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+    #         "It is a truth universally acknowledged, that a single man in \
+    #          possession of a good fortune, must be in want of a wife. However \
+    #          little known the feelings or views of such a man may be on his \
+    #          first entering a neighbourhood, this truth is so well fixed in the \
+    #          minds of the surrounding families, that he is considered the \
+    #          rightful property of some one or other of their daughters."), \
+    #     'encipher'), \
+    #     translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
+    #                (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
+    #                (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
+    # (((2, 0, 5, 3, 1, 4, 6), False), 0.0628106372...)
+    # >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+    #         "It is a truth universally acknowledged, that a single man in \
+    #          possession of a good fortune, must be in want of a wife. However \
+    #          little known the feelings or views of such a man may be on his \
+    #          first entering a neighbourhood, this truth is so well fixed in the \
+    #          minds of the surrounding families, that he is considered the \
+    #          rightful property of some one or other of their daughters."), \
+    #     'encipher'), \
+    #     translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
+    #                (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
+    #                (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
+    #     target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
+    # (((2, 0, 5, 3, 1, 4, 6), False), 0.0592259560...)
+    # """
      with Pool() as pool:
-        helper_args = [(message, trans, metric, target_counts, ngram_length,
-                        message_frequency_scaling) 
-                       for trans in translist.keys()]
+        helper_args = [(message, trans, columnwise, fitness) 
+                       for trans in translist.keys() 
+                       for columnwise in [True, False]]
          # Gotcha: the helper function here needs to be defined at the top level 
          #   (limitation of Pool.starmap)
-        breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize) 
-        return min(breaks, key=lambda k: k[1])
-
-def column_transposition_break_worker(message, transposition, metric, target_counts, 
-                      ngram_length, message_frequency_scaling):
-    plaintext = column_transposition_decipher(message, transposition)
-    counts = message_frequency_scaling(frequencies(
-                         ngrams(sanitise(plaintext), ngram_length)))
-    fit = metric(target_counts, counts)
+        breaks = pool.starmap(column_transposition_break_worker, 
+          helper_args, chunksize) 
+        return max(breaks, key=lambda k: k[1])
+column_transposition_break = column_transposition_break_mp
+
+def column_transposition_break_worker(message, transposition, columnwise, 
+                                        fitness):
+    plaintext = column_transposition_decipher(message, transposition, columnwise=columnwise)
+    fit = fitness(sanitise(plaintext))
      logger.debug('Column transposition break attempt using key {0} '
                           'gives fit of {1} and decrypt starting: {2}'.format(
                               transposition, fit, 
                               sanitise(plaintext)[:50]))
-    return transposition, fit
+    return (transposition, columnwise), fit
+
+
+def transposition_break_exhaustive(message, fitness=Pbigrams):
+    best_transposition = ''
+    best_pw = float('-inf')
+    for keylength in range(1, 21):
+        if len(message) % keylength == 0:
+            for transposition in permutations(range(keylength)):
+                for columnwise in [True, False]:
+                    plaintext = column_transposition_decipher(message, 
+                        transposition, columnwise=columnwise)
+                    fit=fitness(plaintext)
+                    logger.debug('Column transposition break attempt using key {0} {1} '
+                         'gives fit of {2} and decrypt starting: {3}'.format(
+                             transposition, columnwise, pw, 
+                             sanitise(plaintext)[:50]))
+                    if fit > best_fit:
+                        best_transposition = transposition
+                        best_columnwise = columnwise
+                        best_fit = fit
+    return (best_transposition, best_columnwise), best_pw
+
  
-def vigenere_keyword_break(message, 
-                  wordlist=keywords, 
-                  metric=norms.euclidean_distance, 
-                  target_counts=normalised_english_counts, 
-                  message_frequency_scaling=norms.normalise):
+def vigenere_keyword_break(message, wordlist=keywords, fitness=Pletters):
      """Breaks a vigenere cipher using a dictionary and 
      frequency analysis
      
      >>> vigenere_keyword_break(vigenere_encipher(sanitise('this is a test ' \
               'message for the vigenere decipherment'), 'cat'), \
               wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
-    ('cat', 0.15965224935...)
+    ('cat', -52.9479167030...)
      """
      best_keyword = ''
-    best_fit = float("inf")
+    best_fit = float("-inf")
      for keyword in wordlist:
          plaintext = vigenere_decipher(message, keyword)
-        counts = message_frequency_scaling(letter_frequencies(plaintext))
-        fit = metric(target_counts, counts)
+        fit = fitness(plaintext)
          logger.debug('Vigenere break attempt using key {0} '
                           'gives fit of {1} and decrypt starting: {2}'.format(
                               keyword, fit, 
                               sanitise(plaintext)[:50]))
-        if fit < best_fit:
+        if fit > best_fit:
              best_fit = fit
              best_keyword = keyword
      logger.info('Vigenere break best fit with key {0} gives fit '
@@ -404,11 +307,7 @@ def vigenere_keyword_break(message,
                          vigenere_decipher(message, best_keyword))[:50]))
      return best_keyword, best_fit
  
-def vigenere_keyword_break_mp(message, 
-                     wordlist=keywords, 
-                     metric=norms.euclidean_distance, 
-                     target_counts=normalised_english_counts, 
-                     message_frequency_scaling=norms.normalise, 
+def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters, 
                       chunksize=500):
      """Breaks a vigenere cipher using a dictionary and 
      frequency analysis
@@ -416,22 +315,19 @@ def vigenere_keyword_break_mp(message,
      >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
               'message for the vigenere decipherment'), 'cat'), \
               wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
-    ('cat', 0.159652249358...)
+    ('cat', -52.9479167030...)
      """
      with Pool() as pool:
-        helper_args = [(message, word, metric, target_counts, 
-                        message_frequency_scaling) 
+        helper_args = [(message, word, fitness) 
                         for word in wordlist]
          # Gotcha: the helper function here needs to be defined at the top level 
          #   (limitation of Pool.starmap)
          breaks = pool.starmap(vigenere_keyword_break_worker, helper_args, chunksize) 
-        return min(breaks, key=lambda k: k[1])
+        return max(breaks, key=lambda k: k[1])
  
-def vigenere_keyword_break_worker(message, keyword, metric, target_counts, 
-                      message_frequency_scaling):
+def vigenere_keyword_break_worker(message, keyword, fitness):
      plaintext = vigenere_decipher(message, keyword)
-    counts = message_frequency_scaling(letter_frequencies(plaintext))
-    fit = metric(target_counts, counts)
+    fit = fitness(plaintext)
      logger.debug('Vigenere keyword break attempt using key {0} gives fit of '
                   '{1} and decrypt starting: {2}'.format(keyword, 
                       fit, sanitise(plaintext)[:50]))
@@ -439,30 +335,29 @@ def vigenere_keyword_break_worker(message, keyword, metric, target_counts,
  
  
  
-def vigenere_frequency_break(message,
-                  metric=norms.euclidean_distance, 
-                  target_counts=normalised_english_counts, 
-                  message_frequency_scaling=norms.normalise):
+def vigenere_frequency_break(message, fitness=Pletters):
      """Breaks a Vigenere cipher with frequency analysis
  
      >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
              "run. She is ready and so am I. I stole Daniel's pocketbook this " \
              "afternoon when he left his jacket hanging on the easel in the " \
-            "attic."), 'florence')) # doctest: +ELLIPSIS
-    ('florence', 0.077657073...)
+            "attic. I jump every time I hear a footstep on the stairs, " \
+            "certain that the theft has been discovered and that I will " \
+            "be caught. The SS officer visits less often now that he is " \
+            "sure"), 'florence')) # doctest: +ELLIPSIS
+    ('florence', -307.5549865898...)
      """
-    best_fit = float("inf")
+    best_fit = float("-inf")
      best_key = ''
      sanitised_message = sanitise(message)
      for trial_length in range(1, 20):
          splits = every_nth(sanitised_message, trial_length)
-        key = ''.join([chr(caesar_break(s, target_counts=target_counts)[0] + ord('a')) for s in splits])
+        key = ''.join([chr(caesar_break(s)[0] + ord('a')) for s in splits])
          plaintext = vigenere_decipher(sanitised_message, key)
-        counts = message_frequency_scaling(frequencies(plaintext))
-        fit = metric(target_counts, counts)
+        fit = fitness(plaintext)
          logger.debug('Vigenere key length of {0} ({1}) gives fit of {2}'.
                       format(trial_length, key, fit))
-        if fit < best_fit:
+        if fit > best_fit:
              best_fit = fit
              best_key = key
      logger.info('Vigenere break best fit with key {0} gives fit '
@@ -471,6 +366,48 @@ def vigenere_frequency_break(message,
                          vigenere_decipher(message, best_key))[:50]))
      return best_key, best_fit
  
+def beaufort_frequency_break(message, fitness=Pletters):
+    """Breaks a Beaufort cipher with frequency analysis
+
+    >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
+            "run. She is ready and so am I. I stole Daniel's pocketbook this " \
+            "afternoon when he left his jacket hanging on the easel in the " \
+            "attic. I jump every time I hear a footstep on the stairs, " \
+            "certain that the theft has been discovered and that I will " \
+            "be caught. The SS officer visits less often now " \
+            "that he is sure"), 'florence')) # doctest: +ELLIPSIS
+    ('florence', -307.5549865898...)
+    """
+    best_fit = float("-inf")
+    best_key = ''
+    sanitised_message = sanitise(message)
+    for trial_length in range(1, 20):
+        splits = every_nth(sanitised_message, trial_length)
+        key = ''.join([chr(-caesar_break(s)[0] % 26 + ord('a')) for s in splits])
+        plaintext = beaufort_decipher(sanitised_message, key)
+        fit = fitness(plaintext)
+        logger.debug('Beaufort key length of {0} ({1}) gives fit of {2}'.
+                     format(trial_length, key, fit))
+        if fit > best_fit:
+            best_fit = fit
+            best_key = key
+    logger.info('Beaufort break best fit with key {0} gives fit '
+                'of {1} and decrypt starting: {2}'.format(best_key, 
+                    best_fit, sanitise(
+                        beaufort_decipher(message, best_key))[:50]))
+    return best_key, best_fit
+
+
+
+def plot_frequency_histogram(freqs, sort_key=None):
+    x = range(len(freqs.keys()))
+    y = [freqs[l] for l in sorted(freqs.keys(), key=sort_key)]
+    f = plt.figure()
+    ax = f.add_axes([0.1, 0.1, 0.9, 0.9])
+    ax.bar(x, y, align='center')
+    ax.set_xticks(x)
+    ax.set_xticklabels(sorted(freqs.keys(), key=sort_key))
+    f.show()
  
  
  if __name__ == "__main__":