More tweaking to conform with linting

author Neil Smith <neil.git@njae.me.uk>

Sun, 13 Jul 2014 22:13:57 +0000 (23:13 +0100)

committer Neil Smith <neil.git@njae.me.uk>

Sun, 13 Jul 2014 22:13:57 +0000 (23:13 +0100)
author Neil Smith <neil.git@njae.me.uk>
Sun, 13 Jul 2014 22:13:57 +0000 (23:13 +0100)
committer Neil Smith <neil.git@njae.me.uk>
Sun, 13 Jul 2014 22:13:57 +0000 (23:13 +0100)
diff --git a/cipher.py b/cipher.py

index 1656720d64867dc71f547f746f202776792bf633..3783d577d6f1bf4f105bfc83ca5069718650c4ff 100644 (file)
--- a/cipher.py
+++ b/cipher.py
@@ -44,7 +44,7 @@ def combine_every_nth(split_text):
      'abcdefghijklmnopqrstuvwxyz'
      """
      return ''.join([''.join(l)
-        for l in zip_longest(*split_text, fillvalue='')])
+                    for l in zip_longest(*split_text, fillvalue='')])
  
  def chunks(text, n, fillvalue=None):
      """Split a text into chunks of n characters
@@ -79,7 +79,7 @@ def transpose(items, transposition):
  
  def untranspose(items, transposition):
      """Undoes a transpose
-    
+
      >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
      ['a', 'b', 'c', 'd']
      >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
@@ -183,7 +183,7 @@ def caesar_decipher(message, shift):
      return caesar_encipher(message, -shift)
  
  def affine_encipher_letter(accented_letter, multiplier=1, adder=0,
-        one_based=True):
+                           one_based=True):
      """Encipher a letter, given a multiplier and adder
      >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
              for l in string.ascii_uppercase])
@@ -225,7 +225,8 @@ def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
          if one_based: cipher_number += 1
          plaintext_number = (
              modular_division_table[multiplier]
-                                  [(cipher_number - adder) % 26] )
+                                  [(cipher_number - adder) % 26]
+                            )
          if one_based: plaintext_number -= 1
          return chr(plaintext_number % 26 + alphabet_start)
      else:
@@ -254,7 +255,7 @@ def affine_decipher(message, multiplier=1, adder=0, one_based=True):
      return ''.join(enciphered)
  
  
-class Keyword_wrap_alphabet(Enum):
+class KeywordWrapAlphabet(Enum):
      """Ways of wrapping the alphabet for keyword-based substitution ciphers."""
      from_a = 1
      from_last = 2
@@ -262,25 +263,25 @@ class Keyword_wrap_alphabet(Enum):
  
  
  def keyword_cipher_alphabet_of(keyword,
-        wrap_alphabet=Keyword_wrap_alphabet.from_a):
+        wrap_alphabet=KeywordWrapAlphabet.from_a):
      """Find the cipher alphabet given a keyword.
      wrap_alphabet controls how the rest of the alphabet is added
      after the keyword.
  
      >>> keyword_cipher_alphabet_of('bayes')
      'bayescdfghijklmnopqrtuvwxz'
-    >>> keyword_cipher_alphabet_of('bayes', Keyword_wrap_alphabet.from_a)
+    >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a)
      'bayescdfghijklmnopqrtuvwxz'
-    >>> keyword_cipher_alphabet_of('bayes', Keyword_wrap_alphabet.from_last)
+    >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last)
      'bayestuvwxzcdfghijklmnopqr'
-    >>> keyword_cipher_alphabet_of('bayes', Keyword_wrap_alphabet.from_largest)
+    >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest)
      'bayeszcdfghijklmnopqrtuvwx'
      """
-    if wrap_alphabet == Keyword_wrap_alphabet.from_a:
+    if wrap_alphabet == KeywordWrapAlphabet.from_a:
          cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
                                                string.ascii_lowercase))
      else:
-        if wrap_alphabet == Keyword_wrap_alphabet.from_last:
+        if wrap_alphabet == KeywordWrapAlphabet.from_last:
              last_keyword_letter = deduplicate(sanitise(keyword))[-1]
          else:
              last_keyword_letter = sorted(sanitise(keyword))[-1]
@@ -294,7 +295,7 @@ def keyword_cipher_alphabet_of(keyword,
  
  
  def keyword_encipher(message, keyword,
-        wrap_alphabet=Keyword_wrap_alphabet.from_a):
+                     wrap_alphabet=KeywordWrapAlphabet.from_a):
      """Enciphers a message with a keyword substitution cipher.
      wrap_alphabet controls how the rest of the alphabet is added
      after the keyword.
@@ -304,18 +305,19 @@ def keyword_encipher(message, keyword,
  
      >>> keyword_encipher('test message', 'bayes')
      'rsqr ksqqbds'
-    >>> keyword_encipher('test message', 'bayes', Keyword_wrap_alphabet.from_a)
+    >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a)
      'rsqr ksqqbds'
-    >>> keyword_encipher('test message', 'bayes', Keyword_wrap_alphabet.from_last)
+    >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last)
      'lskl dskkbus'
-    >>> keyword_encipher('test message', 'bayes', Keyword_wrap_alphabet.from_largest)
+    >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest)
      'qspq jsppbcs'
      """
      cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
      cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
      return unaccent(message).lower().translate(cipher_translation)
  
-def keyword_decipher(message, keyword, wrap_alphabet=Keyword_wrap_alphabet.from_a):
+def keyword_decipher(message, keyword, 
+                     wrap_alphabet=KeywordWrapAlphabet.from_a):
      """Deciphers a message with a keyword substitution cipher.
      wrap_alphabet controls how the rest of the alphabet is added
      after the keyword.
@@ -325,11 +327,11 @@ def keyword_decipher(message, keyword, wrap_alphabet=Keyword_wrap_alphabet.from_
  
      >>> keyword_decipher('rsqr ksqqbds', 'bayes')
      'test message'
-    >>> keyword_decipher('rsqr ksqqbds', 'bayes', Keyword_wrap_alphabet.from_a)
+    >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a)
      'test message'
-    >>> keyword_decipher('lskl dskkbus', 'bayes', Keyword_wrap_alphabet.from_last)
+    >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last)
      'test message'
-    >>> keyword_decipher('qspq jsppbcs', 'bayes', Keyword_wrap_alphabet.from_largest)
+    >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest)
      'test message'
      """
      cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
@@ -357,14 +359,14 @@ def vigenere_decipher(message, keyword):
      pairs = zip(message, cycle(shifts))
      return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
  
-beaufort_encipher=vigenere_decipher
-beaufort_decipher=vigenere_encipher
+beaufort_encipher = vigenere_decipher
+beaufort_decipher = vigenere_encipher
  
  
  def transpositions_of(keyword):
      """Finds the transpostions given by a keyword. For instance, the keyword
      'clever' rearranges to 'celrv', so the first column (0) stays first, the
-    second column (1) moves to third, the third column (2) moves to second, 
+    second column (1) moves to third, the third column (2) moves to second,
      and so on.
  
      If passed a tuple, assume it's already a transposition and just return it.
@@ -408,8 +410,8 @@ def pad(message_len, group_len, fillvalue):
      return padding
  
  def column_transposition_encipher(message, keyword, fillvalue=' ',
-      fillcolumnwise=False,
-      emptycolumnwise=False):
+                                  fillcolumnwise=False,
+                                  emptycolumnwise=False):
      """Enciphers using the column transposition cipher.
      Message is padded to allow all rows to be the same length.
  
@@ -457,8 +459,8 @@ def column_transposition_encipher(message, keyword, fillvalue=' ',
          return ''.join(chain(*transposed))
  
  def column_transposition_decipher(message, keyword, fillvalue=' ',
-      fillcolumnwise=False,
-      emptycolumnwise=False):
+                                  fillcolumnwise=False,
+                                  emptycolumnwise=False):
      """Deciphers using the column transposition cipher.
      Message is padded to allow all rows to be the same length.
  
@@ -508,12 +510,12 @@ def scytale_encipher(message, rows, fillvalue=' '):
      """
      transpositions = [i for i in range(rows)]
      return column_transposition_encipher(message, transpositions,
-        fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False)
+            fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False)
  
  def scytale_decipher(message, rows):
      """Deciphers using the scytale transposition cipher.
      Assumes the message is padded so that all rows are the same length.
-    
+
      >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
      'thequickbrownfox  '
      >>> scytale_decipher('tubnhirfecooqkwx', 4)
@@ -600,14 +602,14 @@ class PocketEnigma(object):
          ValueError: Wheel specification does not contain 26 letters
          """
          if len(wheel_spec) != 13:
-            raise ValueError("Wheel specification has {} pairs, requires 13".
-                format(len(wheel_spec)))
+            raise ValueError("Wheel specification has {} pairs, requires"
+                             " 13".format(len(wheel_spec)))
          for p in wheel_spec:
              if len(p) != 2:
                  raise ValueError("Not all mappings in wheel specification"
-                    "have two elements")
-        if len(set([p[0] for p in wheel_spec] + 
-                    [p[1] for p in wheel_spec])) != 26:
+                                 "have two elements")
+        if len(set([p[0] for p in wheel_spec] +
+                   [p[1] for p in wheel_spec])) != 26:
              raise ValueError("Wheel specification does not contain 26 letters")
  
      def encipher_letter(self, letter):
diff --git a/cipherbreak.py b/cipherbreak.py

index 3639da5c68c1d32621b73129c9495a91ae73539c..b28b763250bd10115545a34a25a86b08c3a634e9 100644 (file)
--- a/cipherbreak.py
+++ b/cipherbreak.py
@@ -1,12 +1,15 @@
+"""A set of functions to break the ciphers give in ciphers.py.
+"""
+
  import string
  import collections
  import norms
  import logging
  import random
-from itertools import zip_longest, cycle, permutations, starmap
+import math
+from itertools import starmap
  from segment import segment
  from multiprocessing import Pool
-from math import log10
  
  import matplotlib.pyplot as plt
  
@@ -32,7 +35,7 @@ for word in keywords:
  
  def frequencies(text):
      """Count the number of occurrences of each character in text
-    
+
      >>> sorted(frequencies('abcdefabc').items())
      [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
      >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
@@ -48,7 +51,7 @@ def frequencies(text):
       ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
       ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
       ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
-         >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
+    >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
           'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
      [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
       ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
@@ -80,8 +83,8 @@ def caesar_break(message, fitness=Pletters):
          plaintext = caesar_decipher(sanitised_message, shift)
          fit = fitness(plaintext)
          logger.debug('Caesar break attempt using key {0} gives fit of {1} '
-            'and decrypt starting: {2}'.format(shift, fit,
-                plaintext[:50]))
+                     'and decrypt starting: {2}'.format(shift, fit,
+                                                        plaintext[:50]))
          if fit > best_fit:
              best_fit = fit
              best_shift = shift
@@ -109,12 +112,12 @@ def affine_break(message, fitness=Pletters):
      for one_based in [True, False]:
          for multiplier in [x for x in range(1, 26, 2) if x != 13]:
              for adder in range(26):
-                plaintext = affine_decipher(sanitised_message, 
+                plaintext = affine_decipher(sanitised_message,
                                              multiplier, adder, one_based)
                  fit = fitness(plaintext)
                  logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
                               'gives fit of {3} and decrypt starting: {4}'.
-                             format(multiplier, adder, one_based, fit, 
+                             format(multiplier, adder, one_based, fit,
                                      plaintext[:50]))
                  if fit > best_fit:
                      best_fit = fit
@@ -125,22 +128,22 @@ def affine_break(message, fitness=Pletters):
                  '{3} and decrypt starting: {4}'.format(
                      best_multiplier, best_adder, best_one_based, best_fit,
                      affine_decipher(sanitised_message, best_multiplier,
-                        best_adder, best_one_based)[:50]))
+                                    best_adder, best_one_based)[:50]))
      return (best_multiplier, best_adder, best_one_based), best_fit
  
  def keyword_break(message, wordlist=keywords, fitness=Pletters):
-    """Breaks a keyword substitution cipher using a dictionary and 
-    frequency analysis
+    """Breaks a keyword substitution cipher using a dictionary and
+    frequency analysis.
  
      >>> keyword_break(keyword_encipher('this is a test message for the ' \
-          'keyword decipherment', 'elephant', Keyword_wrap_alphabet.from_last), \
+          'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
            wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
-    (('elephant', <Keyword_wrap_alphabet.from_last: 2>), -52.834575011...)
+    (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
      """
      best_keyword = ''
      best_wrap_alphabet = True
      best_fit = float("-inf")
-    for wrap_alphabet in Keyword_wrap_alphabet:
+    for wrap_alphabet in KeywordWrapAlphabet:
          for keyword in wordlist:
              plaintext = keyword_decipher(message, keyword, wrap_alphabet)
              fit = fitness(plaintext)
@@ -159,19 +162,20 @@ def keyword_break(message, wordlist=keywords, fitness=Pletters):
                                           best_wrap_alphabet))[:50]))
      return (best_keyword, best_wrap_alphabet), best_fit
  
-def keyword_break_mp(message, wordlist=keywords, fitness=Pletters, chunksize=500):
-    """Breaks a keyword substitution cipher using a dictionary and 
+def keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
+                     chunksize=500):
+    """Breaks a keyword substitution cipher using a dictionary and
      frequency analysis
  
      >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
-          'keyword decipherment', 'elephant', Keyword_wrap_alphabet.from_last), \
+          'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
            wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
-    (('elephant', <Keyword_wrap_alphabet.from_last: 2>), -52.834575011...)
+    (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
      """
      with Pool() as pool:
          helper_args = [(message, word, wrap, fitness)
                         for word in wordlist
-                       for wrap in Keyword_wrap_alphabet]
+                       for wrap in KeywordWrapAlphabet]
          # Gotcha: the helper function here needs to be defined at the top level
          #   (limitation of Pool.starmap)
          breaks = pool.starmap(keyword_break_worker, helper_args, chunksize)
@@ -185,14 +189,14 @@ def keyword_break_worker(message, keyword, wrap_alphabet, fitness):
                       wrap_alphabet, fit, sanitise(plaintext)[:50]))
      return (keyword, wrap_alphabet), fit
  
-def monoalphabetic_break_hillclimbing(message, max_iterations = 10000000, 
+def monoalphabetic_break_hillclimbing(message, max_iterations=10000000, 
          fitness=Pletters):
      ciphertext = unaccent(message).lower()
      alphabet = list(string.ascii_lowercase)
      random.shuffle(alphabet)
      alphabet = ''.join(alphabet)
      return monoalphabetic_break_hillclimbing_worker(ciphertext, alphabet,
-        max_iterations, fitness)
+                                                    max_iterations, fitness)
  
  def monoalphabetic_break_hillclimbing_mp(message, workers=10, 
          max_iterations = 10000000, fitness=Pletters, chunksize=1):
@@ -205,10 +209,10 @@ def monoalphabetic_break_hillclimbing_mp(message, workers=10,
          worker_args.append((ciphertext, alphabet, max_iterations, fitness))
      with Pool() as pool:
          breaks = pool.starmap(monoalphabetic_break_hillclimbing_worker,
-            worker_args, chunksize)
+                              worker_args, chunksize)
      return max(breaks, key=lambda k: k[1])
  
-def monoalphabetic_break_hillclimbing_worker(message, alphabet, 
+def monoalphabetic_break_hillclimbing_worker(message, alphabet,
          max_iterations, fitness):
      def swap(letters, i, j):
          if i > j:
@@ -216,8 +220,8 @@ def monoalphabetic_break_hillclimbing_worker(message, alphabet,
          if i == j:
              return letters
          else:
-            return letters[:i] + letters[j] + letters[i+1:j] + 
-                    letters[i] + letters[j+1:]
+            return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +
+                    letters[j+1:])
      best_alphabet = alphabet
      best_fitness = float('-inf')
      for i in range(max_iterations):
@@ -231,17 +235,17 @@ def monoalphabetic_break_hillclimbing_worker(message, alphabet,
      return best_alphabet, best_fitness
  
  
-def column_transposition_break_mp(message, translist=transpositions, 
-                     fitness=Pbigrams, chunksize=500):
-    """Breaks a column transposition cipher using a dictionary and 
+def column_transposition_break_mp(message, translist=transpositions,
+                                  fitness=Pbigrams, chunksize=500):
+    """Breaks a column transposition cipher using a dictionary and
      n-gram frequency analysis
  
      >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
              "It is a truth universally acknowledged, that a single man in \
               possession of a good fortune, must be in want of a wife. However \
               little known the feelings or views of such a man may be on his \
-             first entering a neighbourhood, this truth is so well fixed in the \
-             minds of the surrounding families, that he is considered the \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
               rightful property of some one or other of their daughters."), \
          'encipher'), \
          translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
@@ -252,8 +256,8 @@ def column_transposition_break_mp(message, translist=transpositions,
              "It is a truth universally acknowledged, that a single man in \
               possession of a good fortune, must be in want of a wife. However \
               little known the feelings or views of such a man may be on his \
-             first entering a neighbourhood, this truth is so well fixed in the \
-             minds of the surrounding families, that he is considered the \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
               rightful property of some one or other of their daughters."), \
          'encipher'), \
          translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
@@ -263,21 +267,21 @@ def column_transposition_break_mp(message, translist=transpositions,
      (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
      """
      with Pool() as pool:
-        helper_args = [(message, trans, fillcolumnwise, emptycolumnwise, 
-                          fitness) 
-                       for trans in translist.keys() 
+        helper_args = [(message, trans, fillcolumnwise, emptycolumnwise,
+                        fitness)
+                       for trans in translist.keys()
                         for fillcolumnwise in [True, False]
                         for emptycolumnwise in [True, False]]
-        # Gotcha: the helper function here needs to be defined at the top level 
+        # Gotcha: the helper function here needs to be defined at the top level
          #   (limitation of Pool.starmap)
-        breaks = pool.starmap(column_transposition_break_worker, 
-          helper_args, chunksize) 
+        breaks = pool.starmap(column_transposition_break_worker,
+                              helper_args, chunksize) 
          return max(breaks, key=lambda k: k[1])
  column_transposition_break = column_transposition_break_mp
  
-def column_transposition_break_worker(message, transposition, 
+def column_transposition_break_worker(message, transposition,
          fillcolumnwise, emptycolumnwise, fitness):
-    plaintext = column_transposition_decipher(message, transposition, 
+    plaintext = column_transposition_decipher(message, transposition,
          fillcolumnwise=fillcolumnwise, emptycolumnwise=emptycolumnwise)
      fit = fitness(sanitise(plaintext))
      logger.debug('Column transposition break attempt using key {0} '
@@ -296,8 +300,8 @@ def scytale_break_mp(message, max_key_length=20,
              "It is a truth universally acknowledged, that a single man in \
               possession of a good fortune, must be in want of a wife. However \
               little known the feelings or views of such a man may be on his \
-             first entering a neighbourhood, this truth is so well fixed in the \
-             minds of the surrounding families, that he is considered the \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
               rightful property of some one or other of their daughters."), \
          5)) # doctest: +ELLIPSIS
      (5, -709.4646722...)
@@ -305,31 +309,30 @@ def scytale_break_mp(message, max_key_length=20,
              "It is a truth universally acknowledged, that a single man in \
               possession of a good fortune, must be in want of a wife. However \
               little known the feelings or views of such a man may be on his \
-             first entering a neighbourhood, this truth is so well fixed in the \
-             minds of the surrounding families, that he is considered the \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
               rightful property of some one or other of their daughters."), \
          5), \
          fitness=Ptrigrams) # doctest: +ELLIPSIS
      (5, -997.0129085...)
      """
      with Pool() as pool:
-        helper_args = [(message, trans, False, True, fitness) 
-            for trans in 
-                [[col for col in range(math.ceil(len(message)/rows))] 
+        helper_args = [(message, trans, False, True, fitness)
+            for trans in
+                [[col for col in range(math.ceil(len(message)/rows))]
                      for rows in range(1,max_key_length+1)]]
-        # Gotcha: the helper function here needs to be defined at the top level 
+        # Gotcha: the helper function here needs to be defined at the top level
          #   (limitation of Pool.starmap)
-        breaks = pool.starmap(column_transposition_break_worker, 
-          helper_args, chunksize) 
-        best = max(breaks, key=lambda k: k[1]) 
+        breaks = pool.starmap(column_transposition_break_worker,
+                              helper_args, chunksize)
+        best = max(breaks, key=lambda k: k[1])
          return math.trunc(len(message) / len(best[0][0])), best[1]
  scytale_break = scytale_break_mp
  
  
-def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters, 
-                     chunksize=500):
-    """Breaks a vigenere cipher using a dictionary and 
-    frequency analysis
+def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
+                              chunksize=500):
+    """Breaks a vigenere cipher using a dictionary and frequency analysis.
  
      >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
               'message for the vigenere decipherment'), 'cat'), \
@@ -337,11 +340,12 @@ def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
      ('cat', -52.947271216...)
      """
      with Pool() as pool:
-        helper_args = [(message, word, fitness) 
+        helper_args = [(message, word, fitness)
                         for word in wordlist]
-        # Gotcha: the helper function here needs to be defined at the top level 
+        # Gotcha: the helper function here needs to be defined at the top level
          #   (limitation of Pool.starmap)
-        breaks = pool.starmap(vigenere_keyword_break_worker, helper_args, chunksize) 
+        breaks = pool.starmap(vigenere_keyword_break_worker, helper_args,
+                              chunksize)
          return max(breaks, key=lambda k: k[1])
  vigenere_keyword_break = vigenere_keyword_break_mp
  
@@ -349,7 +353,7 @@ def vigenere_keyword_break_worker(message, keyword, fitness):
      plaintext = vigenere_decipher(message, keyword)
      fit = fitness(plaintext)
      logger.debug('Vigenere keyword break attempt using key {0} gives fit of '
-                 '{1} and decrypt starting: {2}'.format(keyword, 
+                 '{1} and decrypt starting: {2}'.format(keyword,
                       fit, sanitise(plaintext)[:50]))
      return keyword, fit
  
@@ -374,8 +378,8 @@ def vigenere_frequency_break(message, max_key_length=20, fitness=Pletters):
          fit = fitness(plaintext)
          return key, fit
      sanitised_message = sanitise(message)
-    results = starmap(worker, [(sanitised_message, i, fitness) 
-        for i in range(1, max_key_length+1)])
+    results = starmap(worker, [(sanitised_message, i, fitness)
+                               for i in range(1, max_key_length+1)])
      return max(results, key=lambda k: k[1])
  
  
@@ -393,13 +397,14 @@ def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters):
      """
      def worker(message, key_length, fitness):
          splits = every_nth(sanitised_message, key_length)
-        key = ''.join([chr(-caesar_break(s)[0] % 26 + ord('a')) for s in splits])
+        key = ''.join([chr(-caesar_break(s)[0] % 26 + ord('a'))
+                       for s in splits])
          plaintext = beaufort_decipher(message, key)
          fit = fitness(plaintext)
          return key, fit
      sanitised_message = sanitise(message)
-    results = starmap(worker, [(sanitised_message, i, fitness) 
-        for i in range(1, max_key_length+1)])
+    results = starmap(worker, [(sanitised_message, i, fitness)
+                               for i in range(1, max_key_length+1)])
      return max(results, key=lambda k: k[1])
  
  
diff --git a/language_models.py b/language_models.py

index babbea19ceed80bd9d95fee347c64e53256b5626..62219efe54ab2ad20e07c1838d5ade29e6511d7d 100644 (file)
--- a/language_models.py
+++ b/language_models.py
@@ -96,10 +96,10 @@ def ngrams(text, n):
      """Returns all n-grams of a text
      
      >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
-    ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn', 
+    ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
       'nf', 'fo', 'ox']
      >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
-    ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow', 
+    ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
       'rown', 'ownf', 'wnfo', 'nfox']
      """
      return [text[i:i+n] for i in range(len(text)-n+1)]
@@ -129,30 +129,29 @@ Pl = Pdist(datafile('count_1l.txt'), lambda _k, _N: 0)
  P2l = Pdist(datafile('count_2l.txt'), lambda _k, _N: 0)
  P3l = Pdist(datafile('count_3l.txt'), lambda _k, _N: 0)
  
-def Pwords(words): 
+def Pwords(words):
      """The Naive Bayes log probability of a sequence of words.
      """
      return sum(Pw[w.lower()] for w in words)
  
-def Pwords_wrong(words): 
+def Pwords_wrong(words):
      """The Naive Bayes log probability of a sequence of words.
      """
      return sum(Pw_wrong[w.lower()] for w in words)
  
-
  def Pletters(letters):
      """The Naive Bayes log probability of a sequence of letters.
      """
      return sum(Pl[l.lower()] for l in letters)
  
  def Pbigrams(letters):
-    """The Naive Bayes log probability of the bigrams formed from a sequence 
+    """The Naive Bayes log probability of the bigrams formed from a sequence
      of letters.
      """
      return sum(P2l[p] for p in ngrams(letters, 2))
  
  def Ptrigrams(letters):
-    """The Naive Bayes log probability of the trigrams formed from a sequence 
+    """The Naive Bayes log probability of the trigrams formed from a sequence
      of letters.
      """
      return sum(P3l[p] for p in ngrams(letters, 3))
@@ -165,8 +164,8 @@ def cosine_similarity_score(text):
      >>> cosine_similarity_score('abcabc') # doctest: +ELLIPSIS
      0.26228882...
      """
-    return norms.cosine_similarity(english_counts, 
-        collections.Counter(sanitise(text)))
+    return norms.cosine_similarity(english_counts,
+                                   collections.Counter(sanitise(text)))
  
  
  if __name__ == "__main__":
diff --git a/norms.py b/norms.py

index b8e4bf1ef82c8b8ea6f85c0e3c3597d90b7d9b8b..36e7fa4440b41942386e550b4403d2bf7d59e058 100644 (file)
--- a/norms.py
+++ b/norms.py
@@ -1,9 +1,10 @@
+"""Define a variety of norms for finding distances between vectors"""
+
  import collections
-from math import log10
  
  def normalise(frequencies):
      """Scale a set of frequencies so they sum to one
-    
+
      >>> sorted(normalise({1: 1, 2: 0}).items())
      [(1, 1.0), (2, 0.0)]
      >>> sorted(normalise({1: 1, 2: 1}).items())
@@ -14,12 +15,12 @@ def normalise(frequencies):
      [(1, 0.25), (2, 0.5), (3, 0.25)]
      """
      length = sum([f for f in frequencies.values()])
-    return collections.defaultdict(int, ((k, v / length) 
+    return collections.defaultdict(int, ((k, v / length)
          for (k, v) in frequencies.items()))
  
  def euclidean_scale(frequencies):
      """Scale a set of frequencies so they have a unit euclidean length
-    
+
      >>> sorted(euclidean_scale({1: 1, 2: 0}).items())
      [(1, 1.0), (2, 0.0)]
      >>> sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS
@@ -30,17 +31,21 @@ def euclidean_scale(frequencies):
      [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)]
      """
      length = sum([f ** 2 for f in frequencies.values()]) ** 0.5
-    return collections.defaultdict(int, ((k, v / length) 
+    return collections.defaultdict(int, ((k, v / length)
          for (k, v) in frequencies.items()))
  
  def identity_scale(frequencies):
+    """Don't scale a set of frequencies. (For use when a function expects a
+    scaling function but you don't want to supply one.)
+    """
      return frequencies
          
  
  def l2(frequencies1, frequencies2):
-    """Finds the distances between two frequency profiles, expressed as dictionaries.
+    """Finds the distances between two frequency profiles, expressed as
+    dictionaries.
      Assumes every key in frequencies1 is also in frequencies2
-    
+
      >>> l2({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
      0.0
      >>> l2({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
@@ -62,7 +67,7 @@ def l2(frequencies1, frequencies2):
  euclidean_distance = l2
  
  def l1(frequencies1, frequencies2):
-    """Finds the distances between two frequency profiles, expressed as 
+    """Finds the distances between two frequency profiles, expressed as
      dictionaries. Assumes every key in frequencies1 is also in frequencies2
  
      >>> l1({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
@@ -82,7 +87,7 @@ def l1(frequencies1, frequencies2):
      return total
  
  def l3(frequencies1, frequencies2):
-    """Finds the distances between two frequency profiles, expressed as 
+    """Finds the distances between two frequency profiles, expressed as
      dictionaries. Assumes every key in frequencies1 is also in frequencies2
  
      >>> l3({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
@@ -105,10 +110,10 @@ def l3(frequencies1, frequencies2):
      return total ** (1/3)
  
  def geometric_mean(frequencies1, frequencies2):
-    """Finds the geometric mean of the absolute differences between two frequency profiles, 
-    expressed as dictionaries.
+    """Finds the geometric mean of the absolute differences between two
+    frequency profiles, expressed as dictionaries.
      Assumes every key in frequencies1 is also in frequencies2
-    
+
      >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
      1
      >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
@@ -131,8 +136,8 @@ def geometric_mean(frequencies1, frequencies2):
      return total
  
  def harmonic_mean(frequencies1, frequencies2):
-    """Finds the harmonic mean of the absolute differences between two frequency profiles, 
-    expressed as dictionaries.
+    """Finds the harmonic mean of the absolute differences between two
+    frequency profiles, expressed as dictionaries.
      Assumes every key in frequencies1 is also in frequencies2
  
      >>> harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
@@ -160,7 +165,8 @@ def harmonic_mean(frequencies1, frequencies2):
  
  
  def cosine_similarity(frequencies1, frequencies2):
-    """Finds the distances between two frequency profiles, expressed as dictionaries.
+    """Finds the distances between two frequency profiles, expressed as 
+    dictionaries.
      Assumes every key in frequencies1 is also in frequencies2
  
      >>> cosine_similarity({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
diff --git a/segment.py b/segment.py

index 1af1b62fc8eb3270c35c4bb39a773804faf8da47..a64ea5d4eb4248edaaff12c791df088f8109e21c 100644 (file)
--- a/segment.py
+++ b/segment.py
@@ -1,3 +1,5 @@
+"""Segment a collection of letters into words"""
+
  import language_models
  import sys
  from functools import lru_cache
@@ -8,7 +10,7 @@ def segment(text):
      """Return a list of words that is the best segmentation of text.
      """
      if not text: return []
-    candidates = ([first]+segment(rest) for first,rest in splits(text))
+    candidates = ([first]+segment(rest) for first, rest in splits(text))
      return max(candidates, key=language_models.Pwords)
  
  @lru_cache()
@@ -16,13 +18,13 @@ def segment_wrong(text):
      """Return a list of words that is the best segmentation of text.
      """
      if not text: return []
-    candidates = ([first]+segment(rest) for first,rest in splits(text))
+    candidates = ([first]+segment(rest) for first, rest in splits(text))
      return max(candidates, key=language_models.Pwords_wrong)
  
  
  def splits(text, L=20):
      """Return a list of all possible (first, rest) pairs, len(first)<=L.
      """
-    return [(text[:i+1], text[i+1:]) 
+    return [(text[:i+1], text[i+1:])
              for i in range(min(len(text), L))]
author	Neil Smith <neil.git@njae.me.uk>
	Sun, 13 Jul 2014 22:13:57 +0000 (23:13 +0100)
committer	Neil Smith <neil.git@njae.me.uk>
	Sun, 13 Jul 2014 22:13:57 +0000 (23:13 +0100)
cipher.py		patch \| blob \| history
cipherbreak.py		patch \| blob \| history
language_models.py		patch \| blob \| history
norms.py		patch \| blob \| history
segment.py		patch \| blob \| history