Merge branch 'columns' into neil

[cipher-tools.git] / cipher.py
diff --git a/cipher.py b/cipher.py

index c5cd6060e208dff51eb7a1cec59649558844a62a..ba4a73f7f39ab8d712b9ba9e52d49e269e1999ed 100644 (file)
--- a/cipher.py
+++ b/cipher.py
@@ -3,7 +3,7 @@ import collections
  import norms
  import logging
  import math
-from itertools import zip_longest
+from itertools import zip_longest, repeat
  from segment import segment
  from multiprocessing import Pool
  
@@ -12,7 +12,7 @@ from multiprocessing import Pool
  # import timeit
  # c5a = open('2012/5a.ciphertext', 'r').read()
  # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
-
+# timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  
  logger = logging.getLogger(__name__)
  logger.addHandler(logging.FileHandler('cipher.log'))
@@ -34,6 +34,14 @@ with open('count_2l.txt', 'r') as f:
          english_bigram_counts[bigram] = int(count)
  normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  
+english_trigram_counts = collections.defaultdict(int)
+with open('count_3l.txt', 'r') as f:
+    for line in f:
+        (trigram, count) = line.split("\t")
+        english_trigram_counts[trigram] = int(count)
+normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
+
+
  with open('words.txt', 'r') as f:
      keywords = [line.rstrip() for line in f]
  
@@ -43,6 +51,14 @@ for a in range(26):
          c = (a * b) % 26
          modular_division_table[b][c] = a
  
+def letters(text):
+    """Remove all non-alphabetic characters from a text
+    >>> letters('The Quick')
+    'TheQuick'
+    >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
+    'TheQuickBROWNfoxjumpedoverthelazyDOG'
+    """
+    return ''.join([c for c in text if c in string.ascii_letters])
  
  def sanitise(text):
      """Remove all non-alphabetic characters and convert the text to lowercase
@@ -52,32 +68,38 @@ def sanitise(text):
      >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
      'thequickbrownfoxjumpedoverthelazydog'
      """
-    sanitised = [c.lower() for c in text if c in string.ascii_letters]
-    return ''.join(sanitised)
+    # sanitised = [c.lower() for c in text if c in string.ascii_letters]
+    # return ''.join(sanitised)
+    return letters(text).lower()
  
  def ngrams(text, n):
      """Returns all n-grams of a text
      
-    >>> ngrams(sanitise('the quick brown fox'), 2)
-    ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn', 'nf', 'fo', 'ox']
-    >>> ngrams(sanitise('the quick brown fox'), 4)
-    ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow', 'rown', 'ownf', 'wnfo', 'nfox']
+    >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
+    ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn', 
+     'nf', 'fo', 'ox']
+    >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
+    ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow', 
+     'rown', 'ownf', 'wnfo', 'nfox']
      """
      return [text[i:i+n] for i in range(len(text)-n+1)]
  
-def every_nth(text, n):
+def every_nth(text, n, fillvalue=''):
      """Returns n strings, each of which consists of every nth character, 
      starting with the 0th, 1st, 2nd, ... (n-1)th character
      
      >>> every_nth(string.ascii_lowercase, 5)
      ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
-    >>> every_nth(string.ascii_lowercase, 1)                                                                                                              
+    >>> every_nth(string.ascii_lowercase, 1)
      ['abcdefghijklmnopqrstuvwxyz']
-    >>> every_nth(string.ascii_lowercase, 26)
-    ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+    >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
+    ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
+     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+    >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
+    ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
      """
      split_text = [text[i:i+n] for i in range(0, len(text), n)]
-    return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
+    return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
  
  def combine_every_nth(split_text):
      """Reforms a text split into every_nth strings
@@ -89,7 +111,38 @@ def combine_every_nth(split_text):
      >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
      'abcdefghijklmnopqrstuvwxyz'
      """
-    return ''.join([''.join(l) for l in zip_longest(*split_text, fillvalue='')])
+    return ''.join([''.join(l) 
+                    for l in zip_longest(*split_text, fillvalue='')])
+
+def transpose(items, transposition):
+    """Moves items around according to the given transposition
+    
+    >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
+    ['a', 'b', 'c', 'd']
+    >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
+    ['d', 'b', 'c', 'a']
+    >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])  
+    [13, 12, 14, 11, 15, 10]
+    """
+    transposed = list(repeat('', len(transposition)))
+    for p, t in enumerate(transposition):
+       transposed[p] = items[t]
+    return transposed
+
+def untranspose(items, transposition):
+    """Undoes a transpose
+    
+    >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
+    ['a', 'b', 'c', 'd']
+    >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
+    ['a', 'b', 'c', 'd']
+    >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
+    [10, 11, 12, 13, 14, 15]
+    """
+    transposed  = list(repeat('', len(transposition)))
+    for p, t in enumerate(transposition):
+       transposed[t] = items[p]
+    return transposed
  
  
  def frequencies(text):
@@ -97,17 +150,33 @@ def frequencies(text):
      
      >>> sorted(frequencies('abcdefabc').items())
      [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
-    >>> sorted(frequencies('the quick brown fox jumped over the lazy dog').items())
-    [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
-    >>> sorted(frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
-    [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
-    >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
-    [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
-    """
-    counts = collections.defaultdict(int)
-    for c in text: 
-        counts[c] += 1
-    return counts
+    >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
+         'dog').items()) # doctest: +NORMALIZE_WHITESPACE
+    [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), 
+     ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), 
+     ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), 
+     ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
+    >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
+         '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
+    [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), 
+     ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), 
+     ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), 
+     ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), 
+     ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
+    >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
+         'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
+    [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), 
+     ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), 
+     ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), 
+     ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
+    >>> frequencies('abcdefabcdef')['x']
+    0
+    """
+    #counts = collections.defaultdict(int)
+    #for c in text: 
+    #    counts[c] += 1
+    #return counts
+    return collections.Counter(c for c in text)
  letter_frequencies = frequencies
  
  def deduplicate(text):
@@ -140,7 +209,8 @@ def caesar_encipher_letter(letter, shift):
              alphabet_start = ord('A')
          else:
              alphabet_start = ord('a')
-        return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start)
+        return chr(((ord(letter) - alphabet_start + shift) % 26) + 
+                   alphabet_start)
      else:
          return letter
  
@@ -184,9 +254,11 @@ def caesar_decipher(message, shift):
  def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
      """Encipher a letter, given a multiplier and adder
      
-    >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
+    >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
+            for l in string.ascii_uppercase])
      'HKNQTWZCFILORUXADGJMPSVYBE'
-    >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
+    >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
+            for l in string.ascii_uppercase])
      'FILORUXADGJMPSVYBEHKNQTWZC'
      """
      if letter in string.ascii_letters:
@@ -205,9 +277,11 @@ def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
  def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
      """Encipher a letter, given a multiplier and adder
      
-    >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
+    >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
+            for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
      'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-    >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
+    >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
+            for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
      'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
      """
      if letter in string.ascii_letters:
@@ -217,7 +291,8 @@ def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
              alphabet_start = ord('a')
          cipher_number = ord(letter) - alphabet_start
          if one_based: cipher_number += 1
-        plaintext_number = modular_division_table[multiplier][(cipher_number - adder) % 26]
+        plaintext_number = ( modular_division_table[multiplier]
+                                                   [(cipher_number - adder) % 26] )
          if one_based: plaintext_number -= 1
          return chr(plaintext_number % 26 + alphabet_start) 
      else:
@@ -226,19 +301,23 @@ def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
  def affine_encipher(message, multiplier=1, adder=0, one_based=True):
      """Encipher a message
      
-    >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
+    >>> affine_encipher('hours passed during which jerico tried every ' \
+           'trick he could think of', 15, 22, True)
      'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
      """
-    enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) for l in message]
+    enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) 
+                  for l in message]
      return ''.join(enciphered)
  
  def affine_decipher(message, multiplier=1, adder=0, one_based=True):
      """Decipher a message
      
-    >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
+    >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
+           'jfaoe ls omytd jlaxe mh', 15, 22, True)
      'hours passed during which jerico tried every trick he could think of'
      """
-    enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) for l in message]
+    enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) 
+                  for l in message]
      return ''.join(enciphered)
  
  
@@ -260,16 +339,19 @@ def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
      'bayeszcdfghijklmnopqrtuvwx'
      """
      if wrap_alphabet == 0:
-        cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
+        cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + 
+                                              string.ascii_lowercase))
      else:
          if wrap_alphabet == 1:
              last_keyword_letter = deduplicate(sanitise(keyword))[-1]
          else:
              last_keyword_letter = sorted(sanitise(keyword))[-1]
-        last_keyword_position = string.ascii_lowercase.find(last_keyword_letter) + 1
-        cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + 
-                                              string.ascii_lowercase[last_keyword_position:] + 
-                                              string.ascii_lowercase))
+        last_keyword_position = string.ascii_lowercase.find(
+            last_keyword_letter) + 1
+        cipher_alphabet = ''.join(
+            deduplicate(sanitise(keyword) + 
+                        string.ascii_lowercase[last_keyword_position:] + 
+                        string.ascii_lowercase))
      return cipher_alphabet
  
  
@@ -333,7 +415,8 @@ def scytale_encipher(message, rows):
      if len(message) % rows != 0:
          message += ' '*(rows - len(message) % rows)
      row_length = round(len(message) / rows)
-    slices = [message[i:i+row_length] for i in range(0, len(message), row_length)]
+    slices = [message[i:i+row_length] 
+              for i in range(0, len(message), row_length)]
      return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
  
  def scytale_decipher(message, rows):
@@ -356,14 +439,79 @@ def scytale_decipher(message, rows):
      return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
  
  
-def caesar_break(message, metric=norms.euclidean_distance, target_counts=normalised_english_counts, message_frequency_scaling=norms.normalise):
+def transpositions_of(keyword):
+    """Finds the transpostions given by a keyword. For instance, the keyword
+    'clever' rearranges to 'celrv', so the first column (0) stays first, the
+    second column (1) moves to third, the third column (2) moves to second, 
+    and so on.
+
+    >>> transpositions_of('clever')
+    [0, 2, 1, 4, 3]
+    """
+    key = deduplicate(keyword)
+    transpositions = [key.index(l) for l in sorted(key)]
+    return transpositions
+
+def column_transposition_encipher(message, keyword, fillvalue=' '):
+    """Enciphers using the column transposition cipher.
+    Message is padded to allow all rows to be the same length.
+
+    >>> column_transposition_encipher('hellothere', 'clever')
+    'hleolteher'
+    >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
+    'hleolthre!e!'
+    """
+    return column_transposition_worker(message, keyword, encipher=True, 
+                                       fillvalue=fillvalue)
+
+def column_transposition_decipher(message, keyword, fillvalue=' '):
+    """Deciphers using the column transposition cipher.
+    Message is padded to allow all rows to be the same length.
+
+    >>> column_transposition_decipher('hleolteher', 'clever')
+    'hellothere'
+    >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
+    'hellothere!!'
+    """
+    return column_transposition_worker(message, keyword, encipher=False, 
+                                       fillvalue=fillvalue)
+
+def column_transposition_worker(message, keyword, 
+                                encipher=True, fillvalue=' '):
+    """Does the actual work of the column transposition cipher.
+    Message is padded with spaces to allow all rows to be the same length.
+
+    >>> column_transposition_worker('hellothere', 'clever')
+    'hleolteher'
+    >>> column_transposition_worker('hellothere', 'clever', encipher=True)
+    'hleolteher'
+    >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
+    'hellothere'
+    """
+    transpositions = transpositions_of(keyword)
+    columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
+    if encipher:
+        transposed_columns = transpose(columns, transpositions)
+    else:
+        transposed_columns = untranspose(columns, transpositions)
+    return combine_every_nth(transposed_columns)
+
+
+
+def caesar_break(message, 
+                 metric=norms.euclidean_distance, 
+                 target_counts=normalised_english_counts, 
+                 message_frequency_scaling=norms.normalise):
      """Breaks a Caesar cipher using frequency analysis
      
-    >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
+    >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
+          'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
      (4, 0.31863952890183...)
-    >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
+    >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
+          'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
      (19, 0.42152901235832...)
-    >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
+    >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
+          'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
      (13, 0.316029208075451...)
      """
      sanitised_message = sanitise(message)
@@ -373,17 +521,28 @@ def caesar_break(message, metric=norms.euclidean_distance, target_counts=normali
          plaintext = caesar_decipher(sanitised_message, shift)
          counts = message_frequency_scaling(letter_frequencies(plaintext))
          fit = metric(target_counts, counts)
-        logger.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
+        logger.debug('Caesar break attempt using key {0} gives fit of {1} '
+                      'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
          if fit < best_fit:
              best_fit = fit
              best_shift = shift
-    logger.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift, best_fit, caesar_decipher(sanitised_message, best_shift)[:50]))
+    logger.info('Caesar break best fit: key {0} gives fit of {1} and '
+                'decrypt starting: {2}'.format(best_shift, best_fit, 
+                    caesar_decipher(sanitised_message, best_shift)[:50]))
      return best_shift, best_fit
  
-def affine_break(message, metric=norms.euclidean_distance, target_counts=normalised_english_counts, message_frequency_scaling=norms.normalise):
+def affine_break(message, 
+                 metric=norms.euclidean_distance, 
+                 target_counts=normalised_english_counts, 
+                 message_frequency_scaling=norms.normalise):
      """Breaks an affine cipher using frequency analysis
      
-    >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
+    >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
+          'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
+          'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
+          'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
+          'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
+          'clm ckuxj.') # doctest: +ELLIPSIS
      ((15, 22, True), 0.23570361818655...)
      """
      sanitised_message = sanitise(message)
@@ -394,23 +553,37 @@ def affine_break(message, metric=norms.euclidean_distance, target_counts=normali
      for one_based in [True, False]:
          for multiplier in range(1, 26, 2):
              for adder in range(26):
-                plaintext = affine_decipher(sanitised_message, multiplier, adder, one_based)
+                plaintext = affine_decipher(sanitised_message, 
+                                            multiplier, adder, one_based)
                  counts = message_frequency_scaling(letter_frequencies(plaintext))
                  fit = metric(target_counts, counts)
-                logger.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier, adder, one_based, fit, plaintext[:50]))
+                logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
+                             'gives fit of {3} and decrypt starting: {4}'.
+                             format(multiplier, adder, one_based, fit, 
+                                    plaintext[:50]))
                  if fit < best_fit:
                      best_fit = fit
                      best_multiplier = multiplier
                      best_adder = adder
                      best_one_based = one_based
-    logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier, best_adder, best_one_based, best_fit, affine_decipher(sanitised_message, best_multiplier, best_adder, best_one_based)[:50]))
+    logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
+                'and decrypt starting: {4}'.format(
+                    best_multiplier, best_adder, best_one_based, best_fit, 
+                    affine_decipher(sanitised_message, best_multiplier, 
+                        best_adder, best_one_based)[:50]))
      return (best_multiplier, best_adder, best_one_based), best_fit
  
-
-def keyword_break(message, wordlist=keywords, metric=norms.euclidean_distance, target_counts=normalised_english_counts, message_frequency_scaling=norms.normalise):
-    """Breaks a keyword substitution cipher using a dictionary and frequency analysis
-
-    >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+def keyword_break(message, 
+                  wordlist=keywords, 
+                  metric=norms.euclidean_distance, 
+                  target_counts=normalised_english_counts, 
+                  message_frequency_scaling=norms.normalise):
+    """Breaks a keyword substitution cipher using a dictionary and 
+    frequency analysis
+
+    >>> keyword_break(keyword_encipher('this is a test message for the ' \
+          'keyword decipherment', 'elephant', 1), \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
      (('elephant', 1), 0.41643991598441...)
      """
      best_keyword = ''
@@ -421,38 +594,63 @@ def keyword_break(message, wordlist=keywords, metric=norms.euclidean_distance, t
              plaintext = keyword_decipher(message, keyword, wrap_alphabet)
              counts = message_frequency_scaling(letter_frequencies(plaintext))
              fit = metric(target_counts, counts)
-            logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword, wrap_alphabet, fit, sanitise(plaintext)[:50]))
+            logger.debug('Keyword break attempt using key {0} (wrap={1}) '
+                         'gives fit of {2} and decrypt starting: {3}'.format(
+                             keyword, wrap_alphabet, fit, 
+                             sanitise(plaintext)[:50]))
              if fit < best_fit:
                  best_fit = fit
                  best_keyword = keyword
                  best_wrap_alphabet = wrap_alphabet
-    logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword, best_wrap_alphabet, best_fit, sanitise(keyword_decipher(message, best_keyword))[:50]))
+    logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
+                '{2} and decrypt starting: {3}'.format(best_keyword, 
+                    best_wrap_alphabet, best_fit, sanitise(
+                        keyword_decipher(message, best_keyword, 
+                                         best_wrap_alphabet))[:50]))
      return (best_keyword, best_wrap_alphabet), best_fit
  
-def keyword_break_mp(message, wordlist=keywords, metric=norms.euclidean_distance, target_counts=normalised_english_counts, message_frequency_scaling=norms.normalise, chunksize=500):
-    """Breaks a keyword substitution cipher using a dictionary and frequency analysis
-
-    >>> keyword_break_mp(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+def keyword_break_mp(message, 
+                     wordlist=keywords, 
+                     metric=norms.euclidean_distance, 
+                     target_counts=normalised_english_counts, 
+                     message_frequency_scaling=norms.normalise, 
+                     chunksize=500):
+    """Breaks a keyword substitution cipher using a dictionary and 
+    frequency analysis
+
+    >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
+          'keyword decipherment', 'elephant', 1), \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
      (('elephant', 1), 0.41643991598441...)
      """
      with Pool() as pool:
-        helper_args = [(message, word, wrap, metric, target_counts, message_frequency_scaling) for word in wordlist for wrap in range(3)]
-        # breaks = map(lambda kw: keyword_break_one(message, kw[0], kw[1], metric, target_counts, message_frequency_scaling), keys)
-        breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
+        helper_args = [(message, word, wrap, metric, target_counts, 
+                        message_frequency_scaling) 
+                       for word in wordlist for wrap in range(3)]
+        # Gotcha: the helper function here needs to be defined at the top level 
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(keyword_break_one, helper_args, chunksize) 
          return min(breaks, key=lambda k: k[1])
  
-def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts, message_frequency_scaling):
+def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts, 
+                      message_frequency_scaling):
      plaintext = keyword_decipher(message, keyword, wrap_alphabet)
      counts = message_frequency_scaling(letter_frequencies(plaintext))
      fit = metric(target_counts, counts)
-    logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword, wrap_alphabet, fit, sanitise(plaintext)[:50]))
+    logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
+                 '{2} and decrypt starting: {3}'.format(keyword, 
+                     wrap_alphabet, fit, sanitise(plaintext)[:50]))
      return (keyword, wrap_alphabet), fit
  
-
-def scytale_break(message, metric=norms.euclidean_distance, target_counts=normalised_english_bigram_counts, message_frequency_scaling=norms.normalise):
+def scytale_break(message, 
+                  metric=norms.euclidean_distance, 
+                  target_counts=normalised_english_bigram_counts, 
+                  message_frequency_scaling=norms.normalise):
      """Breaks a Scytale cipher
      
-    >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoeredcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriiotoaek') # doctest: +ELLIPSIS
+    >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
+           'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
+           'aek') # doctest: +ELLIPSIS
      (6, 0.83453041115025...)
      """
      best_key = 0
@@ -460,15 +658,112 @@ def scytale_break(message, metric=norms.euclidean_distance, target_counts=normal
      for key in range(1, 20):
          if len(message) % key == 0:
              plaintext = scytale_decipher(message, key)
-            counts = message_frequency_scaling(frequencies(ngrams(sanitise(plaintext), 2)))
+            counts = message_frequency_scaling(frequencies(
+                         ngrams(sanitise(plaintext), 2)))
              fit = metric(target_counts, counts)
-            logger.debug('Scytale break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(key, fit, sanitise(plaintext)[:50]))
+            logger.debug('Scytale break attempt using key {0} gives fit of '
+                         '{1} and decrypt starting: {2}'.format(key, 
+                             fit, sanitise(plaintext)[:50]))
              if fit < best_fit:
                  best_fit = fit
                  best_key = key
-    logger.info('Scytale break best fit with key {0} gives fit of {1} and decrypt starting: {2}'.format(best_key, best_fit, sanitise(scytale_decipher(message, best_key))[:50]))
+    logger.info('Scytale break best fit with key {0} gives fit of {1} and '
+                'decrypt starting: {2}'.format(best_key, best_fit, 
+                    sanitise(scytale_decipher(message, best_key))[:50]))
      return best_key, best_fit
  
+def column_transposition_break(message, 
+                  wordlist=keywords, 
+                  metric=norms.euclidean_distance, 
+                  target_counts=normalised_english_bigram_counts, 
+                  message_frequency_scaling=norms.normalise):
+    """Breaks a column transposition cipher using a dictionary and 
+    n-gram frequency analysis
+
+    >>> column_transposition_break(column_transposition_encipher(sanitise( \
+        "Turing's homosexuality resulted in a criminal prosecution in 1952, \
+        when homosexual acts were still illegal in the United Kingdom. "), \
+        'encipher'), \
+        wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
+    ('encipher', 0.898128626285...)
+    >>> column_transposition_break(column_transposition_encipher(sanitise( \
+        "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
+        "when homosexual acts were still illegal in the United Kingdom."), \
+        'encipher'), \
+        wordlist=['encipher', 'keyword', 'fourteen'], \
+        target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
+    ('encipher', 1.1958792913127...)
+    """
+    best_keyword = ''
+    best_fit = float("inf")
+    ngram_length = len(next(iter(target_counts.keys())))
+    for keyword in wordlist:
+        if len(message) % len(deduplicate(keyword)) == 0:
+            plaintext = column_transposition_decipher(message, keyword)
+            counts = message_frequency_scaling(frequencies(
+                         ngrams(sanitise(plaintext), ngram_length)))
+            fit = metric(target_counts, counts)
+            logger.debug('Column transposition break attempt using key {0} '
+                         'gives fit of {1} and decrypt starting: {2}'.format(
+                             keyword, fit, 
+                             sanitise(plaintext)[:50]))
+            if fit < best_fit:
+                best_fit = fit
+                best_keyword = keyword
+    logger.info('Column transposition break best fit with key {0} gives fit '
+                'of {1} and decrypt starting: {2}'.format(best_keyword, 
+                    best_fit, sanitise(
+                        column_transposition_decipher(message, 
+                            best_keyword))[:50]))
+    return best_keyword, best_fit
+
+
+def column_transposition_break_mp(message, 
+                     wordlist=keywords, 
+                     metric=norms.euclidean_distance, 
+                     target_counts=normalised_english_bigram_counts, 
+                     message_frequency_scaling=norms.normalise, 
+                     chunksize=500):
+    """Breaks a column transposition cipher using a dictionary and 
+    n-gram frequency analysis
+
+    >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+        "Turing's homosexuality resulted in a criminal prosecution in 1952, \
+        when homosexual acts were still illegal in the United Kingdom. "), \
+        'encipher'), \
+        wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
+    ('encipher', 0.898128626285...)
+    >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+        "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
+        "when homosexual acts were still illegal in the United Kingdom."), \
+        'encipher'), \
+        wordlist=['encipher', 'keyword', 'fourteen'], \
+        target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
+    ('encipher', 1.1958792913127...)
+    """
+    ngram_length = len(next(iter(target_counts.keys())))
+    with Pool() as pool:
+        helper_args = [(message, word, metric, target_counts, ngram_length,
+                        message_frequency_scaling) 
+                       for word in wordlist]
+        # Gotcha: the helper function here needs to be defined at the top level 
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize) 
+        return min(breaks, key=lambda k: k[1])
+
+def column_transposition_break_worker(message, keyword, metric, target_counts, 
+                      ngram_length, message_frequency_scaling):
+    plaintext = column_transposition_decipher(message, keyword)
+    counts = message_frequency_scaling(frequencies(
+                         ngrams(sanitise(plaintext), ngram_length)))
+    fit = metric(target_counts, counts)
+    logger.debug('Column transposition break attempt using key {0} '
+                         'gives fit of {1} and decrypt starting: {2}'.format(
+                             keyword, fit, 
+                             sanitise(plaintext)[:50]))
+    return keyword, fit
+
+
  
  if __name__ == "__main__":
      import doctest