Column transposition working, needs more tests

[cipher-tools.git] / cipher.py
diff --git a/cipher.py b/cipher.py

index 024d330f3ef22b438f97b019581380bec09168f9..7d3ba78e4045082e394608620f1ac3c6b16733da 100644 (file)
--- a/cipher.py
+++ b/cipher.py
@@ -3,7 +3,7 @@ import collections
  import norms
  import logging
  import math
-from itertools import zip_longest
+from itertools import zip_longest, repeat
  from segment import segment
  from multiprocessing import Pool
  
@@ -67,20 +67,22 @@ def ngrams(text, n):
      """
      return [text[i:i+n] for i in range(len(text)-n+1)]
  
-def every_nth(text, n):
+def every_nth(text, n, fillvalue=''):
      """Returns n strings, each of which consists of every nth character, 
      starting with the 0th, 1st, 2nd, ... (n-1)th character
      
      >>> every_nth(string.ascii_lowercase, 5)
      ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
-    >>> every_nth(string.ascii_lowercase, 1)                                                                                                              
+    >>> every_nth(string.ascii_lowercase, 1)
      ['abcdefghijklmnopqrstuvwxyz']
      >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
      ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
       'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+    >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
+    ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
      """
      split_text = [text[i:i+n] for i in range(0, len(text), n)]
-    return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
+    return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
  
  def combine_every_nth(split_text):
      """Reforms a text split into every_nth strings
@@ -95,24 +97,57 @@ def combine_every_nth(split_text):
      return ''.join([''.join(l) 
                      for l in zip_longest(*split_text, fillvalue='')])
  
+def transpose(items, transposition):
+    """Moves items around according to the given transposition
+    
+    >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
+    ['a', 'b', 'c', 'd']
+    >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
+    ['d', 'b', 'c', 'a']
+    >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])  
+    [13, 12, 14, 11, 15, 10]
+    """
+    transposed = list(repeat('', len(transposition)))
+    for p, t in enumerate(transposition):
+       transposed[p] = items[t]
+    return transposed
+
+def untranspose(items, transposition):
+    """Undoes a transpose
+    
+    >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
+    ['a', 'b', 'c', 'd']
+    >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
+    ['a', 'b', 'c', 'd']
+    >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
+    [10, 11, 12, 13, 14, 15]
+    """
+    transposed  = list(repeat('', len(transposition)))
+    for p, t in enumerate(transposition):
+       transposed[t] = items[p]
+    return transposed
+
  
  def frequencies(text):
      """Count the number of occurrences of each character in text
      
      >>> sorted(frequencies('abcdefabc').items())
      [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
-    >>> sorted(frequencies('the quick brown fox jumped over the lazy dog').items()) # doctest: +NORMALIZE_WHITESPACE
+    >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
+         'dog').items()) # doctest: +NORMALIZE_WHITESPACE
      [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), 
       ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), 
       ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), 
       ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
-    >>> sorted(frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
+    >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
+         '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
      [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), 
       ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), 
       ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), 
       ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), 
       ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
-    >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
+    >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
+         'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
      [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), 
       ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), 
       ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), 
@@ -199,9 +234,11 @@ def caesar_decipher(message, shift):
  def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
      """Encipher a letter, given a multiplier and adder
      
-    >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
+    >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
+            for l in string.ascii_uppercase])
      'HKNQTWZCFILORUXADGJMPSVYBE'
-    >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
+    >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
+            for l in string.ascii_uppercase])
      'FILORUXADGJMPSVYBEHKNQTWZC'
      """
      if letter in string.ascii_letters:
@@ -220,9 +257,11 @@ def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
  def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
      """Encipher a letter, given a multiplier and adder
      
-    >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
+    >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
+            for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
      'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-    >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
+    >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
+            for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
      'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
      """
      if letter in string.ascii_letters:
@@ -242,7 +281,8 @@ def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
  def affine_encipher(message, multiplier=1, adder=0, one_based=True):
      """Encipher a message
      
-    >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
+    >>> affine_encipher('hours passed during which jerico tried every ' \
+           'trick he could think of', 15, 22, True)
      'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
      """
      enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) 
@@ -252,7 +292,8 @@ def affine_encipher(message, multiplier=1, adder=0, one_based=True):
  def affine_decipher(message, multiplier=1, adder=0, one_based=True):
      """Decipher a message
      
-    >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
+    >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
+           'jfaoe ls omytd jlaxe mh', 15, 22, True)
      'hours passed during which jerico tried every trick he could think of'
      """
      enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) 
@@ -378,17 +419,53 @@ def scytale_decipher(message, rows):
      return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
  
  
+def transpositions_of(keyword):
+    """
+    >>> transpositions_of('clever')
+    [0, 2, 1, 4, 3]
+    """
+    transpositions = []
+    key = deduplicate(keyword)
+    for l in sorted(key):
+        transpositions += [key.index(l)]
+    return transpositions
+
+def column_transposition_encipher(message, keyword):
+    """
+    >>> column_transposition_encipher('hellothere', 'clever')
+    'hleolteher'
+    """
+    transpositions = transpositions_of(keyword)
+    columns = every_nth(message, len(transpositions), fillvalue=' ')
+    transposed_columns = transpose(columns, transpositions)
+    return combine_every_nth(transposed_columns)
+
+def column_transposition_decipher(message, keyword):
+    """
+    >>> column_transposition_decipher('hleolteher', 'clever')
+    'hellothere'
+    """
+    transpositions = transpositions_of(keyword)
+    columns = every_nth(message, len(transpositions), fillvalue=' ')
+    transposed_columns = untranspose(columns, transpositions)
+    return combine_every_nth(transposed_columns)
+
+
+
  def caesar_break(message, 
                   metric=norms.euclidean_distance, 
                   target_counts=normalised_english_counts, 
                   message_frequency_scaling=norms.normalise):
      """Breaks a Caesar cipher using frequency analysis
      
-    >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
+    >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
+          'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
      (4, 0.31863952890183...)
-    >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
+    >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
+          'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
      (19, 0.42152901235832...)
-    >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
+    >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
+          'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
      (13, 0.316029208075451...)
      """
      sanitised_message = sanitise(message)
@@ -414,7 +491,12 @@ def affine_break(message,
                   message_frequency_scaling=norms.normalise):
      """Breaks an affine cipher using frequency analysis
      
-    >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
+    >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
+          'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
+          'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
+          'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
+          'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
+          'clm ckuxj.') # doctest: +ELLIPSIS
      ((15, 22, True), 0.23570361818655...)
      """
      sanitised_message = sanitise(message)
@@ -453,7 +535,9 @@ def keyword_break(message,
      """Breaks a keyword substitution cipher using a dictionary and 
      frequency analysis
  
-    >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+    >>> keyword_break(keyword_encipher('this is a test message for the ' \
+          'keyword decipherment', 'elephant', 1), \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
      (('elephant', 1), 0.41643991598441...)
      """
      best_keyword = ''
@@ -488,14 +572,18 @@ def keyword_break_mp(message,
      """Breaks a keyword substitution cipher using a dictionary and 
      frequency analysis
  
-    >>> keyword_break_mp(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+    >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
+          'keyword decipherment', 'elephant', 1), \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
      (('elephant', 1), 0.41643991598441...)
      """
      with Pool() as pool:
          helper_args = [(message, word, wrap, metric, target_counts, 
                          message_frequency_scaling) 
                         for word in wordlist for wrap in range(3)]
-        breaks = pool.starmap(keyword_break_one, helper_args, chunksize) # Gotcha: the helper function here needs to be defined at the top level (limitation of Pool.starmap)
+        # Gotcha: the helper function here needs to be defined at the top level 
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(keyword_break_one, helper_args, chunksize) 
          return min(breaks, key=lambda k: k[1])
  
  def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts, 
@@ -514,7 +602,9 @@ def scytale_break(message,
                    message_frequency_scaling=norms.normalise):
      """Breaks a Scytale cipher
      
-    >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoeredcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriiotoaek') # doctest: +ELLIPSIS
+    >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
+           'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
+           'aek') # doctest: +ELLIPSIS
      (6, 0.83453041115025...)
      """
      best_key = 0