Hillclimbing to solve monosubstitution ciphers

author Neil Smith <neil.git@njae.me.uk>

Mon, 14 Jul 2014 18:51:17 +0000 (19:51 +0100)

committer Neil Smith <neil.git@njae.me.uk>

Mon, 14 Jul 2014 18:51:17 +0000 (19:51 +0100)
author Neil Smith <neil.git@njae.me.uk>
Mon, 14 Jul 2014 18:51:17 +0000 (19:51 +0100)
committer Neil Smith <neil.git@njae.me.uk>
Mon, 14 Jul 2014 18:51:17 +0000 (19:51 +0100)
diff --git a/cipher.py b/cipher.py

index 5a7781b7e0dc23db854bc8cd57631dcf3de2910a..d284afe94e0534b3e11fe6f819fc81d5fe6061db 100644 (file)
--- a/cipher.py
+++ b/cipher.py
@@ -62,35 +62,6 @@ def chunks(text, n, fillvalue=None):
          padding = ''
      return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
  
-def transpose(items, transposition):
-    """Moves items around according to the given transposition
-
-    >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
-    ['a', 'b', 'c', 'd']
-    >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
-    ['d', 'b', 'c', 'a']
-    >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
-    [13, 12, 14, 11, 15, 10]
-    """
-    transposed = [''] * len(transposition)
-    for p, t in enumerate(transposition):
-        transposed[p] = items[t]
-    return transposed
-
-def untranspose(items, transposition):
-    """Undoes a transpose
-
-    >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
-    ['a', 'b', 'c', 'd']
-    >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
-    ['a', 'b', 'c', 'd']
-    >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
-    [10, 11, 12, 13, 14, 15]
-    """
-    transposed = [''] * len(transposition)
-    for p, t in enumerate(transposition):
-        transposed[t] = items[p]
-    return transposed
  
  def deduplicate(text):
      """If a string contains duplicate letters, remove all but the first. Retain
@@ -363,175 +334,6 @@ beaufort_encipher = vigenere_decipher
  beaufort_decipher = vigenere_encipher
  
  
-def transpositions_of(keyword):
-    """Finds the transpostions given by a keyword. For instance, the keyword
-    'clever' rearranges to 'celrv', so the first column (0) stays first, the
-    second column (1) moves to third, the third column (2) moves to second,
-    and so on.
-
-    If passed a tuple, assume it's already a transposition and just return it.
-
-    >>> transpositions_of('clever')
-    (0, 2, 1, 4, 3)
-    >>> transpositions_of('fred')
-    (3, 2, 0, 1)
-    >>> transpositions_of((3, 2, 0, 1))
-    (3, 2, 0, 1)
-    """
-    if isinstance(keyword, tuple):
-        return keyword
-    else:
-        key = deduplicate(keyword)
-        transpositions = tuple(key.index(l) for l in sorted(key))
-        return transpositions
-
-def pad(message_len, group_len, fillvalue):
-    """Returns the padding required to extend a message of message_len to an
-    even multiple of group_len, by adding repreated copies of fillvalue.
-    fillvalue can either be a character or a function that returns a character.
-
-    >>> pad(10, 4, '!')
-    '!!'
-    >>> pad(8, 4, '!')
-    ''
-    >>> pad(16, 4, '!')
-    ''
-    >>> pad(10, 4, lambda: '*')
-    '**'
-    """
-    padding_length = group_len - message_len % group_len
-    if padding_length == group_len: padding_length = 0
-    padding = ''
-    for _ in range(padding_length):
-        if callable(fillvalue):
-            padding += fillvalue()
-        else:
-            padding += fillvalue
-    return padding
-
-def column_transposition_encipher(message, keyword, fillvalue=' ',
-                                  fillcolumnwise=False,
-                                  emptycolumnwise=False):
-    """Enciphers using the column transposition cipher.
-    Message is padded to allow all rows to be the same length.
-
-    >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
-    'hlohr eltee '
-    >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
-    'hellothere  '
-    >>> column_transposition_encipher('hellothere', 'abcdef')
-    'hellothere  '
-    >>> column_transposition_encipher('hellothere', 'abcde')
-    'hellothere'
-    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
-    'hellothere'
-    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
-    'hlohreltee'
-    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
-    'htehlelroe'
-    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
-    'hellothere'
-    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
-    'heotllrehe'
-    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
-    'holrhetlee'
-    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
-    'htleehoelr'
-    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
-    'hleolteher'
-    >>> column_transposition_encipher('hellothere', 'cleverly')
-    'hleolthre e '
-    >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
-    'hleolthre!e!'
-    >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
-    'hleolthre*e*'
-    """
-    transpositions = transpositions_of(keyword)
-    message += pad(len(message), len(transpositions), fillvalue)
-    if fillcolumnwise:
-        rows = every_nth(message, len(message) // len(transpositions))
-    else:
-        rows = chunks(message, len(transpositions))
-    transposed = [transpose(r, transpositions) for r in rows]
-    if emptycolumnwise:
-        return combine_every_nth(transposed)
-    else:
-        return ''.join(chain(*transposed))
-
-def column_transposition_decipher(message, keyword, fillvalue=' ',
-                                  fillcolumnwise=False,
-                                  emptycolumnwise=False):
-    """Deciphers using the column transposition cipher.
-    Message is padded to allow all rows to be the same length.
-
-    >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
-    'hellothere'
-    >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
-    'hellothere'
-    >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
-    'hellothere'
-    >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
-    'hellothere'
-    >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
-    'hellothere'
-    >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
-    'hellothere'
-    >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
-    'hellothere'
-    >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
-    'hellothere'
-    """
-    transpositions = transpositions_of(keyword)
-    message += pad(len(message), len(transpositions), '*')
-    if emptycolumnwise:
-        rows = every_nth(message, len(message) // len(transpositions))
-    else:
-        rows = chunks(message, len(transpositions))
-    untransposed = [untranspose(r, transpositions) for r in rows]
-    if fillcolumnwise:
-        return combine_every_nth(untransposed)
-    else:
-        return ''.join(chain(*untransposed))
-
-def scytale_encipher(message, rows, fillvalue=' '):
-    """Enciphers using the scytale transposition cipher.
-    Message is padded with spaces to allow all rows to be the same length.
-
-    >>> scytale_encipher('thequickbrownfox', 3)
-    'tcnhkfeboqrxuo iw '
-    >>> scytale_encipher('thequickbrownfox', 4)
-    'tubnhirfecooqkwx'
-    >>> scytale_encipher('thequickbrownfox', 5)
-    'tubn hirf ecoo qkwx '
-    >>> scytale_encipher('thequickbrownfox', 6)
-    'tqcrnxhukof eibwo '
-    >>> scytale_encipher('thequickbrownfox', 7)
-    'tqcrnx hukof  eibwo  '
-    """
-    transpositions = [i for i in range(rows)]
-    return column_transposition_encipher(message, transpositions,
-            fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False)
-
-def scytale_decipher(message, rows):
-    """Deciphers using the scytale transposition cipher.
-    Assumes the message is padded so that all rows are the same length.
-
-    >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
-    'thequickbrownfox  '
-    >>> scytale_decipher('tubnhirfecooqkwx', 4)
-    'thequickbrownfox'
-    >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
-    'thequickbrownfox    '
-    >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
-    'thequickbrownfox  '
-    >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
-    'thequickbrownfox     '
-    """
-    transpositions = [i for i in range(rows)]
-    return column_transposition_decipher(message, transpositions,
-        fillcolumnwise=True, emptycolumnwise=False)
-
-
  if __name__ == "__main__":
      import doctest
      doctest.testmod()
diff --git a/cipherbreak.py b/cipherbreak.py

index 56143891e3290c3e0c481238cc2c41bee4bfa82f..1adebe671c17fdc029d28746c67328b208d462f3 100644 (file)
--- a/cipherbreak.py
+++ b/cipherbreak.py
@@ -29,9 +29,6 @@ from language_models import *
  # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
  
-transpositions = collections.defaultdict(list)
-for word in keywords:
-    transpositions[transpositions_of(word)] += [word]
  
  def frequencies(text):
      """Count the number of occurrences of each character in text
diff --git a/language_models.py b/language_models.py

index 02d48bd89a3cf820ebb7f62212eb8c6fa18d1ca4..8f4bd9c34eb9f5c148da4da9be0215bab9dd45b1 100644 (file)
--- a/language_models.py
+++ b/language_models.py
@@ -69,23 +69,6 @@ with open('words.txt', 'r') as f:
      keywords = [line.rstrip() for line in f]
  
  
-def weighted_choice(d):
-    """Generate random item from a dictionary of item counts
-    """
-    target = random.uniform(0, sum(d.values()))
-    cuml = 0.0
-    for (l, p) in d.items():
-        cuml += p
-        if cuml > target:
-            return l
-    return None
-
-def random_english_letter():
-    """Generate a random letter based on English letter counts
-    """
-    return weighted_choice(normalised_english_counts)
-
-
  class Pdist(dict):
      """A probability distribution estimated from counts in datafile.
      Values are stored and returned as log probabilities.
author	Neil Smith <neil.git@njae.me.uk>
	Mon, 14 Jul 2014 18:51:17 +0000 (19:51 +0100)
committer	Neil Smith <neil.git@njae.me.uk>
	Mon, 14 Jul 2014 18:51:17 +0000 (19:51 +0100)
cipher.py		patch \| blob \| history
cipherbreak.py		patch \| blob \| history
language_models.py		patch \| blob \| history