Fiddling with cracking dictionaries

[cipher-tools.git] / cipher.py
diff --git a/cipher.py b/cipher.py

index 6ef01c325d81a7055760d924c400f86b8b718340..5f39a780dd608d4385eb9db9d41c0c7b8edb4df7 100644 (file)
--- a/cipher.py
+++ b/cipher.py
@@ -2,8 +2,7 @@ import string
  import collections
  import logging
  from itertools import zip_longest, cycle, chain
-from counts import *
-
+from language_models import *
  
  logger = logging.getLogger(__name__)
  logger.addHandler(logging.FileHandler('cipher.log'))
@@ -12,45 +11,12 @@ logger.setLevel(logging.WARNING)
  #logger.setLevel(logging.DEBUG)
  
  
-english_counts = collections.defaultdict(int)
-with open('count_1l.txt', 'r') as f:
-    for line in f:
-        (letter, count) = line.split("\t")
-        english_counts[letter] = int(count)
-normalised_english_counts = norms.normalise(english_counts)
-
-choices, weights = zip(*weighted_choices)
-cumdist = list(itertools.accumulate(weights))
-x = random.random() * cumdist[-1]
-choices[bisect.bisect(cumdist, x)]
-
-
-modular_division_table = [[0]*26 for x in range(26)]
+modular_division_table = [[0]*26 for _ in range(26)]
  for a in range(26):
      for b in range(26):
          c = (a * b) % 26
          modular_division_table[b][c] = a
  
-def letters(text):
-    """Remove all non-alphabetic characters from a text
-    >>> letters('The Quick')
-    'TheQuick'
-    >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
-    'TheQuickBROWNfoxjumpedoverthelazyDOG'
-    """
-    return ''.join([c for c in text if c in string.ascii_letters])
-
-def sanitise(text):
-    """Remove all non-alphabetic characters and convert the text to lowercase
-    
-    >>> sanitise('The Quick')
-    'thequick'
-    >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
-    'thequickbrownfoxjumpedoverthelazydog'
-    """
-    # sanitised = [c.lower() for c in text if c in string.ascii_letters]
-    # return ''.join(sanitised)
-    return letters(text).lower()
  
  def ngrams(text, n):
      """Returns all n-grams of a text
@@ -438,14 +404,14 @@ def column_transposition_encipher(message, keyword, fillvalue=' ',
        emptycolumnwise=False):
      """Enciphers using the column transposition cipher.
      Message is padded to allow all rows to be the same length.
-
-    >>> column_transposition_encipher('hellothere', 'clever')
-    'hleolteher'
-    >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
-    'hleolthre!e!'
-    >>> column_transposition_encipher('hellothere', 'clever', columnwise=True)
-    'htleehoelr'
      """
+    # >>> column_transposition_encipher('hellothere', 'clever')
+    # 'hleolteher'
+    # >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
+    # 'hleolthre!e!'
+    # >>> column_transposition_encipher('hellothere', 'clever', columnwise=True)
+    # 'htleehoelr'
+    # """
      transpositions = transpositions_of(keyword)
      message += pad(len(message), len(transpositions), fillvalue)
      if fillcolumnwise:
@@ -463,14 +429,14 @@ def column_transposition_decipher(message, keyword, fillvalue=' ',
        columnwise=False):
      """Deciphers using the column transposition cipher.
      Message is padded to allow all rows to be the same length.
-
-    >>> column_transposition_decipher('hleolteher', 'clever')
-    'hellothere'
-    >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
-    'hellothere!!'
-    >>> column_transposition_decipher('htleehoelr', 'clever', columnwise=True)
-    'hellothere'
      """
+    # >>> column_transposition_decipher('hleolteher', 'clever')
+    # 'hellothere'
+    # >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
+    # 'hellothere!!'
+    # >>> column_transposition_decipher('htleehoelr', 'clever', columnwise=True)
+    # 'hellothere'
+    # """
      transpositions = transpositions_of(keyword)
      if columnwise:
          columns = chunks(message, int(len(message) / len(transpositions)))