import norms
import logging
import math
-from itertools import zip_longest
+from itertools import zip_longest, repeat
from segment import segment
from multiprocessing import Pool
c = (a * b) % 26
modular_division_table[b][c] = a
+ def letters(text):
+ """Remove all non-alphabetic characters from a text
+ >>> letters('The Quick')
+ 'TheQuick'
+ >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
+ 'TheQuickBROWNfoxjumpedoverthelazyDOG'
+ """
+ return ''.join([c for c in text if c in string.ascii_letters])
def sanitise(text):
"""Remove all non-alphabetic characters and convert the text to lowercase
>>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
'thequickbrownfoxjumpedoverthelazydog'
"""
- sanitised = [c.lower() for c in text if c in string.ascii_letters]
- return ''.join(sanitised)
+ # sanitised = [c.lower() for c in text if c in string.ascii_letters]
+ # return ''.join(sanitised)
+ return letters(text).lower()
def ngrams(text, n):
"""Returns all n-grams of a text
"""
return [text[i:i+n] for i in range(len(text)-n+1)]
-def every_nth(text, n):
+def every_nth(text, n, fillvalue=''):
"""Returns n strings, each of which consists of every nth character,
starting with the 0th, 1st, 2nd, ... (n-1)th character
>>> every_nth(string.ascii_lowercase, 5)
['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
- >>> every_nth(string.ascii_lowercase, 1)
+ >>> every_nth(string.ascii_lowercase, 1)
['abcdefghijklmnopqrstuvwxyz']
>>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+ >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
+ ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
"""
split_text = [text[i:i+n] for i in range(0, len(text), n)]
- return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
+ return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
def combine_every_nth(split_text):
"""Reforms a text split into every_nth strings
return ''.join([''.join(l)
for l in zip_longest(*split_text, fillvalue='')])
+def transpose(items, transposition):
+ """Moves items around according to the given transposition
+
+ >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
+ ['a', 'b', 'c', 'd']
+ >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
+ ['d', 'b', 'c', 'a']
+ >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
+ [13, 12, 14, 11, 15, 10]
+ """
+ transposed = list(repeat('', len(transposition)))
+ for p, t in enumerate(transposition):
+ transposed[p] = items[t]
+ return transposed
+
+def untranspose(items, transposition):
+ """Undoes a transpose
+
+ >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
+ ['a', 'b', 'c', 'd']
+ >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
+ ['a', 'b', 'c', 'd']
+ >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
+ [10, 11, 12, 13, 14, 15]
+ """
+ transposed = list(repeat('', len(transposition)))
+ for p, t in enumerate(transposition):
+ transposed[t] = items[p]
+ return transposed
+
def frequencies(text):
"""Count the number of occurrences of each character in text
('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
('w', 1), ('x', 1), ('y', 1), ('z', 1)]
+ >>> frequencies('abcdefabcdef')['x']
+ 0
"""
- counts = collections.defaultdict(int)
- for c in text:
- counts[c] += 1
- return counts
+ #counts = collections.defaultdict(int)
+ #for c in text:
+ # counts[c] += 1
+ #return counts
+ return collections.Counter(c for c in text)
letter_frequencies = frequencies
def deduplicate(text):
return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
+def transpositions_of(keyword):
+ """
+ >>> transpositions_of('clever')
+ [0, 2, 1, 4, 3]
+ """
+ key = deduplicate(keyword)
+ transpositions = [key.index(l) for l in sorted(key)]
+ return transpositions
+
+def column_transposition_encipher(message, keyword):
+ """
+ >>> column_transposition_encipher('hellothere', 'clever')
+ 'hleolteher'
+ """
+ return column_transposition_worker(message, keyword, encipher=True)
+
+def column_transposition_decipher(message, keyword):
+ """
+ >>> column_transposition_decipher('hleolteher', 'clever')
+ 'hellothere'
+ """
+ return column_transposition_worker(message, keyword, encipher=False)
+
+def column_transposition_worker(message, keyword, encipher=True):
+ """
+ >>> column_transposition_worker('hellothere', 'clever')
+ 'hleolteher'
+ >>> column_transposition_worker('hellothere', 'clever', encipher=True)
+ 'hleolteher'
+ >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
+ 'hellothere'
+ """
+ transpositions = transpositions_of(keyword)
+ columns = every_nth(message, len(transpositions), fillvalue=' ')
+ if encipher:
+ transposed_columns = transpose(columns, transpositions)
+ else:
+ transposed_columns = untranspose(columns, transpositions)
+ return combine_every_nth(transposed_columns)
+
+
+
def caesar_break(message,
metric=norms.euclidean_distance,
target_counts=normalised_english_counts,