import norms
import logging
import math
-from itertools import zip_longest
+from itertools import zip_longest, repeat
from segment import segment
from multiprocessing import Pool
"""
return [text[i:i+n] for i in range(len(text)-n+1)]
-def every_nth(text, n):
+def every_nth(text, n, fillvalue=''):
"""Returns n strings, each of which consists of every nth character,
starting with the 0th, 1st, 2nd, ... (n-1)th character
>>> every_nth(string.ascii_lowercase, 5)
['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
- >>> every_nth(string.ascii_lowercase, 1)
+ >>> every_nth(string.ascii_lowercase, 1)
['abcdefghijklmnopqrstuvwxyz']
>>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+ >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
+ ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
"""
split_text = [text[i:i+n] for i in range(0, len(text), n)]
- return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
+ return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
def combine_every_nth(split_text):
"""Reforms a text split into every_nth strings
return ''.join([''.join(l)
for l in zip_longest(*split_text, fillvalue='')])
+def transpose(items, transposition):
+ """Moves items around according to the given transposition
+
+ >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
+ ['a', 'b', 'c', 'd']
+ >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
+ ['d', 'b', 'c', 'a']
+ >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
+ [13, 12, 14, 11, 15, 10]
+ """
+ transposed = list(repeat('', len(transposition)))
+ for p, t in enumerate(transposition):
+ transposed[p] = items[t]
+ return transposed
+
+def untranspose(items, transposition):
+ """Undoes a transpose
+
+ >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
+ ['a', 'b', 'c', 'd']
+ >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
+ ['a', 'b', 'c', 'd']
+ >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
+ [10, 11, 12, 13, 14, 15]
+ """
+ transposed = list(repeat('', len(transposition)))
+ for p, t in enumerate(transposition):
+ transposed[t] = items[p]
+ return transposed
+
def frequencies(text):
"""Count the number of occurrences of each character in text
>>> sorted(frequencies('abcdefabc').items())
[('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
- >>> sorted(frequencies('the quick brown fox jumped over the lazy dog').items()) # doctest: +NORMALIZE_WHITESPACE
+ >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
+ 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
[(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
- >>> sorted(frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
+ >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
+ '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
[(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
- >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
+ >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
+ 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
[('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
"""Encipher a letter, given a multiplier and adder
- >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
+ >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
+ for l in string.ascii_uppercase])
'HKNQTWZCFILORUXADGJMPSVYBE'
- >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
+ >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
+ for l in string.ascii_uppercase])
'FILORUXADGJMPSVYBEHKNQTWZC'
"""
if letter in string.ascii_letters:
def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
"""Encipher a letter, given a multiplier and adder
- >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
+ >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
+ for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
+ >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
+ for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
"""
if letter in string.ascii_letters:
def affine_encipher(message, multiplier=1, adder=0, one_based=True):
"""Encipher a message
- >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
+ >>> affine_encipher('hours passed during which jerico tried every ' \
+ 'trick he could think of', 15, 22, True)
'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
"""
enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
def affine_decipher(message, multiplier=1, adder=0, one_based=True):
"""Decipher a message
- >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
+ >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
+ 'jfaoe ls omytd jlaxe mh', 15, 22, True)
'hours passed during which jerico tried every trick he could think of'
"""
enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
+def transpositions_of(keyword):
+ """
+ >>> transpositions_of('clever')
+ [0, 2, 1, 4, 3]
+ """
+ transpositions = []
+ key = deduplicate(keyword)
+ for l in sorted(key):
+ transpositions += [key.index(l)]
+ return transpositions
+
+def column_transposition_encipher(message, keyword):
+ """
+ >>> column_transposition_encipher('hellothere', 'clever')
+ 'hleolteher'
+ """
+ transpositions = transpositions_of(keyword)
+ columns = every_nth(message, len(transpositions), fillvalue=' ')
+ transposed_columns = transpose(columns, transpositions)
+ return combine_every_nth(transposed_columns)
+
+def column_transposition_decipher(message, keyword):
+ """
+ >>> column_transposition_decipher('hleolteher', 'clever')
+ 'hellothere'
+ """
+ transpositions = transpositions_of(keyword)
+ columns = every_nth(message, len(transpositions), fillvalue=' ')
+ transposed_columns = untranspose(columns, transpositions)
+ return combine_every_nth(transposed_columns)
+
+
+
def caesar_break(message,
metric=norms.euclidean_distance,
target_counts=normalised_english_counts,
message_frequency_scaling=norms.normalise):
"""Breaks a Caesar cipher using frequency analysis
- >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
+ >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
+ 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
(4, 0.31863952890183...)
- >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
+ >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
+ 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
(19, 0.42152901235832...)
- >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
+ >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
+ 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
(13, 0.316029208075451...)
"""
sanitised_message = sanitise(message)
message_frequency_scaling=norms.normalise):
"""Breaks an affine cipher using frequency analysis
- >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
+ >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
+ 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
+ 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
+ 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
+ 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
+ 'clm ckuxj.') # doctest: +ELLIPSIS
((15, 22, True), 0.23570361818655...)
"""
sanitised_message = sanitise(message)
"""Breaks a keyword substitution cipher using a dictionary and
frequency analysis
- >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+ >>> keyword_break(keyword_encipher('this is a test message for the ' \
+ 'keyword decipherment', 'elephant', 1), \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
(('elephant', 1), 0.41643991598441...)
"""
best_keyword = ''
"""Breaks a keyword substitution cipher using a dictionary and
frequency analysis
- >>> keyword_break_mp(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+ >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
+ 'keyword decipherment', 'elephant', 1), \
+ wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
(('elephant', 1), 0.41643991598441...)
"""
with Pool() as pool:
helper_args = [(message, word, wrap, metric, target_counts,
message_frequency_scaling)
for word in wordlist for wrap in range(3)]
- breaks = pool.starmap(keyword_break_one, helper_args, chunksize) # Gotcha: the helper function here needs to be defined at the top level (limitation of Pool.starmap)
+ # Gotcha: the helper function here needs to be defined at the top level
+ # (limitation of Pool.starmap)
+ breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
return min(breaks, key=lambda k: k[1])
def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
message_frequency_scaling=norms.normalise):
"""Breaks a Scytale cipher
- >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoeredcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriiotoaek') # doctest: +ELLIPSIS
+ >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
+ 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
+ 'aek') # doctest: +ELLIPSIS
(6, 0.83453041115025...)
"""
best_key = 0