From: Neil Smith Date: Sun, 13 Jul 2014 14:19:28 +0000 (+0100) Subject: Tweaked in response to some linter suggestions X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=32a4467e6f7ac8ff2e6738118242ec4e4c255e8a;p=cipher-training.git Tweaked in response to some linter suggestions --- diff --git a/cipher.py b/cipher.py index 713db29..1656720 100644 --- a/cipher.py +++ b/cipher.py @@ -1,9 +1,12 @@ +"""A set of ciphers with implementations for both enciphering and deciphering +them. See cipherbreak for automatic breaking of these ciphers +""" + import string import collections -import math from enum import Enum from itertools import zip_longest, cycle, chain -from language_models import * +from language_models import unaccent, sanitise modular_division_table = [[0]*26 for _ in range(26)] @@ -14,15 +17,15 @@ for a in range(26): def every_nth(text, n, fillvalue=''): - """Returns n strings, each of which consists of every nth character, + """Returns n strings, each of which consists of every nth character, starting with the 0th, 1st, 2nd, ... (n-1)th character - + >>> every_nth(string.ascii_lowercase, 5) ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty'] >>> every_nth(string.ascii_lowercase, 1) ['abcdefghijklmnopqrstuvwxyz'] >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE - ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] >>> every_nth(string.ascii_lowercase, 5, fillvalue='!') ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!'] @@ -32,7 +35,7 @@ def every_nth(text, n, fillvalue=''): def combine_every_nth(split_text): """Reforms a text split into every_nth strings - + >>> combine_every_nth(every_nth(string.ascii_lowercase, 5)) 'abcdefghijklmnopqrstuvwxyz' >>> combine_every_nth(every_nth(string.ascii_lowercase, 1)) @@ -40,8 +43,8 @@ def combine_every_nth(split_text): >>> combine_every_nth(every_nth(string.ascii_lowercase, 26)) 'abcdefghijklmnopqrstuvwxyz' """ - return ''.join([''.join(l) - for l in zip_longest(*split_text, fillvalue='')]) + return ''.join([''.join(l) + for l in zip_longest(*split_text, fillvalue='')]) def chunks(text, n, fillvalue=None): """Split a text into chunks of n characters @@ -61,7 +64,7 @@ def chunks(text, n, fillvalue=None): def transpose(items, transposition): """Moves items around according to the given transposition - + >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3)) ['a', 'b', 'c', 'd'] >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0)) @@ -71,7 +74,7 @@ def transpose(items, transposition): """ transposed = [''] * len(transposition) for p, t in enumerate(transposition): - transposed[p] = items[t] + transposed[p] = items[t] return transposed def untranspose(items, transposition): @@ -86,10 +89,20 @@ def untranspose(items, transposition): """ transposed = [''] * len(transposition) for p, t in enumerate(transposition): - transposed[t] = items[p] + transposed[t] = items[p] return transposed def deduplicate(text): + """If a string contains duplicate letters, remove all but the first. Retain + the order of the letters. + + >>> deduplicate('cat') + ['c', 'a', 't'] + >>> deduplicate('happy') + ['h', 'a', 'p', 'y'] + >>> deduplicate('cattca') + ['c', 'a', 't'] + """ return list(collections.OrderedDict.fromkeys(text)) @@ -123,14 +136,14 @@ def caesar_encipher_letter(accented_letter, shift): alphabet_start = ord('A') else: alphabet_start = ord('a') - return chr(((ord(letter) - alphabet_start + shift) % 26) + + return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start) else: return letter def caesar_decipher_letter(letter, shift): """Decipher a letter, given a shift amount - + >>> caesar_decipher_letter('b', 1) 'a' >>> caesar_decipher_letter('b', 2) @@ -140,7 +153,7 @@ def caesar_decipher_letter(letter, shift): def caesar_encipher(message, shift): """Encipher a message with the Caesar cipher of given shift - + >>> caesar_encipher('abc', 1) 'bcd' >>> caesar_encipher('abc', 2) @@ -157,7 +170,7 @@ def caesar_encipher(message, shift): def caesar_decipher(message, shift): """Decipher a message with the Caesar cipher of given shift - + >>> caesar_decipher('bcd', 1) 'abc' >>> caesar_decipher('cde', 2) @@ -169,9 +182,9 @@ def caesar_decipher(message, shift): """ return caesar_encipher(message, -shift) -def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True): +def affine_encipher_letter(accented_letter, multiplier=1, adder=0, + one_based=True): """Encipher a letter, given a multiplier and adder - >>> ''.join([affine_encipher_letter(l, 3, 5, True) \ for l in string.ascii_uppercase]) 'HKNQTWZCFILORUXADGJMPSVYBE' @@ -195,7 +208,7 @@ def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=Tru def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): """Encipher a letter, given a multiplier and adder - + >>> ''.join([affine_decipher_letter(l, 3, 5, True) \ for l in 'HKNQTWZCFILORUXADGJMPSVYBE']) 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' @@ -210,44 +223,46 @@ def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): alphabet_start = ord('a') cipher_number = ord(letter) - alphabet_start if one_based: cipher_number += 1 - plaintext_number = ( + plaintext_number = ( modular_division_table[multiplier] [(cipher_number - adder) % 26] ) if one_based: plaintext_number -= 1 - return chr(plaintext_number % 26 + alphabet_start) + return chr(plaintext_number % 26 + alphabet_start) else: return letter def affine_encipher(message, multiplier=1, adder=0, one_based=True): """Encipher a message - + >>> affine_encipher('hours passed during which jerico tried every ' \ 'trick he could think of', 15, 22, True) 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh' """ - enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) + enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) for l in message] return ''.join(enciphered) def affine_decipher(message, multiplier=1, adder=0, one_based=True): """Decipher a message - + >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \ 'jfaoe ls omytd jlaxe mh', 15, 22, True) 'hours passed during which jerico tried every trick he could think of' """ - enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) + enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) for l in message] return ''.join(enciphered) class Keyword_wrap_alphabet(Enum): + """Ways of wrapping the alphabet for keyword-based substitution ciphers.""" from_a = 1 from_last = 2 from_largest = 3 -def keyword_cipher_alphabet_of(keyword, wrap_alphabet=Keyword_wrap_alphabet.from_a): +def keyword_cipher_alphabet_of(keyword, + wrap_alphabet=Keyword_wrap_alphabet.from_a): """Find the cipher alphabet given a keyword. wrap_alphabet controls how the rest of the alphabet is added after the keyword. @@ -262,7 +277,7 @@ def keyword_cipher_alphabet_of(keyword, wrap_alphabet=Keyword_wrap_alphabet.from 'bayeszcdfghijklmnopqrtuvwx' """ if wrap_alphabet == Keyword_wrap_alphabet.from_a: - cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + + cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase)) else: if wrap_alphabet == Keyword_wrap_alphabet.from_last: @@ -272,13 +287,14 @@ def keyword_cipher_alphabet_of(keyword, wrap_alphabet=Keyword_wrap_alphabet.from last_keyword_position = string.ascii_lowercase.find( last_keyword_letter) + 1 cipher_alphabet = ''.join( - deduplicate(sanitise(keyword) + - string.ascii_lowercase[last_keyword_position:] + + deduplicate(sanitise(keyword) + + string.ascii_lowercase[last_keyword_position:] + string.ascii_lowercase)) return cipher_alphabet -def keyword_encipher(message, keyword, wrap_alphabet=Keyword_wrap_alphabet.from_a): +def keyword_encipher(message, keyword, + wrap_alphabet=Keyword_wrap_alphabet.from_a): """Enciphers a message with a keyword substitution cipher. wrap_alphabet controls how the rest of the alphabet is added after the keyword. @@ -306,7 +322,7 @@ def keyword_decipher(message, keyword, wrap_alphabet=Keyword_wrap_alphabet.from_ 0 : from 'a' 1 : from the last letter in the sanitised keyword 2 : from the largest letter in the sanitised keyword - + >>> keyword_decipher('rsqr ksqqbds', 'bayes') 'test message' >>> keyword_decipher('rsqr ksqqbds', 'bayes', Keyword_wrap_alphabet.from_a) @@ -368,17 +384,30 @@ def transpositions_of(keyword): return transpositions def pad(message_len, group_len, fillvalue): + """Returns the padding required to extend a message of message_len to an + even multiple of group_len, by adding repreated copies of fillvalue. + fillvalue can either be a character or a function that returns a character. + + >>> pad(10, 4, '!') + '!!' + >>> pad(8, 4, '!') + '' + >>> pad(16, 4, '!') + '' + >>> pad(10, 4, lambda: '*') + '**' + """ padding_length = group_len - message_len % group_len if padding_length == group_len: padding_length = 0 padding = '' - for i in range(padding_length): + for _ in range(padding_length): if callable(fillvalue): padding += fillvalue() else: padding += fillvalue return padding -def column_transposition_encipher(message, keyword, fillvalue=' ', +def column_transposition_encipher(message, keyword, fillvalue=' ', fillcolumnwise=False, emptycolumnwise=False): """Enciphers using the column transposition cipher. @@ -427,7 +456,7 @@ def column_transposition_encipher(message, keyword, fillvalue=' ', else: return ''.join(chain(*transposed)) -def column_transposition_decipher(message, keyword, fillvalue=' ', +def column_transposition_decipher(message, keyword, fillvalue=' ', fillcolumnwise=False, emptycolumnwise=False): """Deciphers using the column transposition cipher. @@ -477,11 +506,8 @@ def scytale_encipher(message, rows, fillvalue=' '): >>> scytale_encipher('thequickbrownfox', 7) 'tqcrnx hukof eibwo ' """ - # transpositions = [i for i in range(math.ceil(len(message) / rows))] - # return column_transposition_encipher(message, transpositions, - # fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True) transpositions = [i for i in range(rows)] - return column_transposition_encipher(message, transpositions, + return column_transposition_encipher(message, transpositions, fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False) def scytale_decipher(message, rows): @@ -499,18 +525,15 @@ def scytale_decipher(message, rows): >>> scytale_decipher('tqcrnx hukof eibwo ', 7) 'thequickbrownfox ' """ - # transpositions = [i for i in range(math.ceil(len(message) / rows))] - # return column_transposition_decipher(message, transpositions, - # fillcolumnwise=False, emptycolumnwise=True) transpositions = [i for i in range(rows)] - return column_transposition_decipher(message, transpositions, + return column_transposition_decipher(message, transpositions, fillcolumnwise=True, emptycolumnwise=False) class PocketEnigma(object): """A pocket enigma machine - The wheel is internally represented as a 26-element list self.wheel_map, - where wheel_map[i] == j shows that the position i places on from the arrow + The wheel is internally represented as a 26-element list self.wheel_map, + where wheel_map[i] == j shows that the position i places on from the arrow maps to the position j places on. """ def __init__(self, wheel=1, position='a'): @@ -527,11 +550,11 @@ class PocketEnigma(object): >>> pe.position 0 """ - self.wheel1 = [('a', 'z'), ('b', 'e'), ('c', 'x'), ('d', 'k'), - ('f', 'h'), ('g', 'j'), ('i', 'm'), ('l', 'r'), ('n', 'o'), + self.wheel1 = [('a', 'z'), ('b', 'e'), ('c', 'x'), ('d', 'k'), + ('f', 'h'), ('g', 'j'), ('i', 'm'), ('l', 'r'), ('n', 'o'), ('p', 'v'), ('q', 't'), ('s', 'u'), ('w', 'y')] - self.wheel2 = [('a', 'c'), ('b', 'd'), ('e', 'w'), ('f', 'i'), - ('g', 'p'), ('h', 'm'), ('j', 'k'), ('l', 'n'), ('o', 'q'), + self.wheel2 = [('a', 'c'), ('b', 'd'), ('e', 'w'), ('f', 'i'), + ('g', 'p'), ('h', 'm'), ('j', 'k'), ('l', 'n'), ('o', 'q'), ('r', 'z'), ('s', 'u'), ('t', 'v'), ('x', 'y')] if wheel == 1: self.make_wheel_map(self.wheel1) @@ -566,7 +589,7 @@ class PocketEnigma(object): >>> pe.validate_wheel_spec([]) Traceback (most recent call last): ... - ValueError: Wheel specification has 0 pairs, require 13 + ValueError: Wheel specification has 0 pairs, requires 13 >>> pe.validate_wheel_spec([('a', 'b', 'c')]*13) Traceback (most recent call last): ... @@ -612,8 +635,8 @@ class PocketEnigma(object): """ if letter in string.ascii_lowercase: return chr( - (self.wheel_map[(ord(letter) - ord('a') - self.position) % 26] + - self.position) % 26 + + (self.wheel_map[(ord(letter) - ord('a') - self.position) % 26] + + self.position) % 26 + ord('a')) else: return '' diff --git a/cipherbreak.py b/cipherbreak.py index 6e08f49..3639da5 100644 --- a/cipherbreak.py +++ b/cipherbreak.py @@ -37,22 +37,22 @@ def frequencies(text): [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)] >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \ 'dog').items()) # doctest: +NORMALIZE_WHITESPACE - [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), - ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), - ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), + [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), + ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), + ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)] >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \ '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE - [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), - ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), - ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), - ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), + [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), + ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), + ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), + ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)] - >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \ + >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\ 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE - [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), - ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), - ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), + [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), + ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), + ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)] >>> frequencies('abcdefabcdef')['x'] 0 @@ -62,7 +62,7 @@ def frequencies(text): def caesar_break(message, fitness=Pletters): """Breaks a Caesar cipher using frequency analysis - + >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \ 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS (4, -130.849989015...) @@ -80,7 +80,8 @@ def caesar_break(message, fitness=Pletters): plaintext = caesar_decipher(sanitised_message, shift) fit = fitness(plaintext) logger.debug('Caesar break attempt using key {0} gives fit of {1} ' - 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50])) + 'and decrypt starting: {2}'.format(shift, fit, + plaintext[:50])) if fit > best_fit: best_fit = fit best_shift = shift @@ -91,7 +92,7 @@ def caesar_break(message, fitness=Pletters): def affine_break(message, fitness=Pletters): """Breaks an affine cipher using frequency analysis - + >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \ 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \ 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \ @@ -120,10 +121,10 @@ def affine_break(message, fitness=Pletters): best_multiplier = multiplier best_adder = adder best_one_based = one_based - logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} ' - 'and decrypt starting: {4}'.format( - best_multiplier, best_adder, best_one_based, best_fit, - affine_decipher(sanitised_message, best_multiplier, + logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of ' + '{3} and decrypt starting: {4}'.format( + best_multiplier, best_adder, best_one_based, best_fit, + affine_decipher(sanitised_message, best_multiplier, best_adder, best_one_based)[:50])) return (best_multiplier, best_adder, best_one_based), best_fit @@ -145,16 +146,16 @@ def keyword_break(message, wordlist=keywords, fitness=Pletters): fit = fitness(plaintext) logger.debug('Keyword break attempt using key {0} (wrap={1}) ' 'gives fit of {2} and decrypt starting: {3}'.format( - keyword, wrap_alphabet, fit, + keyword, wrap_alphabet, fit, sanitise(plaintext)[:50])) if fit > best_fit: best_fit = fit best_keyword = keyword best_wrap_alphabet = wrap_alphabet logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of ' - '{2} and decrypt starting: {3}'.format(best_keyword, + '{2} and decrypt starting: {3}'.format(best_keyword, best_wrap_alphabet, best_fit, sanitise( - keyword_decipher(message, best_keyword, + keyword_decipher(message, best_keyword, best_wrap_alphabet))[:50])) return (best_keyword, best_wrap_alphabet), best_fit @@ -168,12 +169,12 @@ def keyword_break_mp(message, wordlist=keywords, fitness=Pletters, chunksize=500 (('elephant', ), -52.834575011...) """ with Pool() as pool: - helper_args = [(message, word, wrap, fitness) - for word in wordlist + helper_args = [(message, word, wrap, fitness) + for word in wordlist for wrap in Keyword_wrap_alphabet] - # Gotcha: the helper function here needs to be defined at the top level + # Gotcha: the helper function here needs to be defined at the top level # (limitation of Pool.starmap) - breaks = pool.starmap(keyword_break_worker, helper_args, chunksize) + breaks = pool.starmap(keyword_break_worker, helper_args, chunksize) return max(breaks, key=lambda k: k[1]) def keyword_break_worker(message, keyword, wrap_alphabet, fitness): @@ -215,7 +216,8 @@ def monoalphabetic_break_hillclimbing_worker(message, alphabet, if i == j: return letters else: - return letters[:i] + letters[j] + letters[i+1:j] + letters[i] + letters[j+1:] + return letters[:i] + letters[j] + letters[i+1:j] + + letters[i] + letters[j+1:] best_alphabet = alphabet best_fitness = float('-inf') for i in range(max_iterations): @@ -443,4 +445,3 @@ def plot_frequency_histogram(freqs, sort_key=None): if __name__ == "__main__": import doctest doctest.testmod() - diff --git a/language_models.py b/language_models.py index 59d8588..babbea1 100644 --- a/language_models.py +++ b/language_models.py @@ -1,6 +1,10 @@ +"""Language-specific functions, including models of languages based on data of +its use. +""" + import string -import norms import random +import norms import collections import unicodedata import itertools @@ -16,7 +20,7 @@ def letters(text): return ''.join([c for c in text if c in string.ascii_letters]) def unaccent(text): - """Remove all accents from letters. + """Remove all accents from letters. It does this by converting the unicode string to decomposed compatability form, dropping all the combining accents, then re-encoding the bytes. @@ -37,7 +41,7 @@ def unaccent(text): def sanitise(text): """Remove all non-alphabetic characters and convert the text to lowercase - + >>> sanitise('The Quick') 'thequick' >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG') @@ -72,20 +76,20 @@ with open('words.txt', 'r') as f: def weighted_choice(d): - """Generate random item from a dictionary of item counts - """ - target = random.uniform(0, sum(d.values())) - cuml = 0.0 - for (l, p) in d.items(): - cuml += p - if cuml > target: - return l - return None + """Generate random item from a dictionary of item counts + """ + target = random.uniform(0, sum(d.values())) + cuml = 0.0 + for (l, p) in d.items(): + cuml += p + if cuml > target: + return l + return None def random_english_letter(): - """Generate a random letter based on English letter counts - """ - return weighted_choice(normalised_english_counts) + """Generate a random letter based on English letter counts + """ + return weighted_choice(normalised_english_counts) def ngrams(text, n):