From f8f68f294a4d226bc249a067c0a22ffca502f240 Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Wed, 23 Jul 2014 12:47:47 +0100 Subject: [PATCH] Copied tweaks across --- 2013/solutions.txt | 4 +- cipher.py | 179 +++++++++++++++++++-------------------------- norms.py | 38 ++++------ 3 files changed, 95 insertions(+), 126 deletions(-) diff --git a/2013/solutions.txt b/2013/solutions.txt index fbf7502..23c8cb2 100644 --- a/2013/solutions.txt +++ b/2013/solutions.txt @@ -13,8 +13,8 @@ c5a = open('2013/5a.ciphertext').read() c5b = open('2013/5b.ciphertext').read() c6a = open('2013/6a.ciphertext').read() c6b = open('2013/6b.ciphertext').read() -c7a = open('2013/6a.ciphertext').read() -c7b = open('2013/6b.ciphertext').read() +c7a = open('2013/7a.ciphertext').read() +c7b = open('2013/7b.ciphertext').read() p1a = caesar_decipher(c1a, 8) p1b = caesar_decipher(c1b, 14) diff --git a/cipher.py b/cipher.py index 3783d57..b53ae24 100644 --- a/cipher.py +++ b/cipher.py @@ -1,12 +1,9 @@ -"""A set of ciphers with implementations for both enciphering and deciphering -them. See cipherbreak for automatic breaking of these ciphers -""" - import string import collections +import math from enum import Enum from itertools import zip_longest, cycle, chain -from language_models import unaccent, sanitise +from language_models import * modular_division_table = [[0]*26 for _ in range(26)] @@ -17,15 +14,15 @@ for a in range(26): def every_nth(text, n, fillvalue=''): - """Returns n strings, each of which consists of every nth character, + """Returns n strings, each of which consists of every nth character, starting with the 0th, 1st, 2nd, ... (n-1)th character - + >>> every_nth(string.ascii_lowercase, 5) ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty'] >>> every_nth(string.ascii_lowercase, 1) ['abcdefghijklmnopqrstuvwxyz'] >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE - ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] >>> every_nth(string.ascii_lowercase, 5, fillvalue='!') ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!'] @@ -35,7 +32,7 @@ def every_nth(text, n, fillvalue=''): def combine_every_nth(split_text): """Reforms a text split into every_nth strings - + >>> combine_every_nth(every_nth(string.ascii_lowercase, 5)) 'abcdefghijklmnopqrstuvwxyz' >>> combine_every_nth(every_nth(string.ascii_lowercase, 1)) @@ -43,7 +40,7 @@ def combine_every_nth(split_text): >>> combine_every_nth(every_nth(string.ascii_lowercase, 26)) 'abcdefghijklmnopqrstuvwxyz' """ - return ''.join([''.join(l) + return ''.join([''.join(l) for l in zip_longest(*split_text, fillvalue='')]) def chunks(text, n, fillvalue=None): @@ -64,7 +61,7 @@ def chunks(text, n, fillvalue=None): def transpose(items, transposition): """Moves items around according to the given transposition - + >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3)) ['a', 'b', 'c', 'd'] >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0)) @@ -74,12 +71,12 @@ def transpose(items, transposition): """ transposed = [''] * len(transposition) for p, t in enumerate(transposition): - transposed[p] = items[t] + transposed[p] = items[t] return transposed def untranspose(items, transposition): """Undoes a transpose - + >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3]) ['a', 'b', 'c', 'd'] >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0]) @@ -89,20 +86,10 @@ def untranspose(items, transposition): """ transposed = [''] * len(transposition) for p, t in enumerate(transposition): - transposed[t] = items[p] + transposed[t] = items[p] return transposed def deduplicate(text): - """If a string contains duplicate letters, remove all but the first. Retain - the order of the letters. - - >>> deduplicate('cat') - ['c', 'a', 't'] - >>> deduplicate('happy') - ['h', 'a', 'p', 'y'] - >>> deduplicate('cattca') - ['c', 'a', 't'] - """ return list(collections.OrderedDict.fromkeys(text)) @@ -136,14 +123,14 @@ def caesar_encipher_letter(accented_letter, shift): alphabet_start = ord('A') else: alphabet_start = ord('a') - return chr(((ord(letter) - alphabet_start + shift) % 26) + + return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start) else: return letter def caesar_decipher_letter(letter, shift): """Decipher a letter, given a shift amount - + >>> caesar_decipher_letter('b', 1) 'a' >>> caesar_decipher_letter('b', 2) @@ -153,7 +140,7 @@ def caesar_decipher_letter(letter, shift): def caesar_encipher(message, shift): """Encipher a message with the Caesar cipher of given shift - + >>> caesar_encipher('abc', 1) 'bcd' >>> caesar_encipher('abc', 2) @@ -170,7 +157,7 @@ def caesar_encipher(message, shift): def caesar_decipher(message, shift): """Decipher a message with the Caesar cipher of given shift - + >>> caesar_decipher('bcd', 1) 'abc' >>> caesar_decipher('cde', 2) @@ -182,9 +169,9 @@ def caesar_decipher(message, shift): """ return caesar_encipher(message, -shift) -def affine_encipher_letter(accented_letter, multiplier=1, adder=0, - one_based=True): +def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True): """Encipher a letter, given a multiplier and adder + >>> ''.join([affine_encipher_letter(l, 3, 5, True) \ for l in string.ascii_uppercase]) 'HKNQTWZCFILORUXADGJMPSVYBE' @@ -208,7 +195,7 @@ def affine_encipher_letter(accented_letter, multiplier=1, adder=0, def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): """Encipher a letter, given a multiplier and adder - + >>> ''.join([affine_decipher_letter(l, 3, 5, True) \ for l in 'HKNQTWZCFILORUXADGJMPSVYBE']) 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' @@ -223,79 +210,75 @@ def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): alphabet_start = ord('a') cipher_number = ord(letter) - alphabet_start if one_based: cipher_number += 1 - plaintext_number = ( + plaintext_number = ( modular_division_table[multiplier] - [(cipher_number - adder) % 26] - ) + [(cipher_number - adder) % 26]) if one_based: plaintext_number -= 1 - return chr(plaintext_number % 26 + alphabet_start) + return chr(plaintext_number % 26 + alphabet_start) else: return letter def affine_encipher(message, multiplier=1, adder=0, one_based=True): """Encipher a message - + >>> affine_encipher('hours passed during which jerico tried every ' \ 'trick he could think of', 15, 22, True) 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh' """ - enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) + enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) for l in message] return ''.join(enciphered) def affine_decipher(message, multiplier=1, adder=0, one_based=True): """Decipher a message - + >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \ 'jfaoe ls omytd jlaxe mh', 15, 22, True) 'hours passed during which jerico tried every trick he could think of' """ - enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) + enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) for l in message] return ''.join(enciphered) -class KeywordWrapAlphabet(Enum): - """Ways of wrapping the alphabet for keyword-based substitution ciphers.""" +class Keyword_wrap_alphabet(Enum): from_a = 1 from_last = 2 from_largest = 3 -def keyword_cipher_alphabet_of(keyword, - wrap_alphabet=KeywordWrapAlphabet.from_a): +def keyword_cipher_alphabet_of(keyword, wrap_alphabet=Keyword_wrap_alphabet.from_a): """Find the cipher alphabet given a keyword. wrap_alphabet controls how the rest of the alphabet is added after the keyword. >>> keyword_cipher_alphabet_of('bayes') 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a) + >>> keyword_cipher_alphabet_of('bayes', Keyword_wrap_alphabet.from_a) 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last) + >>> keyword_cipher_alphabet_of('bayes', Keyword_wrap_alphabet.from_last) 'bayestuvwxzcdfghijklmnopqr' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest) + >>> keyword_cipher_alphabet_of('bayes', Keyword_wrap_alphabet.from_largest) 'bayeszcdfghijklmnopqrtuvwx' """ - if wrap_alphabet == KeywordWrapAlphabet.from_a: - cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + + if wrap_alphabet == Keyword_wrap_alphabet.from_a: + cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase)) else: - if wrap_alphabet == KeywordWrapAlphabet.from_last: + if wrap_alphabet == Keyword_wrap_alphabet.from_last: last_keyword_letter = deduplicate(sanitise(keyword))[-1] else: last_keyword_letter = sorted(sanitise(keyword))[-1] last_keyword_position = string.ascii_lowercase.find( last_keyword_letter) + 1 cipher_alphabet = ''.join( - deduplicate(sanitise(keyword) + - string.ascii_lowercase[last_keyword_position:] + + deduplicate(sanitise(keyword) + + string.ascii_lowercase[last_keyword_position:] + string.ascii_lowercase)) return cipher_alphabet -def keyword_encipher(message, keyword, - wrap_alphabet=KeywordWrapAlphabet.from_a): +def keyword_encipher(message, keyword, wrap_alphabet=Keyword_wrap_alphabet.from_a): """Enciphers a message with a keyword substitution cipher. wrap_alphabet controls how the rest of the alphabet is added after the keyword. @@ -305,33 +288,32 @@ def keyword_encipher(message, keyword, >>> keyword_encipher('test message', 'bayes') 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a) + >>> keyword_encipher('test message', 'bayes', Keyword_wrap_alphabet.from_a) 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last) + >>> keyword_encipher('test message', 'bayes', Keyword_wrap_alphabet.from_last) 'lskl dskkbus' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest) + >>> keyword_encipher('test message', 'bayes', Keyword_wrap_alphabet.from_largest) 'qspq jsppbcs' """ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) return unaccent(message).lower().translate(cipher_translation) -def keyword_decipher(message, keyword, - wrap_alphabet=KeywordWrapAlphabet.from_a): +def keyword_decipher(message, keyword, wrap_alphabet=Keyword_wrap_alphabet.from_a): """Deciphers a message with a keyword substitution cipher. wrap_alphabet controls how the rest of the alphabet is added after the keyword. 0 : from 'a' 1 : from the last letter in the sanitised keyword 2 : from the largest letter in the sanitised keyword - + >>> keyword_decipher('rsqr ksqqbds', 'bayes') 'test message' - >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a) + >>> keyword_decipher('rsqr ksqqbds', 'bayes', Keyword_wrap_alphabet.from_a) 'test message' - >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last) + >>> keyword_decipher('lskl dskkbus', 'bayes', Keyword_wrap_alphabet.from_last) 'test message' - >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest) + >>> keyword_decipher('qspq jsppbcs', 'bayes', Keyword_wrap_alphabet.from_largest) 'test message' """ cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) @@ -359,14 +341,14 @@ def vigenere_decipher(message, keyword): pairs = zip(message, cycle(shifts)) return ''.join([caesar_decipher_letter(l, k) for l, k in pairs]) -beaufort_encipher = vigenere_decipher -beaufort_decipher = vigenere_encipher +beaufort_encipher=vigenere_decipher +beaufort_decipher=vigenere_encipher def transpositions_of(keyword): """Finds the transpostions given by a keyword. For instance, the keyword 'clever' rearranges to 'celrv', so the first column (0) stays first, the - second column (1) moves to third, the third column (2) moves to second, + second column (1) moves to third, the third column (2) moves to second, and so on. If passed a tuple, assume it's already a transposition and just return it. @@ -386,32 +368,19 @@ def transpositions_of(keyword): return transpositions def pad(message_len, group_len, fillvalue): - """Returns the padding required to extend a message of message_len to an - even multiple of group_len, by adding repreated copies of fillvalue. - fillvalue can either be a character or a function that returns a character. - - >>> pad(10, 4, '!') - '!!' - >>> pad(8, 4, '!') - '' - >>> pad(16, 4, '!') - '' - >>> pad(10, 4, lambda: '*') - '**' - """ padding_length = group_len - message_len % group_len if padding_length == group_len: padding_length = 0 padding = '' - for _ in range(padding_length): + for i in range(padding_length): if callable(fillvalue): padding += fillvalue() else: padding += fillvalue return padding -def column_transposition_encipher(message, keyword, fillvalue=' ', - fillcolumnwise=False, - emptycolumnwise=False): +def column_transposition_encipher(message, keyword, fillvalue=' ', + fillcolumnwise=False, + emptycolumnwise=False): """Enciphers using the column transposition cipher. Message is padded to allow all rows to be the same length. @@ -458,9 +427,9 @@ def column_transposition_encipher(message, keyword, fillvalue=' ', else: return ''.join(chain(*transposed)) -def column_transposition_decipher(message, keyword, fillvalue=' ', - fillcolumnwise=False, - emptycolumnwise=False): +def column_transposition_decipher(message, keyword, fillvalue=' ', + fillcolumnwise=False, + emptycolumnwise=False): """Deciphers using the column transposition cipher. Message is padded to allow all rows to be the same length. @@ -508,14 +477,17 @@ def scytale_encipher(message, rows, fillvalue=' '): >>> scytale_encipher('thequickbrownfox', 7) 'tqcrnx hukof eibwo ' """ + # transpositions = [i for i in range(math.ceil(len(message) / rows))] + # return column_transposition_encipher(message, transpositions, + # fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True) transpositions = [i for i in range(rows)] - return column_transposition_encipher(message, transpositions, - fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False) + return column_transposition_encipher(message, transpositions, + fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False) def scytale_decipher(message, rows): """Deciphers using the scytale transposition cipher. Assumes the message is padded so that all rows are the same length. - + >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3) 'thequickbrownfox ' >>> scytale_decipher('tubnhirfecooqkwx', 4) @@ -527,15 +499,18 @@ def scytale_decipher(message, rows): >>> scytale_decipher('tqcrnx hukof eibwo ', 7) 'thequickbrownfox ' """ + # transpositions = [i for i in range(math.ceil(len(message) / rows))] + # return column_transposition_decipher(message, transpositions, + # fillcolumnwise=False, emptycolumnwise=True) transpositions = [i for i in range(rows)] - return column_transposition_decipher(message, transpositions, + return column_transposition_decipher(message, transpositions, fillcolumnwise=True, emptycolumnwise=False) class PocketEnigma(object): """A pocket enigma machine - The wheel is internally represented as a 26-element list self.wheel_map, - where wheel_map[i] == j shows that the position i places on from the arrow + The wheel is internally represented as a 26-element list self.wheel_map, + where wheel_map[i] == j shows that the position i places on from the arrow maps to the position j places on. """ def __init__(self, wheel=1, position='a'): @@ -552,11 +527,11 @@ class PocketEnigma(object): >>> pe.position 0 """ - self.wheel1 = [('a', 'z'), ('b', 'e'), ('c', 'x'), ('d', 'k'), - ('f', 'h'), ('g', 'j'), ('i', 'm'), ('l', 'r'), ('n', 'o'), + self.wheel1 = [('a', 'z'), ('b', 'e'), ('c', 'x'), ('d', 'k'), + ('f', 'h'), ('g', 'j'), ('i', 'm'), ('l', 'r'), ('n', 'o'), ('p', 'v'), ('q', 't'), ('s', 'u'), ('w', 'y')] - self.wheel2 = [('a', 'c'), ('b', 'd'), ('e', 'w'), ('f', 'i'), - ('g', 'p'), ('h', 'm'), ('j', 'k'), ('l', 'n'), ('o', 'q'), + self.wheel2 = [('a', 'c'), ('b', 'd'), ('e', 'w'), ('f', 'i'), + ('g', 'p'), ('h', 'm'), ('j', 'k'), ('l', 'n'), ('o', 'q'), ('r', 'z'), ('s', 'u'), ('t', 'v'), ('x', 'y')] if wheel == 1: self.make_wheel_map(self.wheel1) @@ -602,14 +577,14 @@ class PocketEnigma(object): ValueError: Wheel specification does not contain 26 letters """ if len(wheel_spec) != 13: - raise ValueError("Wheel specification has {} pairs, requires" - " 13".format(len(wheel_spec))) + raise ValueError("Wheel specification has {} pairs, requires 13". + format(len(wheel_spec))) for p in wheel_spec: if len(p) != 2: raise ValueError("Not all mappings in wheel specification" - "have two elements") - if len(set([p[0] for p in wheel_spec] + - [p[1] for p in wheel_spec])) != 26: + "have two elements") + if len(set([p[0] for p in wheel_spec] + + [p[1] for p in wheel_spec])) != 26: raise ValueError("Wheel specification does not contain 26 letters") def encipher_letter(self, letter): @@ -637,8 +612,8 @@ class PocketEnigma(object): """ if letter in string.ascii_lowercase: return chr( - (self.wheel_map[(ord(letter) - ord('a') - self.position) % 26] + - self.position) % 26 + + (self.wheel_map[(ord(letter) - ord('a') - self.position) % 26] + + self.position) % 26 + ord('a')) else: return '' diff --git a/norms.py b/norms.py index 36e7fa4..6645294 100644 --- a/norms.py +++ b/norms.py @@ -1,10 +1,9 @@ -"""Define a variety of norms for finding distances between vectors""" - import collections +from math import log10 def normalise(frequencies): """Scale a set of frequencies so they sum to one - + >>> sorted(normalise({1: 1, 2: 0}).items()) [(1, 1.0), (2, 0.0)] >>> sorted(normalise({1: 1, 2: 1}).items()) @@ -14,13 +13,13 @@ def normalise(frequencies): >>> sorted(normalise({1: 1, 2: 2, 3: 1}).items()) [(1, 0.25), (2, 0.5), (3, 0.25)] """ - length = sum([f for f in frequencies.values()]) - return collections.defaultdict(int, ((k, v / length) + length = sum(f for f in frequencies.values()) + return collections.defaultdict(int, ((k, v / length) for (k, v) in frequencies.items())) def euclidean_scale(frequencies): """Scale a set of frequencies so they have a unit euclidean length - + >>> sorted(euclidean_scale({1: 1, 2: 0}).items()) [(1, 1.0), (2, 0.0)] >>> sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS @@ -31,21 +30,17 @@ def euclidean_scale(frequencies): [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)] """ length = sum([f ** 2 for f in frequencies.values()]) ** 0.5 - return collections.defaultdict(int, ((k, v / length) + return collections.defaultdict(int, ((k, v / length) for (k, v) in frequencies.items())) def identity_scale(frequencies): - """Don't scale a set of frequencies. (For use when a function expects a - scaling function but you don't want to supply one.) - """ return frequencies def l2(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as - dictionaries. + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 - + >>> l2({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) 0.0 >>> l2({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS @@ -67,7 +62,7 @@ def l2(frequencies1, frequencies2): euclidean_distance = l2 def l1(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> l1({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) @@ -87,7 +82,7 @@ def l1(frequencies1, frequencies2): return total def l3(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> l3({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) @@ -110,10 +105,10 @@ def l3(frequencies1, frequencies2): return total ** (1/3) def geometric_mean(frequencies1, frequencies2): - """Finds the geometric mean of the absolute differences between two - frequency profiles, expressed as dictionaries. + """Finds the geometric mean of the absolute differences between two frequency profiles, + expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 - + >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) 1 >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) @@ -136,8 +131,8 @@ def geometric_mean(frequencies1, frequencies2): return total def harmonic_mean(frequencies1, frequencies2): - """Finds the harmonic mean of the absolute differences between two - frequency profiles, expressed as dictionaries. + """Finds the harmonic mean of the absolute differences between two frequency profiles, + expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) @@ -165,8 +160,7 @@ def harmonic_mean(frequencies1, frequencies2): def cosine_similarity(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as - dictionaries. + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> cosine_similarity({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS -- 2.34.1