Initial commit
authorNeil Smith <neil.git@njae.me.uk>
Thu, 22 Oct 2020 13:12:47 +0000 (14:12 +0100)
committerNeil Smith <neil.git@njae.me.uk>
Thu, 22 Oct 2020 13:12:47 +0000 (14:12 +0100)
21 files changed:
.gitignore [new file with mode: 0644]
LICENCE [new file with mode: 0644]
README.md [new file with mode: 0644]
setup.py [new file with mode: 0644]
szyfrow/__init__.py [new file with mode: 0644]
szyfrow/affine.py [new file with mode: 0644]
szyfrow/amsco.py [new file with mode: 0644]
szyfrow/autokey.py [new file with mode: 0644]
szyfrow/bifid.py [new file with mode: 0644]
szyfrow/bombe.py [new file with mode: 0644]
szyfrow/cadenus.py [new file with mode: 0644]
szyfrow/caesar.py [new file with mode: 0644]
szyfrow/column_transposition.py [new file with mode: 0644]
szyfrow/enigma.py [new file with mode: 0644]
szyfrow/hill.py [new file with mode: 0644]
szyfrow/keyword_cipher.py [new file with mode: 0644]
szyfrow/playfair.py [new file with mode: 0644]
szyfrow/pocket_enigma.py [new file with mode: 0644]
szyfrow/polybius.py [new file with mode: 0644]
szyfrow/railfence.py [new file with mode: 0644]
szyfrow/vigenere.py [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..59c61f4
--- /dev/null
@@ -0,0 +1,28 @@
+*~
+*doc
+*log
+/tmp
+/__pycache__/*
+*pyc
+.ipynb*
+*.sublime-workspace
+.directory/*
+
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
diff --git a/LICENCE b/LICENCE
new file mode 100644 (file)
index 0000000..9ec50e7
--- /dev/null
+++ b/LICENCE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Neil Smith
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..5dc238b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,4 @@
+# Szyfow ciphers
+
+Various tools for encipher, deciphering, and breaking simple (manual) ciphers.
+
diff --git a/setup.py b/setup.py
new file mode 100644 (file)
index 0000000..cc0b525
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,22 @@
+import setuptools
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name="szyfrow",
+    version="0.0.1",
+    author="Neil Smith",
+    author_email="neil.szyfrow@njae.me.uk",
+    description="Tools for using and breaking simple ciphers",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/pypa/sampleproject",
+    packages=setuptools.find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.6',
+)
diff --git a/szyfrow/__init__.py b/szyfrow/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/szyfrow/affine.py b/szyfrow/affine.py
new file mode 100644 (file)
index 0000000..a5647be
--- /dev/null
@@ -0,0 +1,123 @@
+from support.utilities import *
+from support.language_models import *
+from logger import logger
+
+
+modular_division_table = {
+    (multiplier, (multiplier * plaintext) % 26): plaintext
+    for plaintext in range(26) 
+    for multiplier in range(26)
+    }
+
+
+def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True):
+    """Encipher a letter, given a multiplier and adder
+    
+    >>> cat(affine_encipher_letter(l, 3, 5, True) \
+            for l in string.ascii_letters)
+    'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE'
+    >>> cat(affine_encipher_letter(l, 3, 5, False) \
+            for l in string.ascii_letters)
+    'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC'
+    """
+    letter = unaccent(accented_letter)
+    if letter in string.ascii_letters:
+        letter_number = pos(letter)
+        if one_based: letter_number += 1
+        cipher_number = (letter_number * multiplier + adder) % 26
+        if one_based: cipher_number -= 1
+        if letter in string.ascii_uppercase:
+            return unpos(cipher_number).upper()
+        else:
+            return unpos(cipher_number)
+    else:
+        return letter
+
+def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
+    """Encipher a letter, given a multiplier and adder
+    
+    >>> cat(affine_decipher_letter(l, 3, 5, True) \
+            for l in 'hknqtwzcfiloruxadgjmpsvybeHKNQTWZCFILORUXADGJMPSVYBE')
+    'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    >>> cat(affine_decipher_letter(l, 3, 5, False) \
+            for l in 'filoruxadgjmpsvybehknqtwzcFILORUXADGJMPSVYBEHKNQTWZC')
+    'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    """
+    if letter in string.ascii_letters:
+        cipher_number = pos(letter)
+        if one_based: cipher_number += 1
+        # plaintext_number = ( 
+        #     modular_division_table[multiplier]
+        #                           [(cipher_number - adder) % 26])
+        plaintext_number = ( 
+            modular_division_table[multiplier, (cipher_number - adder) % 26]
+            )
+        if one_based: plaintext_number -= 1
+        if letter in string.ascii_uppercase:
+            return unpos(plaintext_number).upper()
+        else:
+            return unpos(plaintext_number) 
+    else:
+        return letter
+
+def affine_encipher(message, multiplier=1, adder=0, one_based=True):
+    """Encipher a message
+    
+    >>> affine_encipher('hours passed during which jerico tried every ' \
+           'trick he could think of', 15, 22, True)
+    'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
+    """
+    enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) 
+                  for l in message]
+    return cat(enciphered)
+
+def affine_decipher(message, multiplier=1, adder=0, one_based=True):
+    """Decipher a message
+    
+    >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
+           'jfaoe ls omytd jlaxe mh', 15, 22, True)
+    'hours passed during which jerico tried every trick he could think of'
+    """
+    enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) 
+                  for l in message]
+    return cat(enciphered)
+
+
+
+def affine_break(message, fitness=Pletters):
+    """Breaks an affine cipher using frequency analysis
+
+    >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
+          'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
+          'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
+          'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
+          'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
+          'kxd clm ckuxj.') # doctest: +ELLIPSIS
+    ((15, 22, True), -340.601181913...)
+    """
+    sanitised_message = sanitise(message)
+    best_multiplier = 0
+    best_adder = 0
+    best_one_based = True
+    best_fit = float("-inf")
+    for one_based in [True, False]:
+        for multiplier in [x for x in range(1, 26, 2) if x != 13]:
+            for adder in range(26):
+                plaintext = affine_decipher(sanitised_message,
+                                            multiplier, adder, one_based)
+                fit = fitness(plaintext)
+                logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
+                             'gives fit of {3} and decrypt starting: {4}'.
+                             format(multiplier, adder, one_based, fit,
+                                    plaintext[:50]))
+                if fit > best_fit:
+                    best_fit = fit
+                    best_multiplier = multiplier
+                    best_adder = adder
+                    best_one_based = one_based
+    logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
+                '{3} and decrypt starting: {4}'.format(
+                    best_multiplier, best_adder, best_one_based, best_fit,
+                    affine_decipher(sanitised_message, best_multiplier,
+                                    best_adder, best_one_based)[:50]))
+    return (best_multiplier, best_adder, best_one_based), best_fit
diff --git a/szyfrow/amsco.py b/szyfrow/amsco.py
new file mode 100644 (file)
index 0000000..3d4e49b
--- /dev/null
@@ -0,0 +1,204 @@
+from enum import Enum
+import multiprocessing 
+import itertools
+
+from support.utilities import *
+from support.language_models import *
+from cipher.column_transposition import transpositions, transpositions_of
+
+from logger import logger
+
+# Where each piece of text ends up in the AMSCO transpositon cipher.
+# 'index' shows where the slice appears in the plaintext, with the slice
+# from 'start' to 'end'
+AmscoSlice = collections.namedtuple('AmscoSlice', ['index', 'start', 'end'])
+
+class AmscoFillStyle(Enum):
+    continuous = 1
+    same_each_row = 2
+    reverse_each_row = 3
+
+def amsco_transposition_positions(message, keyword, 
+      fillpattern=(1, 2),
+      fillstyle=AmscoFillStyle.continuous,
+      fillcolumnwise=False,
+      emptycolumnwise=True):
+    """Creates the grid for the AMSCO transposition cipher. Each element in the
+    grid shows the index of that slice and the start and end positions of the
+    plaintext that go to make it up.
+
+    >>> amsco_transposition_positions(string.ascii_lowercase, 'freddy', \
+        fillpattern=(1, 2)) # doctest:  +NORMALIZE_WHITESPACE
+    [[AmscoSlice(index=3, start=4, end=6),
+     AmscoSlice(index=2, start=3, end=4),
+     AmscoSlice(index=0, start=0, end=1),
+     AmscoSlice(index=1, start=1, end=3),
+     AmscoSlice(index=4, start=6, end=7)],
+    [AmscoSlice(index=8, start=12, end=13),
+     AmscoSlice(index=7, start=10, end=12),
+     AmscoSlice(index=5, start=7, end=9),
+     AmscoSlice(index=6, start=9, end=10),
+     AmscoSlice(index=9, start=13, end=15)],
+    [AmscoSlice(index=13, start=19, end=21),
+     AmscoSlice(index=12, start=18, end=19),
+     AmscoSlice(index=10, start=15, end=16),
+     AmscoSlice(index=11, start=16, end=18),
+     AmscoSlice(index=14, start=21, end=22)],
+    [AmscoSlice(index=18, start=27, end=28),
+     AmscoSlice(index=17, start=25, end=27),
+     AmscoSlice(index=15, start=22, end=24),
+     AmscoSlice(index=16, start=24, end=25),
+     AmscoSlice(index=19, start=28, end=30)]]
+    """
+    transpositions = transpositions_of(keyword)
+    fill_iterator = itertools.cycle(fillpattern)
+    indices = itertools.count()
+    message_length = len(message)
+
+    current_position = 0
+    grid = []
+    current_fillpattern = fillpattern
+    while current_position < message_length:
+        row = []
+        if fillstyle == AmscoFillStyle.same_each_row:
+            fill_iterator = itertools.cycle(fillpattern)
+        if fillstyle == AmscoFillStyle.reverse_each_row:
+            fill_iterator = itertools.cycle(current_fillpattern)
+        for _ in range(len(transpositions)):
+            index = next(indices)
+            gap = next(fill_iterator)
+            row += [AmscoSlice(index, current_position, current_position + gap)]
+            current_position += gap
+        grid += [row]
+        if fillstyle == AmscoFillStyle.reverse_each_row:
+            current_fillpattern = list(reversed(current_fillpattern))
+    return [transpose(r, transpositions) for r in grid]
+
+def amsco_transposition_encipher(message, keyword, 
+    fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row):
+    """AMSCO transposition encipher.
+
+    >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(1, 2))
+    'hoteelhler'
+    >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(2, 1))
+    'hetelhelor'
+    >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(1, 2))
+    'hotelerelh'
+    >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(2, 1))
+    'hetelorlhe'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'encode')
+    'etecstthhomoerereenisxip'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2))
+    'hetcsoeisterereipexthomn'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous)
+    'hecsoisttererteipexhomen'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(2, 1))
+    'heecisoosttrrtepeixhemen'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2))
+    'hxtomephescieretoeisnter'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous)
+    'hxomeiphscerettoisenteer'
+    """
+    grid = amsco_transposition_positions(message, keyword, 
+        fillpattern=fillpattern, fillstyle=fillstyle)
+    ct_as_grid = [[message[s.start:s.end] for s in r] for r in grid]
+    return combine_every_nth(ct_as_grid)
+
+
+def amsco_transposition_decipher(message, keyword, 
+    fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row):
+    """AMSCO transposition decipher
+
+    >>> amsco_transposition_decipher('hoteelhler', 'abc', fillpattern=(1, 2))
+    'hellothere'
+    >>> amsco_transposition_decipher('hetelhelor', 'abc', fillpattern=(2, 1))
+    'hellothere'
+    >>> amsco_transposition_decipher('hotelerelh', 'acb', fillpattern=(1, 2))
+    'hellothere'
+    >>> amsco_transposition_decipher('hetelorlhe', 'acb', fillpattern=(2, 1))
+    'hellothere'
+    >>> amsco_transposition_decipher('etecstthhomoerereenisxip', 'encode')
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('hetcsoeisterereipexthomn', 'cipher', fillpattern=(1, 2))
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('hecsoisttererteipexhomen', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous)
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('heecisoosttrrtepeixhemen', 'cipher', fillpattern=(2, 1))
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('hxtomephescieretoeisnter', 'cipher', fillpattern=(1, 3, 2))
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('hxomeiphscerettoisenteer', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous)
+    'hereissometexttoencipher'
+    """
+
+    grid = amsco_transposition_positions(message, keyword, 
+        fillpattern=fillpattern, fillstyle=fillstyle)
+    transposed_sections = [s for c in [l for l in zip(*grid)] for s in c]
+    plaintext_list = [''] * len(transposed_sections)
+    current_pos = 0
+    for slice in transposed_sections:
+        plaintext_list[slice.index] = message[current_pos:current_pos-slice.start+slice.end][:len(message[slice.start:slice.end])]
+        current_pos += len(message[slice.start:slice.end])
+    return cat(plaintext_list)
+
+
+def amsco_break(message, translist=transpositions, patterns = [(1, 2), (2, 1)],
+                                  fillstyles = [AmscoFillStyle.continuous, 
+                                                AmscoFillStyle.same_each_row, 
+                                                AmscoFillStyle.reverse_each_row],
+                                  fitness=Pbigrams, 
+                                  chunksize=500):
+    """Breaks an AMSCO transposition cipher using a dictionary and
+    n-gram frequency analysis
+
+    >>> amsco_break(amsco_transposition_encipher(sanitise( \
+            "It is a truth universally acknowledged, that a single man in \
+             possession of a good fortune, must be in want of a wife. However \
+             little known the feelings or views of such a man may be on his \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
+             rightful property of some one or other of their daughters."), \
+        'encipher'), \
+        translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
+                   (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
+                   (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
+        patterns=[(1, 2)]) # doctest: +ELLIPSIS
+    (((2, 0, 5, 3, 1, 4, 6), (1, 2), <AmscoFillStyle.continuous: 1>), -709.4646722...)
+    >>> amsco_break(amsco_transposition_encipher(sanitise( \
+            "It is a truth universally acknowledged, that a single man in \
+             possession of a good fortune, must be in want of a wife. However \
+             little known the feelings or views of such a man may be on his \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
+             rightful property of some one or other of their daughters."), \
+        'encipher', fillpattern=(2, 1)), \
+        translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
+                   (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
+                   (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
+        patterns=[(1, 2), (2, 1)], fitness=Ptrigrams) # doctest: +ELLIPSIS
+    (((2, 0, 5, 3, 1, 4, 6), (2, 1), <AmscoFillStyle.continuous: 1>), -997.0129085...)
+    """
+    with multiprocessing.Pool() as pool:
+        helper_args = [(message, trans, pattern, fillstyle, fitness)
+                       for trans in translist
+                       for pattern in patterns
+                       for fillstyle in fillstyles]
+        # Gotcha: the helper function here needs to be defined at the top level
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(amsco_break_worker, helper_args, chunksize) 
+        return max(breaks, key=lambda k: k[1])
+
+def amsco_break_worker(message, transposition,
+        pattern, fillstyle, fitness):
+    plaintext = amsco_transposition_decipher(message, transposition,
+        fillpattern=pattern, fillstyle=fillstyle)
+    fit = fitness(sanitise(plaintext))
+    logger.debug('AMSCO transposition break attempt using key {0} and pattern'
+                         '{1} ({2}) gives fit of {3} and decrypt starting: '
+                         '{4}'.format(
+                             transposition, pattern, fillstyle, fit, 
+                             sanitise(plaintext)[:50]))
+    return (transposition, pattern, fillstyle), fit
+
+if __name__ == "__main__":
+    import doctest
\ No newline at end of file
diff --git a/szyfrow/autokey.py b/szyfrow/autokey.py
new file mode 100644 (file)
index 0000000..5c4a954
--- /dev/null
@@ -0,0 +1,120 @@
+import math
+import multiprocessing 
+from support.utilities import *
+from support.language_models import *
+from cipher.caesar import caesar_encipher_letter, caesar_decipher_letter
+
+from logger import logger
+
+
+def autokey_encipher(message, keyword):
+    """Encipher with the autokey cipher
+
+    >>> autokey_encipher('meetatthefountain', 'kilt')
+    'wmpmmxxaeyhbryoca'
+    """
+    shifts = [pos(l) for l in keyword + message]
+    pairs = zip(message, shifts)
+    return cat([caesar_encipher_letter(l, k) for l, k in pairs])
+
+def autokey_decipher(ciphertext, keyword):
+    """Decipher with the autokey cipher
+
+    >>> autokey_decipher('wmpmmxxaeyhbryoca', 'kilt')
+    'meetatthefountain'
+    """
+    plaintext = []
+    keys = list(keyword)
+    for c in ciphertext:
+        plaintext_letter = caesar_decipher_letter(c, pos(keys[0]))
+        plaintext += [plaintext_letter]
+        keys = keys[1:] + [plaintext_letter]
+    return cat(plaintext)
+
+
+
+def autokey_sa_break( message
+                    , min_keylength=2
+                    , max_keylength=20
+                    , workers=10
+                    , initial_temperature=200
+                    , max_iterations=20000
+                    , fitness=Pletters
+                    , chunksize=1
+                    , result_count=1
+                    ):
+    """Break an autokey cipher by simulated annealing
+    """
+    worker_args = []
+    ciphertext = sanitise(message)
+    for keylength in range(min_keylength, max_keylength+1):
+        for i in range(workers):
+            key = cat(random.choice(string.ascii_lowercase) for _ in range(keylength))
+            worker_args.append((ciphertext, key, 
+                            initial_temperature, max_iterations, fitness))
+            
+    with multiprocessing.Pool() as pool:
+        breaks = pool.starmap(autokey_sa_break_worker,
+                              worker_args, chunksize)
+    if result_count <= 1:
+        return max(breaks, key=lambda k: k[1])
+    else:
+        return sorted(set(breaks), key=lambda k: k[1], reverse=True)[:result_count]
+
+
+def autokey_sa_break_worker(message, key, 
+                                     t0, max_iterations, fitness):
+   
+    temperature = t0
+
+    dt = t0 / (0.9 * max_iterations)
+    
+    plaintext = autokey_decipher(message, key)
+    current_fitness = fitness(plaintext)
+    current_key = key
+
+    best_key = current_key
+    best_fitness = current_fitness
+    best_plaintext = plaintext
+    
+    # print('starting for', max_iterations)
+    for i in range(max_iterations):
+        swap_pos = random.randrange(len(current_key))
+        swap_char = random.choice(string.ascii_lowercase)
+        
+        new_key = current_key[:swap_pos] + swap_char + current_key[swap_pos+1:]
+        
+        plaintext = autokey_decipher(message, new_key)
+        new_fitness = fitness(plaintext)
+        try:
+            sa_chance = math.exp((new_fitness - current_fitness) / temperature)
+        except (OverflowError, ZeroDivisionError):
+            # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
+            sa_chance = 0
+        if (new_fitness > current_fitness or random.random() < sa_chance):
+            # logger.debug('Simulated annealing: iteration {}, temperature {}, '
+            #     'current alphabet {}, current_fitness {}, '
+            #     'best_plaintext {}'.format(i, temperature, current_alphabet, 
+            #     current_fitness, best_plaintext[:50]))
+
+            # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
+#             print(new_fitness, new_key, plaintext[:100])
+            current_fitness = new_fitness
+            current_key = new_key
+            
+        if current_fitness > best_fitness:
+            best_key = current_key
+            best_fitness = current_fitness
+            best_plaintext = plaintext
+        if i % 500 == 0:
+            logger.debug('Simulated annealing: iteration {}, temperature {}, '
+                'current key {}, current_fitness {}, '
+                'best_plaintext {}'.format(i, temperature, current_key, 
+                current_fitness, plaintext[:50]))
+        temperature = max(temperature - dt, 0.001)
+        
+#     print(best_key, best_fitness, best_plaintext[:70])
+    return best_key, best_fitness # current_alphabet, current_fitness
+
+if __name__ == "__main__":
+    import doctest
\ No newline at end of file
diff --git a/szyfrow/bifid.py b/szyfrow/bifid.py
new file mode 100644 (file)
index 0000000..478b239
--- /dev/null
@@ -0,0 +1,123 @@
+import multiprocessing 
+from support.utilities import *
+from support.language_models import *
+from cipher.keyword_cipher import KeywordWrapAlphabet, keyword_cipher_alphabet_of
+
+from logger import logger
+
+def bifid_grid(keyword, wrap_alphabet, letter_mapping):
+    """Create the grids for a Bifid cipher
+    """
+    cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
+    if letter_mapping is None:
+        letter_mapping = {'j': 'i'}
+    translation = ''.maketrans(letter_mapping)
+    cipher_alphabet = cat(collections.OrderedDict.fromkeys(cipher_alphabet.translate(translation)))
+    f_grid = {k: ((i // 5) + 1, (i % 5) + 1) 
+              for i, k in enumerate(cipher_alphabet)}
+    r_grid = {((i // 5) + 1, (i % 5) + 1): k 
+              for i, k in enumerate(cipher_alphabet)}
+    return translation, f_grid, r_grid
+
+def bifid_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a, 
+                   letter_mapping=None, period=None, fillvalue=None):
+    """Bifid cipher
+
+    >>> bifid_encipher("indiajelly", 'iguana')
+    'ibidonhprm'
+    >>> bifid_encipher("indiacurry", 'iguana', period=4)
+    'ibnhgaqltm'
+    >>> bifid_encipher("indiacurry", 'iguana', period=4, fillvalue='x')
+    'ibnhgaqltzml'
+    """
+    translation, f_grid, r_grid = bifid_grid(keyword, wrap_alphabet, letter_mapping)
+    
+    t_message = message.translate(translation)
+    pairs0 = [f_grid[l] for l in sanitise(t_message)]
+    if period:
+        chunked_pairs = [pairs0[i:i+period] for i in range(0, len(pairs0), period)]
+        if len(chunked_pairs[-1]) < period and fillvalue:
+            chunked_pairs[-1] += [f_grid[fillvalue]] * (period - len(chunked_pairs[-1]))
+    else:
+        chunked_pairs = [pairs0]
+    
+    pairs1 = []
+    for c in chunked_pairs:
+        items = sum(list(list(i) for i in zip(*c)), [])
+        p = [(items[i], items[i+1]) for i in range(0, len(items), 2)]
+        pairs1 += p
+    
+    return cat(r_grid[p] for p in pairs1)
+
+
+def bifid_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a, 
+                   letter_mapping=None, period=None, fillvalue=None):
+    """Decipher with bifid cipher
+
+    >>> bifid_decipher('ibidonhprm', 'iguana')
+    'indiaielly'
+    >>> bifid_decipher("ibnhgaqltm", 'iguana', period=4)
+    'indiacurry'
+    >>> bifid_decipher("ibnhgaqltzml", 'iguana', period=4)
+    'indiacurryxx'
+    """
+    translation, f_grid, r_grid = bifid_grid(keyword, wrap_alphabet, letter_mapping)
+    
+    t_message = message.translate(translation)
+    pairs0 = [f_grid[l] for l in sanitise(t_message)]
+    if period:
+        chunked_pairs = [pairs0[i:i+period] for i in range(0, len(pairs0), period)]
+        if len(chunked_pairs[-1]) < period and fillvalue:
+            chunked_pairs[-1] += [f_grid[fillvalue]] * (period - len(chunked_pairs[-1]))
+    else:
+        chunked_pairs = [pairs0]
+        
+    pairs1 = []
+    for c in chunked_pairs:
+        items = [j for i in c for j in i]
+        gap = len(c)
+        p = [(items[i], items[i+gap]) for i in range(gap)]
+        pairs1 += p
+
+    return cat(r_grid[p] for p in pairs1) 
+
+
+def bifid_break_mp(message, wordlist=keywords, fitness=Pletters, max_period=10,
+                     number_of_solutions=1, chunksize=500):
+    """Breaks a keyword substitution cipher using a dictionary and
+    frequency analysis
+
+    >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
+          'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+    (('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...)
+    >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
+          'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
+          wordlist=['cat', 'elephant', 'kangaroo'], \
+          number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    [(('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...), 
+    (('elephant', <KeywordWrapAlphabet.from_largest: 3>, 0), -52.834575011...)]
+    """
+    with multiprocessing.Pool() as pool:
+        helper_args = [(message, word, wrap, period, fitness)
+                       for word in wordlist
+                       for wrap in KeywordWrapAlphabet
+                       for period in range(max_period+1)]
+        # Gotcha: the helper function here needs to be defined at the top level
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(bifid_break_worker, helper_args, chunksize)
+        if number_of_solutions == 1:
+            return max(breaks, key=lambda k: k[1])
+        else:
+            return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
+
+def bifid_break_worker(message, keyword, wrap_alphabet, period, fitness):
+    plaintext = bifid_decipher(message, keyword, wrap_alphabet, period=period)
+    fit = fitness(plaintext)
+    logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
+                 '{2} and decrypt starting: {3}'.format(keyword, 
+                     wrap_alphabet, fit, sanitise(plaintext)[:50]))
+    return (keyword, wrap_alphabet, period), fit
+
+if __name__ == "__main__":
+    import doctest
\ No newline at end of file
diff --git a/szyfrow/bombe.py b/szyfrow/bombe.py
new file mode 100644 (file)
index 0000000..74e03ba
--- /dev/null
@@ -0,0 +1,207 @@
+import string
+import collections
+import multiprocessing
+import itertools
+import logging
+
+from cipher.enigma import *
+
+
+logger = logging.getLogger('bombe')
+# logger.setLevel(logging.WARNING)
+# logger.setLevel(logging.INFO)
+logger.setLevel(logging.DEBUG)
+
+# create the logging file handler
+fh = logging.FileHandler("enigma.log")
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+fh.setFormatter(formatter)
+
+# add handler to logger object
+logger.addHandler(fh)
+
+##################################
+# # Bombe
+##################################
+#
+# Good explanation of [how the bombe worked](http://www.ellsbury.com/enigmabombe.htm) by Graham Ellsbury
+#
+
+Signal = collections.namedtuple('Signal', ['bank', 'wire'])
+Connection = collections.namedtuple('Connection', ['banks', 'scrambler'])
+MenuItem = collections.namedtuple('MenuIem', ['before', 'after', 'number'])
+
+
+def make_menu(plaintext, ciphertext):
+    return [MenuItem(p, c, i+1) 
+            for i, (p, c) in enumerate(zip(plaintext, ciphertext))]
+
+
+class Scrambler(object):
+    def __init__(self, wheel1_spec, wheel2_spec, wheel3_spec, reflector_spec,
+                 wheel1_pos='a', wheel2_pos='a', wheel3_pos='a'):
+        self.wheel1 = SimpleWheel(wheel1_spec, position=wheel1_pos)
+        self.wheel2 = SimpleWheel(wheel2_spec, position=wheel2_pos)
+        self.wheel3 = SimpleWheel(wheel3_spec, position=wheel3_pos)
+        self.reflector = Reflector(reflector_spec)
+    
+    def __getattribute__(self, name):
+        if name=='wheel_positions':
+            return self.wheel1.position, self.wheel2.position, self.wheel3.position 
+        elif name=='wheel_positions_l':
+            return self.wheel1.position_l, self.wheel2.position_l, self.wheel3.position_l 
+        else:
+            return object.__getattribute__(self, name)
+    
+    def advance(self, wheel1=False, wheel2=False, wheel3=True):
+        if wheel1: self.wheel1.advance()
+        if wheel2: self.wheel2.advance()
+        if wheel3: self.wheel3.advance()
+            
+    def lookup(self, letter):
+        a = self.wheel3.forward(letter)
+        b = self.wheel2.forward(a)
+        c = self.wheel1.forward(b)
+        d = self.reflector.forward(c)
+        e = self.wheel1.backward(d)
+        f = self.wheel2.backward(e)
+        g = self.wheel3.backward(f)
+        return g
+    
+    def set_positions(self, wheel1_pos, wheel2_pos, wheel3_pos):
+        self.wheel1.set_position(wheel1_pos)
+        self.wheel2.set_position(wheel2_pos)
+        self.wheel3.set_position(wheel3_pos)        
+
+
+class Bombe(object):
+    
+    def __init__(self, wheel1_spec, wheel2_spec, wheel3_spec, reflector_spec,
+                menu=None, start_signal=None, use_diagonal_board=True, 
+                verify_plugboard=True):
+        self.connections = []
+        self.wheel1_spec = wheel1_spec
+        self.wheel2_spec = wheel2_spec
+        self.wheel3_spec = wheel3_spec
+        self.reflector_spec = reflector_spec
+        if menu:
+            self.read_menu(menu)
+        if start_signal:
+            self.test_start = start_signal
+        self.use_diagonal_board = use_diagonal_board
+        self.verify_plugboard = verify_plugboard
+        
+    def __getattribute__(self, name):
+        if name=='wheel_positions':
+            return self.connections[0].scrambler.wheel_positions
+        elif name=='wheel_positions_l':
+            return self.connections[0].scrambler.wheel_positions_l
+        else:
+            return object.__getattribute__(self, name)
+        
+    def __call__(self, start_positions):
+        return start_positions, self.test(initial_signal=self.test_start,
+            start_positions=start_positions, 
+            use_diagonal_board=self.use_diagonal_board,
+            verify_plugboard=self.verify_plugboard)
+        
+    def add_connection(self, bank_before, bank_after, scrambler):
+        self.connections += [Connection([bank_before, bank_after], scrambler)]
+        
+    def read_menu(self, menu):
+        self.connections = []
+        for item in menu:
+            scrambler = Scrambler(self.wheel1_spec, self.wheel2_spec, self.wheel3_spec,
+                                  self.reflector_spec,
+                                  wheel3_pos=unpos(item.number - 1))
+            self.add_connection(item.before, item.after, scrambler)
+        most_common_letter = (collections.Counter(m.before for m in menu) +\
+            collections.Counter(m.after for m in menu)).most_common(1)[0][0]
+        self.test_start = Signal(most_common_letter, most_common_letter)
+        
+    def set_positions(self, wheel1_pos, wheel2_pos, wheel3_pos):
+        for i, c in enumerate(self.connections):
+            c.scrambler.set_positions(wheel1_pos, wheel2_pos, unpos(pos(wheel3_pos) + i))
+    
+    def test(self, initial_signal=None, start_positions=None, use_diagonal_board=True,
+            verify_plugboard=True):
+        self.banks = {label: 
+                      dict(zip(string.ascii_lowercase, [False]*len(string.ascii_lowercase)))
+                      for label in string.ascii_lowercase}
+        if start_positions:
+            self.set_positions(*start_positions)
+        if not initial_signal:
+            initial_signal = self.test_start
+        self.pending = [initial_signal]
+        self.propagate(use_diagonal_board)
+        live_wire_count = len([self.banks[self.test_start.bank][w] 
+                    for w in self.banks[self.test_start.bank] 
+                    if self.banks[self.test_start.bank][w]])
+        if live_wire_count < 26:
+            if verify_plugboard:
+                possibles = self.possible_plugboards()
+                return all(s0.isdisjoint(s1) for s0 in possibles for s1 in possibles if s0 != s1)
+            else:
+                return True
+        else:
+            return False
+        
+    def propagate(self, use_diagonal_board):
+        while self.pending:
+            current = self.pending[0]
+            # print("processing", current)
+            logger.debug("Propogater processing {}".format(current))
+            self.pending = self.pending[1:]
+            if not self.banks[current.bank][current.wire]:
+                self.banks[current.bank][current.wire] = True
+                if use_diagonal_board:
+                    self.pending += [Signal(current.wire, current.bank)]
+                for c in self.connections:
+                    if current.bank in c.banks:
+                        other_bank = [b for b in c.banks if b != current.bank][0]
+                        other_wire = c.scrambler.lookup(current.wire)
+                        # print("  adding", other_bank, other_wire, "because", c.banks)
+                        logger.debug("Propogator adding {0} {1} because {2}".format(other_bank, other_wire, c.banks))
+                        self.pending += [Signal(other_bank, other_wire)]
+    
+    def run(self, run_start=None, wheel1_pos='a', wheel2_pos='a', wheel3_pos='a', use_diagonal_board=True):
+        if not run_start:
+            run_start = self.test_start
+        self.solutions = []
+        self.set_positions(wheel1_pos, wheel2_pos, wheel3_pos)
+        for run_index in range(26*26*26):
+            if self.test(initial_signal=run_start, use_diagonal_board=use_diagonal_board):
+                self.solutions += [self.connections[0].scrambler.wheel_positions_l]
+            advance3 = True
+            advance2 = False
+            advance1 = False
+            if (run_index + 1) % 26 == 0: advance2 = True
+            if (run_index + 1) % (26*26) == 0: advance1 = True
+            for c in self.connections:
+                c.scrambler.advance(advance1, advance2, advance3)
+        return self.solutions
+    
+    def possible_plugboards(self):
+        possibles = set()
+        for b in self.banks:
+            active = [w for w in self.banks[b] if self.banks[b][w]]
+            inactive = [w for w in self.banks[b] if not self.banks[b][w]]
+            if len(active) == 1:
+                possibles = possibles.union({frozenset((b, active[0]))})
+            if len(inactive) == 1:
+                possibles = possibles.union({frozenset((b, inactive[0]))})
+        return possibles
+
+
+def run_multi_bombe(wheel1_spec, wheel2_spec, wheel3_spec, reflector_spec, menu,
+                    start_signal=None, use_diagonal_board=True, 
+                    verify_plugboard=True):
+    allwheels = itertools.product(string.ascii_lowercase, repeat=3)
+
+    with multiprocessing.Pool() as pool:
+        res = pool.map(Bombe(wheel1_spec, wheel2_spec, wheel3_spec, 
+            reflector_spec, menu=menu, start_signal=start_signal, 
+            use_diagonal_board=use_diagonal_board, 
+            verify_plugboard=verify_plugboard),
+                  allwheels)
+    return [r[0] for r in res if r[1]]
\ No newline at end of file
diff --git a/szyfrow/cadenus.py b/szyfrow/cadenus.py
new file mode 100644 (file)
index 0000000..433ad19
--- /dev/null
@@ -0,0 +1,125 @@
+from itertools import chain
+import multiprocessing
+from support.utilities import *
+from support.language_models import *
+from cipher.column_transposition import transpositions_of
+
+from logger import logger
+
+def make_cadenus_keycolumn(doubled_letters = 'vw', start='a', reverse=False):
+    """Makes the key column for a Cadenus cipher (the column down between the
+        rows of letters)
+
+    >>> make_cadenus_keycolumn()['a']
+    0
+    >>> make_cadenus_keycolumn()['b']
+    1
+    >>> make_cadenus_keycolumn()['c']
+    2
+    >>> make_cadenus_keycolumn()['v']
+    21
+    >>> make_cadenus_keycolumn()['w']
+    21
+    >>> make_cadenus_keycolumn()['z']
+    24
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['a']
+    1
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['b']
+    0
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['c']
+    24
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['i']
+    18
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['j']
+    18
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['v']
+    6
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['z']
+    2
+    """
+    index_to_remove = string.ascii_lowercase.find(doubled_letters[0])
+    short_alphabet = string.ascii_lowercase[:index_to_remove] + string.ascii_lowercase[index_to_remove+1:]
+    if reverse:
+        short_alphabet = cat(reversed(short_alphabet))
+    start_pos = short_alphabet.find(start)
+    rotated_alphabet = short_alphabet[start_pos:] + short_alphabet[:start_pos]
+    keycolumn = {l: i for i, l in enumerate(rotated_alphabet)}
+    keycolumn[doubled_letters[0]] = keycolumn[doubled_letters[1]]
+    return keycolumn
+
+def cadenus_encipher(message, keyword, keycolumn, fillvalue='a'):
+    """Encipher with the Cadenus cipher
+
+    >>> cadenus_encipher(sanitise('Whoever has made a voyage up the Hudson ' \
+                                  'must remember the Kaatskill mountains. ' \
+                                  'They are a dismembered branch of the great'), \
+                'wink', \
+                make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True))
+    'antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaasuvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned'
+    >>> cadenus_encipher(sanitise('a severe limitation on the usefulness of ' \
+                                  'the cadenus is that every message must be ' \
+                                  'a multiple of twenty-five letters long'), \
+                'easy', \
+                make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True))
+    'systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtofarenuseieeieltarlmentieetogevesitfaisltngeeuvowul'
+    """
+    rows = chunks(message, len(message) // 25, fillvalue=fillvalue)
+    columns = zip(*rows)
+    rotated_columns = [col[start:] + col[:start] for start, col in zip([keycolumn[l] for l in keyword], columns)]    
+    rotated_rows = zip(*rotated_columns)
+    transpositions = transpositions_of(keyword)
+    transposed = [transpose(r, transpositions) for r in rotated_rows]
+    return cat(chain(*transposed))
+
+def cadenus_decipher(message, keyword, keycolumn, fillvalue='a'):
+    """
+    >>> cadenus_decipher('antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaa' \
+                         'suvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned', \
+                 'wink', \
+                 make_cadenus_keycolumn(reverse=True))
+    'whoeverhasmadeavoyageupthehudsonmustrememberthekaatskillmountainstheyareadismemberedbranchofthegreat'
+    >>> cadenus_decipher('systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtof' \
+                        'arenuseieeieltarlmentieetogevesitfaisltngeeuvowul', \
+                 'easy', \
+                 make_cadenus_keycolumn(reverse=True))
+    'aseverelimitationontheusefulnessofthecadenusisthateverymessagemustbeamultipleoftwentyfiveletterslong'
+    """
+    rows = chunks(message, len(message) // 25, fillvalue=fillvalue)
+    transpositions = transpositions_of(keyword)
+    untransposed_rows = [untranspose(r, transpositions) for r in rows]
+    columns = zip(*untransposed_rows)
+    rotated_columns = [col[-start:] + col[:-start] for start, col in zip([keycolumn[l] for l in keyword], columns)]    
+    rotated_rows = zip(*rotated_columns)
+    # return rotated_columns
+    return cat(chain(*rotated_rows))
+
+
+def cadenus_break(message, words=keywords, 
+    doubled_letters='vw', fitness=Pbigrams):
+    c = make_cadenus_keycolumn(reverse=True)
+    valid_words = [w for w in words 
+        if max(transpositions_of(w)) <= len(c)]
+    with multiprocessing.Pool() as pool:
+        results = pool.starmap(cadenus_break_worker, 
+                [(message, w, 
+                    make_cadenus_keycolumn(doubled_letters=doubled_letters, 
+                        start=s, reverse=r), 
+                    fitness)
+                for w in words 
+                for s in string.ascii_lowercase 
+                for r in [True, False]
+                if max(transpositions_of(w)) <= len(
+                    make_cadenus_keycolumn(
+                        doubled_letters=doubled_letters, start=s, reverse=r))
+                ])
+    # return list(results)
+    return max(results, key=lambda k: k[1])
+
+def cadenus_break_worker(message, keyword, keycolumn, fitness):
+    message_chunks = chunks(message, 175)
+    plaintext = ''.join(cadenus_decipher(c, keyword, keycolumn) for c in message_chunks)
+    fit = fitness(plaintext)
+    return (keyword, keycolumn), fit
+
+if __name__ == "__main__":
+    import doctest
\ No newline at end of file
diff --git a/szyfrow/caesar.py b/szyfrow/caesar.py
new file mode 100644 (file)
index 0000000..ec878fc
--- /dev/null
@@ -0,0 +1,121 @@
+from support.utilities import *
+from support.language_models import *
+
+from logger import logger
+
+def caesar_encipher_letter(accented_letter, shift):
+    """Encipher a letter, given a shift amount
+
+    >>> caesar_encipher_letter('a', 1)
+    'b'
+    >>> caesar_encipher_letter('a', 2)
+    'c'
+    >>> caesar_encipher_letter('b', 2)
+    'd'
+    >>> caesar_encipher_letter('x', 2)
+    'z'
+    >>> caesar_encipher_letter('y', 2)
+    'a'
+    >>> caesar_encipher_letter('z', 2)
+    'b'
+    >>> caesar_encipher_letter('z', -1)
+    'y'
+    >>> caesar_encipher_letter('a', -1)
+    'z'
+    >>> caesar_encipher_letter('A', 1)
+    'B'
+    >>> caesar_encipher_letter('é', 1)
+    'f'
+    """
+    # letter = unaccent(accented_letter)
+    # if letter in string.ascii_letters:
+    #     if letter in string.ascii_uppercase:
+    #         alphabet_start = ord('A')
+    #     else:
+    #         alphabet_start = ord('a')
+    #     return chr(((ord(letter) - alphabet_start + shift) % 26) + 
+    #                alphabet_start)
+    # else:
+    #     return letter
+
+    letter = unaccent(accented_letter)
+    if letter in string.ascii_letters:
+        cipherletter = unpos(pos(letter) + shift)
+        if letter in string.ascii_uppercase:
+            return cipherletter.upper()
+        else:
+            return cipherletter
+    else:
+        return letter
+
+def caesar_decipher_letter(letter, shift):
+    """Decipher a letter, given a shift amount
+    
+    >>> caesar_decipher_letter('b', 1)
+    'a'
+    >>> caesar_decipher_letter('b', 2)
+    'z'
+    """
+    return caesar_encipher_letter(letter, -shift)
+
+def caesar_encipher(message, shift):
+    """Encipher a message with the Caesar cipher of given shift
+    
+    >>> caesar_encipher('abc', 1)
+    'bcd'
+    >>> caesar_encipher('abc', 2)
+    'cde'
+    >>> caesar_encipher('abcxyz', 2)
+    'cdezab'
+    >>> caesar_encipher('ab cx yz', 2)
+    'cd ez ab'
+    >>> caesar_encipher('Héllo World!', 2)
+    'Jgnnq Yqtnf!'
+    """
+    enciphered = [caesar_encipher_letter(l, shift) for l in message]
+    return cat(enciphered)
+
+def caesar_decipher(message, shift):
+    """Decipher a message with the Caesar cipher of given shift
+    
+    >>> caesar_decipher('bcd', 1)
+    'abc'
+    >>> caesar_decipher('cde', 2)
+    'abc'
+    >>> caesar_decipher('cd ez ab', 2)
+    'ab cx yz'
+    >>> caesar_decipher('Jgnnq Yqtnf!', 2)
+    'Hello World!'
+    """
+    return caesar_encipher(message, -shift)
+
+
+def caesar_break(message, fitness=Pletters):
+    """Breaks a Caesar cipher using frequency analysis
+
+    >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
+          'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
+    (4, -130.849989015...)
+    >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
+          'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
+    (19, -128.82410410...)
+    >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
+          'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
+    (13, -126.25403935...)
+    """
+    sanitised_message = sanitise(message)
+    best_shift = 0
+    best_fit = float('-inf')
+    for shift in range(26):
+        plaintext = caesar_decipher(sanitised_message, shift)
+        fit = fitness(plaintext)
+        logger.debug('Caesar break attempt using key {0} gives fit of {1} '
+                     'and decrypt starting: {2}'.format(shift, fit,
+                                                        plaintext[:50]))
+        if fit > best_fit:
+            best_fit = fit
+            best_shift = shift
+    logger.info('Caesar break best fit: key {0} gives fit of {1} and '
+                'decrypt starting: {2}'.format(best_shift, best_fit, 
+                    caesar_decipher(sanitised_message, best_shift)[:50]))
+    return best_shift, best_fit
diff --git a/szyfrow/column_transposition.py b/szyfrow/column_transposition.py
new file mode 100644 (file)
index 0000000..7e0fc28
--- /dev/null
@@ -0,0 +1,272 @@
+import math
+import multiprocessing 
+from itertools import chain
+from support.utilities import *
+from support.language_models import *
+
+from logger import logger
+
+def transpositions_of(keyword):
+    """Finds the transpostions given by a keyword. For instance, the keyword
+    'clever' rearranges to 'celrv', so the first column (0) stays first, the
+    second column (1) moves to third, the third column (2) moves to second, 
+    and so on.
+
+    If passed a tuple, assume it's already a transposition and just return it.
+
+    >>> transpositions_of('clever')
+    (0, 2, 1, 4, 3)
+    >>> transpositions_of('fred')
+    (3, 2, 0, 1)
+    >>> transpositions_of((3, 2, 0, 1))
+    (3, 2, 0, 1)
+    """
+    if isinstance(keyword, tuple):
+        return keyword
+    else:
+        key = deduplicate(keyword)
+        transpositions = tuple(key.index(l) for l in sorted(key))
+        return transpositions
+
+
+transpositions = collections.defaultdict(list)
+for word in keywords:
+    transpositions[transpositions_of(word)] += [word]
+
+
+def pad(message_len, group_len, fillvalue):
+    padding_length = group_len - message_len % group_len
+    if padding_length == group_len: padding_length = 0
+    padding = ''
+    for i in range(padding_length):
+        if callable(fillvalue):
+            padding += fillvalue()
+        else:
+            padding += fillvalue
+    return padding
+
+def column_transposition_encipher(message, keyword, fillvalue=' ', 
+      fillcolumnwise=False,
+      emptycolumnwise=False):
+    """Enciphers using the column transposition cipher.
+    Message is padded to allow all rows to be the same length.
+
+    >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
+    'hlohr eltee '
+    >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
+    'hellothere  '
+    >>> column_transposition_encipher('hellothere', 'abcdef')
+    'hellothere  '
+    >>> column_transposition_encipher('hellothere', 'abcde')
+    'hellothere'
+    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
+    'hlohreltee'
+    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
+    'htehlelroe'
+    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
+    'hellothere'
+    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
+    'heotllrehe'
+    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
+    'holrhetlee'
+    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
+    'htleehoelr'
+    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
+    'hleolteher'
+    >>> column_transposition_encipher('hellothere', 'cleverly')
+    'hleolthre e '
+    >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
+    'hleolthre!e!'
+    >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
+    'hleolthre*e*'
+    """
+    transpositions = transpositions_of(keyword)
+    message += pad(len(message), len(transpositions), fillvalue)
+    if fillcolumnwise:
+        rows = every_nth(message, len(message) // len(transpositions))
+    else:
+        rows = chunks(message, len(transpositions))
+    transposed = [transpose(r, transpositions) for r in rows]
+    if emptycolumnwise:
+        return combine_every_nth(transposed)
+    else:
+        return cat(chain(*transposed))
+
+def column_transposition_decipher(message, keyword, fillvalue=' ', 
+      fillcolumnwise=False,
+      emptycolumnwise=False):
+    """Deciphers using the column transposition cipher.
+    Message is padded to allow all rows to be the same length.
+
+    >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
+    'hellothere'
+    >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
+    'hellothere'
+    >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
+    'hellothere'
+    >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
+    'hellothere'
+    """
+    transpositions = transpositions_of(keyword)
+    message += pad(len(message), len(transpositions), fillvalue)
+    if emptycolumnwise:
+        rows = every_nth(message, len(message) // len(transpositions))
+    else:
+        rows = chunks(message, len(transpositions))
+    untransposed = [untranspose(r, transpositions) for r in rows]
+    if fillcolumnwise:
+        return combine_every_nth(untransposed)
+    else:
+        return cat(chain(*untransposed))
+
+def scytale_encipher(message, rows, fillvalue=' '):
+    """Enciphers using the scytale transposition cipher.
+    Message is padded with spaces to allow all rows to be the same length.
+
+    >>> scytale_encipher('thequickbrownfox', 3)
+    'tcnhkfeboqrxuo iw '
+    >>> scytale_encipher('thequickbrownfox', 4)
+    'tubnhirfecooqkwx'
+    >>> scytale_encipher('thequickbrownfox', 5)
+    'tubn hirf ecoo qkwx '
+    >>> scytale_encipher('thequickbrownfox', 6)
+    'tqcrnxhukof eibwo '
+    >>> scytale_encipher('thequickbrownfox', 7)
+    'tqcrnx hukof  eibwo  '
+    """
+    # transpositions = [i for i in range(math.ceil(len(message) / rows))]
+    # return column_transposition_encipher(message, transpositions, 
+    #     fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True)
+    transpositions = [i for i in range(rows)]
+    return column_transposition_encipher(message, transpositions, 
+        fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False)
+
+def scytale_decipher(message, rows):
+    """Deciphers using the scytale transposition cipher.
+    Assumes the message is padded so that all rows are the same length.
+    
+    >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
+    'thequickbrownfox  '
+    >>> scytale_decipher('tubnhirfecooqkwx', 4)
+    'thequickbrownfox'
+    >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
+    'thequickbrownfox    '
+    >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
+    'thequickbrownfox  '
+    >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
+    'thequickbrownfox     '
+    """
+    # transpositions = [i for i in range(math.ceil(len(message) / rows))]
+    # return column_transposition_decipher(message, transpositions, 
+    #     fillcolumnwise=False, emptycolumnwise=True)
+    transpositions = [i for i in range(rows)]
+    return column_transposition_decipher(message, transpositions, 
+        fillcolumnwise=True, emptycolumnwise=False)
+
+
+def column_transposition_break_mp(message, translist=transpositions,
+                                  fitness=Pbigrams, chunksize=500):
+    """Breaks a column transposition cipher using a dictionary and
+    n-gram frequency analysis
+
+    >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+            "It is a truth universally acknowledged, that a single man in \
+             possession of a good fortune, must be in want of a wife. However \
+             little known the feelings or views of such a man may be on his \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
+             rightful property of some one or other of their daughters."), \
+        'encipher'), \
+        translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
+                   (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
+                   (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
+    (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...)
+    >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
+            "It is a truth universally acknowledged, that a single man in \
+             possession of a good fortune, must be in want of a wife. However \
+             little known the feelings or views of such a man may be on his \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
+             rightful property of some one or other of their daughters."), \
+        'encipher'), \
+        translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
+                   (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
+                   (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
+        fitness=Ptrigrams) # doctest: +ELLIPSIS
+    (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
+    """
+    with multiprocessing.Pool() as pool:
+        helper_args = [(message, trans, fillcolumnwise, emptycolumnwise,
+                        fitness)
+                       for trans in translist
+                       for fillcolumnwise in [True, False]
+                       for emptycolumnwise in [True, False]]
+        # Gotcha: the helper function here needs to be defined at the top level
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(column_transposition_break_worker,
+                              helper_args, chunksize) 
+        return max(breaks, key=lambda k: k[1])
+column_transposition_break = column_transposition_break_mp
+
+def column_transposition_break_worker(message, transposition,
+        fillcolumnwise, emptycolumnwise, fitness):
+    plaintext = column_transposition_decipher(message, transposition,
+        fillcolumnwise=fillcolumnwise, emptycolumnwise=emptycolumnwise)
+    fit = fitness(sanitise(plaintext))
+    logger.debug('Column transposition break attempt using key {0} '
+                         'gives fit of {1} and decrypt starting: {2}'.format(
+                             transposition, fit, 
+                             sanitise(plaintext)[:50]))
+    return (transposition, fillcolumnwise, emptycolumnwise), fit
+
+
+def scytale_break_mp(message, max_key_length=20,
+                     fitness=Pbigrams, chunksize=500):
+    """Breaks a scytale cipher using a range of lengths and
+    n-gram frequency analysis
+
+    >>> scytale_break_mp(scytale_encipher(sanitise( \
+            "It is a truth universally acknowledged, that a single man in \
+             possession of a good fortune, must be in want of a wife. However \
+             little known the feelings or views of such a man may be on his \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
+             rightful property of some one or other of their daughters."), \
+        5)) # doctest: +ELLIPSIS
+    (5, -709.4646722...)
+    >>> scytale_break_mp(scytale_encipher(sanitise( \
+            "It is a truth universally acknowledged, that a single man in \
+             possession of a good fortune, must be in want of a wife. However \
+             little known the feelings or views of such a man may be on his \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
+             rightful property of some one or other of their daughters."), \
+        5), \
+        fitness=Ptrigrams) # doctest: +ELLIPSIS
+    (5, -997.0129085...)
+    """
+    with multiprocessing.Pool() as pool:
+        helper_args = [(message, trans, False, True, fitness)
+            for trans in
+                [[col for col in range(math.ceil(len(message)/rows))]
+                    for rows in range(1,max_key_length+1)]]
+        # Gotcha: the helper function here needs to be defined at the top level
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(column_transposition_break_worker,
+                              helper_args, chunksize)
+        best = max(breaks, key=lambda k: k[1])
+        return math.trunc(len(message) / len(best[0][0])), best[1]
+scytale_break = scytale_break_mp
+
+if __name__ == "__main__":
+    import doctest
\ No newline at end of file
diff --git a/szyfrow/enigma.py b/szyfrow/enigma.py
new file mode 100644 (file)
index 0000000..89758f5
--- /dev/null
@@ -0,0 +1,353 @@
+
+# coding: utf-8
+
+##################################
+# # Enigma machine
+##################################
+# Specification from [Codes and Ciphers](http://www.codesandciphers.org.uk/enigma/rotorspec.htm) page.
+# 
+# Example Enigma machines from [Louise Dale](http://enigma.louisedade.co.uk/enigma.html) (full simulation) and [EnigmaCo](http://enigmaco.de/enigma/enigma.html) (good animation of the wheels, but no ring settings).
+# 
+# There's also the nice Enigma simulator for Android by [Franklin Heath](https://franklinheath.co.uk/2012/02/04/our-first-app-published-enigma-simulator/), available on the [Google Play store](https://play.google.com/store/apps/details?id=uk.co.franklinheath.enigmasim&hl=en_GB).
+
+
+
+import string
+import collections
+import multiprocessing
+import itertools
+import logging
+
+logger = logging.getLogger('engima')
+logger.setLevel(logging.WARNING)
+# logger.setLevel(logging.INFO)
+# logger.setLevel(logging.DEBUG)
+
+# create the logging file handler
+fh = logging.FileHandler("enigma.log")
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+fh.setFormatter(formatter)
+
+# add handler to logger object
+logger.addHandler(fh)
+
+
+# Some convenience functions
+
+cat = ''.join
+
+def clean(text): return cat(l.lower() for l in text if l in string.ascii_letters)
+
+def pos(letter): 
+    if letter in string.ascii_lowercase:
+        return ord(letter) - ord('a')
+    elif letter in string.ascii_uppercase:
+        return ord(letter) - ord('A')
+    else:
+        return ''
+    
+def unpos(number): return chr(number % 26 + ord('a'))
+
+
+wheel_i_spec = 'ekmflgdqvzntowyhxuspaibrcj'
+wheel_ii_spec = 'ajdksiruxblhwtmcqgznpyfvoe'
+wheel_iii_spec = 'bdfhjlcprtxvznyeiwgakmusqo'
+wheel_iv_spec = 'esovpzjayquirhxlnftgkdcmwb'
+wheel_v_spec = 'vzbrgityupsdnhlxawmjqofeck'
+wheel_vi_spec = 'jpgvoumfyqbenhzrdkasxlictw'
+wheel_vii_spec = 'nzjhgrcxmyswboufaivlpekqdt'
+wheel_viii_spec = 'fkqhtlxocbjspdzramewniuygv'
+beta_wheel_spec = 'leyjvcnixwpbqmdrtakzgfuhos'
+gamma_wheel_spec = 'fsokanuerhmbtiycwlqpzxvgjd'
+
+wheel_i_notches = ['q']
+wheel_ii_notches = ['e']
+wheel_iii_notches = ['v']
+wheel_iv_notches = ['j']
+wheel_v_notches = ['z']
+wheel_vi_notches = ['z', 'm']
+wheel_vii_notches = ['z', 'm']
+wheel_viii_notches = ['z', 'm']
+
+reflector_b_spec = 'ay br cu dh eq fs gl ip jx kn mo tz vw'
+reflector_c_spec = 'af bv cp dj ei go hy kr lz mx nw tq su'
+
+
+
+class LetterTransformer(object):
+    """A generic substitution cipher, that has different transforms in the 
+    forward and backward directions. It requires that the transforms for all
+    letters by provided.
+    """
+    def __init__(self, specification, raw_transform=False):
+        if raw_transform:
+            transform = specification
+        else:
+            transform = self.parse_specification(specification)
+        self.validate_transform(transform)
+        self.make_transform_map(transform)
+    
+    def parse_specification(self, specification):
+        return list(zip(string.ascii_lowercase, clean(specification)))
+        # return specification
+    
+    def validate_transform(self, transform):
+        """A set of pairs, of from-to"""
+        if len(transform) != 26:
+            raise ValueError("Transform specification has {} pairs, requires 26".
+                format(len(transform)))
+        for p in transform:
+            if len(p) != 2:
+                raise ValueError("Not all mappings in transform "
+                    "have two elements")
+        if len(set([p[0] for p in transform])) != 26:
+            raise ValueError("Transform specification must list 26 origin letters") 
+        if len(set([p[1] for p in transform])) != 26:
+            raise ValueError("Transform specification must list 26 destination letters") 
+
+    def make_empty_transform(self):
+        self.forward_map = [0] * 26
+        self.backward_map = [0] * 26
+            
+    def make_transform_map(self, transform):
+        self.make_empty_transform()
+        for p in transform:
+            self.forward_map[pos(p[0])] = pos(p[1])
+            self.backward_map[pos(p[1])] = pos(p[0])
+        return self.forward_map, self.backward_map
+    
+    def forward(self, letter):
+        if letter in string.ascii_lowercase:
+            return unpos(self.forward_map[pos(letter)])
+        else:
+            return ''
+                
+    def backward(self, letter):
+        if letter in string.ascii_lowercase:
+            return unpos(self.backward_map[pos(letter)])
+        else:
+            return ''
+
+
+class Plugboard(LetterTransformer):
+    """A plugboard, a type of letter transformer where forward and backward
+    transforms are the same. If a letter isn't explicitly transformed, it is 
+    kept as it is.
+    """
+    def parse_specification(self, specification):
+        return [tuple(clean(p)) for p in specification.split()]
+    
+    def validate_transform(self, transform):
+        """A set of pairs, of from-to"""
+        for p in transform:
+            if len(p) != 2:
+                raise ValueError("Not all mappings in transform"
+                    "have two elements")
+    
+    def make_empty_transform(self):
+        self.forward_map = list(range(26))
+        self.backward_map = list(range(26))
+        
+    def make_transform_map(self, transform):
+        expanded_transform = transform + [tuple(reversed(p)) for p in transform]
+        return super(Plugboard, self).make_transform_map(expanded_transform)
+
+
+
+
+class Reflector(Plugboard):
+    """A reflector is a plugboard that requires 13 transforms.
+    """
+    def validate_transform(self, transform):
+        if len(transform) != 13:
+            raise ValueError("Reflector specification has {} pairs, requires 13".
+                format(len(transform)))
+        if len(set([p[0] for p in transform] + 
+                    [p[1] for p in transform])) != 26:
+            raise ValueError("Reflector specification does not contain 26 letters")
+        try:
+            super(Reflector, self).validate_transform(transform)
+        except ValueError as v:
+            raise ValueError("Not all mappings in reflector have two elements")
+
+
+
+
+class SimpleWheel(LetterTransformer):
+    """A wheel is a transform that rotates.
+
+    Looking from the right, letters go in sequence a-b-c clockwise around the 
+    wheel. 
+
+    The position of the wheel is the number of spaces anticlockwise the wheel
+    has turned.
+
+    Letter inputs and outputs are given relative to the frame holding the wheel,
+    so if the wheel is advanced three places, an input of 'p' will enter the 
+    wheel on the position under the wheel's 'q' label.
+    """
+    def __init__(self, transform, position='a', raw_transform=False):
+        super(SimpleWheel, self).__init__(transform, raw_transform)
+        self.set_position(position)
+        
+    def __getattribute__(self,name):
+        if name=='position_l':
+            return unpos(self.position)
+        else:
+            return object.__getattribute__(self, name)
+    
+    def set_position(self, position):
+        if isinstance(position, str):
+            # self.position = ord(position) - ord('a')
+            self.position = pos(position)
+        else:
+            self.position = position
+    
+    def forward(self, letter):
+        if letter in string.ascii_lowercase:
+            return unpos((self.forward_map[(pos(letter) + self.position) % 26] - self.position))
+        else:
+            return ''
+                
+    def backward(self, letter):
+        if letter in string.ascii_lowercase:
+            return unpos((self.backward_map[(pos(letter) + self.position) % 26] - self.position))
+        else:
+            return ''
+        
+    def advance(self):
+        self.position = (self.position + 1) % 26
+
+
+
+class Wheel(SimpleWheel):
+    """A wheel with a movable ring.
+
+    The ring holds the letters and the notches that turn other wheels. The core
+    holds the wiring that does the transformation.
+
+    The ring position is how many steps the core is turned relative to the ring.
+    This is one-based, so a ring setting of 1 means the core and ring are 
+    aligned.
+
+    The position of the wheel is the position of the core (the transforms) 
+    relative to the neutral position. 
+
+    The position_l is the position of the ring, or what would be observed
+    by the user of the Enigma machine. 
+
+    The notch_positions are the number of advances of this wheel before it will 
+    advance the next wheel.
+
+    """
+    def __init__(self, transform, ring_notch_letters, ring_setting=1, position='a', raw_transform=False):
+        self.ring_notch_letters = ring_notch_letters
+        self.ring_setting = ring_setting
+        super(Wheel, self).__init__(transform, position=position, raw_transform=raw_transform)
+        self.set_position(position)
+        
+    def __getattribute__(self,name):
+        if name=='position_l':
+            return unpos(self.position + self.ring_setting - 1)
+        else:
+            return object.__getattribute__(self, name)
+
+    def set_position(self, position):
+        if isinstance(position, str):
+            self.position = (pos(position) - self.ring_setting + 1) % 26
+        else:
+            self.position = (position - self.ring_setting) % 26
+        # # self.notch_positions = [(pos(p) - pos(position)) % 26  for p in self.ring_notch_letters]
+        # self.notch_positions = [(pos(p) - (self.position + self.ring_setting - 1)) % 26  for p in self.ring_notch_letters]
+        self.notch_positions = [(self.position + self.ring_setting - 1 - pos(p)) % 26  for p in self.ring_notch_letters]
+        
+    def advance(self):
+        super(Wheel, self).advance()
+        self.notch_positions = [(p + 1) % 26 for p in self.notch_positions]
+        return self.position
+
+
+class Enigma(object):
+    """An Enigma machine.
+
+
+    """
+    def __init__(self, reflector_spec,
+                 left_wheel_spec, left_wheel_notches,
+                 middle_wheel_spec, middle_wheel_notches,
+                 right_wheel_spec, right_wheel_notches,
+                 left_ring_setting, middle_ring_setting, right_ring_setting,
+                 plugboard_setting):
+        self.reflector = Reflector(reflector_spec)
+        self.left_wheel = Wheel(left_wheel_spec, left_wheel_notches, ring_setting=left_ring_setting)
+        self.middle_wheel = Wheel(middle_wheel_spec, middle_wheel_notches, ring_setting=middle_ring_setting)
+        self.right_wheel = Wheel(right_wheel_spec, right_wheel_notches, ring_setting=right_ring_setting)
+        self.plugboard = Plugboard(plugboard_setting)
+        
+    def __getattribute__(self,name):
+        if name=='wheel_positions':
+            return self.left_wheel.position, self.middle_wheel.position, self.right_wheel.position 
+        elif name=='wheel_positions_l':
+            return self.left_wheel.position_l, self.middle_wheel.position_l, self.right_wheel.position_l 
+        elif name=='notch_positions':
+            return self.left_wheel.notch_positions, self.middle_wheel.notch_positions, self.right_wheel.notch_positions
+        else:
+            return object.__getattribute__(self, name)
+
+    def set_wheels(self, left_wheel_position, middle_wheel_position, right_wheel_position):
+        self.left_wheel.set_position(left_wheel_position)
+        self.middle_wheel.set_position(middle_wheel_position)
+        self.right_wheel.set_position(right_wheel_position)
+        
+    def lookup(self, letter):
+        a = self.plugboard.forward(letter)
+        b = self.right_wheel.forward(a)
+        c = self.middle_wheel.forward(b)
+        d = self.left_wheel.forward(c)
+        e = self.reflector.forward(d)
+        f = self.left_wheel.backward(e)
+        g = self.middle_wheel.backward(f)
+        h = self.right_wheel.backward(g)
+        i = self.plugboard.backward(h)
+        return i
+    
+    def advance(self):
+        advance_middle = False
+        advance_left = False
+        if 0 in self.right_wheel.notch_positions:
+            advance_middle = True
+        if 0 in self.middle_wheel.notch_positions:
+            advance_left = True
+            advance_middle = True
+        self.right_wheel.advance()
+        if advance_middle: self.middle_wheel.advance()
+        if advance_left: self.left_wheel.advance()
+            
+    def encipher_letter(self, letter):
+        self.advance()
+        return self.lookup(letter)
+    
+    def encipher(self, message):
+        enciphered = ''
+        for letter in clean(message):
+            enciphered += self.encipher_letter(letter)
+        return enciphered
+
+    decipher = encipher
+
+
+# for i in range(26):
+#     enigma.advance()
+#     print('enigma.advance()')
+#     print("assert(enigma.wheel_positions == {})".format(enigma.wheel_positions))
+#     print("assert(cat(enigma.wheel_positions_l) == '{}')".format(cat(enigma.wheel_positions_l)))
+#     print("assert(enigma.notch_positions == {})".format(enigma.notch_positions))
+#     print("assert(cat(enigma.lookup(l) for l in string.ascii_lowercase) == '{}')".format(cat(enigma.lookup(l) for l in string.ascii_lowercase)))
+#     print()
+
+
+if __name__ == "__main__":
+    import doctest
+    # doctest.testmod(extraglobs={'lt': LetterTransformer(1, 'a')})
+    doctest.testmod()
+
diff --git a/szyfrow/hill.py b/szyfrow/hill.py
new file mode 100644 (file)
index 0000000..8233de7
--- /dev/null
@@ -0,0 +1,81 @@
+import multiprocessing
+import numpy as np
+from numpy import matrix
+from numpy import linalg
+from support.utilities import *
+from support.language_models import *
+from cipher.affine import modular_division_table
+
+from logger import logger
+
+
+def hill_encipher(matrix, message_letters, fillvalue='a'):
+    """Hill cipher
+
+    >>> hill_encipher(np.matrix([[7,8], [11,11]]), 'hellothere')
+    'drjiqzdrvx'
+    >>> hill_encipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \
+        'hello there')
+    'tfjflpznvyac'
+    """
+    n = len(matrix)
+    sanitised_message = sanitise(message_letters)
+    if len(sanitised_message) % n != 0:
+        padding = fillvalue[0] * (n - len(sanitised_message) % n)
+    else:
+        padding = ''
+    message = [pos(c) for c in sanitised_message + padding]
+    message_chunks = [message[i:i+n] for i in range(0, len(message), n)]
+    # message_chunks = chunks(message, len(matrix), fillvalue=None)
+    enciphered_chunks = [((matrix * np.matrix(c).T).T).tolist()[0] 
+            for c in message_chunks]
+    return cat([unpos(round(l))
+            for l in sum(enciphered_chunks, [])])
+
+def hill_decipher(matrix, message, fillvalue='a'):
+    """Hill cipher
+
+    >>> hill_decipher(np.matrix([[7,8], [11,11]]), 'drjiqzdrvx')
+    'hellothere'
+    >>> hill_decipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \
+        'tfjflpznvyac')
+    'hellothereaa'
+    """
+    adjoint = linalg.det(matrix)*linalg.inv(matrix)
+    inverse_determinant = modular_division_table[int(round(linalg.det(matrix))) % 26, 1]
+    inverse_matrix = (inverse_determinant * adjoint) % 26
+    return hill_encipher(inverse_matrix, message, fillvalue)          
+
+def hill_break(message, matrix_size=2, fitness=Pletters, 
+    number_of_solutions=1, chunksize=500):
+
+    all_matrices = [np.matrix(list(m)) 
+        for m in itertools.product([list(r) 
+            for r in itertools.product(range(26), repeat=matrix_size)], 
+        repeat=matrix_size)]
+    valid_matrices = [m for m, d in 
+        zip(all_matrices, (int(round(linalg.det(m))) for m in all_matrices))
+                  if d != 0
+                  if d % 2 != 0
+                  if d % 13 != 0 ]
+    with multiprocessing.Pool() as pool:
+        helper_args = [(message, matrix, fitness)
+                       for matrix in valid_matrices]
+        # Gotcha: the helper function here needs to be defined at the top level
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(hill_break_worker, helper_args, chunksize)
+        if number_of_solutions == 1:
+            return max(breaks, key=lambda k: k[1])
+        else:
+            return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
+
+def hill_break_worker(message, matrix, fitness):
+    plaintext = hill_decipher(matrix, message)
+    fit = fitness(plaintext)
+    logger.debug('Hill cipher break attempt using key {0} gives fit of '
+                 '{1} and decrypt starting: {2}'.format(matrix, 
+                     fit, sanitise(plaintext)[:50]))
+    return matrix, fit
+
+if __name__ == "__main__":
+    import doctest
\ No newline at end of file
diff --git a/szyfrow/keyword_cipher.py b/szyfrow/keyword_cipher.py
new file mode 100644 (file)
index 0000000..2cf3290
--- /dev/null
@@ -0,0 +1,304 @@
+from enum import Enum
+# from itertools import starmap
+import multiprocessing
+import math
+from support.utilities import *
+from support.language_models import *
+
+from logger import logger
+import logging
+# logger.setLevel(logging.DEBUG)
+
+
+class KeywordWrapAlphabet(Enum):
+    from_a = 1
+    from_last = 2
+    from_largest = 3
+
+
+def keyword_cipher_alphabet_of(keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
+    """Find the cipher alphabet given a keyword.
+    wrap_alphabet controls how the rest of the alphabet is added
+    after the keyword.
+
+    >>> keyword_cipher_alphabet_of('bayes')
+    'bayescdfghijklmnopqrtuvwxz'
+    >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a)
+    'bayescdfghijklmnopqrtuvwxz'
+    >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last)
+    'bayestuvwxzcdfghijklmnopqr'
+    >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest)
+    'bayeszcdfghijklmnopqrtuvwx'
+    """
+    if wrap_alphabet == KeywordWrapAlphabet.from_a:
+        cipher_alphabet = cat(deduplicate(sanitise(keyword) + 
+                                              string.ascii_lowercase))
+    else:
+        if wrap_alphabet == KeywordWrapAlphabet.from_last:
+            last_keyword_letter = deduplicate(sanitise(keyword))[-1]
+        else:
+            last_keyword_letter = sorted(sanitise(keyword))[-1]
+        last_keyword_position = string.ascii_lowercase.find(
+            last_keyword_letter) + 1
+        cipher_alphabet = cat(
+            deduplicate(sanitise(keyword) + 
+                        string.ascii_lowercase[last_keyword_position:] + 
+                        string.ascii_lowercase))
+    return cipher_alphabet
+
+
+def keyword_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
+    """Enciphers a message with a keyword substitution cipher.
+    wrap_alphabet controls how the rest of the alphabet is added
+    after the keyword.
+    0 : from 'a'
+    1 : from the last letter in the sanitised keyword
+    2 : from the largest letter in the sanitised keyword
+
+    >>> keyword_encipher('test message', 'bayes')
+    'rsqr ksqqbds'
+    >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a)
+    'rsqr ksqqbds'
+    >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last)
+    'lskl dskkbus'
+    >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest)
+    'qspq jsppbcs'
+    """
+    cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
+    cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
+    return unaccent(message).lower().translate(cipher_translation)
+
+def keyword_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
+    """Deciphers a message with a keyword substitution cipher.
+    wrap_alphabet controls how the rest of the alphabet is added
+    after the keyword.
+    0 : from 'a'
+    1 : from the last letter in the sanitised keyword
+    2 : from the largest letter in the sanitised keyword
+    
+    >>> keyword_decipher('rsqr ksqqbds', 'bayes')
+    'test message'
+    >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a)
+    'test message'
+    >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last)
+    'test message'
+    >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest)
+    'test message'
+    """
+    cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
+    cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
+    return message.lower().translate(cipher_translation)
+
+
+def keyword_break(message, wordlist=keywords, fitness=Pletters):
+    """Breaks a keyword substitution cipher using a dictionary and
+    frequency analysis.
+
+    >>> keyword_break(keyword_encipher('this is a test message for the ' \
+          'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+    (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
+    """
+    best_keyword = ''
+    best_wrap_alphabet = True
+    best_fit = float("-inf")
+    for wrap_alphabet in KeywordWrapAlphabet:
+        for keyword in wordlist:
+            plaintext = keyword_decipher(message, keyword, wrap_alphabet)
+            fit = fitness(plaintext)
+            logger.debug('Keyword break attempt using key {0} (wrap={1}) '
+                         'gives fit of {2} and decrypt starting: {3}'.format(
+                             keyword, wrap_alphabet, fit,
+                             sanitise(plaintext)[:50]))
+            if fit > best_fit:
+                best_fit = fit
+                best_keyword = keyword
+                best_wrap_alphabet = wrap_alphabet
+    logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
+                '{2} and decrypt starting: {3}'.format(best_keyword,
+                    best_wrap_alphabet, best_fit, sanitise(
+                        keyword_decipher(message, best_keyword,
+                                         best_wrap_alphabet))[:50]))
+    return (best_keyword, best_wrap_alphabet), best_fit
+
+def keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
+                     number_of_solutions=1, chunksize=500):
+    """Breaks a keyword substitution cipher using a dictionary and
+    frequency analysis
+
+    >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
+          'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+    (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
+    >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
+          'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
+          wordlist=['cat', 'elephant', 'kangaroo'], \
+          number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    [(('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...), 
+    (('elephant', <KeywordWrapAlphabet.from_largest: 3>), -52.834575011...)]
+    """
+    with multiprocessing.Pool() as pool:
+        helper_args = [(message, word, wrap, fitness)
+                       for word in wordlist
+                       for wrap in KeywordWrapAlphabet]
+        # Gotcha: the helper function here needs to be defined at the top level
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(keyword_break_worker, helper_args, chunksize)
+        if number_of_solutions == 1:
+            return max(breaks, key=lambda k: k[1])
+        else:
+            return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
+
+def keyword_break_worker(message, keyword, wrap_alphabet, fitness):
+    plaintext = keyword_decipher(message, keyword, wrap_alphabet)
+    fit = fitness(plaintext)
+    logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
+                 '{2} and decrypt starting: {3}'.format(keyword, 
+                     wrap_alphabet, fit, sanitise(plaintext)[:50]))
+    return (keyword, wrap_alphabet), fit
+
+
+def monoalphabetic_break_hillclimbing(message, 
+                              max_iterations=20000,
+                              plain_alphabet=None, 
+                              cipher_alphabet=None, 
+                              swap_index_finder=None,
+                              fitness=Pletters, chunksize=1):
+    return simulated_annealing_break(message, 
+                              workers=1, 
+                              initial_temperature=0,
+                              max_iterations=max_iterations,
+                              plain_alphabet=plain_alphabet, 
+                              cipher_alphabet=cipher_alphabet, 
+                              swap_index_finder=swap_index_finder,
+                              fitness=fitness, chunksize=chunksize)
+
+
+def monoalphabetic_break_hillclimbing_mp(message, 
+                              workers=10, 
+                              max_iterations=20000,
+                              plain_alphabet=None, 
+                              cipher_alphabet=None, 
+                              swap_index_finder=None,
+                              fitness=Pletters, chunksize=1):
+    return simulated_annealing_break(message, 
+                              workers=workers, 
+                              initial_temperature=0,
+                              max_iterations=max_iterations,
+                              plain_alphabet=plain_alphabet, 
+                              cipher_alphabet=cipher_alphabet, 
+                              swap_index_finder=swap_index_finder,
+                              fitness=fitness, chunksize=chunksize)
+
+
+def gaussian_swap_index(a):
+    return (a + int(random.gauss(0, 4))) % 26
+
+def uniform_swap_index(a):
+    return random.randrange(26)
+
+def simulated_annealing_break(message, workers=10, 
+                              initial_temperature=200,
+                              max_iterations=20000,
+                              plain_alphabet=None, 
+                              cipher_alphabet=None, 
+                              swap_index_finder=None,
+                              fitness=Ptrigrams, chunksize=1):
+    worker_args = []
+    ciphertext = sanitise(message)
+    if swap_index_finder is None:
+        swap_index_finder = gaussian_swap_index
+        
+    for i in range(workers):
+        if plain_alphabet is None:
+            used_plain_alphabet = string.ascii_lowercase
+        else:
+            used_plain_alphabet = plain_alphabet
+        if cipher_alphabet is None:
+            used_cipher_alphabet = list(string.ascii_lowercase)
+            random.shuffle(used_cipher_alphabet)
+            used_cipher_alphabet = cat(used_cipher_alphabet)
+        else:
+            used_cipher_alphabet = cipher_alphabet
+        # if not plain_alphabet:
+        #     plain_alphabet = string.ascii_lowercase
+        # if not cipher_alphabet:
+        #     cipher_alphabet = list(string.ascii_lowercase)
+        #     random.shuffle(cipher_alphabet)
+        #     cipher_alphabet = cat(cipher_alphabet)
+        worker_args.append((ciphertext, used_plain_alphabet, used_cipher_alphabet, 
+                            swap_index_finder,
+                            initial_temperature, max_iterations, fitness,
+                            i))
+    with multiprocessing.Pool() as pool:
+        breaks = pool.starmap(simulated_annealing_break_worker,
+                              worker_args, chunksize)
+    return max(breaks, key=lambda k: k[1])
+
+
+def simulated_annealing_break_worker(message, plain_alphabet, cipher_alphabet, 
+                                     swap_index_finder,
+                                     t0, max_iterations, fitness,
+                                     logID):
+    def swap(letters, i, j):
+        if i > j:
+            i, j = j, i
+        if i == j:
+            return letters
+        else:
+            return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +
+                    letters[j+1:])
+    
+    temperature = t0
+
+    dt = t0 / (0.9 * max_iterations)
+    
+    current_alphabet = cipher_alphabet
+    alphabet = current_alphabet
+    cipher_translation = ''.maketrans(current_alphabet, plain_alphabet)
+    plaintext = message.translate(cipher_translation)
+    current_fitness = fitness(plaintext)
+
+    best_alphabet = current_alphabet
+    best_fitness = current_fitness
+    best_plaintext = plaintext
+    
+    # print('starting for', max_iterations)
+    for i in range(max_iterations):
+        swap_a = random.randrange(26)
+        # swap_b = (swap_a + int(random.gauss(0, 4))) % 26
+        swap_b = swap_index_finder(swap_a)
+        alphabet = swap(current_alphabet, swap_a, swap_b)
+        cipher_translation = ''.maketrans(alphabet, plain_alphabet)
+        plaintext = message.translate(cipher_translation)
+        new_fitness = fitness(plaintext)
+        try:
+            sa_chance = math.exp((new_fitness - current_fitness) / temperature)
+        except (OverflowError, ZeroDivisionError):
+            # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
+            sa_chance = 0
+        if (new_fitness > current_fitness or random.random() < sa_chance):
+            # logger.debug('Simulated annealing: iteration {}, temperature {}, '
+            #     'current alphabet {}, current_fitness {}, '
+            #     'best_plaintext {}'.format(i, temperature, current_alphabet, 
+            #     current_fitness, best_plaintext[:50]))
+
+            # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
+            current_fitness = new_fitness
+            current_alphabet = alphabet
+            
+        if current_fitness > best_fitness:
+            best_alphabet = current_alphabet
+            best_fitness = current_fitness
+            best_plaintext = plaintext
+        if i % 500 == 0:
+            logger.debug('Simulated annealing worker {}: iteration {}, temperature {}, '
+                'current alphabet {}, plain alphabet {}, current_fitness {}, '
+                'best_plaintext {}'.format(logID, i, temperature, current_alphabet, plain_alphabet,
+                current_fitness, plaintext[:50]))
+        temperature = max(temperature - dt, 0.001)
+
+    return best_alphabet, best_fitness # current_alphabet, current_fitness
+
+if __name__ == "__main__":
+    import doctest
diff --git a/szyfrow/playfair.py b/szyfrow/playfair.py
new file mode 100644 (file)
index 0000000..0c0bc6e
--- /dev/null
@@ -0,0 +1,298 @@
+from support.utilities import *
+from support.language_models import *
+from cipher.keyword_cipher import KeywordWrapAlphabet, keyword_cipher_alphabet_of
+from cipher.polybius import polybius_grid
+import multiprocessing
+
+from logger import logger
+
+def playfair_wrap(n, lowest, highest):
+    skip = highest - lowest + 1
+    while n > highest or n < lowest:
+        if n > highest:
+            n -= skip
+        if n < lowest:
+            n += skip
+    return n
+
+def playfair_encipher_bigram(ab, grid, padding_letter='x'):
+    a, b = ab
+    max_row = max(c[0] for c in grid.values())
+    max_col = max(c[1] for c in grid.values())
+    min_row = min(c[0] for c in grid.values())
+    min_col = min(c[1] for c in grid.values())
+    if a == b:
+        b = padding_letter
+    if grid[a][0] == grid[b][0]:  # same row
+        cp = (grid[a][0], playfair_wrap(grid[a][1] + 1, min_col, max_col))
+        dp = (grid[b][0], playfair_wrap(grid[b][1] + 1, min_col, max_col))
+    elif grid[a][1] == grid[b][1]:  # same column
+        cp = (playfair_wrap(grid[a][0] + 1, min_row, max_row), grid[a][1])
+        dp = (playfair_wrap(grid[b][0] + 1, min_row, max_row), grid[b][1])
+    else:
+        cp = (grid[a][0], grid[b][1])
+        dp = (grid[b][0], grid[a][1])
+    c = [k for k, v in grid.items() if v == cp][0]
+    d = [k for k, v in grid.items() if v == dp][0]
+    return c + d
+
+def playfair_decipher_bigram(ab, grid, padding_letter='x'):
+    a, b = ab
+    max_row = max(c[0] for c in grid.values())
+    max_col = max(c[1] for c in grid.values())
+    min_row = min(c[0] for c in grid.values())
+    min_col = min(c[1] for c in grid.values())
+    if a == b:
+        b = padding_letter
+    if grid[a][0] == grid[b][0]:  # same row
+        cp = (grid[a][0], playfair_wrap(grid[a][1] - 1, min_col, max_col))
+        dp = (grid[b][0], playfair_wrap(grid[b][1] - 1, min_col, max_col))
+    elif grid[a][1] == grid[b][1]:  # same column
+        cp = (playfair_wrap(grid[a][0] - 1, min_row, max_row), grid[a][1])
+        dp = (playfair_wrap(grid[b][0] - 1, min_row, max_row), grid[b][1])
+    else:
+        cp = (grid[a][0], grid[b][1])
+        dp = (grid[b][0], grid[a][1])
+    c = [k for k, v in grid.items() if v == cp][0]
+    d = [k for k, v in grid.items() if v == dp][0]
+    return c + d
+
+def playfair_bigrams(text, padding_letter='x', padding_replaces_repeat=True):
+    i = 0
+    bigrams = []
+    while i < len(text):
+        bigram = text[i:i+2]
+        if len(bigram) == 1:
+            i = len(text) + 1
+            bigram = bigram + padding_letter
+        else:
+            if bigram[0] == bigram[1]:
+                bigram = bigram[0] + padding_letter
+                if padding_replaces_repeat:
+                    i += 2
+                else:
+                    i += 1
+            else:
+                i += 2
+        bigrams += [bigram]
+    return bigrams
+
+def playfair_encipher(message, keyword, padding_letter='x',
+                      padding_replaces_repeat=False, letters_to_merge=None, 
+                      wrap_alphabet=KeywordWrapAlphabet.from_a):
+    column_order = list(range(5))
+    row_order = list(range(5))
+    if letters_to_merge is None: 
+        letters_to_merge = {'j': 'i'}   
+    grid = polybius_grid(keyword, column_order, row_order,
+                        letters_to_merge=letters_to_merge,
+                        wrap_alphabet=wrap_alphabet)
+    message_bigrams = playfair_bigrams(sanitise(message), padding_letter=padding_letter, 
+                                       padding_replaces_repeat=padding_replaces_repeat)
+    ciphertext_bigrams = [playfair_encipher_bigram(b, grid, padding_letter=padding_letter) for b in message_bigrams]
+    return cat(ciphertext_bigrams)
+
+def playfair_decipher(message, keyword, padding_letter='x',
+                      padding_replaces_repeat=False, letters_to_merge=None, 
+                      wrap_alphabet=KeywordWrapAlphabet.from_a):
+    column_order = list(range(5))
+    row_order = list(range(5))
+    if letters_to_merge is None: 
+        letters_to_merge = {'j': 'i'}   
+    grid = polybius_grid(keyword, column_order, row_order,
+                        letters_to_merge=letters_to_merge,
+                        wrap_alphabet=wrap_alphabet)
+    message_bigrams = playfair_bigrams(sanitise(message), padding_letter=padding_letter, 
+                                       padding_replaces_repeat=padding_replaces_repeat)
+    plaintext_bigrams = [playfair_decipher_bigram(b, grid, padding_letter=padding_letter) for b in message_bigrams]
+    return cat(plaintext_bigrams)
+
+def playfair_break_mp(message, 
+                      letters_to_merge=None, padding_letter='x',
+                      wordlist=keywords, fitness=Pletters,
+                      number_of_solutions=1, chunksize=500):
+    if letters_to_merge is None: 
+        letters_to_merge = {'j': 'i'}   
+
+    with multiprocessing.Pool() as pool:
+        helper_args = [(message, word, wrap, 
+                        letters_to_merge, padding_letter,
+                        pad_replace,
+                        fitness)
+                       for word in wordlist
+                       for wrap in KeywordWrapAlphabet
+                       for pad_replace in [False, True]]
+        # Gotcha: the helper function here needs to be defined at the top level
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(playfair_break_worker, helper_args, chunksize)
+        if number_of_solutions == 1:
+            return max(breaks, key=lambda k: k[1])
+        else:
+            return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
+
+def playfair_break_worker(message, keyword, wrap, 
+                          letters_to_merge, padding_letter,
+                          pad_replace,
+                          fitness):
+    plaintext = playfair_decipher(message, keyword, padding_letter,
+                                  pad_replace,
+                                  letters_to_merge, 
+                                  wrap)
+    if plaintext:
+        fit = fitness(plaintext)
+    else:
+        fit = float('-inf')
+    logger.debug('Playfair break attempt using key {0} (wrap={1}, merging {2}, '
+                 'pad replaces={3}), '
+                 'gives fit of {4} and decrypt starting: '
+                 '{5}'.format(keyword, wrap, letters_to_merge, pad_replace,
+                              fit, sanitise(plaintext)[:50]))
+    return (keyword, wrap, letters_to_merge, padding_letter, pad_replace), fit
+
+def playfair_simulated_annealing_break(message, workers=10, 
+                              initial_temperature=200,
+                              max_iterations=20000,
+                              plain_alphabet=None, 
+                              cipher_alphabet=None, 
+                              fitness=Pletters, chunksize=1):
+    worker_args = []
+    ciphertext = sanitise(message)
+    for i in range(workers):
+        if plain_alphabet is None:
+            used_plain_alphabet = string.ascii_lowercase
+        else:
+            used_plain_alphabet = plain_alphabet
+        if cipher_alphabet is None:
+            # used_cipher_alphabet = list(string.ascii_lowercase)
+            # random.shuffle(used_cipher_alphabet)
+            # used_cipher_alphabet = cat(used_cipher_alphabet)
+            used_cipher_alphabet = random.choice(keywords)
+        else:
+            used_cipher_alphabet = cipher_alphabet
+        worker_args.append((ciphertext, used_plain_alphabet, used_cipher_alphabet, 
+                            initial_temperature, max_iterations, fitness))
+    with multiprocessing.Pool() as pool:
+        breaks = pool.starmap(playfair_simulated_annealing_break_worker,
+                              worker_args, chunksize)
+    return max(breaks, key=lambda k: k[1])
+
+def playfair_simulated_annealing_break_worker(message, plain_alphabet, cipher_alphabet, 
+                                     t0, max_iterations, fitness):
+    def swap(letters, i, j):
+        if i > j:
+            i, j = j, i
+        if i == j:
+            return letters
+        else:
+            return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +
+                    letters[j+1:])
+    
+    temperature = t0
+
+    dt = t0 / (0.9 * max_iterations)
+    
+    current_alphabet = cipher_alphabet
+    current_wrap = KeywordWrapAlphabet.from_a
+    current_letters_to_merge = {'j': 'i'}
+    current_pad_replace = False
+    current_padding_letter = 'x'
+    
+    alphabet = current_alphabet
+    wrap = current_wrap
+    letters_to_merge = current_letters_to_merge
+    pad_replace = current_pad_replace
+    padding_letter = current_padding_letter
+    plaintext = playfair_decipher(message, alphabet, padding_letter,
+                                  pad_replace,
+                                  letters_to_merge, 
+                                  wrap)
+    current_fitness = fitness(plaintext)
+
+    best_alphabet = current_alphabet
+    best_fitness = current_fitness
+    best_plaintext = plaintext
+    
+    # print('starting for', max_iterations)
+    for i in range(max_iterations):
+        chosen = random.random()
+        # if chosen < 0.7:
+        #     swap_a = random.randrange(26)
+        #     swap_b = (swap_a + int(random.gauss(0, 4))) % 26
+        #     alphabet = swap(current_alphabet, swap_a, swap_b)
+        # elif chosen < 0.8:
+        #     wrap = random.choice(list(KeywordWrapAlphabet))
+        # elif chosen < 0.9:
+        #     pad_replace = random.choice([True, False])
+        # elif chosen < 0.95:
+        #     letter_from = random.choice(string.ascii_lowercase)
+        #     letter_to = random.choice([c for c in string.ascii_lowercase if c != letter_from])
+        #     letters_to_merge = {letter_from: letter_to}
+        # else:
+        #     padding_letter = random.choice(string.ascii_lowercase)
+        if chosen < 0.7:
+            swap_a = random.randrange(len(current_alphabet))
+            swap_b = (swap_a + int(random.gauss(0, 4))) % len(current_alphabet)
+            alphabet = swap(current_alphabet, swap_a, swap_b)
+        elif chosen < 0.85:
+            new_letter = random.choice(string.ascii_lowercase)
+            alphabet = swap(current_alphabet + new_letter, random.randrange(len(current_alphabet)), len(current_alphabet))
+        else:
+            if len(current_alphabet) > 1:
+                deletion_position = random.randrange(len(current_alphabet))
+                alphabet = current_alphabet[:deletion_position] + current_alphabet[deletion_position+1:]
+            else:
+                alphabet = current_alphabet
+        
+        try:
+            plaintext = playfair_decipher(message, alphabet, padding_letter,
+                                  pad_replace,
+                                  letters_to_merge, 
+                                  wrap)
+        except:
+            print("Error", alphabet, padding_letter,
+                                  pad_replace,
+                                  letters_to_merge, 
+                                  wrap)
+            raise
+
+        new_fitness = fitness(plaintext)
+        try:
+            sa_chance = math.exp((new_fitness - current_fitness) / temperature)
+        except (OverflowError, ZeroDivisionError):
+            # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
+            sa_chance = 0
+        if (new_fitness > current_fitness or random.random() < sa_chance):
+            # logger.debug('Simulated annealing: iteration {}, temperature {}, '
+            #     'current alphabet {}, current_fitness {}, '
+            #     'best_plaintext {}'.format(i, temperature, current_alphabet, 
+            #     current_fitness, best_plaintext[:50]))
+
+            # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
+            current_fitness = new_fitness
+            current_alphabet = alphabet
+            current_wrap = wrap
+            current_letters_to_merge = letters_to_merge
+            current_pad_replace = pad_replace
+            current_padding_letter = padding_letter
+            
+        if current_fitness > best_fitness:
+            best_alphabet = current_alphabet
+            best_wrap = current_wrap
+            best_letters_to_merge = current_letters_to_merge
+            best_pad_replace = current_pad_replace
+            best_padding_letter = current_padding_letter
+            best_fitness = current_fitness
+            best_plaintext = plaintext
+        if i % 500 == 0:
+            logger.debug('Simulated annealing: iteration {}, temperature {}, '
+                'current alphabet {}, current_fitness {}, '
+                'best_plaintext {}'.format(i, temperature, current_alphabet, 
+                current_fitness, plaintext[:50]))
+        temperature = max(temperature - dt, 0.001)
+
+    return { 'alphabet': best_alphabet
+           , 'wrap': best_wrap
+           , 'letters_to_merge': best_letters_to_merge
+           , 'pad_replace': best_pad_replace
+           , 'padding_letter': best_padding_letter
+           }, best_fitness # current_alphabet, current_fitness
diff --git a/szyfrow/pocket_enigma.py b/szyfrow/pocket_enigma.py
new file mode 100644 (file)
index 0000000..a51955a
--- /dev/null
@@ -0,0 +1,194 @@
+from support.utilities import *
+from support.language_models import *
+
+from logger import logger
+
+
+class PocketEnigma(object):
+    """A pocket enigma machine
+    The wheel is internally represented as a 26-element list self.wheel_map, 
+    where wheel_map[i] == j shows that the position i places on from the arrow 
+    maps to the position j places on.
+    """
+    def __init__(self, wheel=1, position='a'):
+        """initialise the pocket enigma, including which wheel to use and the
+        starting position of the wheel.
+
+        The wheel is either 1 or 2 (the predefined wheels) or a list of letter
+        pairs.
+
+        The position is the letter pointed to by the arrow on the wheel.
+
+        >>> pe.wheel_map
+        [25, 4, 23, 10, 1, 7, 9, 5, 12, 6, 3, 17, 8, 14, 13, 21, 19, 11, 20, 16, 18, 15, 24, 2, 22, 0]
+        >>> pe.position
+        0
+        """
+        self.wheel1 = [('a', 'z'), ('b', 'e'), ('c', 'x'), ('d', 'k'), 
+            ('f', 'h'), ('g', 'j'), ('i', 'm'), ('l', 'r'), ('n', 'o'), 
+            ('p', 'v'), ('q', 't'), ('s', 'u'), ('w', 'y')]
+        self.wheel2 = [('a', 'c'), ('b', 'd'), ('e', 'w'), ('f', 'i'), 
+            ('g', 'p'), ('h', 'm'), ('j', 'k'), ('l', 'n'), ('o', 'q'), 
+            ('r', 'z'), ('s', 'u'), ('t', 'v'), ('x', 'y')]
+        if wheel == 1:
+            self.make_wheel_map(self.wheel1)
+        elif wheel == 2:
+            self.make_wheel_map(self.wheel2)
+        else:
+            self.validate_wheel_spec(wheel)
+            self.make_wheel_map(wheel)
+        if position in string.ascii_lowercase:
+            self.position = pos(position)
+        else:
+            self.position = position
+
+    def make_wheel_map(self, wheel_spec):
+        """Expands a wheel specification from a list of letter-letter pairs
+        into a full wheel_map.
+
+        >>> pe.make_wheel_map(pe.wheel2)
+        [2, 3, 0, 1, 22, 8, 15, 12, 5, 10, 9, 13, 7, 11, 16, 6, 14, 25, 20, 21, 18, 19, 4, 24, 23, 17]
+        """
+        self.validate_wheel_spec(wheel_spec)
+        self.wheel_map = [0] * 26
+        for p in wheel_spec:
+            self.wheel_map[pos(p[0])] = pos(p[1])
+            self.wheel_map[pos(p[1])] = pos(p[0])
+        return self.wheel_map
+
+    def validate_wheel_spec(self, wheel_spec):
+        """Validates that a wheel specificaiton will turn into a valid wheel
+        map.
+
+        >>> pe.validate_wheel_spec([])
+        Traceback (most recent call last):
+            ...
+        ValueError: Wheel specification has 0 pairs, requires 13
+        >>> pe.validate_wheel_spec([('a', 'b', 'c')]*13)
+        Traceback (most recent call last):
+            ...
+        ValueError: Not all mappings in wheel specificationhave two elements
+        >>> pe.validate_wheel_spec([('a', 'b')]*13)
+        Traceback (most recent call last):
+            ...
+        ValueError: Wheel specification does not contain 26 letters
+        """
+        if len(wheel_spec) != 13:
+            raise ValueError("Wheel specification has {} pairs, requires 13".
+                format(len(wheel_spec)))
+        for p in wheel_spec:
+            if len(p) != 2:
+                raise ValueError("Not all mappings in wheel specification"
+                    "have two elements")
+        if len(set([p[0] for p in wheel_spec] + 
+                    [p[1] for p in wheel_spec])) != 26:
+            raise ValueError("Wheel specification does not contain 26 letters")
+
+    def encipher_letter(self, letter):
+        """Enciphers a single letter, by advancing the wheel before looking up
+        the letter on the wheel.
+
+        >>> pe.set_position('f')
+        5
+        >>> pe.encipher_letter('k')
+        'h'
+        """
+        self.advance()
+        return self.lookup(letter)
+    decipher_letter = encipher_letter
+
+    def lookup(self, letter):
+        """Look up what a letter enciphers to, without turning the wheel.
+
+        >>> pe.set_position('f')
+        5
+        >>> cat([pe.lookup(l) for l in string.ascii_lowercase])
+        'udhbfejcpgmokrliwntsayqzvx'
+        >>> pe.lookup('A')
+        ''
+        """
+        if letter in string.ascii_lowercase:
+            return unpos(
+                (self.wheel_map[(pos(letter) - self.position) % 26] + 
+                    self.position))
+        else:
+            return ''
+
+    def advance(self):
+        """Advances the wheel one position.
+
+        >>> pe.set_position('f')
+        5
+        >>> pe.advance()
+        6
+        """
+        self.position = (self.position + 1) % 26
+        return self.position
+
+    def encipher(self, message, starting_position=None):
+        """Enciphers a whole message.
+
+        >>> pe.set_position('f')
+        5
+        >>> pe.encipher('helloworld')
+        'kjsglcjoqc'
+        >>> pe.set_position('f')
+        5
+        >>> pe.encipher('kjsglcjoqc')
+        'helloworld'
+        >>> pe.encipher('helloworld', starting_position = 'x')
+        'egrekthnnf'
+        """
+        if starting_position:
+            self.set_position(starting_position)
+        transformed = ''
+        for l in message:
+            transformed += self.encipher_letter(l)
+        return transformed
+    decipher = encipher
+
+    def set_position(self, position):
+        """Sets the position of the wheel, by specifying the letter the arrow
+        points to.
+
+        >>> pe.set_position('a')
+        0
+        >>> pe.set_position('m')
+        12
+        >>> pe.set_position('z')
+        25
+        """
+        self.position = pos(position)
+        return self.position
+
+
+def pocket_enigma_break_by_crib(message, wheel_spec, crib, crib_position):
+    """Break a pocket enigma using a crib (some plaintext that's expected to
+    be in a certain position). Returns a list of possible starting wheel
+    positions that could produce the crib.
+
+    >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'h', 0)
+    ['a', 'f', 'q']
+    >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'he', 0)
+    ['a']
+    >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'll', 2)
+    ['a']
+    >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 2)
+    ['a']
+    >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 3)
+    ['a', 'j', 'n']
+    >>> pocket_enigma_break_by_crib('aaaaa', 1, 'l', 3)
+    []
+    """
+    pe = PocketEnigma(wheel=wheel_spec)
+    possible_positions = []
+    for p in string.ascii_lowercase:
+        pe.set_position(p)
+        plaintext = pe.decipher(message)
+        if plaintext[crib_position:crib_position+len(crib)] == crib:
+            possible_positions += [p]
+    return possible_positions
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(extraglobs={'pe': PocketEnigma(1, 'a')})
\ No newline at end of file
diff --git a/szyfrow/polybius.py b/szyfrow/polybius.py
new file mode 100644 (file)
index 0000000..965c3bb
--- /dev/null
@@ -0,0 +1,184 @@
+import multiprocessing 
+from support.utilities import *
+from support.language_models import *
+from cipher.keyword_cipher import KeywordWrapAlphabet, keyword_cipher_alphabet_of
+
+from logger import logger
+
+def polybius_grid(keyword, column_order, row_order, letters_to_merge=None,
+                  wrap_alphabet=KeywordWrapAlphabet.from_a):
+    """Grid for a Polybius cipher, using a keyword to rearrange the
+    alphabet.
+
+
+    >>> polybius_grid('a', 'abcde', 'abcde')['x'] == ('e', 'c')
+    True
+    >>> polybius_grid('elephant', 'abcde', 'abcde')['e'] == ('a', 'a')
+    True
+    >>> polybius_grid('elephant', 'abcde', 'abcde')['b'] == ('b', 'c')
+    True
+    """
+    alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet)
+    if letters_to_merge is None: 
+        letters_to_merge = {'j': 'i'}
+    grid = {l: k 
+            for k, l in zip([(c, r) for c in column_order for r in row_order],
+                [l for l in alphabet if l not in letters_to_merge])}
+    for l in letters_to_merge:
+        grid[l] = grid[letters_to_merge[l]]
+    return grid
+
+def polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge=None,
+                  wrap_alphabet=KeywordWrapAlphabet.from_a):
+    """Grid for decrypting using a Polybius cipher, using a keyword to 
+    rearrange the alphabet.
+
+    >>> polybius_reverse_grid('a', 'abcde', 'abcde')['e', 'c'] == 'x'
+    True
+    >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['a', 'a'] == 'e'
+    True
+    >>> polybius_reverse_grid('elephant', 'abcde', 'abcde')['b', 'c'] == 'b'
+    True
+    """
+    alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet=wrap_alphabet)
+    if letters_to_merge is None: 
+        letters_to_merge = {'j': 'i'}
+    grid = {k: l 
+            for k, l in zip([(c, r) for c in column_order for r in row_order],
+                [l for l in alphabet if l not in letters_to_merge])}
+    return grid  
+
+
+def polybius_flatten(pair, column_first):
+    """Convert a series of pairs into a single list of characters"""
+    if column_first:
+        return str(pair[1]) + str(pair[0])
+    else:
+        return str(pair[0]) + str(pair[1])
+
+def polybius_encipher(message, keyword, column_order, row_order, 
+                      column_first=False,
+                      letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a): 
+    """Encipher a message with Polybius cipher, using a keyword to rearrange
+    the alphabet
+
+
+    >>> polybius_encipher('this is a test message for the ' \
+          'polybius decipherment', 'elephant', \
+          [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], \
+          wrap_alphabet=KeywordWrapAlphabet.from_last)
+    '2214445544551522115522511155551543114252542214111352123234442355411135441314115451112122'
+    >>> polybius_encipher('this is a test message for the ' \
+          'polybius decipherment', 'elephant', 'abcde', 'abcde', \
+          column_first=False)
+    'bbadccddccddaebbaaddbbceaaddddaecbaacadadcbbadaaacdaabedbcccdeddbeaabdccacadaadcceaababb'
+    >>> polybius_encipher('this is a test message for the ' \
+          'polybius decipherment', 'elephant', 'abcde', 'abcde', \
+          column_first=True)
+    'bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaacaadbadecbccedddebaadbcccadaaacdecaaabbb'
+    """
+    grid = polybius_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet)
+    return cat(polybius_flatten(grid[l], column_first)
+               for l in message
+               if l in grid)
+
+
+def polybius_decipher(message, keyword, column_order, row_order, 
+                      column_first=False,
+                      letters_to_merge=None, wrap_alphabet=KeywordWrapAlphabet.from_a):    
+    """Decipher a message with a Polybius cipher, using a keyword to rearrange
+    the alphabet
+
+    >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\
+    'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \
+    column_first=False)
+    'toisisvtestxessvbephktoefhnugiysweqifoekxelt'
+
+    >>> polybius_decipher('bbdaccddccddeabbaaddbbecaaddddeabcaaacadcdbbdaaaca'\
+    'adbadecbccedddebaadbcccadaaacdecaaabbb', 'elephant', 'abcde', 'abcde', \
+    column_first=True)
+    'thisisatestmessageforthepolybiusdecipherment'
+    """
+    grid = polybius_reverse_grid(keyword, column_order, row_order, letters_to_merge, wrap_alphabet)
+    column_index_type = type(column_order[0])
+    row_index_type = type(row_order[0])
+    if column_first:
+        pairs = [(column_index_type(p[1]), row_index_type(p[0])) for p in chunks(message, 2)]
+    else:
+        pairs = [(row_index_type(p[0]), column_index_type(p[1])) for p in chunks(message, 2)]
+    return cat(grid[p] for p in pairs if p in grid)
+
+
+def polybius_break_mp(message, column_labels, row_labels,
+                      letters_to_merge=None,
+                      wordlist=keywords, fitness=Pletters,
+                      number_of_solutions=1, chunksize=500):
+    """Breaks a Polybius substitution cipher using a dictionary and
+    frequency analysis
+
+    >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+          'polybius decipherment', 'elephant', 'abcde', 'abcde'), \
+          'abcde', 'abcde', \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
+    -54.53880...)
+    >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+          'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=True), \
+          'abcde', 'abcde', \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', True), \
+    -54.53880...)
+    >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+          'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=False), \
+          'abcde', 'abcde', \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
+    -54.53880...)
+    >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
+          'polybius decipherment', 'elephant', 'abcde', 'pqrst', column_first=True), \
+          'abcde', 'pqrst', \
+          wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'pqrst', True), \
+    -54.53880...)
+    """
+    if letters_to_merge is None: 
+        letters_to_merge = {'j': 'i'}
+    with multiprocessing.Pool() as pool:
+        helper_args = [(message, word, wrap, 
+                        column_labels, row_labels, column_first, 
+                        letters_to_merge, 
+                        fitness)
+                       for word in wordlist
+                       for wrap in KeywordWrapAlphabet
+                       for column_first in [False, True]]
+        # Gotcha: the helper function here needs to be defined at the top level
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(polybius_break_worker, helper_args, chunksize)
+        if number_of_solutions == 1:
+            return max(breaks, key=lambda k: k[1])
+        else:
+            return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
+
+def polybius_break_worker(message, keyword, wrap_alphabet, 
+                          column_order, row_order, column_first, 
+                          letters_to_merge, 
+                          fitness):
+    plaintext = polybius_decipher(message, keyword, 
+                                  column_order, row_order, 
+                                  column_first=column_first,
+                                  letters_to_merge=letters_to_merge, 
+                                  wrap_alphabet=wrap_alphabet)
+    if plaintext:
+        fit = fitness(plaintext)
+    else:
+        fit = float('-inf')
+    logger.debug('Polybius break attempt using key {0} (wrap={1}, merging {2}), '
+                 'columns as {3}, rows as {4} (column_first={5}) '
+                 'gives fit of {6} and decrypt starting: '
+                 '{7}'.format(keyword, wrap_alphabet, letters_to_merge,
+                              column_order, row_order, column_first,
+                              fit, sanitise(plaintext)[:50]))
+    return (keyword, wrap_alphabet, column_order, row_order, column_first), fit
+
+if __name__ == "__main__":
+    import doctest
\ No newline at end of file
diff --git a/szyfrow/railfence.py b/szyfrow/railfence.py
new file mode 100644 (file)
index 0000000..7d6ac31
--- /dev/null
@@ -0,0 +1,179 @@
+import math
+from enum import Enum
+from itertools import starmap, zip_longest
+from support.utilities import *
+from support.language_models import *
+
+
+from logger import logger
+
+def railfence_encipher(message, height, fillvalue=''):
+    """Railfence cipher.
+    Works by splitting the text into sections, then reading across them to
+    generate the rows in the cipher. The rows are then combined to form the
+    ciphertext.
+
+    Example: the plaintext "hellotherefriends", with a height of four, written 
+    out in the railfence as 
+       h h i
+       etere*
+       lorfns
+       l e d
+    (with the * showing the one character to finish the last section). 
+    Each 'section' is two columns, but unfolded. In the example, the first
+    section is 'hellot'.
+
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 2, fillvalue='!')
+    'hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3, fillvalue='!')
+    'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5, fillvalue='!')
+    'hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 10, fillvalue='!')
+    'hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3)
+    'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5)
+    'hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 7)
+    'haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic'
+    """
+    sections = chunks(message, (height - 1) * 2, fillvalue=fillvalue)
+    n_sections = len(sections)
+    # Add the top row
+    rows = [cat([s[0] for s in sections])]
+    # process the middle rows of the grid
+    for r in range(1, height-1):
+        rows += [cat([s[r:r+1] + s[height*2-r-2:height*2-r-1] for s in sections])]
+    # process the bottom row
+    rows += [cat([s[height - 1:height] for s in sections])]
+    # rows += [wcat([s[height - 1] for s in sections])]
+    return cat(rows)
+
+def railfence_decipher(message, height, fillvalue=''):
+    """Railfence decipher. 
+    Works by reconstructing the grid used to generate the ciphertext, then
+    unfolding the sections so the text can be concatenated together.
+
+    Example: given the ciphertext 'hhieterelorfnsled' and a height of 4, first
+    work out that the second row has a character missing, find the rows of the
+    grid, then split the section into its two columns.
+
+    'hhieterelorfnsled' is split into
+        h h i
+        etere
+        lorfns
+        l e d
+    (spaces added for clarity), which is stored in 'rows'. This is then split
+    into 'down_rows' and 'up_rows':
+
+    down_rows:
+       hhi
+       eee
+       lrn
+       led
+
+    up_rows:
+       tr
+       ofs
+
+    These are then zipped together (after the up_rows are reversed) to recover 
+    the plaintext.
+
+    Most of the procedure is about finding the correct lengths for each row then
+    splitting the ciphertext into those rows.
+
+    >>> railfence_decipher('hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!', 2).strip('!')
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!', 3).strip('!')
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!', 5).strip('!')
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!', 10).strip('!')
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece', 3)
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp', 5)
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic', 7)
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    """
+    # find the number and size of the sections, including how many characters
+    #   are missing for a full grid
+    n_sections = math.ceil(len(message) / ((height - 1) * 2))
+    padding_to_add = n_sections * (height - 1) * 2 - len(message)
+    # row_lengths are for the both up rows and down rows
+    row_lengths = [n_sections] * (height - 1) * 2
+    for i in range((height - 1) * 2 - 1, (height - 1) * 2 - (padding_to_add + 1), -1):
+        row_lengths[i] -= 1
+    # folded_rows are the combined row lengths in the middle of the railfence
+    folded_row_lengths = [row_lengths[0]]
+    for i in range(1, height-1):
+        folded_row_lengths += [row_lengths[i] + row_lengths[-i]]
+    folded_row_lengths += [row_lengths[height - 1]]
+    # find the rows that form the railfence grid
+    rows = []
+    row_start = 0
+    for i in folded_row_lengths:
+        rows += [message[row_start:row_start + i]]
+        row_start += i
+    # split the rows into the 'down_rows' (those that form the first column of
+    #   a section) and the 'up_rows' (those that ofrm the second column of a 
+    #   section).
+    down_rows = [rows[0]]
+    up_rows = []
+    for i in range(1, height-1):
+        down_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 0])]
+        up_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 1])]
+    down_rows += [rows[-1]]
+    up_rows.reverse()
+    return cat(c for r in zip_longest(*(down_rows + up_rows), fillvalue='') for c in r)
+
+
+def railfence_break(message, max_key_length=20,
+                     fitness=Pletters, chunksize=500):
+    """Breaks a railfence cipher using a matrix of given rank and letter frequencies
+
+    
+    """
+    
+    sanitised_message = sanitise(message)
+    results = starmap(worker, [(sanitised_message, i, fitness)
+                               for i in range(2, max_key_length+1)])
+    return max(results, key=lambda k: k[1])
+
+
+def railfence_break(message, max_key_length=20,
+                     fitness=Pbigrams, chunksize=500):
+    """Breaks a railfence cipher using a range of lengths and
+    n-gram frequency analysis
+
+    >>> railfence_break(railfence_encipher(sanitise( \
+            "It is a truth universally acknowledged, that a single man in \
+             possession of a good fortune, must be in want of a wife. However \
+             little known the feelings or views of such a man may be on his \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
+             rightful property of some one or other of their daughters."), \
+        7)) # doctest: +ELLIPSIS
+    (7, -709.46467226...)
+    >>> railfence_break(railfence_encipher(sanitise( \
+            "It is a truth universally acknowledged, that a single man in \
+             possession of a good fortune, must be in want of a wife. However \
+             little known the feelings or views of such a man may be on his \
+             first entering a neighbourhood, this truth is so well fixed in \
+             the minds of the surrounding families, that he is considered the \
+             rightful property of some one or other of their daughters."), \
+        7), \
+        fitness=Ptrigrams) # doctest: +ELLIPSIS
+    (7, -997.0129085...)
+    """
+    def worker(message, height, fitness):
+        plaintext = railfence_decipher(message, height)
+        fit = fitness(plaintext)
+        return height, fit
+
+    sanitised_message = sanitise(message)
+    results = starmap(worker, [(sanitised_message, i, fitness)
+                               for i in range(2, max_key_length+1)])
+    return max(results, key=lambda k: k[1])
diff --git a/szyfrow/vigenere.py b/szyfrow/vigenere.py
new file mode 100644 (file)
index 0000000..f1cfe99
--- /dev/null
@@ -0,0 +1,184 @@
+from enum import Enum
+from itertools import starmap, cycle
+import multiprocessing
+from cipher.caesar import *
+from support.utilities import *
+from support.language_models import *
+
+from logger import logger
+
+def vigenere_encipher(message, keyword):
+    """Vigenere encipher
+
+    >>> vigenere_encipher('hello', 'abc')
+    'hfnlp'
+    """
+    shifts = [pos(l) for l in sanitise(keyword)]
+    pairs = zip(message, cycle(shifts))
+    return cat([caesar_encipher_letter(l, k) for l, k in pairs])
+
+def vigenere_decipher(message, keyword):
+    """Vigenere decipher
+
+    >>> vigenere_decipher('hfnlp', 'abc')
+    'hello'
+    """
+    shifts = [pos(l) for l in sanitise(keyword)]
+    pairs = zip(message, cycle(shifts))
+    return cat([caesar_decipher_letter(l, k) for l, k in pairs])
+
+
+def beaufort_encipher(message, keyword):
+    """Beaufort encipher
+
+    >>> beaufort_encipher('inhisjournaldatedtheidesofoctober', 'arcanaimperii')
+    'sevsvrusyrrxfayyxuteemazudmpjmmwr'
+    """
+    shifts = [pos(l) for l in sanitise(keyword)]
+    pairs = zip(message, cycle(shifts))
+    return cat([unpos(k - pos(l)) for l, k in pairs])
+
+beaufort_decipher = beaufort_encipher    
+
+beaufort_variant_encipher=vigenere_decipher
+beaufort_variant_decipher=vigenere_encipher
+
+
+def index_of_coincidence_scan(text, max_key_length=20):
+    """Finds the index of coincidence of the text, using different chunk sizes."""
+    stext = sanitise(text)
+    iocs = {}
+    for i in range(1, max_key_length + 1):
+        splits = every_nth(stext, i)
+        mean_ioc = sum(index_of_coincidence(s) for s in splits) / i
+        iocs[i] = mean_ioc
+    return iocs
+
+def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
+                              chunksize=500):
+    """Breaks a vigenere cipher using a dictionary and frequency analysis.
+
+    >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
+             'message for the vigenere decipherment'), 'cat'), \
+             wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
+    ('cat', -52.9472712...)
+    """
+    with multiprocessing.Pool() as pool:
+        helper_args = [(message, word, fitness)
+                       for word in wordlist]
+        # Gotcha: the helper function here needs to be defined at the top level
+        #   (limitation of Pool.starmap)
+        breaks = pool.starmap(vigenere_keyword_break_worker, helper_args,
+                              chunksize)
+        return max(breaks, key=lambda k: k[1])
+vigenere_keyword_break = vigenere_keyword_break_mp
+
+def vigenere_keyword_break_worker(message, keyword, fitness):
+    plaintext = vigenere_decipher(message, keyword)
+    fit = fitness(plaintext)
+    logger.debug('Vigenere keyword break attempt using key {0} gives fit of '
+                 '{1} and decrypt starting: {2}'.format(keyword,
+                     fit, sanitise(plaintext)[:50]))
+    return keyword, fit
+
+
+def vigenere_frequency_break(message, max_key_length=20, fitness=Pletters):
+    """Breaks a Vigenere cipher with frequency analysis
+
+    >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
+            "run. She is ready and so am I. I stole Daniel's pocketbook this " \
+            "afternoon when he left his jacket hanging on the easel in the " \
+            "attic. I jump every time I hear a footstep on the stairs, " \
+            "certain that the theft has been discovered and that I will " \
+            "be caught. The SS officer visits less often now that he is " \
+            "sure"), 'florence')) # doctest: +ELLIPSIS
+    ('florence', -307.5473096...)
+    """
+    def worker(message, key_length, fitness):
+        splits = every_nth(sanitised_message, key_length)
+        key = cat([unpos(caesar_break(s)[0]) for s in splits])
+        plaintext = vigenere_decipher(message, key)
+        fit = fitness(plaintext)
+        return key, fit
+    sanitised_message = sanitise(message)
+    results = starmap(worker, [(sanitised_message, i, fitness)
+                               for i in range(1, max_key_length+1)])
+    return max(results, key=lambda k: k[1])
+
+
+def beaufort_sub_break(message, fitness=Pletters):
+    """Breaks one chunk of a Beaufort cipher with frequency analysis
+
+    >>> beaufort_sub_break('samwpplggnnmmyaazgympjapopnwiywwomwspgpjmefwmawx' \
+      'jafjhxwwwdigxshnlywiamhyshtasxptwueahhytjwsn') # doctest: +ELLIPSIS
+    (0, -117.4492...)
+    >>> beaufort_sub_break('eyprzjjzznxymrygryjqmqhznjrjjapenejznawngnnezgza' \
+      'dgndknaogpdjneadadazlhkhxkryevrronrmdjnndjlo') # doctest: +ELLIPSIS
+    (17, -114.9598...)
+    """
+    best_shift = 0
+    best_fit = float('-inf')
+    for key in range(26):
+        plaintext = [unpos(key - pos(l)) for l in message]
+        fit = fitness(plaintext)
+        logger.debug('Beaufort sub break attempt using key {0} gives fit of {1} '
+                     'and decrypt starting: {2}'.format(key, fit,
+                                                        plaintext[:50]))
+        if fit > best_fit:
+            best_fit = fit
+            best_key = key
+    logger.info('Beaufort sub break best fit: key {0} gives fit of {1} and '
+                'decrypt starting: {2}'.format(best_key, best_fit, 
+                    cat([unpos(best_key - pos(l)) for l in message[:50]])))
+    return best_key, best_fit
+
+
+def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters):
+    """Breaks a Beaufort cipher with frequency analysis
+
+    >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
+            "run. She is ready and so am I. I stole Daniel's pocketbook this " \
+            "afternoon when he left his jacket hanging on the easel in the " \
+            "attic. I jump every time I hear a footstep on the stairs, " \
+            "certain that the theft has been discovered and that I will " \
+            "be caught. The SS officer visits less often now " \
+            "that he is sure"), 'florence')) # doctest: +ELLIPSIS
+    ('florence', -307.5473096791...)
+    """
+    def worker(message, key_length, fitness):
+        splits = every_nth(message, key_length)
+        key = cat([unpos(beaufort_sub_break(s)[0]) for s in splits])
+        plaintext = beaufort_decipher(message, key)
+        fit = fitness(plaintext)
+        return key, fit
+    sanitised_message = sanitise(message)
+    results = starmap(worker, [(sanitised_message, i, fitness)
+                               for i in range(1, max_key_length+1)])
+    return max(results, key=lambda k: k[1])    
+
+
+def beaufort_variant_frequency_break(message, max_key_length=20, fitness=Pletters):
+    """Breaks a Beaufort cipher with frequency analysis
+
+    >>> beaufort_variant_frequency_break(beaufort_variant_encipher(sanitise("It is time to " \
+            "run. She is ready and so am I. I stole Daniel's pocketbook this " \
+            "afternoon when he left his jacket hanging on the easel in the " \
+            "attic. I jump every time I hear a footstep on the stairs, " \
+            "certain that the theft has been discovered and that I will " \
+            "be caught. The SS officer visits less often now " \
+            "that he is sure"), 'florence')) # doctest: +ELLIPSIS
+    ('florence', -307.5473096791...)
+    """
+    def worker(message, key_length, fitness):
+        splits = every_nth(sanitised_message, key_length)
+        key = cat([unpos(-caesar_break(s)[0]) for s in splits])
+        plaintext = beaufort_variant_decipher(message, key)
+        fit = fitness(plaintext)
+        return key, fit
+    sanitised_message = sanitise(message)
+    results = starmap(worker, [(sanitised_message, i, fitness)
+                               for i in range(1, max_key_length+1)])
+    return max(results, key=lambda k: k[1])
+
+if __name__ == "__main__":
+    import doctest
\ No newline at end of file