Updated code from development branch
authorNeil Smith <neil.git@njae.me.uk>
Thu, 7 Jan 2016 16:45:30 +0000 (16:45 +0000)
committerNeil Smith <neil.git@njae.me.uk>
Thu, 7 Jan 2016 16:45:30 +0000 (16:45 +0000)
SIGNED.md [new file with mode: 0644]
cipher.py
language_models.py
norms.py

diff --git a/SIGNED.md b/SIGNED.md
new file mode 100644 (file)
index 0000000..3204992
--- /dev/null
+++ b/SIGNED.md
@@ -0,0 +1,158 @@
+##### Signed by https://keybase.io/neilnjae
+```
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v2
+
+iQIcBAABCAAGBQJWjpYtAAoJEJPB2e07PgbqRXUP/jIADsBXAgHR7JTnNp11G6Bj
+ne+9n8CJ/Kqf2rRVzVWLePPildK6AkDtKKulJsVNJ+cD7Kt+C2v6GR7DoXReh3ak
+Dp14k5TQ7ta9kP01e+p7eNZ5khn7sHJy1atBVZ7NYP5oSvr20Wjs5Hc/v33sRPAq
+2D0P1hRD7lrv/M5PG1WBXuhnE+QsuC63g7FQu/4/J7uaFmoOCq75seYMWJAL0LPY
+VZCuNB4/45f/WoPY0M9LqG4uOBI0snmoVLFJV0y9GesP9qmzxSsNGsWymeBIU/+x
+i7vc45Txb+KQjX9crl+A3PBrm8NpTxEQGi4+zt9B+3TwsmcAgwnM3316JJrsLG0V
+cVlqpZ0CmBcKj2R+Z2+UDe5dfPIb4rIk9b3KY7e2nYLh5pBjyFiPxJf/hkit56vs
+ENLyvXx1IoEoR9VJaTRCjTz+ViwMFEQFU6pqsj6yH6qS6pekYIENggRyol6q2qPt
+dHG7SgZtB2ynowJ7kQHs90S7zHhY7XCOENLT93gRSKfYbu4lOni5P2OFrfMH6tjR
+fbB5rNPTJA0k6i57fi22i7I5kFt90EyitXgBZaoR23HlXIhIKATIrd3BWLB68VFd
+1x2VrcgjNoDPQWGoNoRorJP4p68FVqtbKXbGEXYfLAJfNwW6FCEQ9Kg83OFkUOyP
+r7Hd2gCiTxL10XeabFEJ
+=exxP
+-----END PGP SIGNATURE-----
+
+```
+
+<!-- END SIGNATURES -->
+
+### Begin signed statement 
+
+#### Expect
+
+```
+size     exec  file                                            contents                                                                                                                         
+               ./                                                                                                                                                                               
+384              .gitignore                                    a93de2ae5c2a47a38599751d1f914566569dfa09dd1778e207117db6c71421dd                                                                 
+                 2012/                                                                                                                                                                          
+1678               1a.ciphertext                               c7c16198650e7d91577683acca664a0e588ac5474046a028dea3d9dc4b388df8                                                                 
+1678               1a.plaintext                                1b34de5b1d1422a3089c54d751dfd3938449c98526137a0b8b3b6561a700f9b5                                                                 
+3312               1b.ciphertext                               ec263c5cb415f99924e5744846c618b2566a072ac79f6b3cfdde294b99c491fd                                                                 
+3323               1b.plaintext                                a7fde879259e64fccca52618a9c008f2111e485ed1c6bfad863c90d8e6ceacae                                                                 
+1716               2a.ciphertext                               2203112c5b43e09b61d8e2918554831559efc7a1157aafda85c25d5ae67f2f0f                                                                 
+1716               2a.plaintext                                28afae88a3b3f0e553f326be7efb896587cb4ac43b9fd664d2cc3f9b2489de35                                                                 
+2268               2b.ciphertext                               587ba7bb4c39d6bd23cbefdcef63d1211d81f6340d401027f9b8e97c97a71b6c                                                                 
+2268               2b.plaintext                                4ee16e37ec82a6ea5446c3d03689bcf608d7408cf79542accaabc297fe6d9f97                                                                 
+1549               3a.ciphertext                               fcfff1c90fb5e0cb4240b0617cd7a0bfb1f81ee513a572b44562a0e9db9b4a1b                                                                 
+1549               3a.plaintext                                54c6b1ec3364c34a824ee693c00fa41c466f4bfccad281c4111ce1966232c9f4                                                                 
+2009               3b.ciphertext                               4a568ac5e67430cd30a2bfd73214a7b3984b3117be16f649ea99c40d3de551c3                                                                 
+2118               3b.plaintext                                a2b398c99fe1f00d3e7c558c5a9ea9d6739120d63e84b17f19d8f524ae9adf26                                                                 
+1495               4a.ciphertext                               4cbf7aa5d463087f40c3ca724a3860f38b8cad1fc281374eb4567df004d3b5ca                                                                 
+1559               4a.plaintext                                b1c02b4ed9df29fcefb046aec75a7702f6e6c1cd28bd56cc67bec1f4ae2af4fb                                                                 
+2137               4b.ciphertext                               829ef4812a023567b9652c57f92da6f48c0b854694eac1a7c9452645723e15db                                                                 
+2212               4b.plaintext                                d811c945afab0212c346531fd557c0f1d9a3fb0207835152a1426deda9e0b2a1                                                                 
+1449               5a.ciphertext                               9fb9fbe92a26b3d1b2f54909e732a0d1afde90f97483affcfa9bbacb4edf3d77                                                                 
+1524               5a.plaintext                                203ea3702e17269cbed804f019dd887ae7fc27b2585f1551fc758692e5ba14dd                                                                 
+2561               5b.ciphertext                               c0bc2532c853e723fb7c3529c60ca859b7d61af238a98ddaabe0c01423e2a3ab                                                                 
+2618               5b.plaintext                                61431a5bfd9a0e1e21e8ac7ba764ba46cc4b2a3e7adf22fff13153fdc278a451                                                                 
+1225               6a.ciphertext                               d22c65b2445ab1228c8351239c37310188d4ec9e173825da43f79509d957aada                                                                 
+1285               6a.plaintext                                27e574c224ad11d7b6258ef6212e8961decac5af3e93c9b08b0919c82c376f98                                                                 
+1478               6b.ciphertext                               e80fc065648e8a4207a7d698cd0f39722d69df819fdf756a7d30e2886670542c                                                                 
+1544               6b.plaintext                                424fc647b26f1ef153ec73eda2745896813bc1c6b498a5a0b0b05ff931ec5bf6                                                                 
+1331               7a.ciphertext                               d73fe6633bafb097a283396bb5a9b8a5f025031453d1df14156f9c41e898fac3                                                                 
+1412               7a.plaintext                                e13149a6f2df3d7d466f761d58c4a8ca3a82fbbc4f6dca983e83442e5caa748f                                                                 
+2551               7b.ciphertext                               15200bf858dede4f6cabffaac54c14d37171cd4f7641a059258cd44b6cb98e7d                                                                 
+2661               7b.plaintext                                430d167225a8f8c98d6e3f69aaca5846ba7d112567b987f3481832b07b03fc05                                                                 
+1125               8a.ciphertext                               5c47ee5314dc118b515b2ab8dc5ed14c492294e70a67f9329ed10d3486fd9ba6                                                                 
+1195               8a.plaintext                                a1deee4fa908fb536035435eb066d5c1fed91f962cfd0390f5c51fbb60caceb7                                                                 
+1783               8b.ciphertext                               c782e2765c9e3cb0ec72cc77f063ad7f90f2a003d5078b452c24ce3a4549f696                                                                 
+1783               8b.plaintext                                925b270f35dce522f0447085f78795a6f8101013648c836b029f0535f4d752e6                                                                 
+                 2013/                                                                                                                                                                          
+1340               1a.ciphertext                               6b69f06bd684fdaa79ff35497715af6ded3c176277859d1d774bb2569dd53a97                                                                 
+1340               1a.plaintext                                cbdaac33a7943a752ba5dd21cf7af23bdbdd22e4a5feb0bccecce45d605d0581                                                                 
+1495               1b.ciphertext                               2e0389e2c7d3156892f7d361ee79f86eb79d34812fca37d8ce692cb6f3976446                                                                 
+1495               1b.plaintext                                717b1f8955515a5d5d2f049f0ffe137820d71f8b2a1689e1a29ec30ed7c9d473                                                                 
+1135               2a.ciphertext                               85ad3dac4751a8db90f391d0f06935a5ef2118d9ac72e05fe0518d7f77bff058                                                                 
+1135               2a.plaintext                                11cdf7c462c536085daced417023adaeff2f6b7a6162ccd12b340b079bd36b48                                                                 
+926                2b.ciphertext                               0fc59b28e4cd2db0aa9f28a603e2b25988a6688f05f90a59b205088093721340                                                                 
+973                2b.plaintext                                019c012f87531e2f9173acae952e7d4a304ed1a62ef341e71455a128d3e7056f                                                                 
+983                3a.ciphertext                               20ab3ad9342e51d82243a678eaf2bc9187553bcedb50c830541a54f34dc9b3dd                                                                 
+1090               3b.ciphertext                               c35efccfb68ce3eaa5e7c699ed36a09f23aa719125e4df9f2faef11da1cd5d8a                                                                 
+1134               4a.ciphertext                               523d35a8b412bf2e2a88b160529cae76c512d08f2a589ba53b2ac3c4d4184858                                                                 
+1061               4b.ciphertext                               2cdebce220d9d20ef4c85f208999f81940383a9181bbb0e63b37b7031edcb688                                                                 
+875                5a.ciphertext                               b841414d1fbf8af991df389b1b9466d0401c39074f7433dcf3e90648b4d9ab5f                                                                 
+957                5b.ciphertext                               5b6303144fde83cdcdb74976da515933444b7c29efd8ad15d33e65edd2ef78fb                                                                 
+712                6a.ciphertext                               1313d5ffe4e6d2ee82b4f4bbffbe522d5e073775baa53a11bec10c2a693bc7da                                                                 
+1888               6b.ciphertext                               137618d9d92a8a28b7b7fff9a385c4c2db76dff10e0d326895cb0723a63a6adc                                                                 
+2132               7a.ciphertext                               d604c136b793701d7501c72821d4069975ce098c2da55fac29331f0045c62b5f                                                                 
+1565               7b.ciphertext                               adeef9d9e985c534c40acee5c6734d04195895a32efd9452a89cafef1fdc91b8                                                                 
+182                mona-lisa-words.txt                         d7d05c9c86f6282fa66df5f4ca795c89f01cef88a9ce8c921ce4484b3d6078a7                                                                 
+1190               solutions.txt                               d9575ed8f930520da2e84047ad06f0b51b7d98e1d122f140142b3674bbdbf44f                                                                 
+                 2014/                                                                                                                                                                          
+                 2015/                                                                                                                                                                          
+10000              pysol.zip                                   64e25c247e877f37285db635fcb5dea101e0c531fdbfe5000bcc088c22caad79|68d878fb019198e691d321be1dd6744fae0b48da9c4e705c34f96e351b00fbed
+2195               sol.py.zip                                  4c22e57eaacd8a91ef9b0c338429e400210a03dec9fbacec4a45fe1faed50720|1fd0a62af53125eacfa11016088451e2c58269d98aaef74c320730ebc7601b66
+18025            LICENSE                                       a01259a1b522cf0de95824f9860613b453153eebac468e96196d5d7dba84786c                                                                 
+61               README.md                                     277247b410300ee16477b12ca54ad878d81c8061f6134e2e1cadccaf299de3a3                                                                 
+469              affine_break_parameter_trials.csv             1a9d635d0af2f41fc6f1e83ae87d6372034259321ba288a11fb024e98ed52f4f|dd9c840434de596a30c84e79de26a9824b36c217a84876c2aab0579b76999735
+6488666          big.txt                                       fa066c7d40f0f201ac4144e652aa62430e58a6b3805ec70650f678da5804e87b                                                                 
+514              caesar_break_parameter_trials.csv             6586223bcc00e06e3ff79d107202d6c29ef962a6dd544add00610c5907407e85|1cb7cc77831ef3ef4f994a9ea77e82a841b38acdde45ede9cedbe7a54f1e8e46
+135303           challenge6.ipynb                              5b37a8b10db4c8d9831827a2acdffdcdb65369557d15b3e08a900ee8e088da73                                                                 
+75506            challenge7.ipynb                              ee9a99fa7a9845ad7db77199927fe8ad1d05a6b0272e50d8175e874e7a3e4cdf                                                                 
+42922            cipher.py                                     58637b8946b4fb973b19a374a2066a896d86c928dacaa1ccd2252e6f8bb6e810                                                                 
+120              cipher.sublime-project                        6eeb5379b3486e9b65567acb8ce7d3fe56261019c13c99d10fa00818df4d0409                                                                 
+11564            count_1edit.txt                               3bf563ef032ba151ec1a4b2d1f33f50c49f4a47e4dc5b8152394bc5b63f57655|b5fbacbebcc25f5011ce97bc9ac967a09c50eef28b4aa98379a6c426df6ac08b
+4956241          count_1w.txt                                  51df159fd3de12b20e403c108f526e96dbd723d9cabdd5f17955cdc16059e690                                                                 
+9270             count_2l.txt                                  bc2895f800189070c193907cd8bca956ad65fed2e25c14300d4bb5b6a243ba99                                                                 
+5566017          count_2w.txt                                  781c0596c3eea532d30bef9f3dba1d5137d652f00376260822c761a7584dfb8c                                                                 
+220441           count_3l.txt                                  8702c95530c7d0d182ab94dc03ed7681fcf969819f6db011a58de31411dc6365                                                                 
+320508           count_big.txt                                 3ba257fba1934bd138413d8274e79b56c5992431a27692fd562929aa43ec01a3                                                                 
+3355             find_best_affine_break_parameters.py          6b11004bb93ac26ec7d42d33504e758edbaf9d55365ae2e4ca2fca7589263f25                                                                 
+3027             find_best_caesar_break_parameters.py          0347d80309179d937a88fd1c8684490a513ccd086366c5a0dd55b8a2fe5c565f                                                                 
+1236             find_wikipedia_titles.py                      f040bf855dfec7fff9d8e5eba2fb509179bc53bc02a20b26b7fc61fef983aa45                                                                 
+5645             language_models.py                            bfd5b60cdef8af20cdb061b24a1691f569984be3be333782c3d76e3370e16d14                                                                 
+592              make-cracking-dictionary.py                   71791e64e4853cd9ca292cb436bbe8c72dd60f509811174df93ed2067683d5c1                                                                 
+7077             norms.py                                      a657a36c1741e6f3a513386b318fcc99e6b11f98ec64a48284b47462ff2acf30                                                                 
+8411             norms.pyc                                     ac7a18765c7bcc27e406d8f38d943408097b3384a271502185d53482e6ec0da7|002b186e716cec64869a00bd2d72e16614931e696daa0cf3529d634a0f270e42
+112847           plot-caesar-parameters.ipynb                  639459b4b2e434f9f0852c012ed9a8a8d87bd1cb6c2d65ca5abfdb0e42c3dea6                                                                 
+4538523          shakespeare.txt                               6f9c770efced5c3d87efa6197cd3091b982341372e36c6357f865df91ddecde6                                                                 
+592309           sherlock-holmes.txt                           0027de6f4110440ea51d67a2f3af3484898c630808f13b1d4db108e6283e67a3|2034ee1ebdec47e839607124d22b674d4614e1cc6209d758f7b6e99e69ae8e08
+451530           spell-errors.txt                              a4abe6ce6c24280f9a8d0485cbf78ddd2e58279ca01293692630a08ba4b13407                                                                 
+69351            unknown-word-probability-investigation.ipynb  8a9cd7163f10bf2bfb3e286445eddcfc953f80abfdef4e29dac27617a53c3d41                                                                 
+3291641          war-and-peace.txt                             3ed0f41cfdf660846878943bad5b9d575bcae1e4a92ee9a7f43d3c9dba2af344|6799e48d3fd0a6f4c40b9951ec86de6da81f0b9cd36e413490ac511542ca54d3
+868202           words.txt                                     aa77abbcba3c6dee1306d93adcedc2b2ccb8a4e0344a39d0676732ff58ebd5e5                                                                 
+868384           words_2013.txt                                57faa4841fe28dd82a5da4488b6381c194df6e1ecc04e61fb9f60e842bbca18c                                                                 
+```
+
+#### Ignore
+
+```
+/SIGNED.md
+```
+
+#### Presets
+
+```
+git  # ignore .git and anything as described by .gitignore files
+```
+
+<!-- summarize version = 0.0.9 -->
+
+### End signed statement
+
+<hr>
+
+#### Notes
+
+With keybase you can sign any directory's contents, whether it's a git repo,
+source code distribution, or a personal documents folder. It aims to replace the drudgery of:
+
+  1. comparing a zipped file to a detached statement
+  2. downloading a public key
+  3. confirming it is in fact the author's by reviewing public statements they've made, using it
+
+All in one simple command:
+
+```bash
+keybase dir verify
+```
+
+There are lots of options, including assertions for automating your checks.
+
+For more info, check out https://keybase.io/docs/command_line/code_signing
\ No newline at end of file
index 68472bee600516a5e58d5005e2933bc9765ef567..266237a65a70b070e2ea8bc41d49f273477eda2c 100644 (file)
--- a/cipher.py
+++ b/cipher.py
@@ -1,10 +1,99 @@
-"""A set of ciphers with implementations for both enciphering and deciphering
-them. See cipherbreak for automatic breaking of these ciphers
-"""
-
 import string
 import collections
-from language_models import unaccent, sanitise
+import math
+from enum import Enum
+from itertools import zip_longest, cycle, chain, count
+import numpy as np
+from numpy import matrix
+from numpy import linalg
+from language_models import *
+
+
+modular_division_table = [[0]*26 for _ in range(26)]
+for a in range(26):
+    for b in range(26):
+        c = (a * b) % 26
+        modular_division_table[b][c] = a
+
+
+def every_nth(text, n, fillvalue=''):
+    """Returns n strings, each of which consists of every nth character, 
+    starting with the 0th, 1st, 2nd, ... (n-1)th character
+    
+    >>> every_nth(string.ascii_lowercase, 5)
+    ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
+    >>> every_nth(string.ascii_lowercase, 1)
+    ['abcdefghijklmnopqrstuvwxyz']
+    >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
+    ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
+     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+    >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
+    ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
+    """
+    split_text = chunks(text, n, fillvalue)
+    return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
+
+def combine_every_nth(split_text):
+    """Reforms a text split into every_nth strings
+    
+    >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
+    'abcdefghijklmnopqrstuvwxyz'
+    >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
+    'abcdefghijklmnopqrstuvwxyz'
+    >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
+    'abcdefghijklmnopqrstuvwxyz'
+    """
+    return ''.join([''.join(l) 
+                    for l in zip_longest(*split_text, fillvalue='')])
+
+def chunks(text, n, fillvalue=None):
+    """Split a text into chunks of n characters
+
+    >>> chunks('abcdefghi', 3)
+    ['abc', 'def', 'ghi']
+    >>> chunks('abcdefghi', 4)
+    ['abcd', 'efgh', 'i']
+    >>> chunks('abcdefghi', 4, fillvalue='!')
+    ['abcd', 'efgh', 'i!!!']
+    """
+    if fillvalue:
+        padding = fillvalue[0] * (n - len(text) % n)
+    else:
+        padding = ''
+    return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
+
+def transpose(items, transposition):
+    """Moves items around according to the given transposition
+    
+    >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
+    ['a', 'b', 'c', 'd']
+    >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
+    ['d', 'b', 'c', 'a']
+    >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
+    [13, 12, 14, 11, 15, 10]
+    """
+    transposed = [''] * len(transposition)
+    for p, t in enumerate(transposition):
+       transposed[p] = items[t]
+    return transposed
+
+def untranspose(items, transposition):
+    """Undoes a transpose
+    
+    >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
+    ['a', 'b', 'c', 'd']
+    >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
+    ['a', 'b', 'c', 'd']
+    >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
+    [10, 11, 12, 13, 14, 15]
+    """
+    transposed = [''] * len(transposition)
+    for p, t in enumerate(transposition):
+       transposed[t] = items[p]
+    return transposed
+
+def deduplicate(text):
+    return list(collections.OrderedDict.fromkeys(text))
 
 
 def caesar_encipher_letter(accented_letter, shift):
@@ -37,14 +126,14 @@ def caesar_encipher_letter(accented_letter, shift):
             alphabet_start = ord('A')
         else:
             alphabet_start = ord('a')
-        return chr(((ord(letter) - alphabet_start + shift) % 26) +
+        return chr(((ord(letter) - alphabet_start + shift) % 26) + 
                    alphabet_start)
     else:
         return letter
 
 def caesar_decipher_letter(letter, shift):
     """Decipher a letter, given a shift amount
-
+    
     >>> caesar_decipher_letter('b', 1)
     'a'
     >>> caesar_decipher_letter('b', 2)
@@ -54,7 +143,7 @@ def caesar_decipher_letter(letter, shift):
 
 def caesar_encipher(message, shift):
     """Encipher a message with the Caesar cipher of given shift
-
+    
     >>> caesar_encipher('abc', 1)
     'bcd'
     >>> caesar_encipher('abc', 2)
@@ -71,7 +160,7 @@ def caesar_encipher(message, shift):
 
 def caesar_decipher(message, shift):
     """Decipher a message with the Caesar cipher of given shift
-
+    
     >>> caesar_decipher('bcd', 1)
     'abc'
     >>> caesar_decipher('cde', 2)
@@ -83,7 +172,885 @@ def caesar_decipher(message, shift):
     """
     return caesar_encipher(message, -shift)
 
+def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True):
+    """Encipher a letter, given a multiplier and adder
+    
+    >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
+            for l in string.ascii_uppercase])
+    'HKNQTWZCFILORUXADGJMPSVYBE'
+    >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
+            for l in string.ascii_uppercase])
+    'FILORUXADGJMPSVYBEHKNQTWZC'
+    """
+    letter = unaccent(accented_letter)
+    if letter in string.ascii_letters:
+        if letter in string.ascii_uppercase:
+            alphabet_start = ord('A')
+        else:
+            alphabet_start = ord('a')
+        letter_number = ord(letter) - alphabet_start
+        if one_based: letter_number += 1
+        cipher_number = (letter_number * multiplier + adder) % 26
+        if one_based: cipher_number -= 1
+        return chr(cipher_number % 26 + alphabet_start)
+    else:
+        return letter
+
+def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
+    """Encipher a letter, given a multiplier and adder
+    
+    >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
+            for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
+    'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
+            for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
+    'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    """
+    if letter in string.ascii_letters:
+        if letter in string.ascii_uppercase:
+            alphabet_start = ord('A')
+        else:
+            alphabet_start = ord('a')
+        cipher_number = ord(letter) - alphabet_start
+        if one_based: cipher_number += 1
+        plaintext_number = ( 
+            modular_division_table[multiplier]
+                                  [(cipher_number - adder) % 26])
+        if one_based: plaintext_number -= 1
+        return chr(plaintext_number % 26 + alphabet_start) 
+    else:
+        return letter
+
+def affine_encipher(message, multiplier=1, adder=0, one_based=True):
+    """Encipher a message
+    
+    >>> affine_encipher('hours passed during which jerico tried every ' \
+           'trick he could think of', 15, 22, True)
+    'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
+    """
+    enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) 
+                  for l in message]
+    return ''.join(enciphered)
+
+def affine_decipher(message, multiplier=1, adder=0, one_based=True):
+    """Decipher a message
+    
+    >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
+           'jfaoe ls omytd jlaxe mh', 15, 22, True)
+    'hours passed during which jerico tried every trick he could think of'
+    """
+    enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) 
+                  for l in message]
+    return ''.join(enciphered)
+
+
+class KeywordWrapAlphabet(Enum):
+    from_a = 1
+    from_last = 2
+    from_largest = 3
+
+
+def keyword_cipher_alphabet_of(keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
+    """Find the cipher alphabet given a keyword.
+    wrap_alphabet controls how the rest of the alphabet is added
+    after the keyword.
+
+    >>> keyword_cipher_alphabet_of('bayes')
+    'bayescdfghijklmnopqrtuvwxz'
+    >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a)
+    'bayescdfghijklmnopqrtuvwxz'
+    >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last)
+    'bayestuvwxzcdfghijklmnopqr'
+    >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest)
+    'bayeszcdfghijklmnopqrtuvwx'
+    """
+    if wrap_alphabet == KeywordWrapAlphabet.from_a:
+        cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + 
+                                              string.ascii_lowercase))
+    else:
+        if wrap_alphabet == KeywordWrapAlphabet.from_last:
+            last_keyword_letter = deduplicate(sanitise(keyword))[-1]
+        else:
+            last_keyword_letter = sorted(sanitise(keyword))[-1]
+        last_keyword_position = string.ascii_lowercase.find(
+            last_keyword_letter) + 1
+        cipher_alphabet = ''.join(
+            deduplicate(sanitise(keyword) + 
+                        string.ascii_lowercase[last_keyword_position:] + 
+                        string.ascii_lowercase))
+    return cipher_alphabet
+
+
+def keyword_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
+    """Enciphers a message with a keyword substitution cipher.
+    wrap_alphabet controls how the rest of the alphabet is added
+    after the keyword.
+    0 : from 'a'
+    1 : from the last letter in the sanitised keyword
+    2 : from the largest letter in the sanitised keyword
+
+    >>> keyword_encipher('test message', 'bayes')
+    'rsqr ksqqbds'
+    >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a)
+    'rsqr ksqqbds'
+    >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last)
+    'lskl dskkbus'
+    >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest)
+    'qspq jsppbcs'
+    """
+    cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
+    cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
+    return unaccent(message).lower().translate(cipher_translation)
+
+def keyword_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a):
+    """Deciphers a message with a keyword substitution cipher.
+    wrap_alphabet controls how the rest of the alphabet is added
+    after the keyword.
+    0 : from 'a'
+    1 : from the last letter in the sanitised keyword
+    2 : from the largest letter in the sanitised keyword
+    
+    >>> keyword_decipher('rsqr ksqqbds', 'bayes')
+    'test message'
+    >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a)
+    'test message'
+    >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last)
+    'test message'
+    >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest)
+    'test message'
+    """
+    cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
+    cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
+    return message.lower().translate(cipher_translation)
+
+
+def vigenere_encipher(message, keyword):
+    """Vigenere encipher
+
+    >>> vigenere_encipher('hello', 'abc')
+    'hfnlp'
+    """
+    shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
+    pairs = zip(message, cycle(shifts))
+    return ''.join([caesar_encipher_letter(l, k) for l, k in pairs])
+
+def vigenere_decipher(message, keyword):
+    """Vigenere decipher
+
+    >>> vigenere_decipher('hfnlp', 'abc')
+    'hello'
+    """
+    shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
+    pairs = zip(message, cycle(shifts))
+    return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
+
+beaufort_encipher=vigenere_decipher
+beaufort_decipher=vigenere_encipher
+
+
+def transpositions_of(keyword):
+    """Finds the transpostions given by a keyword. For instance, the keyword
+    'clever' rearranges to 'celrv', so the first column (0) stays first, the
+    second column (1) moves to third, the third column (2) moves to second, 
+    and so on.
+
+    If passed a tuple, assume it's already a transposition and just return it.
+
+    >>> transpositions_of('clever')
+    (0, 2, 1, 4, 3)
+    >>> transpositions_of('fred')
+    (3, 2, 0, 1)
+    >>> transpositions_of((3, 2, 0, 1))
+    (3, 2, 0, 1)
+    """
+    if isinstance(keyword, tuple):
+        return keyword
+    else:
+        key = deduplicate(keyword)
+        transpositions = tuple(key.index(l) for l in sorted(key))
+        return transpositions
+
+def pad(message_len, group_len, fillvalue):
+    padding_length = group_len - message_len % group_len
+    if padding_length == group_len: padding_length = 0
+    padding = ''
+    for i in range(padding_length):
+        if callable(fillvalue):
+            padding += fillvalue()
+        else:
+            padding += fillvalue
+    return padding
+
+def column_transposition_encipher(message, keyword, fillvalue=' ', 
+      fillcolumnwise=False,
+      emptycolumnwise=False):
+    """Enciphers using the column transposition cipher.
+    Message is padded to allow all rows to be the same length.
+
+    >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
+    'hlohr eltee '
+    >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
+    'hellothere  '
+    >>> column_transposition_encipher('hellothere', 'abcdef')
+    'hellothere  '
+    >>> column_transposition_encipher('hellothere', 'abcde')
+    'hellothere'
+    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
+    'hlohreltee'
+    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
+    'htehlelroe'
+    >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
+    'hellothere'
+    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
+    'heotllrehe'
+    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
+    'holrhetlee'
+    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
+    'htleehoelr'
+    >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
+    'hleolteher'
+    >>> column_transposition_encipher('hellothere', 'cleverly')
+    'hleolthre e '
+    >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
+    'hleolthre!e!'
+    >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
+    'hleolthre*e*'
+    """
+    transpositions = transpositions_of(keyword)
+    message += pad(len(message), len(transpositions), fillvalue)
+    if fillcolumnwise:
+        rows = every_nth(message, len(message) // len(transpositions))
+    else:
+        rows = chunks(message, len(transpositions))
+    transposed = [transpose(r, transpositions) for r in rows]
+    if emptycolumnwise:
+        return combine_every_nth(transposed)
+    else:
+        return ''.join(chain(*transposed))
+
+def column_transposition_decipher(message, keyword, fillvalue=' ', 
+      fillcolumnwise=False,
+      emptycolumnwise=False):
+    """Deciphers using the column transposition cipher.
+    Message is padded to allow all rows to be the same length.
+
+    >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
+    'hellothere'
+    >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
+    'hellothere'
+    >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
+    'hellothere'
+    >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
+    'hellothere'
+    >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
+    'hellothere'
+    """
+    transpositions = transpositions_of(keyword)
+    message += pad(len(message), len(transpositions), fillvalue)
+    if emptycolumnwise:
+        rows = every_nth(message, len(message) // len(transpositions))
+    else:
+        rows = chunks(message, len(transpositions))
+    untransposed = [untranspose(r, transpositions) for r in rows]
+    if fillcolumnwise:
+        return combine_every_nth(untransposed)
+    else:
+        return ''.join(chain(*untransposed))
+
+def scytale_encipher(message, rows, fillvalue=' '):
+    """Enciphers using the scytale transposition cipher.
+    Message is padded with spaces to allow all rows to be the same length.
+
+    >>> scytale_encipher('thequickbrownfox', 3)
+    'tcnhkfeboqrxuo iw '
+    >>> scytale_encipher('thequickbrownfox', 4)
+    'tubnhirfecooqkwx'
+    >>> scytale_encipher('thequickbrownfox', 5)
+    'tubn hirf ecoo qkwx '
+    >>> scytale_encipher('thequickbrownfox', 6)
+    'tqcrnxhukof eibwo '
+    >>> scytale_encipher('thequickbrownfox', 7)
+    'tqcrnx hukof  eibwo  '
+    """
+    # transpositions = [i for i in range(math.ceil(len(message) / rows))]
+    # return column_transposition_encipher(message, transpositions, 
+    #     fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True)
+    transpositions = [i for i in range(rows)]
+    return column_transposition_encipher(message, transpositions, 
+        fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False)
+
+def scytale_decipher(message, rows):
+    """Deciphers using the scytale transposition cipher.
+    Assumes the message is padded so that all rows are the same length.
+    
+    >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
+    'thequickbrownfox  '
+    >>> scytale_decipher('tubnhirfecooqkwx', 4)
+    'thequickbrownfox'
+    >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
+    'thequickbrownfox    '
+    >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
+    'thequickbrownfox  '
+    >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
+    'thequickbrownfox     '
+    """
+    # transpositions = [i for i in range(math.ceil(len(message) / rows))]
+    # return column_transposition_decipher(message, transpositions, 
+    #     fillcolumnwise=False, emptycolumnwise=True)
+    transpositions = [i for i in range(rows)]
+    return column_transposition_decipher(message, transpositions, 
+        fillcolumnwise=True, emptycolumnwise=False)
+
+
+def railfence_encipher(message, height, fillvalue=''):
+    """Railfence cipher.
+    Works by splitting the text into sections, then reading across them to
+    generate the rows in the cipher. The rows are then combined to form the
+    ciphertext.
+
+    Example: the plaintext "hellotherefriends", with a height of four, written 
+    out in the railfence as 
+       h h i
+       etere*
+       lorfns
+       l e d
+    (with the * showing the one character to finish the last section). 
+    Each 'section' is two columns, but unfolded. In the example, the first
+    section is 'hellot'.
+
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 2, fillvalue='!')
+    'hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3, fillvalue='!')
+    'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5, fillvalue='!')
+    'hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 10, fillvalue='!')
+    'hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3)
+    'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5)
+    'hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp'
+    >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 7)
+    'haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic'
+    """
+    sections = chunks(message, (height - 1) * 2, fillvalue=fillvalue)
+    n_sections = len(sections)
+    # Add the top row
+    rows = [''.join([s[0] for s in sections])]
+    # process the middle rows of the grid
+    for r in range(1, height-1):
+        rows += [''.join([s[r:r+1] + s[height*2-r-2:height*2-r-1] for s in sections])]
+    # process the bottom row
+    rows += [''.join([s[height - 1:height] for s in sections])]
+    # rows += [' '.join([s[height - 1] for s in sections])]
+    return ''.join(rows)
+
+def railfence_decipher(message, height, fillvalue=''):
+    """Railfence decipher. 
+    Works by reconstructing the grid used to generate the ciphertext, then
+    unfolding the sections so the text can be concatenated together.
+
+    Example: given the ciphertext 'hhieterelorfnsled' and a height of 4, first
+    work out that the second row has a character missing, find the rows of the
+    grid, then split the section into its two columns.
+
+    'hhieterelorfnsled' is split into
+        h h i
+        etere
+        lorfns
+        l e d
+    (spaces added for clarity), which is stored in 'rows'. This is then split
+    into 'down_rows' and 'up_rows':
+
+    down_rows:
+       hhi
+       eee
+       lrn
+       led
+
+    up_rows:
+       tr
+       ofs
+
+    These are then zipped together (after the up_rows are reversed) to recover 
+    the plaintext.
+
+    Most of the procedure is about finding the correct lengths for each row then
+    splitting the ciphertext into those rows.
+
+    >>> railfence_decipher('hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!', 2).strip('!')
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!', 3).strip('!')
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!', 5).strip('!')
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!', 10).strip('!')
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece', 3)
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp', 5)
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    >>> railfence_decipher('haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic', 7)
+    'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
+    """
+    # find the number and size of the sections, including how many characters
+    #   are missing for a full grid
+    n_sections = math.ceil(len(message) / ((height - 1) * 2))
+    padding_to_add = n_sections * (height - 1) * 2 - len(message)
+    # row_lengths are for the both up rows and down rows
+    row_lengths = [n_sections] * (height - 1) * 2
+    for i in range((height - 1) * 2 - 1, (height - 1) * 2 - (padding_to_add + 1), -1):
+        row_lengths[i] -= 1
+    # folded_rows are the combined row lengths in the middle of the railfence
+    folded_row_lengths = [row_lengths[0]]
+    for i in range(1, height-1):
+        folded_row_lengths += [row_lengths[i] + row_lengths[-i]]
+    folded_row_lengths += [row_lengths[height - 1]]
+    # find the rows that form the railfence grid
+    rows = []
+    row_start = 0
+    for i in folded_row_lengths:
+        rows += [message[row_start:row_start + i]]
+        row_start += i
+    # split the rows into the 'down_rows' (those that form the first column of
+    #   a section) and the 'up_rows' (those that ofrm the second column of a 
+    #   section).
+    down_rows = [rows[0]]
+    up_rows = []
+    for i in range(1, height-1):
+        down_rows += [''.join([c for n, c in enumerate(rows[i]) if n % 2 == 0])]
+        up_rows += [''.join([c for n, c in enumerate(rows[i]) if n % 2 == 1])]
+    down_rows += [rows[-1]]
+    up_rows.reverse()
+    return ''.join(c for r in zip_longest(*(down_rows + up_rows), fillvalue='') for c in r)
+
+def make_cadenus_keycolumn(doubled_letters = 'vw', start='a', reverse=False):
+    """Makes the key column for a Cadenus cipher (the column down between the
+        rows of letters)
+
+    >>> make_cadenus_keycolumn()['a']
+    0
+    >>> make_cadenus_keycolumn()['b']
+    1
+    >>> make_cadenus_keycolumn()['c']
+    2
+    >>> make_cadenus_keycolumn()['v']
+    21
+    >>> make_cadenus_keycolumn()['w']
+    21
+    >>> make_cadenus_keycolumn()['z']
+    24
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['a']
+    1
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['b']
+    0
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['c']
+    24
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['i']
+    18
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['j']
+    18
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['v']
+    6
+    >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['z']
+    2
+    """
+    index_to_remove = string.ascii_lowercase.find(doubled_letters[0])
+    short_alphabet = string.ascii_lowercase[:index_to_remove] + string.ascii_lowercase[index_to_remove+1:]
+    if reverse:
+        short_alphabet = ''.join(reversed(short_alphabet))
+    start_pos = short_alphabet.find(start)
+    rotated_alphabet = short_alphabet[start_pos:] + short_alphabet[:start_pos]
+    keycolumn = {l: i for i, l in enumerate(rotated_alphabet)}
+    keycolumn[doubled_letters[0]] = keycolumn[doubled_letters[1]]
+    return keycolumn
+
+def cadenus_encipher(message, keyword, keycolumn, fillvalue='a'):
+    """Encipher with the Cadenus cipher
+
+    >>> cadenus_encipher(sanitise('Whoever has made a voyage up the Hudson ' \
+                                  'must remember the Kaatskill mountains. ' \
+                                  'They are a dismembered branch of the great'), \
+                'wink', \
+                make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True))
+    'antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaasuvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned'
+    >>> cadenus_encipher(sanitise('a severe limitation on the usefulness of ' \
+                                  'the cadenus is that every message must be ' \
+                                  'a multiple of twenty-five letters long'), \
+                'easy', \
+                make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True))
+    'systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtofarenuseieeieltarlmentieetogevesitfaisltngeeuvowul'
+    """
+    rows = chunks(message, len(message) // 25, fillvalue=fillvalue)
+    columns = zip(*rows)
+    rotated_columns = [col[start:] + col[:start] for start, col in zip([keycolumn[l] for l in keyword], columns)]    
+    rotated_rows = zip(*rotated_columns)
+    transpositions = transpositions_of(keyword)
+    transposed = [transpose(r, transpositions) for r in rotated_rows]
+    return ''.join(chain(*transposed))
+
+def cadenus_decipher(message, keyword, keycolumn, fillvalue='a'):
+    """
+    >>> cadenus_decipher('antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaa' \
+                         'suvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned', \
+                 'wink', \
+                 make_cadenus_keycolumn(reverse=True))
+    'whoeverhasmadeavoyageupthehudsonmustrememberthekaatskillmountainstheyareadismemberedbranchofthegreat'
+    >>> cadenus_decipher('systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtof' \
+                        'arenuseieeieltarlmentieetogevesitfaisltngeeuvowul', \
+                 'easy', \
+                 make_cadenus_keycolumn(reverse=True))
+    'aseverelimitationontheusefulnessofthecadenusisthateverymessagemustbeamultipleoftwentyfiveletterslong'
+    """
+    rows = chunks(message, len(message) // 25, fillvalue=fillvalue)
+    transpositions = transpositions_of(keyword)
+    untransposed_rows = [untranspose(r, transpositions) for r in rows]
+    columns = zip(*untransposed_rows)
+    rotated_columns = [col[-start:] + col[:-start] for start, col in zip([keycolumn[l] for l in keyword], columns)]    
+    rotated_rows = zip(*rotated_columns)
+    # return rotated_columns
+    return ''.join(chain(*rotated_rows))
+
+
+def hill_encipher(matrix, message_letters, fillvalue='a'):
+    """Hill cipher
+
+    >>> hill_encipher(np.matrix([[7,8], [11,11]]), 'hellothere')
+    'drjiqzdrvx'
+    >>> hill_encipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \
+        'hello there')
+    'tfjflpznvyac'
+    """
+    n = len(matrix)
+    sanitised_message = sanitise(message_letters)
+    if len(sanitised_message) % n != 0:
+        padding = fillvalue[0] * (n - len(sanitised_message) % n)
+    else:
+        padding = ''
+    message = [ord(c) - ord('a') for c in sanitised_message + padding]
+    message_chunks = [message[i:i+n] for i in range(0, len(message), n)]
+    # message_chunks = chunks(message, len(matrix), fillvalue=None)
+    enciphered_chunks = [((matrix * np.matrix(c).T).T).tolist()[0] 
+            for c in message_chunks]
+    return ''.join([chr(int(round(l)) % 26 + ord('a')) 
+            for l in sum(enciphered_chunks, [])])
+
+def hill_decipher(matrix, message, fillvalue='a'):
+    """Hill cipher
+
+    >>> hill_decipher(np.matrix([[7,8], [11,11]]), 'drjiqzdrvx')
+    'hellothere'
+    >>> hill_decipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \
+        'tfjflpznvyac')
+    'hellothereaa'
+    """
+    adjoint = linalg.det(matrix)*linalg.inv(matrix)
+    inverse_determinant = modular_division_table[int(round(linalg.det(matrix))) % 26][1]
+    inverse_matrix = (inverse_determinant * adjoint) % 26
+    return hill_encipher(inverse_matrix, message, fillvalue)          
+
+
+# Where each piece of text ends up in the AMSCO transpositon cipher.
+# 'index' shows where the slice appears in the plaintext, with the slice
+# from 'start' to 'end'
+AmscoSlice = collections.namedtuple('AmscoSlice', ['index', 'start', 'end'])
+
+class AmscoFillStyle(Enum):
+    continuous = 1
+    same_each_row = 2
+    reverse_each_row = 3
+
+def amsco_transposition_positions(message, keyword, 
+      fillpattern=(1, 2),
+      fillstyle=AmscoFillStyle.continuous,
+      fillcolumnwise=False,
+      emptycolumnwise=True):
+    """Creates the grid for the AMSCO transposition cipher. Each element in the
+    grid shows the index of that slice and the start and end positions of the
+    plaintext that go to make it up.
+
+    >>> amsco_transposition_positions(string.ascii_lowercase, 'freddy', \
+        fillpattern=(1, 2)) # doctest:  +NORMALIZE_WHITESPACE
+    [[AmscoSlice(index=3, start=4, end=6),
+     AmscoSlice(index=2, start=3, end=4),
+     AmscoSlice(index=0, start=0, end=1),
+     AmscoSlice(index=1, start=1, end=3),
+     AmscoSlice(index=4, start=6, end=7)],
+    [AmscoSlice(index=8, start=12, end=13),
+     AmscoSlice(index=7, start=10, end=12),
+     AmscoSlice(index=5, start=7, end=9),
+     AmscoSlice(index=6, start=9, end=10),
+     AmscoSlice(index=9, start=13, end=15)],
+    [AmscoSlice(index=13, start=19, end=21),
+     AmscoSlice(index=12, start=18, end=19),
+     AmscoSlice(index=10, start=15, end=16),
+     AmscoSlice(index=11, start=16, end=18),
+     AmscoSlice(index=14, start=21, end=22)],
+    [AmscoSlice(index=18, start=27, end=28),
+     AmscoSlice(index=17, start=25, end=27),
+     AmscoSlice(index=15, start=22, end=24),
+     AmscoSlice(index=16, start=24, end=25),
+     AmscoSlice(index=19, start=28, end=30)]]
+    """
+    transpositions = transpositions_of(keyword)
+    fill_iterator = cycle(fillpattern)
+    indices = count()
+    message_length = len(message)
+
+    current_position = 0
+    grid = []
+    current_fillpattern = fillpattern
+    while current_position < message_length:
+        row = []
+        if fillstyle == AmscoFillStyle.same_each_row:
+            fill_iterator = cycle(fillpattern)
+        if fillstyle == AmscoFillStyle.reverse_each_row:
+            fill_iterator = cycle(current_fillpattern)
+        for _ in range(len(transpositions)):
+            index = next(indices)
+            gap = next(fill_iterator)
+            row += [AmscoSlice(index, current_position, current_position + gap)]
+            current_position += gap
+        grid += [row]
+        if fillstyle == AmscoFillStyle.reverse_each_row:
+            current_fillpattern = list(reversed(current_fillpattern))
+    return [transpose(r, transpositions) for r in grid]
+
+def amsco_transposition_encipher(message, keyword, 
+    fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row):
+    """AMSCO transposition encipher.
+
+    >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(1, 2))
+    'hoteelhler'
+    >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(2, 1))
+    'hetelhelor'
+    >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(1, 2))
+    'hotelerelh'
+    >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(2, 1))
+    'hetelorlhe'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'encode')
+    'etecstthhomoerereenisxip'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2))
+    'hetcsoeisterereipexthomn'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous)
+    'hecsoisttererteipexhomen'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(2, 1))
+    'heecisoosttrrtepeixhemen'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2))
+    'hxtomephescieretoeisnter'
+    >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous)
+    'hxomeiphscerettoisenteer'
+    """
+    grid = amsco_transposition_positions(message, keyword, 
+        fillpattern=fillpattern, fillstyle=fillstyle)
+    ct_as_grid = [[message[s.start:s.end] for s in r] for r in grid]
+    return combine_every_nth(ct_as_grid)
+
+
+def amsco_transposition_decipher(message, keyword, 
+    fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row):
+    """AMSCO transposition decipher
+
+    >>> amsco_transposition_decipher('hoteelhler', 'abc', fillpattern=(1, 2))
+    'hellothere'
+    >>> amsco_transposition_decipher('hetelhelor', 'abc', fillpattern=(2, 1))
+    'hellothere'
+    >>> amsco_transposition_decipher('hotelerelh', 'acb', fillpattern=(1, 2))
+    'hellothere'
+    >>> amsco_transposition_decipher('hetelorlhe', 'acb', fillpattern=(2, 1))
+    'hellothere'
+    >>> amsco_transposition_decipher('etecstthhomoerereenisxip', 'encode')
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('hetcsoeisterereipexthomn', 'cipher', fillpattern=(1, 2))
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('hecsoisttererteipexhomen', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous)
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('heecisoosttrrtepeixhemen', 'cipher', fillpattern=(2, 1))
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('hxtomephescieretoeisnter', 'cipher', fillpattern=(1, 3, 2))
+    'hereissometexttoencipher'
+    >>> amsco_transposition_decipher('hxomeiphscerettoisenteer', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous)
+    'hereissometexttoencipher'
+    """
+
+    grid = amsco_transposition_positions(message, keyword, 
+        fillpattern=fillpattern, fillstyle=fillstyle)
+    transposed_sections = [s for c in [l for l in zip(*grid)] for s in c]
+    plaintext_list = [''] * len(transposed_sections)
+    current_pos = 0
+    for slice in transposed_sections:
+        plaintext_list[slice.index] = message[current_pos:current_pos-slice.start+slice.end][:len(message[slice.start:slice.end])]
+        current_pos += len(message[slice.start:slice.end])
+    return ''.join(plaintext_list)
+
+
+class PocketEnigma(object):
+    """A pocket enigma machine
+    The wheel is internally represented as a 26-element list self.wheel_map, 
+    where wheel_map[i] == j shows that the position i places on from the arrow 
+    maps to the position j places on.
+    """
+    def __init__(self, wheel=1, position='a'):
+        """initialise the pocket enigma, including which wheel to use and the
+        starting position of the wheel.
+
+        The wheel is either 1 or 2 (the predefined wheels) or a list of letter
+        pairs.
+
+        The position is the letter pointed to by the arrow on the wheel.
+
+        >>> pe.wheel_map
+        [25, 4, 23, 10, 1, 7, 9, 5, 12, 6, 3, 17, 8, 14, 13, 21, 19, 11, 20, 16, 18, 15, 24, 2, 22, 0]
+        >>> pe.position
+        0
+        """
+        self.wheel1 = [('a', 'z'), ('b', 'e'), ('c', 'x'), ('d', 'k'), 
+            ('f', 'h'), ('g', 'j'), ('i', 'm'), ('l', 'r'), ('n', 'o'), 
+            ('p', 'v'), ('q', 't'), ('s', 'u'), ('w', 'y')]
+        self.wheel2 = [('a', 'c'), ('b', 'd'), ('e', 'w'), ('f', 'i'), 
+            ('g', 'p'), ('h', 'm'), ('j', 'k'), ('l', 'n'), ('o', 'q'), 
+            ('r', 'z'), ('s', 'u'), ('t', 'v'), ('x', 'y')]
+        if wheel == 1:
+            self.make_wheel_map(self.wheel1)
+        elif wheel == 2:
+            self.make_wheel_map(self.wheel2)
+        else:
+            self.validate_wheel_spec(wheel)
+            self.make_wheel_map(wheel)
+        if position in string.ascii_lowercase:
+            self.position = ord(position) - ord('a')
+        else:
+            self.position = position
+
+    def make_wheel_map(self, wheel_spec):
+        """Expands a wheel specification from a list of letter-letter pairs
+        into a full wheel_map.
+
+        >>> pe.make_wheel_map(pe.wheel2)
+        [2, 3, 0, 1, 22, 8, 15, 12, 5, 10, 9, 13, 7, 11, 16, 6, 14, 25, 20, 21, 18, 19, 4, 24, 23, 17]
+        """
+        self.validate_wheel_spec(wheel_spec)
+        self.wheel_map = [0] * 26
+        for p in wheel_spec:
+            self.wheel_map[ord(p[0]) - ord('a')] = ord(p[1]) - ord('a')
+            self.wheel_map[ord(p[1]) - ord('a')] = ord(p[0]) - ord('a')
+        return self.wheel_map
+
+    def validate_wheel_spec(self, wheel_spec):
+        """Validates that a wheel specificaiton will turn into a valid wheel
+        map.
+
+        >>> pe.validate_wheel_spec([])
+        Traceback (most recent call last):
+            ...
+        ValueError: Wheel specification has 0 pairs, requires 13
+        >>> pe.validate_wheel_spec([('a', 'b', 'c')]*13)
+        Traceback (most recent call last):
+            ...
+        ValueError: Not all mappings in wheel specificationhave two elements
+        >>> pe.validate_wheel_spec([('a', 'b')]*13)
+        Traceback (most recent call last):
+            ...
+        ValueError: Wheel specification does not contain 26 letters
+        """
+        if len(wheel_spec) != 13:
+            raise ValueError("Wheel specification has {} pairs, requires 13".
+                format(len(wheel_spec)))
+        for p in wheel_spec:
+            if len(p) != 2:
+                raise ValueError("Not all mappings in wheel specification"
+                    "have two elements")
+        if len(set([p[0] for p in wheel_spec] + 
+                    [p[1] for p in wheel_spec])) != 26:
+            raise ValueError("Wheel specification does not contain 26 letters")
+
+    def encipher_letter(self, letter):
+        """Enciphers a single letter, by advancing the wheel before looking up
+        the letter on the wheel.
+
+        >>> pe.set_position('f')
+        5
+        >>> pe.encipher_letter('k')
+        'h'
+        """
+        self.advance()
+        return self.lookup(letter)
+    decipher_letter = encipher_letter
+
+    def lookup(self, letter):
+        """Look up what a letter enciphers to, without turning the wheel.
+
+        >>> pe.set_position('f')
+        5
+        >>> ''.join([pe.lookup(l) for l in string.ascii_lowercase])
+        'udhbfejcpgmokrliwntsayqzvx'
+        >>> pe.lookup('A')
+        ''
+        """
+        if letter in string.ascii_lowercase:
+            return chr(
+                (self.wheel_map[(ord(letter) - ord('a') - self.position) % 26] + 
+                    self.position) % 26 + 
+                ord('a'))
+        else:
+            return ''
+
+    def advance(self):
+        """Advances the wheel one position.
+
+        >>> pe.set_position('f')
+        5
+        >>> pe.advance()
+        6
+        """
+        self.position = (self.position + 1) % 26
+        return self.position
+
+    def encipher(self, message, starting_position=None):
+        """Enciphers a whole message.
+
+        >>> pe.set_position('f')
+        5
+        >>> pe.encipher('helloworld')
+        'kjsglcjoqc'
+        >>> pe.set_position('f')
+        5
+        >>> pe.encipher('kjsglcjoqc')
+        'helloworld'
+        >>> pe.encipher('helloworld', starting_position = 'x')
+        'egrekthnnf'
+        """
+        if starting_position:
+            self.set_position(starting_position)
+        transformed = ''
+        for l in message:
+            transformed += self.encipher_letter(l)
+        return transformed
+    decipher = encipher
+
+    def set_position(self, position):
+        """Sets the position of the wheel, by specifying the letter the arrow
+        points to.
+
+        >>> pe.set_position('a')
+        0
+        >>> pe.set_position('m')
+        12
+        >>> pe.set_position('z')
+        25
+        """
+        self.position = ord(position) - ord('a')
+        return self.position
+
 
 if __name__ == "__main__":
     import doctest
-    doctest.testmod()
+    doctest.testmod(extraglobs={'pe': PocketEnigma(1, 'a')})
index b84e43cc3e185b90bac4b540b4dacf7822b23539..bf00875c43e134fd2b46327e80c56c8468c60e58 100644 (file)
@@ -3,9 +3,15 @@ its use.
 """
 
 import string
+import random
 import norms
 import collections
 import unicodedata
+import itertools
+from math import log10
+import os 
+
+unaccent_specials = ''.maketrans({"’": "'"})
 
 def letters(text):
     """Remove all non-alphabetic characters from a text
@@ -32,7 +38,8 @@ def unaccent(text):
     >>> unaccent('HÉLLÖ')
     'HELLO'
     """
-    return unicodedata.normalize('NFKD', text).\
+    translated_text = text.translate(unaccent_specials)
+    return unicodedata.normalize('NFKD', translated_text).\
         encode('ascii', 'ignore').\
         decode('utf-8')
 
@@ -51,6 +58,120 @@ def sanitise(text):
     return letters(unaccent(text)).lower()
 
 
+def datafile(name, sep='\t'):
+    """Read key,value pairs from file.
+    """
+    with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), name), 'r') as f:
+        for line in f:
+            splits = line.split(sep)
+            yield [splits[0], int(splits[1])]
+
+english_counts = collections.Counter(dict(datafile('count_1l.txt')))
+normalised_english_counts = norms.normalise(english_counts)
+
+english_bigram_counts = collections.Counter(dict(datafile('count_2l.txt')))
+normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
+
+english_trigram_counts = collections.Counter(dict(datafile('count_3l.txt')))
+normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
+
+with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'words.txt'), 'r') as f:
+    keywords = [line.rstrip() for line in f]
+
+
+def weighted_choice(d):
+    """Generate random item from a dictionary of item counts
+    """
+    target = random.uniform(0, sum(d.values()))
+    cuml = 0.0
+    for (l, p) in d.items():
+        cuml += p
+        if cuml > target:
+            return l
+    return None
+
+def random_english_letter():
+    """Generate a random letter based on English letter counts
+    """
+    return weighted_choice(normalised_english_counts)
+
+
+def ngrams(text, n):
+    """Returns all n-grams of a text
+    
+    >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
+    ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
+     'nf', 'fo', 'ox']
+    >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
+    ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
+     'rown', 'ownf', 'wnfo', 'nfox']
+    """
+    return [text[i:i+n] for i in range(len(text)-n+1)]
+
+
+class Pdist(dict):
+    """A probability distribution estimated from counts in datafile.
+    Values are stored and returned as log probabilities.
+    """
+    def __init__(self, data=[], estimate_of_missing=None):
+        data1, data2 = itertools.tee(data)
+        self.total = sum([d[1] for d in data1])
+        for key, count in data2:
+            self[key] = log10(count / self.total)
+        self.estimate_of_missing = estimate_of_missing or (lambda k, N: 1./N)
+    def __missing__(self, key):
+        return self.estimate_of_missing(key, self.total)
+
+def log_probability_of_unknown_word(key, N):
+    """Estimate the probability of an unknown word.
+    """
+    return -log10(N * 10**((len(key) - 2) * 1.4))
+
+Pw = Pdist(datafile('count_1w.txt'), log_probability_of_unknown_word)
+Pw_wrong = Pdist(datafile('count_1w.txt'), lambda _k, N: log10(1/N))
+Pl = Pdist(datafile('count_1l.txt'), lambda _k, _N: 0)
+P2l = Pdist(datafile('count_2l.txt'), lambda _k, _N: 0)
+P3l = Pdist(datafile('count_3l.txt'), lambda _k, _N: 0)
+
+def Pwords(words):
+    """The Naive Bayes log probability of a sequence of words.
+    """
+    return sum(Pw[w.lower()] for w in words)
+
+def Pwords_wrong(words):
+    """The Naive Bayes log probability of a sequence of words.
+    """
+    return sum(Pw_wrong[w.lower()] for w in words)
+
+def Pletters(letters):
+    """The Naive Bayes log probability of a sequence of letters.
+    """
+    return sum(Pl[l.lower()] for l in letters)
+
+def Pbigrams(letters):
+    """The Naive Bayes log probability of the bigrams formed from a sequence
+    of letters.
+    """
+    return sum(P2l[p] for p in ngrams(letters, 2))
+
+def Ptrigrams(letters):
+    """The Naive Bayes log probability of the trigrams formed from a sequence
+    of letters.
+    """
+    return sum(P3l[p] for p in ngrams(letters, 3))
+
+
+def cosine_similarity_score(text):
+    """Finds the dissimilarity of a text to English, using the cosine distance
+    of the frequency distribution.
+
+    >>> cosine_similarity_score('abcabc') # doctest: +ELLIPSIS
+    0.26228882...
+    """
+    return norms.cosine_similarity(english_counts,
+                                   collections.Counter(sanitise(text)))
+
+
 if __name__ == "__main__":
     import doctest
     doctest.testmod()
index 36e7fa4440b41942386e550b4403d2bf7d59e058..66452940c376658e446bb704c7198ee1ace2be22 100644 (file)
--- a/norms.py
+++ b/norms.py
@@ -1,10 +1,9 @@
-"""Define a variety of norms for finding distances between vectors"""
-
 import collections
+from math import log10
 
 def normalise(frequencies):
     """Scale a set of frequencies so they sum to one
-
+    
     >>> sorted(normalise({1: 1, 2: 0}).items())
     [(1, 1.0), (2, 0.0)]
     >>> sorted(normalise({1: 1, 2: 1}).items())
@@ -14,13 +13,13 @@ def normalise(frequencies):
     >>> sorted(normalise({1: 1, 2: 2, 3: 1}).items())
     [(1, 0.25), (2, 0.5), (3, 0.25)]
     """
-    length = sum([f for f in frequencies.values()])
-    return collections.defaultdict(int, ((k, v / length)
+    length = sum(f for f in frequencies.values())
+    return collections.defaultdict(int, ((k, v / length) 
         for (k, v) in frequencies.items()))
 
 def euclidean_scale(frequencies):
     """Scale a set of frequencies so they have a unit euclidean length
-
+    
     >>> sorted(euclidean_scale({1: 1, 2: 0}).items())
     [(1, 1.0), (2, 0.0)]
     >>> sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS
@@ -31,21 +30,17 @@ def euclidean_scale(frequencies):
     [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)]
     """
     length = sum([f ** 2 for f in frequencies.values()]) ** 0.5
-    return collections.defaultdict(int, ((k, v / length)
+    return collections.defaultdict(int, ((k, v / length) 
         for (k, v) in frequencies.items()))
 
 def identity_scale(frequencies):
-    """Don't scale a set of frequencies. (For use when a function expects a
-    scaling function but you don't want to supply one.)
-    """
     return frequencies
         
 
 def l2(frequencies1, frequencies2):
-    """Finds the distances between two frequency profiles, expressed as
-    dictionaries.
+    """Finds the distances between two frequency profiles, expressed as dictionaries.
     Assumes every key in frequencies1 is also in frequencies2
-
+    
     >>> l2({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
     0.0
     >>> l2({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
@@ -67,7 +62,7 @@ def l2(frequencies1, frequencies2):
 euclidean_distance = l2
 
 def l1(frequencies1, frequencies2):
-    """Finds the distances between two frequency profiles, expressed as
+    """Finds the distances between two frequency profiles, expressed as 
     dictionaries. Assumes every key in frequencies1 is also in frequencies2
 
     >>> l1({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
@@ -87,7 +82,7 @@ def l1(frequencies1, frequencies2):
     return total
 
 def l3(frequencies1, frequencies2):
-    """Finds the distances between two frequency profiles, expressed as
+    """Finds the distances between two frequency profiles, expressed as 
     dictionaries. Assumes every key in frequencies1 is also in frequencies2
 
     >>> l3({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
@@ -110,10 +105,10 @@ def l3(frequencies1, frequencies2):
     return total ** (1/3)
 
 def geometric_mean(frequencies1, frequencies2):
-    """Finds the geometric mean of the absolute differences between two
-    frequency profiles, expressed as dictionaries.
+    """Finds the geometric mean of the absolute differences between two frequency profiles, 
+    expressed as dictionaries.
     Assumes every key in frequencies1 is also in frequencies2
-
+    
     >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
     1
     >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
@@ -136,8 +131,8 @@ def geometric_mean(frequencies1, frequencies2):
     return total
 
 def harmonic_mean(frequencies1, frequencies2):
-    """Finds the harmonic mean of the absolute differences between two
-    frequency profiles, expressed as dictionaries.
+    """Finds the harmonic mean of the absolute differences between two frequency profiles, 
+    expressed as dictionaries.
     Assumes every key in frequencies1 is also in frequencies2
 
     >>> harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
@@ -165,8 +160,7 @@ def harmonic_mean(frequencies1, frequencies2):
 
 
 def cosine_similarity(frequencies1, frequencies2):
-    """Finds the distances between two frequency profiles, expressed as 
-    dictionaries.
+    """Finds the distances between two frequency profiles, expressed as dictionaries.
     Assumes every key in frequencies1 is also in frequencies2
 
     >>> cosine_similarity({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS