From e516b576576969e82892483e18bea2b3017d498f Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Thu, 7 Jan 2016 16:45:30 +0000 Subject: [PATCH] Updated code from development branch --- SIGNED.md | 158 ++++++++ cipher.py | 987 ++++++++++++++++++++++++++++++++++++++++++++- language_models.py | 123 +++++- norms.py | 38 +- 4 files changed, 1273 insertions(+), 33 deletions(-) create mode 100644 SIGNED.md diff --git a/SIGNED.md b/SIGNED.md new file mode 100644 index 0000000..3204992 --- /dev/null +++ b/SIGNED.md @@ -0,0 +1,158 @@ +##### Signed by https://keybase.io/neilnjae +``` +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v2 + +iQIcBAABCAAGBQJWjpYtAAoJEJPB2e07PgbqRXUP/jIADsBXAgHR7JTnNp11G6Bj +ne+9n8CJ/Kqf2rRVzVWLePPildK6AkDtKKulJsVNJ+cD7Kt+C2v6GR7DoXReh3ak +Dp14k5TQ7ta9kP01e+p7eNZ5khn7sHJy1atBVZ7NYP5oSvr20Wjs5Hc/v33sRPAq +2D0P1hRD7lrv/M5PG1WBXuhnE+QsuC63g7FQu/4/J7uaFmoOCq75seYMWJAL0LPY +VZCuNB4/45f/WoPY0M9LqG4uOBI0snmoVLFJV0y9GesP9qmzxSsNGsWymeBIU/+x +i7vc45Txb+KQjX9crl+A3PBrm8NpTxEQGi4+zt9B+3TwsmcAgwnM3316JJrsLG0V +cVlqpZ0CmBcKj2R+Z2+UDe5dfPIb4rIk9b3KY7e2nYLh5pBjyFiPxJf/hkit56vs +ENLyvXx1IoEoR9VJaTRCjTz+ViwMFEQFU6pqsj6yH6qS6pekYIENggRyol6q2qPt +dHG7SgZtB2ynowJ7kQHs90S7zHhY7XCOENLT93gRSKfYbu4lOni5P2OFrfMH6tjR +fbB5rNPTJA0k6i57fi22i7I5kFt90EyitXgBZaoR23HlXIhIKATIrd3BWLB68VFd +1x2VrcgjNoDPQWGoNoRorJP4p68FVqtbKXbGEXYfLAJfNwW6FCEQ9Kg83OFkUOyP +r7Hd2gCiTxL10XeabFEJ +=exxP +-----END PGP SIGNATURE----- + +``` + + + +### Begin signed statement + +#### Expect + +``` +size exec file contents + ./ +384 .gitignore a93de2ae5c2a47a38599751d1f914566569dfa09dd1778e207117db6c71421dd + 2012/ +1678 1a.ciphertext c7c16198650e7d91577683acca664a0e588ac5474046a028dea3d9dc4b388df8 +1678 1a.plaintext 1b34de5b1d1422a3089c54d751dfd3938449c98526137a0b8b3b6561a700f9b5 +3312 1b.ciphertext ec263c5cb415f99924e5744846c618b2566a072ac79f6b3cfdde294b99c491fd +3323 1b.plaintext a7fde879259e64fccca52618a9c008f2111e485ed1c6bfad863c90d8e6ceacae +1716 2a.ciphertext 2203112c5b43e09b61d8e2918554831559efc7a1157aafda85c25d5ae67f2f0f +1716 2a.plaintext 28afae88a3b3f0e553f326be7efb896587cb4ac43b9fd664d2cc3f9b2489de35 +2268 2b.ciphertext 587ba7bb4c39d6bd23cbefdcef63d1211d81f6340d401027f9b8e97c97a71b6c +2268 2b.plaintext 4ee16e37ec82a6ea5446c3d03689bcf608d7408cf79542accaabc297fe6d9f97 +1549 3a.ciphertext fcfff1c90fb5e0cb4240b0617cd7a0bfb1f81ee513a572b44562a0e9db9b4a1b +1549 3a.plaintext 54c6b1ec3364c34a824ee693c00fa41c466f4bfccad281c4111ce1966232c9f4 +2009 3b.ciphertext 4a568ac5e67430cd30a2bfd73214a7b3984b3117be16f649ea99c40d3de551c3 +2118 3b.plaintext a2b398c99fe1f00d3e7c558c5a9ea9d6739120d63e84b17f19d8f524ae9adf26 +1495 4a.ciphertext 4cbf7aa5d463087f40c3ca724a3860f38b8cad1fc281374eb4567df004d3b5ca +1559 4a.plaintext b1c02b4ed9df29fcefb046aec75a7702f6e6c1cd28bd56cc67bec1f4ae2af4fb +2137 4b.ciphertext 829ef4812a023567b9652c57f92da6f48c0b854694eac1a7c9452645723e15db +2212 4b.plaintext d811c945afab0212c346531fd557c0f1d9a3fb0207835152a1426deda9e0b2a1 +1449 5a.ciphertext 9fb9fbe92a26b3d1b2f54909e732a0d1afde90f97483affcfa9bbacb4edf3d77 +1524 5a.plaintext 203ea3702e17269cbed804f019dd887ae7fc27b2585f1551fc758692e5ba14dd +2561 5b.ciphertext c0bc2532c853e723fb7c3529c60ca859b7d61af238a98ddaabe0c01423e2a3ab +2618 5b.plaintext 61431a5bfd9a0e1e21e8ac7ba764ba46cc4b2a3e7adf22fff13153fdc278a451 +1225 6a.ciphertext d22c65b2445ab1228c8351239c37310188d4ec9e173825da43f79509d957aada +1285 6a.plaintext 27e574c224ad11d7b6258ef6212e8961decac5af3e93c9b08b0919c82c376f98 +1478 6b.ciphertext e80fc065648e8a4207a7d698cd0f39722d69df819fdf756a7d30e2886670542c +1544 6b.plaintext 424fc647b26f1ef153ec73eda2745896813bc1c6b498a5a0b0b05ff931ec5bf6 +1331 7a.ciphertext d73fe6633bafb097a283396bb5a9b8a5f025031453d1df14156f9c41e898fac3 +1412 7a.plaintext e13149a6f2df3d7d466f761d58c4a8ca3a82fbbc4f6dca983e83442e5caa748f +2551 7b.ciphertext 15200bf858dede4f6cabffaac54c14d37171cd4f7641a059258cd44b6cb98e7d +2661 7b.plaintext 430d167225a8f8c98d6e3f69aaca5846ba7d112567b987f3481832b07b03fc05 +1125 8a.ciphertext 5c47ee5314dc118b515b2ab8dc5ed14c492294e70a67f9329ed10d3486fd9ba6 +1195 8a.plaintext a1deee4fa908fb536035435eb066d5c1fed91f962cfd0390f5c51fbb60caceb7 +1783 8b.ciphertext c782e2765c9e3cb0ec72cc77f063ad7f90f2a003d5078b452c24ce3a4549f696 +1783 8b.plaintext 925b270f35dce522f0447085f78795a6f8101013648c836b029f0535f4d752e6 + 2013/ +1340 1a.ciphertext 6b69f06bd684fdaa79ff35497715af6ded3c176277859d1d774bb2569dd53a97 +1340 1a.plaintext cbdaac33a7943a752ba5dd21cf7af23bdbdd22e4a5feb0bccecce45d605d0581 +1495 1b.ciphertext 2e0389e2c7d3156892f7d361ee79f86eb79d34812fca37d8ce692cb6f3976446 +1495 1b.plaintext 717b1f8955515a5d5d2f049f0ffe137820d71f8b2a1689e1a29ec30ed7c9d473 +1135 2a.ciphertext 85ad3dac4751a8db90f391d0f06935a5ef2118d9ac72e05fe0518d7f77bff058 +1135 2a.plaintext 11cdf7c462c536085daced417023adaeff2f6b7a6162ccd12b340b079bd36b48 +926 2b.ciphertext 0fc59b28e4cd2db0aa9f28a603e2b25988a6688f05f90a59b205088093721340 +973 2b.plaintext 019c012f87531e2f9173acae952e7d4a304ed1a62ef341e71455a128d3e7056f +983 3a.ciphertext 20ab3ad9342e51d82243a678eaf2bc9187553bcedb50c830541a54f34dc9b3dd +1090 3b.ciphertext c35efccfb68ce3eaa5e7c699ed36a09f23aa719125e4df9f2faef11da1cd5d8a +1134 4a.ciphertext 523d35a8b412bf2e2a88b160529cae76c512d08f2a589ba53b2ac3c4d4184858 +1061 4b.ciphertext 2cdebce220d9d20ef4c85f208999f81940383a9181bbb0e63b37b7031edcb688 +875 5a.ciphertext b841414d1fbf8af991df389b1b9466d0401c39074f7433dcf3e90648b4d9ab5f +957 5b.ciphertext 5b6303144fde83cdcdb74976da515933444b7c29efd8ad15d33e65edd2ef78fb +712 6a.ciphertext 1313d5ffe4e6d2ee82b4f4bbffbe522d5e073775baa53a11bec10c2a693bc7da +1888 6b.ciphertext 137618d9d92a8a28b7b7fff9a385c4c2db76dff10e0d326895cb0723a63a6adc +2132 7a.ciphertext d604c136b793701d7501c72821d4069975ce098c2da55fac29331f0045c62b5f +1565 7b.ciphertext adeef9d9e985c534c40acee5c6734d04195895a32efd9452a89cafef1fdc91b8 +182 mona-lisa-words.txt d7d05c9c86f6282fa66df5f4ca795c89f01cef88a9ce8c921ce4484b3d6078a7 +1190 solutions.txt d9575ed8f930520da2e84047ad06f0b51b7d98e1d122f140142b3674bbdbf44f + 2014/ + 2015/ +10000 pysol.zip 64e25c247e877f37285db635fcb5dea101e0c531fdbfe5000bcc088c22caad79|68d878fb019198e691d321be1dd6744fae0b48da9c4e705c34f96e351b00fbed +2195 sol.py.zip 4c22e57eaacd8a91ef9b0c338429e400210a03dec9fbacec4a45fe1faed50720|1fd0a62af53125eacfa11016088451e2c58269d98aaef74c320730ebc7601b66 +18025 LICENSE a01259a1b522cf0de95824f9860613b453153eebac468e96196d5d7dba84786c +61 README.md 277247b410300ee16477b12ca54ad878d81c8061f6134e2e1cadccaf299de3a3 +469 affine_break_parameter_trials.csv 1a9d635d0af2f41fc6f1e83ae87d6372034259321ba288a11fb024e98ed52f4f|dd9c840434de596a30c84e79de26a9824b36c217a84876c2aab0579b76999735 +6488666 big.txt fa066c7d40f0f201ac4144e652aa62430e58a6b3805ec70650f678da5804e87b +514 caesar_break_parameter_trials.csv 6586223bcc00e06e3ff79d107202d6c29ef962a6dd544add00610c5907407e85|1cb7cc77831ef3ef4f994a9ea77e82a841b38acdde45ede9cedbe7a54f1e8e46 +135303 challenge6.ipynb 5b37a8b10db4c8d9831827a2acdffdcdb65369557d15b3e08a900ee8e088da73 +75506 challenge7.ipynb ee9a99fa7a9845ad7db77199927fe8ad1d05a6b0272e50d8175e874e7a3e4cdf +42922 cipher.py 58637b8946b4fb973b19a374a2066a896d86c928dacaa1ccd2252e6f8bb6e810 +120 cipher.sublime-project 6eeb5379b3486e9b65567acb8ce7d3fe56261019c13c99d10fa00818df4d0409 +11564 count_1edit.txt 3bf563ef032ba151ec1a4b2d1f33f50c49f4a47e4dc5b8152394bc5b63f57655|b5fbacbebcc25f5011ce97bc9ac967a09c50eef28b4aa98379a6c426df6ac08b +4956241 count_1w.txt 51df159fd3de12b20e403c108f526e96dbd723d9cabdd5f17955cdc16059e690 +9270 count_2l.txt bc2895f800189070c193907cd8bca956ad65fed2e25c14300d4bb5b6a243ba99 +5566017 count_2w.txt 781c0596c3eea532d30bef9f3dba1d5137d652f00376260822c761a7584dfb8c +220441 count_3l.txt 8702c95530c7d0d182ab94dc03ed7681fcf969819f6db011a58de31411dc6365 +320508 count_big.txt 3ba257fba1934bd138413d8274e79b56c5992431a27692fd562929aa43ec01a3 +3355 find_best_affine_break_parameters.py 6b11004bb93ac26ec7d42d33504e758edbaf9d55365ae2e4ca2fca7589263f25 +3027 find_best_caesar_break_parameters.py 0347d80309179d937a88fd1c8684490a513ccd086366c5a0dd55b8a2fe5c565f +1236 find_wikipedia_titles.py f040bf855dfec7fff9d8e5eba2fb509179bc53bc02a20b26b7fc61fef983aa45 +5645 language_models.py bfd5b60cdef8af20cdb061b24a1691f569984be3be333782c3d76e3370e16d14 +592 make-cracking-dictionary.py 71791e64e4853cd9ca292cb436bbe8c72dd60f509811174df93ed2067683d5c1 +7077 norms.py a657a36c1741e6f3a513386b318fcc99e6b11f98ec64a48284b47462ff2acf30 +8411 norms.pyc ac7a18765c7bcc27e406d8f38d943408097b3384a271502185d53482e6ec0da7|002b186e716cec64869a00bd2d72e16614931e696daa0cf3529d634a0f270e42 +112847 plot-caesar-parameters.ipynb 639459b4b2e434f9f0852c012ed9a8a8d87bd1cb6c2d65ca5abfdb0e42c3dea6 +4538523 shakespeare.txt 6f9c770efced5c3d87efa6197cd3091b982341372e36c6357f865df91ddecde6 +592309 sherlock-holmes.txt 0027de6f4110440ea51d67a2f3af3484898c630808f13b1d4db108e6283e67a3|2034ee1ebdec47e839607124d22b674d4614e1cc6209d758f7b6e99e69ae8e08 +451530 spell-errors.txt a4abe6ce6c24280f9a8d0485cbf78ddd2e58279ca01293692630a08ba4b13407 +69351 unknown-word-probability-investigation.ipynb 8a9cd7163f10bf2bfb3e286445eddcfc953f80abfdef4e29dac27617a53c3d41 +3291641 war-and-peace.txt 3ed0f41cfdf660846878943bad5b9d575bcae1e4a92ee9a7f43d3c9dba2af344|6799e48d3fd0a6f4c40b9951ec86de6da81f0b9cd36e413490ac511542ca54d3 +868202 words.txt aa77abbcba3c6dee1306d93adcedc2b2ccb8a4e0344a39d0676732ff58ebd5e5 +868384 words_2013.txt 57faa4841fe28dd82a5da4488b6381c194df6e1ecc04e61fb9f60e842bbca18c +``` + +#### Ignore + +``` +/SIGNED.md +``` + +#### Presets + +``` +git # ignore .git and anything as described by .gitignore files +``` + + + +### End signed statement + +
+ +#### Notes + +With keybase you can sign any directory's contents, whether it's a git repo, +source code distribution, or a personal documents folder. It aims to replace the drudgery of: + + 1. comparing a zipped file to a detached statement + 2. downloading a public key + 3. confirming it is in fact the author's by reviewing public statements they've made, using it + +All in one simple command: + +```bash +keybase dir verify +``` + +There are lots of options, including assertions for automating your checks. + +For more info, check out https://keybase.io/docs/command_line/code_signing \ No newline at end of file diff --git a/cipher.py b/cipher.py index 68472be..266237a 100644 --- a/cipher.py +++ b/cipher.py @@ -1,10 +1,99 @@ -"""A set of ciphers with implementations for both enciphering and deciphering -them. See cipherbreak for automatic breaking of these ciphers -""" - import string import collections -from language_models import unaccent, sanitise +import math +from enum import Enum +from itertools import zip_longest, cycle, chain, count +import numpy as np +from numpy import matrix +from numpy import linalg +from language_models import * + + +modular_division_table = [[0]*26 for _ in range(26)] +for a in range(26): + for b in range(26): + c = (a * b) % 26 + modular_division_table[b][c] = a + + +def every_nth(text, n, fillvalue=''): + """Returns n strings, each of which consists of every nth character, + starting with the 0th, 1st, 2nd, ... (n-1)th character + + >>> every_nth(string.ascii_lowercase, 5) + ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty'] + >>> every_nth(string.ascii_lowercase, 1) + ['abcdefghijklmnopqrstuvwxyz'] + >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] + >>> every_nth(string.ascii_lowercase, 5, fillvalue='!') + ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!'] + """ + split_text = chunks(text, n, fillvalue) + return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)] + +def combine_every_nth(split_text): + """Reforms a text split into every_nth strings + + >>> combine_every_nth(every_nth(string.ascii_lowercase, 5)) + 'abcdefghijklmnopqrstuvwxyz' + >>> combine_every_nth(every_nth(string.ascii_lowercase, 1)) + 'abcdefghijklmnopqrstuvwxyz' + >>> combine_every_nth(every_nth(string.ascii_lowercase, 26)) + 'abcdefghijklmnopqrstuvwxyz' + """ + return ''.join([''.join(l) + for l in zip_longest(*split_text, fillvalue='')]) + +def chunks(text, n, fillvalue=None): + """Split a text into chunks of n characters + + >>> chunks('abcdefghi', 3) + ['abc', 'def', 'ghi'] + >>> chunks('abcdefghi', 4) + ['abcd', 'efgh', 'i'] + >>> chunks('abcdefghi', 4, fillvalue='!') + ['abcd', 'efgh', 'i!!!'] + """ + if fillvalue: + padding = fillvalue[0] * (n - len(text) % n) + else: + padding = '' + return [(text+padding)[i:i+n] for i in range(0, len(text), n)] + +def transpose(items, transposition): + """Moves items around according to the given transposition + + >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3)) + ['a', 'b', 'c', 'd'] + >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0)) + ['d', 'b', 'c', 'a'] + >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0)) + [13, 12, 14, 11, 15, 10] + """ + transposed = [''] * len(transposition) + for p, t in enumerate(transposition): + transposed[p] = items[t] + return transposed + +def untranspose(items, transposition): + """Undoes a transpose + + >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3]) + ['a', 'b', 'c', 'd'] + >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0]) + ['a', 'b', 'c', 'd'] + >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0]) + [10, 11, 12, 13, 14, 15] + """ + transposed = [''] * len(transposition) + for p, t in enumerate(transposition): + transposed[t] = items[p] + return transposed + +def deduplicate(text): + return list(collections.OrderedDict.fromkeys(text)) def caesar_encipher_letter(accented_letter, shift): @@ -37,14 +126,14 @@ def caesar_encipher_letter(accented_letter, shift): alphabet_start = ord('A') else: alphabet_start = ord('a') - return chr(((ord(letter) - alphabet_start + shift) % 26) + + return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start) else: return letter def caesar_decipher_letter(letter, shift): """Decipher a letter, given a shift amount - + >>> caesar_decipher_letter('b', 1) 'a' >>> caesar_decipher_letter('b', 2) @@ -54,7 +143,7 @@ def caesar_decipher_letter(letter, shift): def caesar_encipher(message, shift): """Encipher a message with the Caesar cipher of given shift - + >>> caesar_encipher('abc', 1) 'bcd' >>> caesar_encipher('abc', 2) @@ -71,7 +160,7 @@ def caesar_encipher(message, shift): def caesar_decipher(message, shift): """Decipher a message with the Caesar cipher of given shift - + >>> caesar_decipher('bcd', 1) 'abc' >>> caesar_decipher('cde', 2) @@ -83,7 +172,885 @@ def caesar_decipher(message, shift): """ return caesar_encipher(message, -shift) +def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True): + """Encipher a letter, given a multiplier and adder + + >>> ''.join([affine_encipher_letter(l, 3, 5, True) \ + for l in string.ascii_uppercase]) + 'HKNQTWZCFILORUXADGJMPSVYBE' + >>> ''.join([affine_encipher_letter(l, 3, 5, False) \ + for l in string.ascii_uppercase]) + 'FILORUXADGJMPSVYBEHKNQTWZC' + """ + letter = unaccent(accented_letter) + if letter in string.ascii_letters: + if letter in string.ascii_uppercase: + alphabet_start = ord('A') + else: + alphabet_start = ord('a') + letter_number = ord(letter) - alphabet_start + if one_based: letter_number += 1 + cipher_number = (letter_number * multiplier + adder) % 26 + if one_based: cipher_number -= 1 + return chr(cipher_number % 26 + alphabet_start) + else: + return letter + +def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True): + """Encipher a letter, given a multiplier and adder + + >>> ''.join([affine_decipher_letter(l, 3, 5, True) \ + for l in 'HKNQTWZCFILORUXADGJMPSVYBE']) + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + >>> ''.join([affine_decipher_letter(l, 3, 5, False) \ + for l in 'FILORUXADGJMPSVYBEHKNQTWZC']) + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + """ + if letter in string.ascii_letters: + if letter in string.ascii_uppercase: + alphabet_start = ord('A') + else: + alphabet_start = ord('a') + cipher_number = ord(letter) - alphabet_start + if one_based: cipher_number += 1 + plaintext_number = ( + modular_division_table[multiplier] + [(cipher_number - adder) % 26]) + if one_based: plaintext_number -= 1 + return chr(plaintext_number % 26 + alphabet_start) + else: + return letter + +def affine_encipher(message, multiplier=1, adder=0, one_based=True): + """Encipher a message + + >>> affine_encipher('hours passed during which jerico tried every ' \ + 'trick he could think of', 15, 22, True) + 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh' + """ + enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) + for l in message] + return ''.join(enciphered) + +def affine_decipher(message, multiplier=1, adder=0, one_based=True): + """Decipher a message + + >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \ + 'jfaoe ls omytd jlaxe mh', 15, 22, True) + 'hours passed during which jerico tried every trick he could think of' + """ + enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) + for l in message] + return ''.join(enciphered) + + +class KeywordWrapAlphabet(Enum): + from_a = 1 + from_last = 2 + from_largest = 3 + + +def keyword_cipher_alphabet_of(keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Find the cipher alphabet given a keyword. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + + >>> keyword_cipher_alphabet_of('bayes') + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a) + 'bayescdfghijklmnopqrtuvwxz' + >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last) + 'bayestuvwxzcdfghijklmnopqr' + >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest) + 'bayeszcdfghijklmnopqrtuvwx' + """ + if wrap_alphabet == KeywordWrapAlphabet.from_a: + cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + + string.ascii_lowercase)) + else: + if wrap_alphabet == KeywordWrapAlphabet.from_last: + last_keyword_letter = deduplicate(sanitise(keyword))[-1] + else: + last_keyword_letter = sorted(sanitise(keyword))[-1] + last_keyword_position = string.ascii_lowercase.find( + last_keyword_letter) + 1 + cipher_alphabet = ''.join( + deduplicate(sanitise(keyword) + + string.ascii_lowercase[last_keyword_position:] + + string.ascii_lowercase)) + return cipher_alphabet + + +def keyword_encipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Enciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_encipher('test message', 'bayes') + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a) + 'rsqr ksqqbds' + >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last) + 'lskl dskkbus' + >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest) + 'qspq jsppbcs' + """ + cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) + cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) + return unaccent(message).lower().translate(cipher_translation) + +def keyword_decipher(message, keyword, wrap_alphabet=KeywordWrapAlphabet.from_a): + """Deciphers a message with a keyword substitution cipher. + wrap_alphabet controls how the rest of the alphabet is added + after the keyword. + 0 : from 'a' + 1 : from the last letter in the sanitised keyword + 2 : from the largest letter in the sanitised keyword + + >>> keyword_decipher('rsqr ksqqbds', 'bayes') + 'test message' + >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a) + 'test message' + >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last) + 'test message' + >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest) + 'test message' + """ + cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) + cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) + return message.lower().translate(cipher_translation) + + +def vigenere_encipher(message, keyword): + """Vigenere encipher + + >>> vigenere_encipher('hello', 'abc') + 'hfnlp' + """ + shifts = [ord(l) - ord('a') for l in sanitise(keyword)] + pairs = zip(message, cycle(shifts)) + return ''.join([caesar_encipher_letter(l, k) for l, k in pairs]) + +def vigenere_decipher(message, keyword): + """Vigenere decipher + + >>> vigenere_decipher('hfnlp', 'abc') + 'hello' + """ + shifts = [ord(l) - ord('a') for l in sanitise(keyword)] + pairs = zip(message, cycle(shifts)) + return ''.join([caesar_decipher_letter(l, k) for l, k in pairs]) + +beaufort_encipher=vigenere_decipher +beaufort_decipher=vigenere_encipher + + +def transpositions_of(keyword): + """Finds the transpostions given by a keyword. For instance, the keyword + 'clever' rearranges to 'celrv', so the first column (0) stays first, the + second column (1) moves to third, the third column (2) moves to second, + and so on. + + If passed a tuple, assume it's already a transposition and just return it. + + >>> transpositions_of('clever') + (0, 2, 1, 4, 3) + >>> transpositions_of('fred') + (3, 2, 0, 1) + >>> transpositions_of((3, 2, 0, 1)) + (3, 2, 0, 1) + """ + if isinstance(keyword, tuple): + return keyword + else: + key = deduplicate(keyword) + transpositions = tuple(key.index(l) for l in sorted(key)) + return transpositions + +def pad(message_len, group_len, fillvalue): + padding_length = group_len - message_len % group_len + if padding_length == group_len: padding_length = 0 + padding = '' + for i in range(padding_length): + if callable(fillvalue): + padding += fillvalue() + else: + padding += fillvalue + return padding + +def column_transposition_encipher(message, keyword, fillvalue=' ', + fillcolumnwise=False, + emptycolumnwise=False): + """Enciphers using the column transposition cipher. + Message is padded to allow all rows to be the same length. + + >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True) + 'hlohr eltee ' + >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True) + 'hellothere ' + >>> column_transposition_encipher('hellothere', 'abcdef') + 'hellothere ' + >>> column_transposition_encipher('hellothere', 'abcde') + 'hellothere' + >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False) + 'hlohreltee' + >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True) + 'htehlelroe' + >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False) + 'hellothere' + >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True) + 'heotllrehe' + >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False) + 'holrhetlee' + >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True) + 'htleehoelr' + >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False) + 'hleolteher' + >>> column_transposition_encipher('hellothere', 'cleverly') + 'hleolthre e ' + >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!') + 'hleolthre!e!' + >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*') + 'hleolthre*e*' + """ + transpositions = transpositions_of(keyword) + message += pad(len(message), len(transpositions), fillvalue) + if fillcolumnwise: + rows = every_nth(message, len(message) // len(transpositions)) + else: + rows = chunks(message, len(transpositions)) + transposed = [transpose(r, transpositions) for r in rows] + if emptycolumnwise: + return combine_every_nth(transposed) + else: + return ''.join(chain(*transposed)) + +def column_transposition_decipher(message, keyword, fillvalue=' ', + fillcolumnwise=False, + emptycolumnwise=False): + """Deciphers using the column transposition cipher. + Message is padded to allow all rows to be the same length. + + >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False) + 'hellothere' + >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False) + 'hellothere' + >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False) + 'hellothere' + >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True) + 'hellothere' + >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False) + 'hellothere' + """ + transpositions = transpositions_of(keyword) + message += pad(len(message), len(transpositions), fillvalue) + if emptycolumnwise: + rows = every_nth(message, len(message) // len(transpositions)) + else: + rows = chunks(message, len(transpositions)) + untransposed = [untranspose(r, transpositions) for r in rows] + if fillcolumnwise: + return combine_every_nth(untransposed) + else: + return ''.join(chain(*untransposed)) + +def scytale_encipher(message, rows, fillvalue=' '): + """Enciphers using the scytale transposition cipher. + Message is padded with spaces to allow all rows to be the same length. + + >>> scytale_encipher('thequickbrownfox', 3) + 'tcnhkfeboqrxuo iw ' + >>> scytale_encipher('thequickbrownfox', 4) + 'tubnhirfecooqkwx' + >>> scytale_encipher('thequickbrownfox', 5) + 'tubn hirf ecoo qkwx ' + >>> scytale_encipher('thequickbrownfox', 6) + 'tqcrnxhukof eibwo ' + >>> scytale_encipher('thequickbrownfox', 7) + 'tqcrnx hukof eibwo ' + """ + # transpositions = [i for i in range(math.ceil(len(message) / rows))] + # return column_transposition_encipher(message, transpositions, + # fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True) + transpositions = [i for i in range(rows)] + return column_transposition_encipher(message, transpositions, + fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False) + +def scytale_decipher(message, rows): + """Deciphers using the scytale transposition cipher. + Assumes the message is padded so that all rows are the same length. + + >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3) + 'thequickbrownfox ' + >>> scytale_decipher('tubnhirfecooqkwx', 4) + 'thequickbrownfox' + >>> scytale_decipher('tubn hirf ecoo qkwx ', 5) + 'thequickbrownfox ' + >>> scytale_decipher('tqcrnxhukof eibwo ', 6) + 'thequickbrownfox ' + >>> scytale_decipher('tqcrnx hukof eibwo ', 7) + 'thequickbrownfox ' + """ + # transpositions = [i for i in range(math.ceil(len(message) / rows))] + # return column_transposition_decipher(message, transpositions, + # fillcolumnwise=False, emptycolumnwise=True) + transpositions = [i for i in range(rows)] + return column_transposition_decipher(message, transpositions, + fillcolumnwise=True, emptycolumnwise=False) + + +def railfence_encipher(message, height, fillvalue=''): + """Railfence cipher. + Works by splitting the text into sections, then reading across them to + generate the rows in the cipher. The rows are then combined to form the + ciphertext. + + Example: the plaintext "hellotherefriends", with a height of four, written + out in the railfence as + h h i + etere* + lorfns + l e d + (with the * showing the one character to finish the last section). + Each 'section' is two columns, but unfolded. In the example, the first + section is 'hellot'. + + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 2, fillvalue='!') + 'hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3, fillvalue='!') + 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5, fillvalue='!') + 'hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 10, fillvalue='!') + 'hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3) + 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5) + 'hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp' + >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 7) + 'haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic' + """ + sections = chunks(message, (height - 1) * 2, fillvalue=fillvalue) + n_sections = len(sections) + # Add the top row + rows = [''.join([s[0] for s in sections])] + # process the middle rows of the grid + for r in range(1, height-1): + rows += [''.join([s[r:r+1] + s[height*2-r-2:height*2-r-1] for s in sections])] + # process the bottom row + rows += [''.join([s[height - 1:height] for s in sections])] + # rows += [' '.join([s[height - 1] for s in sections])] + return ''.join(rows) + +def railfence_decipher(message, height, fillvalue=''): + """Railfence decipher. + Works by reconstructing the grid used to generate the ciphertext, then + unfolding the sections so the text can be concatenated together. + + Example: given the ciphertext 'hhieterelorfnsled' and a height of 4, first + work out that the second row has a character missing, find the rows of the + grid, then split the section into its two columns. + + 'hhieterelorfnsled' is split into + h h i + etere + lorfns + l e d + (spaces added for clarity), which is stored in 'rows'. This is then split + into 'down_rows' and 'up_rows': + + down_rows: + hhi + eee + lrn + led + + up_rows: + tr + ofs + + These are then zipped together (after the up_rows are reversed) to recover + the plaintext. + + Most of the procedure is about finding the correct lengths for each row then + splitting the ciphertext into those rows. + + >>> railfence_decipher('hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!', 2).strip('!') + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!', 3).strip('!') + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!', 5).strip('!') + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!', 10).strip('!') + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece', 3) + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp', 5) + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + >>> railfence_decipher('haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic', 7) + 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers' + """ + # find the number and size of the sections, including how many characters + # are missing for a full grid + n_sections = math.ceil(len(message) / ((height - 1) * 2)) + padding_to_add = n_sections * (height - 1) * 2 - len(message) + # row_lengths are for the both up rows and down rows + row_lengths = [n_sections] * (height - 1) * 2 + for i in range((height - 1) * 2 - 1, (height - 1) * 2 - (padding_to_add + 1), -1): + row_lengths[i] -= 1 + # folded_rows are the combined row lengths in the middle of the railfence + folded_row_lengths = [row_lengths[0]] + for i in range(1, height-1): + folded_row_lengths += [row_lengths[i] + row_lengths[-i]] + folded_row_lengths += [row_lengths[height - 1]] + # find the rows that form the railfence grid + rows = [] + row_start = 0 + for i in folded_row_lengths: + rows += [message[row_start:row_start + i]] + row_start += i + # split the rows into the 'down_rows' (those that form the first column of + # a section) and the 'up_rows' (those that ofrm the second column of a + # section). + down_rows = [rows[0]] + up_rows = [] + for i in range(1, height-1): + down_rows += [''.join([c for n, c in enumerate(rows[i]) if n % 2 == 0])] + up_rows += [''.join([c for n, c in enumerate(rows[i]) if n % 2 == 1])] + down_rows += [rows[-1]] + up_rows.reverse() + return ''.join(c for r in zip_longest(*(down_rows + up_rows), fillvalue='') for c in r) + +def make_cadenus_keycolumn(doubled_letters = 'vw', start='a', reverse=False): + """Makes the key column for a Cadenus cipher (the column down between the + rows of letters) + + >>> make_cadenus_keycolumn()['a'] + 0 + >>> make_cadenus_keycolumn()['b'] + 1 + >>> make_cadenus_keycolumn()['c'] + 2 + >>> make_cadenus_keycolumn()['v'] + 21 + >>> make_cadenus_keycolumn()['w'] + 21 + >>> make_cadenus_keycolumn()['z'] + 24 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['a'] + 1 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['b'] + 0 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['c'] + 24 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['i'] + 18 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['j'] + 18 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['v'] + 6 + >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['z'] + 2 + """ + index_to_remove = string.ascii_lowercase.find(doubled_letters[0]) + short_alphabet = string.ascii_lowercase[:index_to_remove] + string.ascii_lowercase[index_to_remove+1:] + if reverse: + short_alphabet = ''.join(reversed(short_alphabet)) + start_pos = short_alphabet.find(start) + rotated_alphabet = short_alphabet[start_pos:] + short_alphabet[:start_pos] + keycolumn = {l: i for i, l in enumerate(rotated_alphabet)} + keycolumn[doubled_letters[0]] = keycolumn[doubled_letters[1]] + return keycolumn + +def cadenus_encipher(message, keyword, keycolumn, fillvalue='a'): + """Encipher with the Cadenus cipher + + >>> cadenus_encipher(sanitise('Whoever has made a voyage up the Hudson ' \ + 'must remember the Kaatskill mountains. ' \ + 'They are a dismembered branch of the great'), \ + 'wink', \ + make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True)) + 'antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaasuvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned' + >>> cadenus_encipher(sanitise('a severe limitation on the usefulness of ' \ + 'the cadenus is that every message must be ' \ + 'a multiple of twenty-five letters long'), \ + 'easy', \ + make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True)) + 'systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtofarenuseieeieltarlmentieetogevesitfaisltngeeuvowul' + """ + rows = chunks(message, len(message) // 25, fillvalue=fillvalue) + columns = zip(*rows) + rotated_columns = [col[start:] + col[:start] for start, col in zip([keycolumn[l] for l in keyword], columns)] + rotated_rows = zip(*rotated_columns) + transpositions = transpositions_of(keyword) + transposed = [transpose(r, transpositions) for r in rotated_rows] + return ''.join(chain(*transposed)) + +def cadenus_decipher(message, keyword, keycolumn, fillvalue='a'): + """ + >>> cadenus_decipher('antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaa' \ + 'suvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned', \ + 'wink', \ + make_cadenus_keycolumn(reverse=True)) + 'whoeverhasmadeavoyageupthehudsonmustrememberthekaatskillmountainstheyareadismemberedbranchofthegreat' + >>> cadenus_decipher('systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtof' \ + 'arenuseieeieltarlmentieetogevesitfaisltngeeuvowul', \ + 'easy', \ + make_cadenus_keycolumn(reverse=True)) + 'aseverelimitationontheusefulnessofthecadenusisthateverymessagemustbeamultipleoftwentyfiveletterslong' + """ + rows = chunks(message, len(message) // 25, fillvalue=fillvalue) + transpositions = transpositions_of(keyword) + untransposed_rows = [untranspose(r, transpositions) for r in rows] + columns = zip(*untransposed_rows) + rotated_columns = [col[-start:] + col[:-start] for start, col in zip([keycolumn[l] for l in keyword], columns)] + rotated_rows = zip(*rotated_columns) + # return rotated_columns + return ''.join(chain(*rotated_rows)) + + +def hill_encipher(matrix, message_letters, fillvalue='a'): + """Hill cipher + + >>> hill_encipher(np.matrix([[7,8], [11,11]]), 'hellothere') + 'drjiqzdrvx' + >>> hill_encipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \ + 'hello there') + 'tfjflpznvyac' + """ + n = len(matrix) + sanitised_message = sanitise(message_letters) + if len(sanitised_message) % n != 0: + padding = fillvalue[0] * (n - len(sanitised_message) % n) + else: + padding = '' + message = [ord(c) - ord('a') for c in sanitised_message + padding] + message_chunks = [message[i:i+n] for i in range(0, len(message), n)] + # message_chunks = chunks(message, len(matrix), fillvalue=None) + enciphered_chunks = [((matrix * np.matrix(c).T).T).tolist()[0] + for c in message_chunks] + return ''.join([chr(int(round(l)) % 26 + ord('a')) + for l in sum(enciphered_chunks, [])]) + +def hill_decipher(matrix, message, fillvalue='a'): + """Hill cipher + + >>> hill_decipher(np.matrix([[7,8], [11,11]]), 'drjiqzdrvx') + 'hellothere' + >>> hill_decipher(np.matrix([[6, 24, 1], [13, 16, 10], [20, 17, 15]]), \ + 'tfjflpznvyac') + 'hellothereaa' + """ + adjoint = linalg.det(matrix)*linalg.inv(matrix) + inverse_determinant = modular_division_table[int(round(linalg.det(matrix))) % 26][1] + inverse_matrix = (inverse_determinant * adjoint) % 26 + return hill_encipher(inverse_matrix, message, fillvalue) + + +# Where each piece of text ends up in the AMSCO transpositon cipher. +# 'index' shows where the slice appears in the plaintext, with the slice +# from 'start' to 'end' +AmscoSlice = collections.namedtuple('AmscoSlice', ['index', 'start', 'end']) + +class AmscoFillStyle(Enum): + continuous = 1 + same_each_row = 2 + reverse_each_row = 3 + +def amsco_transposition_positions(message, keyword, + fillpattern=(1, 2), + fillstyle=AmscoFillStyle.continuous, + fillcolumnwise=False, + emptycolumnwise=True): + """Creates the grid for the AMSCO transposition cipher. Each element in the + grid shows the index of that slice and the start and end positions of the + plaintext that go to make it up. + + >>> amsco_transposition_positions(string.ascii_lowercase, 'freddy', \ + fillpattern=(1, 2)) # doctest: +NORMALIZE_WHITESPACE + [[AmscoSlice(index=3, start=4, end=6), + AmscoSlice(index=2, start=3, end=4), + AmscoSlice(index=0, start=0, end=1), + AmscoSlice(index=1, start=1, end=3), + AmscoSlice(index=4, start=6, end=7)], + [AmscoSlice(index=8, start=12, end=13), + AmscoSlice(index=7, start=10, end=12), + AmscoSlice(index=5, start=7, end=9), + AmscoSlice(index=6, start=9, end=10), + AmscoSlice(index=9, start=13, end=15)], + [AmscoSlice(index=13, start=19, end=21), + AmscoSlice(index=12, start=18, end=19), + AmscoSlice(index=10, start=15, end=16), + AmscoSlice(index=11, start=16, end=18), + AmscoSlice(index=14, start=21, end=22)], + [AmscoSlice(index=18, start=27, end=28), + AmscoSlice(index=17, start=25, end=27), + AmscoSlice(index=15, start=22, end=24), + AmscoSlice(index=16, start=24, end=25), + AmscoSlice(index=19, start=28, end=30)]] + """ + transpositions = transpositions_of(keyword) + fill_iterator = cycle(fillpattern) + indices = count() + message_length = len(message) + + current_position = 0 + grid = [] + current_fillpattern = fillpattern + while current_position < message_length: + row = [] + if fillstyle == AmscoFillStyle.same_each_row: + fill_iterator = cycle(fillpattern) + if fillstyle == AmscoFillStyle.reverse_each_row: + fill_iterator = cycle(current_fillpattern) + for _ in range(len(transpositions)): + index = next(indices) + gap = next(fill_iterator) + row += [AmscoSlice(index, current_position, current_position + gap)] + current_position += gap + grid += [row] + if fillstyle == AmscoFillStyle.reverse_each_row: + current_fillpattern = list(reversed(current_fillpattern)) + return [transpose(r, transpositions) for r in grid] + +def amsco_transposition_encipher(message, keyword, + fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row): + """AMSCO transposition encipher. + + >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(1, 2)) + 'hoteelhler' + >>> amsco_transposition_encipher('hellothere', 'abc', fillpattern=(2, 1)) + 'hetelhelor' + >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(1, 2)) + 'hotelerelh' + >>> amsco_transposition_encipher('hellothere', 'acb', fillpattern=(2, 1)) + 'hetelorlhe' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'encode') + 'etecstthhomoerereenisxip' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2)) + 'hetcsoeisterereipexthomn' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous) + 'hecsoisttererteipexhomen' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(2, 1)) + 'heecisoosttrrtepeixhemen' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2)) + 'hxtomephescieretoeisnter' + >>> amsco_transposition_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous) + 'hxomeiphscerettoisenteer' + """ + grid = amsco_transposition_positions(message, keyword, + fillpattern=fillpattern, fillstyle=fillstyle) + ct_as_grid = [[message[s.start:s.end] for s in r] for r in grid] + return combine_every_nth(ct_as_grid) + + +def amsco_transposition_decipher(message, keyword, + fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row): + """AMSCO transposition decipher + + >>> amsco_transposition_decipher('hoteelhler', 'abc', fillpattern=(1, 2)) + 'hellothere' + >>> amsco_transposition_decipher('hetelhelor', 'abc', fillpattern=(2, 1)) + 'hellothere' + >>> amsco_transposition_decipher('hotelerelh', 'acb', fillpattern=(1, 2)) + 'hellothere' + >>> amsco_transposition_decipher('hetelorlhe', 'acb', fillpattern=(2, 1)) + 'hellothere' + >>> amsco_transposition_decipher('etecstthhomoerereenisxip', 'encode') + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('hetcsoeisterereipexthomn', 'cipher', fillpattern=(1, 2)) + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('hecsoisttererteipexhomen', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous) + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('heecisoosttrrtepeixhemen', 'cipher', fillpattern=(2, 1)) + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('hxtomephescieretoeisnter', 'cipher', fillpattern=(1, 3, 2)) + 'hereissometexttoencipher' + >>> amsco_transposition_decipher('hxomeiphscerettoisenteer', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous) + 'hereissometexttoencipher' + """ + + grid = amsco_transposition_positions(message, keyword, + fillpattern=fillpattern, fillstyle=fillstyle) + transposed_sections = [s for c in [l for l in zip(*grid)] for s in c] + plaintext_list = [''] * len(transposed_sections) + current_pos = 0 + for slice in transposed_sections: + plaintext_list[slice.index] = message[current_pos:current_pos-slice.start+slice.end][:len(message[slice.start:slice.end])] + current_pos += len(message[slice.start:slice.end]) + return ''.join(plaintext_list) + + +class PocketEnigma(object): + """A pocket enigma machine + The wheel is internally represented as a 26-element list self.wheel_map, + where wheel_map[i] == j shows that the position i places on from the arrow + maps to the position j places on. + """ + def __init__(self, wheel=1, position='a'): + """initialise the pocket enigma, including which wheel to use and the + starting position of the wheel. + + The wheel is either 1 or 2 (the predefined wheels) or a list of letter + pairs. + + The position is the letter pointed to by the arrow on the wheel. + + >>> pe.wheel_map + [25, 4, 23, 10, 1, 7, 9, 5, 12, 6, 3, 17, 8, 14, 13, 21, 19, 11, 20, 16, 18, 15, 24, 2, 22, 0] + >>> pe.position + 0 + """ + self.wheel1 = [('a', 'z'), ('b', 'e'), ('c', 'x'), ('d', 'k'), + ('f', 'h'), ('g', 'j'), ('i', 'm'), ('l', 'r'), ('n', 'o'), + ('p', 'v'), ('q', 't'), ('s', 'u'), ('w', 'y')] + self.wheel2 = [('a', 'c'), ('b', 'd'), ('e', 'w'), ('f', 'i'), + ('g', 'p'), ('h', 'm'), ('j', 'k'), ('l', 'n'), ('o', 'q'), + ('r', 'z'), ('s', 'u'), ('t', 'v'), ('x', 'y')] + if wheel == 1: + self.make_wheel_map(self.wheel1) + elif wheel == 2: + self.make_wheel_map(self.wheel2) + else: + self.validate_wheel_spec(wheel) + self.make_wheel_map(wheel) + if position in string.ascii_lowercase: + self.position = ord(position) - ord('a') + else: + self.position = position + + def make_wheel_map(self, wheel_spec): + """Expands a wheel specification from a list of letter-letter pairs + into a full wheel_map. + + >>> pe.make_wheel_map(pe.wheel2) + [2, 3, 0, 1, 22, 8, 15, 12, 5, 10, 9, 13, 7, 11, 16, 6, 14, 25, 20, 21, 18, 19, 4, 24, 23, 17] + """ + self.validate_wheel_spec(wheel_spec) + self.wheel_map = [0] * 26 + for p in wheel_spec: + self.wheel_map[ord(p[0]) - ord('a')] = ord(p[1]) - ord('a') + self.wheel_map[ord(p[1]) - ord('a')] = ord(p[0]) - ord('a') + return self.wheel_map + + def validate_wheel_spec(self, wheel_spec): + """Validates that a wheel specificaiton will turn into a valid wheel + map. + + >>> pe.validate_wheel_spec([]) + Traceback (most recent call last): + ... + ValueError: Wheel specification has 0 pairs, requires 13 + >>> pe.validate_wheel_spec([('a', 'b', 'c')]*13) + Traceback (most recent call last): + ... + ValueError: Not all mappings in wheel specificationhave two elements + >>> pe.validate_wheel_spec([('a', 'b')]*13) + Traceback (most recent call last): + ... + ValueError: Wheel specification does not contain 26 letters + """ + if len(wheel_spec) != 13: + raise ValueError("Wheel specification has {} pairs, requires 13". + format(len(wheel_spec))) + for p in wheel_spec: + if len(p) != 2: + raise ValueError("Not all mappings in wheel specification" + "have two elements") + if len(set([p[0] for p in wheel_spec] + + [p[1] for p in wheel_spec])) != 26: + raise ValueError("Wheel specification does not contain 26 letters") + + def encipher_letter(self, letter): + """Enciphers a single letter, by advancing the wheel before looking up + the letter on the wheel. + + >>> pe.set_position('f') + 5 + >>> pe.encipher_letter('k') + 'h' + """ + self.advance() + return self.lookup(letter) + decipher_letter = encipher_letter + + def lookup(self, letter): + """Look up what a letter enciphers to, without turning the wheel. + + >>> pe.set_position('f') + 5 + >>> ''.join([pe.lookup(l) for l in string.ascii_lowercase]) + 'udhbfejcpgmokrliwntsayqzvx' + >>> pe.lookup('A') + '' + """ + if letter in string.ascii_lowercase: + return chr( + (self.wheel_map[(ord(letter) - ord('a') - self.position) % 26] + + self.position) % 26 + + ord('a')) + else: + return '' + + def advance(self): + """Advances the wheel one position. + + >>> pe.set_position('f') + 5 + >>> pe.advance() + 6 + """ + self.position = (self.position + 1) % 26 + return self.position + + def encipher(self, message, starting_position=None): + """Enciphers a whole message. + + >>> pe.set_position('f') + 5 + >>> pe.encipher('helloworld') + 'kjsglcjoqc' + >>> pe.set_position('f') + 5 + >>> pe.encipher('kjsglcjoqc') + 'helloworld' + >>> pe.encipher('helloworld', starting_position = 'x') + 'egrekthnnf' + """ + if starting_position: + self.set_position(starting_position) + transformed = '' + for l in message: + transformed += self.encipher_letter(l) + return transformed + decipher = encipher + + def set_position(self, position): + """Sets the position of the wheel, by specifying the letter the arrow + points to. + + >>> pe.set_position('a') + 0 + >>> pe.set_position('m') + 12 + >>> pe.set_position('z') + 25 + """ + self.position = ord(position) - ord('a') + return self.position + if __name__ == "__main__": import doctest - doctest.testmod() + doctest.testmod(extraglobs={'pe': PocketEnigma(1, 'a')}) diff --git a/language_models.py b/language_models.py index b84e43c..bf00875 100644 --- a/language_models.py +++ b/language_models.py @@ -3,9 +3,15 @@ its use. """ import string +import random import norms import collections import unicodedata +import itertools +from math import log10 +import os + +unaccent_specials = ''.maketrans({"’": "'"}) def letters(text): """Remove all non-alphabetic characters from a text @@ -32,7 +38,8 @@ def unaccent(text): >>> unaccent('HÉLLÖ') 'HELLO' """ - return unicodedata.normalize('NFKD', text).\ + translated_text = text.translate(unaccent_specials) + return unicodedata.normalize('NFKD', translated_text).\ encode('ascii', 'ignore').\ decode('utf-8') @@ -51,6 +58,120 @@ def sanitise(text): return letters(unaccent(text)).lower() +def datafile(name, sep='\t'): + """Read key,value pairs from file. + """ + with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), name), 'r') as f: + for line in f: + splits = line.split(sep) + yield [splits[0], int(splits[1])] + +english_counts = collections.Counter(dict(datafile('count_1l.txt'))) +normalised_english_counts = norms.normalise(english_counts) + +english_bigram_counts = collections.Counter(dict(datafile('count_2l.txt'))) +normalised_english_bigram_counts = norms.normalise(english_bigram_counts) + +english_trigram_counts = collections.Counter(dict(datafile('count_3l.txt'))) +normalised_english_trigram_counts = norms.normalise(english_trigram_counts) + +with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'words.txt'), 'r') as f: + keywords = [line.rstrip() for line in f] + + +def weighted_choice(d): + """Generate random item from a dictionary of item counts + """ + target = random.uniform(0, sum(d.values())) + cuml = 0.0 + for (l, p) in d.items(): + cuml += p + if cuml > target: + return l + return None + +def random_english_letter(): + """Generate a random letter based on English letter counts + """ + return weighted_choice(normalised_english_counts) + + +def ngrams(text, n): + """Returns all n-grams of a text + + >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE + ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn', + 'nf', 'fo', 'ox'] + >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE + ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow', + 'rown', 'ownf', 'wnfo', 'nfox'] + """ + return [text[i:i+n] for i in range(len(text)-n+1)] + + +class Pdist(dict): + """A probability distribution estimated from counts in datafile. + Values are stored and returned as log probabilities. + """ + def __init__(self, data=[], estimate_of_missing=None): + data1, data2 = itertools.tee(data) + self.total = sum([d[1] for d in data1]) + for key, count in data2: + self[key] = log10(count / self.total) + self.estimate_of_missing = estimate_of_missing or (lambda k, N: 1./N) + def __missing__(self, key): + return self.estimate_of_missing(key, self.total) + +def log_probability_of_unknown_word(key, N): + """Estimate the probability of an unknown word. + """ + return -log10(N * 10**((len(key) - 2) * 1.4)) + +Pw = Pdist(datafile('count_1w.txt'), log_probability_of_unknown_word) +Pw_wrong = Pdist(datafile('count_1w.txt'), lambda _k, N: log10(1/N)) +Pl = Pdist(datafile('count_1l.txt'), lambda _k, _N: 0) +P2l = Pdist(datafile('count_2l.txt'), lambda _k, _N: 0) +P3l = Pdist(datafile('count_3l.txt'), lambda _k, _N: 0) + +def Pwords(words): + """The Naive Bayes log probability of a sequence of words. + """ + return sum(Pw[w.lower()] for w in words) + +def Pwords_wrong(words): + """The Naive Bayes log probability of a sequence of words. + """ + return sum(Pw_wrong[w.lower()] for w in words) + +def Pletters(letters): + """The Naive Bayes log probability of a sequence of letters. + """ + return sum(Pl[l.lower()] for l in letters) + +def Pbigrams(letters): + """The Naive Bayes log probability of the bigrams formed from a sequence + of letters. + """ + return sum(P2l[p] for p in ngrams(letters, 2)) + +def Ptrigrams(letters): + """The Naive Bayes log probability of the trigrams formed from a sequence + of letters. + """ + return sum(P3l[p] for p in ngrams(letters, 3)) + + +def cosine_similarity_score(text): + """Finds the dissimilarity of a text to English, using the cosine distance + of the frequency distribution. + + >>> cosine_similarity_score('abcabc') # doctest: +ELLIPSIS + 0.26228882... + """ + return norms.cosine_similarity(english_counts, + collections.Counter(sanitise(text))) + + if __name__ == "__main__": import doctest doctest.testmod() diff --git a/norms.py b/norms.py index 36e7fa4..6645294 100644 --- a/norms.py +++ b/norms.py @@ -1,10 +1,9 @@ -"""Define a variety of norms for finding distances between vectors""" - import collections +from math import log10 def normalise(frequencies): """Scale a set of frequencies so they sum to one - + >>> sorted(normalise({1: 1, 2: 0}).items()) [(1, 1.0), (2, 0.0)] >>> sorted(normalise({1: 1, 2: 1}).items()) @@ -14,13 +13,13 @@ def normalise(frequencies): >>> sorted(normalise({1: 1, 2: 2, 3: 1}).items()) [(1, 0.25), (2, 0.5), (3, 0.25)] """ - length = sum([f for f in frequencies.values()]) - return collections.defaultdict(int, ((k, v / length) + length = sum(f for f in frequencies.values()) + return collections.defaultdict(int, ((k, v / length) for (k, v) in frequencies.items())) def euclidean_scale(frequencies): """Scale a set of frequencies so they have a unit euclidean length - + >>> sorted(euclidean_scale({1: 1, 2: 0}).items()) [(1, 1.0), (2, 0.0)] >>> sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS @@ -31,21 +30,17 @@ def euclidean_scale(frequencies): [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)] """ length = sum([f ** 2 for f in frequencies.values()]) ** 0.5 - return collections.defaultdict(int, ((k, v / length) + return collections.defaultdict(int, ((k, v / length) for (k, v) in frequencies.items())) def identity_scale(frequencies): - """Don't scale a set of frequencies. (For use when a function expects a - scaling function but you don't want to supply one.) - """ return frequencies def l2(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as - dictionaries. + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 - + >>> l2({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) 0.0 >>> l2({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS @@ -67,7 +62,7 @@ def l2(frequencies1, frequencies2): euclidean_distance = l2 def l1(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> l1({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) @@ -87,7 +82,7 @@ def l1(frequencies1, frequencies2): return total def l3(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> l3({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) @@ -110,10 +105,10 @@ def l3(frequencies1, frequencies2): return total ** (1/3) def geometric_mean(frequencies1, frequencies2): - """Finds the geometric mean of the absolute differences between two - frequency profiles, expressed as dictionaries. + """Finds the geometric mean of the absolute differences between two frequency profiles, + expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 - + >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) 1 >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) @@ -136,8 +131,8 @@ def geometric_mean(frequencies1, frequencies2): return total def harmonic_mean(frequencies1, frequencies2): - """Finds the harmonic mean of the absolute differences between two - frequency profiles, expressed as dictionaries. + """Finds the harmonic mean of the absolute differences between two frequency profiles, + expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) @@ -165,8 +160,7 @@ def harmonic_mean(frequencies1, frequencies2): def cosine_similarity(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as - dictionaries. + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> cosine_similarity({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS -- 2.34.1