More tweaking
authorNeil Smith <neil.git@njae.me.uk>
Sun, 10 Jan 2016 15:35:04 +0000 (15:35 +0000)
committerNeil Smith <neil.git@njae.me.uk>
Wed, 4 Oct 2017 08:20:14 +0000 (09:20 +0100)
SIGNED.md
caesar_break_parameter_trials.csv
cipherbreak.py
find_best_caesar_break_parameters.py
norms.py

index 5b46ad58dfb3ac020da7176de388d8927481d088..9e2c50b4f89215f708dc5ca9de30dac8fe511f01 100644 (file)
--- a/SIGNED.md
+++ b/SIGNED.md
@@ -3,19 +3,19 @@
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
-iQIcBAABCAAGBQJWjpTaAAoJEJPB2e07Pgbqh6IP/3ZEzWXxCdqrHoXYRRuRvP3U
-7Q8FJro7m+dW0gV3R+0BvcJx7hmepvU8Hr4O4RLTQ3AWnlVSorMGFgOoZ3XLYvZl
-YACQU4q8Cy3PB2nm8/Mcj/yLVyiyGanxYaxGhBoYSUnaE6ncJ8wod3iGHwQouIgA
-sQmaJSoBj0RuYCcTlg8ipOmwZ/doNjKcFbmJQx7BRdlaQ1QQtqyDJgX6y0e4snU0
-ktcu0dCwroyAzu+HXQE5prhsB3x3UHzTq3VwIIgeNnO/Hye9LXgrT8fSdY54YndU
-E/S/gUD8/jB5TFTdHVmAEQAqg/YHXc49o1taPU+NU6hSl9EDmhM7CacBu+//VUy4
-VXjFFWJV9YFX6yhaNHSS5Vuz7h8oXKOQwGYa4dMlrGl912MnOHl+WjCDjcHL+0ht
-cFfEq3OgQDL4Q7Eln1bW7VwYDcpId1yNYcuHvd+h3bSzAxNtMcjH3Sim7EDcFP2z
-n6nuwFNdfY+drpAllw1df8QotFP8vkiA3O9sUtTIJkECmu9vGqJDfujhBOhPNQxk
-9bwSIWhXz0yvt3V7P/ds2PRGBB67ejkWOEEMhI/94yu/prZaeZQsm49ULoqz+6z/
-3eGlgz/Y0RRjlgorl7VwOwacuqb5w++WZ2CX4O5oylkxOtC/rkuOTFvwsas2c3sy
-as4QzMxXGkYvRCKzUvR7
-=gSfT
+iQIcBAABCAAGBQJWknovAAoJEJPB2e07Pgbqd8YQALEujIWisxG31Hy25uWt9dpL
+Fp1HeRaqTPe4bEZHVwrMLsKsqRpQWjoCLbSrtssuAVd1UD+IHCMT5pW39hnSASsf
+euNQW9MUrDP9CydAkNMfXEWG9Jp3fAUtm7SIiQc7D7n+/r8vrW5aa0uRCG9H56tn
+LeRLGLbkEA1dPOW08ehd772k/jMIf2Z7jEIhVoBm2tSE+1GpvvDXdbVm8iB1Yv3r
+7fCTQKTtsGqxtbhzcOABRIHze7m+U72nULK2Q24e165MNBPXPBu93J+JBphOPz4u
+apwq463oRAXA0LcjX1iU7t5rHJxngUV8VoXIWhqplaPoekmEGpH6zePYxLnpUrfT
++R/JxeGpPFEHcpHN7oUmNmeD1Xgir+7tKCy5mfI+ltTiw1ftaUthX8MImughMDGx
+KkMRkntleeAe5nIrk9NlaBEFVyah62CEE6F5h/EoZJS9UsiXF7HBpPdbtfar4glE
+t1iYNRg5zuEJEVN7j1ktmwrMtw/QufMCTinUqWhjNKlA+orRvgXgSAywpWNS9BBc
+n2Kf8HgtjeCkF9IjqfbB1yRlIwwnVN1Zfn2UsFs2/5lH6YXCSjdD7iMJLTTOgbBY
+mkHajKR3rR4uizA/jJ24MmqC+78/dldGQ1BwwYXOiTnpQEzZ52NpiTMAsmlXxt85
+5sbthAzP9pIV+/svQYEm
+=R1hh
 -----END PGP SIGNATURE-----
 
 ```
 -----END PGP SIGNATURE-----
 
 ```
@@ -139,19 +139,17 @@ size     exec  file                                               contents
 1771               8b.ciphertext                                  669a7144433b0554e1de51583abb048ca396f2ffe6a79e53dad6e6ffbf06bc8c                                                                 
 7607               Solitaire.py                                   abb966d0aa4f23599204a1a75f3179903c175eac637c0847bcc772374f380126|9e3ee44bd5d7ec196b513afad0856ed4f2cd6bf3e0fe9b6bc0130bb7731d04c8
 63970              cards2.png                                     f2959fc211d97c9b16bf049eb5c9e99167b033f887c43f9ae20adb13717c7899|b42371c35b39715277627114132f8b261283b2b1d59e47b7066e06f4df5a4ad6
 1771               8b.ciphertext                                  669a7144433b0554e1de51583abb048ca396f2ffe6a79e53dad6e6ffbf06bc8c                                                                 
 7607               Solitaire.py                                   abb966d0aa4f23599204a1a75f3179903c175eac637c0847bcc772374f380126|9e3ee44bd5d7ec196b513afad0856ed4f2cd6bf3e0fe9b6bc0130bb7731d04c8
 63970              cards2.png                                     f2959fc211d97c9b16bf049eb5c9e99167b033f887c43f9ae20adb13717c7899|b42371c35b39715277627114132f8b261283b2b1d59e47b7066e06f4df5a4ad6
-10000              pysol.zip                                      64e25c247e877f37285db635fcb5dea101e0c531fdbfe5000bcc088c22caad79|68d878fb019198e691d321be1dd6744fae0b48da9c4e705c34f96e351b00fbed
 5281               sol.py                                         dbea0e836eb158a26407ca3b74289d24fa7eae16ed1093eb7c598767a150fccc|e50319aa995f934a4cbd52bc28d8f49f376e62470170690cdbed7352f67a7942
 5281               sol.py                                         dbea0e836eb158a26407ca3b74289d24fa7eae16ed1093eb7c598767a150fccc|e50319aa995f934a4cbd52bc28d8f49f376e62470170690cdbed7352f67a7942
-2195               sol.py.zip                                     4c22e57eaacd8a91ef9b0c338429e400210a03dec9fbacec4a45fe1faed50720|1fd0a62af53125eacfa11016088451e2c58269d98aaef74c320730ebc7601b66
 18025            LICENSE                                          a01259a1b522cf0de95824f9860613b453153eebac468e96196d5d7dba84786c                                                                 
 7999             LJ!-Qt!-Fghxft-dferts%3B-hsjeukaxxn-sfedw.ipynb  429b6c6995096ff19c28a5ee342bef8ea4774200bdf9aaf6268de3cb8b28df28                                                                 
 61               README.md                                        277247b410300ee16477b12ca54ad878d81c8061f6134e2e1cadccaf299de3a3                                                                 
 18025            LICENSE                                          a01259a1b522cf0de95824f9860613b453153eebac468e96196d5d7dba84786c                                                                 
 7999             LJ!-Qt!-Fghxft-dferts%3B-hsjeukaxxn-sfedw.ipynb  429b6c6995096ff19c28a5ee342bef8ea4774200bdf9aaf6268de3cb8b28df28                                                                 
 61               README.md                                        277247b410300ee16477b12ca54ad878d81c8061f6134e2e1cadccaf299de3a3                                                                 
-469              affine_break_parameter_trials.csv                1a9d635d0af2f41fc6f1e83ae87d6372034259321ba288a11fb024e98ed52f4f|dd9c840434de596a30c84e79de26a9824b36c217a84876c2aab0579b76999735
+569              affine_break_parameter_trials.csv                6593e15e63e3e5632222fece8971ab473a7cdcf43020cd279e6f926cac0445fd|80705a44f33ed99ee08ed7d80e18945e84a2ae60afa1b39c331fa6bf58e21115
 6488666          big.txt                                          fa066c7d40f0f201ac4144e652aa62430e58a6b3805ec70650f678da5804e87b                                                                 
 16832            cadenus-ciphers.ipynb                            50e49b3ec5e6440b86fe13472b9f4ab9e133e5665c7280b2abf7a6b57bb8a89a                                                                 
 6488666          big.txt                                          fa066c7d40f0f201ac4144e652aa62430e58a6b3805ec70650f678da5804e87b                                                                 
 16832            cadenus-ciphers.ipynb                            50e49b3ec5e6440b86fe13472b9f4ab9e133e5665c7280b2abf7a6b57bb8a89a                                                                 
-514              caesar_break_parameter_trials.csv                6586223bcc00e06e3ff79d107202d6c29ef962a6dd544add00610c5907407e85|1cb7cc77831ef3ef4f994a9ea77e82a841b38acdde45ede9cedbe7a54f1e8e46
+514              caesar_break_parameter_trials.csv                cafee5b7e752807f856090b527870fc3a189de325e41c09c2933fe7bf5efcbe5|54761ff222587ae788d3213505308b631a8a3f17793beb3fbf492bfdaba320c3
 318              cipher-training.sublime-project                  58e5d5b4e54fb29abecaef2d41266e3355adccb8b6a70bd595e509bd07c16587                                                                 
 42922            cipher.py                                        58637b8946b4fb973b19a374a2066a896d86c928dacaa1ccd2252e6f8bb6e810                                                                 
 318              cipher-training.sublime-project                  58e5d5b4e54fb29abecaef2d41266e3355adccb8b6a70bd595e509bd07c16587                                                                 
 42922            cipher.py                                        58637b8946b4fb973b19a374a2066a896d86c928dacaa1ccd2252e6f8bb6e810                                                                 
-28908            cipherbreak.py                                   0fb22645ddce4e04c7e441a1f7bdc0e4a397a3c9b2cfb3098bcb213e79a361c9                                                                 
+28937            cipherbreak.py                                   5ba9424badfed9721f8f6f044e636b693105fbbdb03ad65907c10c25a26ac760                                                                 
 11564            count_1edit.txt                                  3bf563ef032ba151ec1a4b2d1f33f50c49f4a47e4dc5b8152394bc5b63f57655|b5fbacbebcc25f5011ce97bc9ac967a09c50eef28b4aa98379a6c426df6ac08b
 223              count_1l.txt                                     335388d457db6ef1da05d8b55ab879e9be7d4e021085efc8d9dfeac0e4a79aa9                                                                 
 4956241          count_1w.txt                                     51df159fd3de12b20e403c108f526e96dbd723d9cabdd5f17955cdc16059e690                                                                 
 11564            count_1edit.txt                                  3bf563ef032ba151ec1a4b2d1f33f50c49f4a47e4dc5b8152394bc5b63f57655|b5fbacbebcc25f5011ce97bc9ac967a09c50eef28b4aa98379a6c426df6ac08b
 223              count_1l.txt                                     335388d457db6ef1da05d8b55ab879e9be7d4e021085efc8d9dfeac0e4a79aa9                                                                 
 4956241          count_1w.txt                                     51df159fd3de12b20e403c108f526e96dbd723d9cabdd5f17955cdc16059e690                                                                 
@@ -159,20 +157,21 @@ size     exec  file                                               contents
 5566017          count_2w.txt                                     781c0596c3eea532d30bef9f3dba1d5137d652f00376260822c761a7584dfb8c                                                                 
 220441           count_3l.txt                                     8702c95530c7d0d182ab94dc03ed7681fcf969819f6db011a58de31411dc6365                                                                 
 320508           count_big.txt                                    3ba257fba1934bd138413d8274e79b56c5992431a27692fd562929aa43ec01a3                                                                 
 5566017          count_2w.txt                                     781c0596c3eea532d30bef9f3dba1d5137d652f00376260822c761a7584dfb8c                                                                 
 220441           count_3l.txt                                     8702c95530c7d0d182ab94dc03ed7681fcf969819f6db011a58de31411dc6365                                                                 
 320508           count_big.txt                                    3ba257fba1934bd138413d8274e79b56c5992431a27692fd562929aa43ec01a3                                                                 
-3355             find_best_affine_break_parameters.py             6b11004bb93ac26ec7d42d33504e758edbaf9d55365ae2e4ca2fca7589263f25                                                                 
-3027             find_best_caesar_break_parameters.py             0347d80309179d937a88fd1c8684490a513ccd086366c5a0dd55b8a2fe5c565f                                                                 
+3348             find_best_affine_break_parameters.py             b08fbccd56f7a3243cde14bd895e8ca417b89cd0faaced9a0e68ecb372ffcc14                                                                 
+3020             find_best_caesar_break_parameters.py             7119e2eea7c138e133b6f2df691af9e1128ff10d2b6ae16f32b0c0b35e488d76                                                                 
 1236             find_wikipedia_titles.py                         f040bf855dfec7fff9d8e5eba2fb509179bc53bc02a20b26b7fc61fef983aa45                                                                 
 37128            hill-ciphers.ipynb                               ce802c2be807b4565858b568d3a82c65a3957aa625344189f8f2a055237b3fdd                                                                 
 5645             language_models.py                               bfd5b60cdef8af20cdb061b24a1691f569984be3be333782c3d76e3370e16d14                                                                 
 368              lettercount.py                                   ed36497d62cf75b91994055e4a18848b2fabe5ce793cd76a77fabfc94d81d4f3                                                                 
 592              make-cracking-dictionary.py                      71791e64e4853cd9ca292cb436bbe8c72dd60f509811174df93ed2067683d5c1                                                                 
 1236             find_wikipedia_titles.py                         f040bf855dfec7fff9d8e5eba2fb509179bc53bc02a20b26b7fc61fef983aa45                                                                 
 37128            hill-ciphers.ipynb                               ce802c2be807b4565858b568d3a82c65a3957aa625344189f8f2a055237b3fdd                                                                 
 5645             language_models.py                               bfd5b60cdef8af20cdb061b24a1691f569984be3be333782c3d76e3370e16d14                                                                 
 368              lettercount.py                                   ed36497d62cf75b91994055e4a18848b2fabe5ce793cd76a77fabfc94d81d4f3                                                                 
 592              make-cracking-dictionary.py                      71791e64e4853cd9ca292cb436bbe8c72dd60f509811174df93ed2067683d5c1                                                                 
-7077             norms.py                                         a657a36c1741e6f3a513386b318fcc99e6b11f98ec64a48284b47462ff2acf30                                                                 
+7070             norms.py                                         c80289c5769e518ea40a7de85c869febab2e6d40c64a596bcd2a13ea2a9ece1a                                                                 
 8411             norms.pyc                                        ac7a18765c7bcc27e406d8f38d943408097b3384a271502185d53482e6ec0da7|002b186e716cec64869a00bd2d72e16614931e696daa0cf3529d634a0f270e42
 112847           plot-caesar-parameters.ipynb                     639459b4b2e434f9f0852c012ed9a8a8d87bd1cb6c2d65ca5abfdb0e42c3dea6                                                                 
 23863            railfence-experiment-1.ipynb                     e34a61cef19b3249a3d6d731fe054769c02233b0be661161ebf45bada3b7b97b                                                                 
 18628            railfence-experiment-2.ipynb                     8c4d90c1c934b764deba0956bb4be81868e463789dc8fe02e5621ab3a95c0889                                                                 
 881              segment.py                                       94d257cc6151861ef3d3033c4d2d03d8c121b0a982344abf400f65fd507fed28                                                                 
 4538523          shakespeare.txt                                  6f9c770efced5c3d87efa6197cd3091b982341372e36c6357f865df91ddecde6                                                                 
 8411             norms.pyc                                        ac7a18765c7bcc27e406d8f38d943408097b3384a271502185d53482e6ec0da7|002b186e716cec64869a00bd2d72e16614931e696daa0cf3529d634a0f270e42
 112847           plot-caesar-parameters.ipynb                     639459b4b2e434f9f0852c012ed9a8a8d87bd1cb6c2d65ca5abfdb0e42c3dea6                                                                 
 23863            railfence-experiment-1.ipynb                     e34a61cef19b3249a3d6d731fe054769c02233b0be661161ebf45bada3b7b97b                                                                 
 18628            railfence-experiment-2.ipynb                     8c4d90c1c934b764deba0956bb4be81868e463789dc8fe02e5621ab3a95c0889                                                                 
 881              segment.py                                       94d257cc6151861ef3d3033c4d2d03d8c121b0a982344abf400f65fd507fed28                                                                 
 4538523          shakespeare.txt                                  6f9c770efced5c3d87efa6197cd3091b982341372e36c6357f865df91ddecde6                                                                 
+592309           sherlock-holmes.txt                              0027de6f4110440ea51d67a2f3af3484898c630808f13b1d4db108e6283e67a3|2034ee1ebdec47e839607124d22b674d4614e1cc6209d758f7b6e99e69ae8e08
 451530           spell-errors.txt                                 a4abe6ce6c24280f9a8d0485cbf78ddd2e58279ca01293692630a08ba4b13407                                                                 
 69351            unknown-word-probability-investigation.ipynb     8a9cd7163f10bf2bfb3e286445eddcfc953f80abfdef4e29dac27617a53c3d41                                                                 
 3291641          war-and-peace.txt                                3ed0f41cfdf660846878943bad5b9d575bcae1e4a92ee9a7f43d3c9dba2af344|6799e48d3fd0a6f4c40b9951ec86de6da81f0b9cd36e413490ac511542ca54d3
 451530           spell-errors.txt                                 a4abe6ce6c24280f9a8d0485cbf78ddd2e58279ca01293692630a08ba4b13407                                                                 
 69351            unknown-word-probability-investigation.ipynb     8a9cd7163f10bf2bfb3e286445eddcfc953f80abfdef4e29dac27617a53c3d41                                                                 
 3291641          war-and-peace.txt                                3ed0f41cfdf660846878943bad5b9d575bcae1e4a92ee9a7f43d3c9dba2af344|6799e48d3fd0a6f4c40b9951ec86de6da81f0b9cd36e413490ac511542ca54d3
index 6f71f0779797eb302a00da9557f6b23fd20447ae..e18f92c5604d97855b83cff0fdce99d17c7385a1 100644 (file)
@@ -1,3 +1,4 @@
+<<<<<<< HEAD
 ,message_length
 scoring, 300, 100, 50, 30, 20, 10, 5
 Pletters, 0.9994, 0.9994, 0.9994, 0.9966, 0.9778, 0.8174, 0.4712
 ,message_length
 scoring, 300, 100, 50, 30, 20, 10, 5
 Pletters, 0.9994, 0.9994, 0.9994, 0.9966, 0.9778, 0.8174, 0.4712
@@ -91,3 +92,15 @@ l3 + normalised, 1.0, 0.9986, 0.9932, 0.963, 0.8696, 0.594, 0.4122
 l3 + normalised, 1.0, 0.9986, 0.9932, 0.963, 0.8696, 0.594, 0.4122
 l3 + normalised, 1.0, 0.9986, 0.9932, 0.963, 0.8696, 0.594, 0.4122
 l3 + normalised, 1.0, 0.9986, 0.9932, 0.963, 0.8696, 0.594, 0.4122
 l3 + normalised, 1.0, 0.9986, 0.9932, 0.963, 0.8696, 0.594, 0.4122
 l3 + normalised, 1.0, 0.9986, 0.9932, 0.963, 0.8696, 0.594, 0.4122
 l3 + normalised, 1.0, 0.9986, 0.9932, 0.963, 0.8696, 0.594, 0.4122
+=======
+"name",100,50,30,20,10,5\r
+"Pletters",4996,4997,4984,4900,4063,2358\r
+"cosine_similarity + euclidean_scaled",4998,4986,4914,4659,3528,2198\r
+"cosine_similarity + normalised",4997,4993,4917,4659,3557,2084\r
+"l1 + euclidean_scaled",4998,4992,4951,4755,3767,2192\r
+"l1 + normalised",4998,4996,4936,4767,3596,2161\r
+"l2 + euclidean_scaled",4998,4990,4926,4683,3567,2179\r
+"l2 + normalised",4995,4993,4920,4672,3610,2135\r
+"l3 + euclidean_scaled",4996,4964,4822,4457,3167,2018\r
+"l3 + normalised",4999,4973,4797,4351,2872,1989\r
+>>>>>>> 883806c... More tweaking
index 0ac8ae57f7ed11a443dfedc8366ddc51086eda8c..1a589c2ef1c5d1720f44d82651eda6b6a6cfe434 100644 (file)
@@ -359,7 +359,7 @@ def column_transposition_break_mp(message, translist=transpositions,
     with Pool() as pool:
         helper_args = [(message, trans, fillcolumnwise, emptycolumnwise,
                         fitness)
     with Pool() as pool:
         helper_args = [(message, trans, fillcolumnwise, emptycolumnwise,
                         fitness)
-                       for trans in translist.keys()
+                       for trans in translist
                        for fillcolumnwise in [True, False]
                        for emptycolumnwise in [True, False]]
         # Gotcha: the helper function here needs to be defined at the top level
                        for fillcolumnwise in [True, False]
                        for emptycolumnwise in [True, False]]
         # Gotcha: the helper function here needs to be defined at the top level
@@ -489,7 +489,7 @@ def amsco_break(message, translist=transpositions, patterns = [(1, 2), (2, 1)],
                    (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
                    (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
         patterns=[(1, 2)]) # doctest: +ELLIPSIS
                    (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
                    (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
         patterns=[(1, 2)]) # doctest: +ELLIPSIS
-    (((2, 0, 5, 3, 1, 4, 6), (1, 2)), -709.4646722...)
+    (((2, 0, 5, 3, 1, 4, 6), (1, 2), <AmscoFillStyle.continuous: 1>), -709.4646722...)
     >>> amsco_break(amsco_transposition_encipher(sanitise( \
             "It is a truth universally acknowledged, that a single man in \
              possession of a good fortune, must be in want of a wife. However \
     >>> amsco_break(amsco_transposition_encipher(sanitise( \
             "It is a truth universally acknowledged, that a single man in \
              possession of a good fortune, must be in want of a wife. However \
@@ -502,11 +502,11 @@ def amsco_break(message, translist=transpositions, patterns = [(1, 2), (2, 1)],
                    (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
                    (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
         patterns=[(1, 2), (2, 1)], fitness=Ptrigrams) # doctest: +ELLIPSIS
                    (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
                    (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
         patterns=[(1, 2), (2, 1)], fitness=Ptrigrams) # doctest: +ELLIPSIS
-    (((2, 0, 5, 3, 1, 4, 6), (2, 1)), -997.0129085...)
+    (((2, 0, 5, 3, 1, 4, 6), (2, 1), <AmscoFillStyle.continuous: 1>), -997.0129085...)
     """
     with Pool() as pool:
         helper_args = [(message, trans, pattern, fillstyle, fitness)
     """
     with Pool() as pool:
         helper_args = [(message, trans, pattern, fillstyle, fitness)
-                       for trans in translist.keys()
+                       for trans in translist
                        for pattern in patterns
                        for fillstyle in fillstyles]
         # Gotcha: the helper function here needs to be defined at the top level
                        for pattern in patterns
                        for fillstyle in fillstyles]
         # Gotcha: the helper function here needs to be defined at the top level
@@ -588,13 +588,13 @@ def pocket_enigma_break_by_crib(message, wheel_spec, crib, crib_position):
 
 
 def plot_frequency_histogram(freqs, sort_key=None):
 
 
 def plot_frequency_histogram(freqs, sort_key=None):
-    x = range(len(freqs.keys()))
-    y = [freqs[l] for l in sorted(freqs.keys(), key=sort_key)]
+    x = range(len(freqs))
+    y = [freqs[l] for l in sorted(freqs, key=sort_key)]
     f = plt.figure()
     ax = f.add_axes([0.1, 0.1, 0.9, 0.9])
     ax.bar(x, y, align='center')
     ax.set_xticks(x)
     f = plt.figure()
     ax = f.add_axes([0.1, 0.1, 0.9, 0.9])
     ax.bar(x, y, align='center')
     ax.set_xticks(x)
-    ax.set_xticklabels(sorted(freqs.keys(), key=sort_key))
+    ax.set_xticklabels(sorted(freqs, key=sort_key))
     f.show()
 
 
     f.show()
 
 
index 9ed53488dde8161ea13b1b6caa35b8aa60eb0fa0..7a8ddc9dc0a4dd3340a84c3fb021d44131d1ab87 100644 (file)
@@ -3,6 +3,7 @@ import collections
 from cipher import *
 from cipherbreak import *
 import itertools
 from cipher import *
 from cipherbreak import *
 import itertools
+import csv
 
 corpus = sanitise(''.join([open('shakespeare.txt', 'r').read(), 
     open('sherlock-holmes.txt', 'r').read(), 
 
 corpus = sanitise(''.join([open('shakespeare.txt', 'r').read(), 
     open('sherlock-holmes.txt', 'r').read(), 
@@ -11,27 +12,19 @@ corpus_length = len(corpus)
 
 euclidean_scaled_english_counts = norms.euclidean_scale(english_counts)
 
 
 euclidean_scaled_english_counts = norms.euclidean_scale(english_counts)
 
-# def frequency_compare(text, target_frequency, frequency_scaling, metric):
-#     counts = frequency_scaling(frequencies(text))
-#     return -1 * metric(target_frequency, counts)
-
-# def euclidean_compare(text):
-#     return frequency_compare(text, norms.euclidean_scale(english_counts),
-#             norms.euclidean_scale, norms.euclidean_distance)
-
 metrics = [{'func': norms.l1, 'invert': True, 'name': 'l1'}, 
     {'func': norms.l2, 'invert': True, 'name': 'l2'},
     {'func': norms.l3, 'invert': True, 'name': 'l3'},
 metrics = [{'func': norms.l1, 'invert': True, 'name': 'l1'}, 
     {'func': norms.l2, 'invert': True, 'name': 'l2'},
     {'func': norms.l3, 'invert': True, 'name': 'l3'},
-    {'func': norms.cosine_distance, 'invert': False, 'name': 'cosine_distance'},
-    {'func': norms.harmonic_mean, 'invert': True, 'name': 'harmonic_mean'},
-    {'func': norms.geometric_mean, 'invert': True, 'name': 'geometric_mean'}]
+    {'func': norms.cosine_similarity, 'invert': False, 'name': 'cosine_similarity'}]
+    {'func': norms.harmonic_mean, 'invert': True, 'name': 'harmonic_mean'},
+    {'func': norms.geometric_mean, 'invert': True, 'name': 'geometric_mean'}]
 scalings = [{'corpus_frequency': normalised_english_counts, 
          'scaling': norms.normalise,
          'name': 'normalised'},
         {'corpus_frequency': euclidean_scaled_english_counts, 
          'scaling': norms.euclidean_scale,
          'name': 'euclidean_scaled'}]
 scalings = [{'corpus_frequency': normalised_english_counts, 
          'scaling': norms.normalise,
          'name': 'normalised'},
         {'corpus_frequency': euclidean_scaled_english_counts, 
          'scaling': norms.euclidean_scale,
          'name': 'euclidean_scaled'}]
-message_lengths = [300, 100, 50, 30, 20, 10, 5]
+message_lengths = [100, 50, 30, 20, 10, 5]
 
 trials = 5000
 
 
 trials = 5000
 
@@ -48,7 +41,6 @@ def make_frequency_compare_function(target_frequency, frequency_scaling, metric,
         return score
     return frequency_compare
 
         return score
     return frequency_compare
 
-
 def scoring_functions():
     return [{'func': make_frequency_compare_function(s['corpus_frequency'], 
                 s['scaling'], m['func'], m['invert']),
 def scoring_functions():
     return [{'func': make_frequency_compare_function(s['corpus_frequency'], 
                 s['scaling'], m['func'], m['invert']),
@@ -77,14 +69,12 @@ def eval_one_score(scoring_function, message_length):
 
 def show_results():
     with open('caesar_break_parameter_trials.csv', 'w') as f:
 
 def show_results():
     with open('caesar_break_parameter_trials.csv', 'w') as f:
-        print(',message_length', file = f)
-        print('scoring,', ', '.join([str(l) for l in message_lengths]), file = f)
-        for scoring in sorted(scores.keys()):
-            for length in message_lengths:
-                print(scoring, end='', sep='', file=f)
-                for l in message_lengths:
-                    print(',', scores[scoring][l] / trials, end='', file=f)
-                print('', file = f)
+        writer = csv.DictWriter(f, ['name'] + message_lengths, 
+            quoting=csv.QUOTE_NONNUMERIC)
+        writer.writeheader()
+        for scoring in sorted(scores):
+            scores[scoring]['name'] = scoring
+            writer.writerow(scores[scoring])
 
 eval_scores()
 show_results()
 
 eval_scores()
 show_results()
index 3d6d37df7f2e4f9f576c9cd4ef1a2341aa48d016..eb436c3b8163141a3ada1f1f02f8be741d6f47fb 100644 (file)
--- a/norms.py
+++ b/norms.py
@@ -13,7 +13,7 @@ def normalise(frequencies):
     >>> sorted(normalise({1: 1, 2: 2, 3: 1}).items())
     [(1, 0.25), (2, 0.5), (3, 0.25)]
     """
     >>> sorted(normalise({1: 1, 2: 2, 3: 1}).items())
     [(1, 0.25), (2, 0.5), (3, 0.25)]
     """
-    length = sum([f for f in frequencies.values()])
+    length = sum(f for f in frequencies.values())
     return collections.defaultdict(int, ((k, v / length) 
         for (k, v) in frequencies.items()))
 
     return collections.defaultdict(int, ((k, v / length) 
         for (k, v) in frequencies.items()))
 
@@ -159,17 +159,17 @@ def harmonic_mean(frequencies1, frequencies2):
     return len(frequencies1) / total
 
 
     return len(frequencies1) / total
 
 
-def cosine_distance(frequencies1, frequencies2):
+def cosine_similarity(frequencies1, frequencies2):
     """Finds the distances between two frequency profiles, expressed as dictionaries.
     Assumes every key in frequencies1 is also in frequencies2
 
     """Finds the distances between two frequency profiles, expressed as dictionaries.
     Assumes every key in frequencies1 is also in frequencies2
 
-    >>> cosine_distance({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
+    >>> cosine_similarity({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
     1.0000000000...
     1.0000000000...
-    >>> cosine_distance({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
+    >>> cosine_similarity({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
     1.0000000000...
     1.0000000000...
-    >>> cosine_distance({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
+    >>> cosine_similarity({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
     0.5773502691...
     0.5773502691...
-    >>> cosine_distance({'a':0, 'b':1}, {'a':1, 'b':1}) # doctest: +ELLIPSIS
+    >>> cosine_similarity({'a':0, 'b':1}, {'a':1, 'b':1}) # doctest: +ELLIPSIS
     0.7071067811...
     """
     numerator = 0
     0.7071067811...
     """
     numerator = 0
@@ -178,8 +178,8 @@ def cosine_distance(frequencies1, frequencies2):
     for k in frequencies1:
         numerator += frequencies1[k] * frequencies2[k]
         length1 += frequencies1[k]**2
     for k in frequencies1:
         numerator += frequencies1[k] * frequencies2[k]
         length1 += frequencies1[k]**2
-    for k in frequencies2.keys():
-        length2 += frequencies2[k]
+    for k in frequencies2:
+        length2 += frequencies2[k]**2
     return numerator / (length1 ** 0.5 * length2 ** 0.5)
 
 
     return numerator / (length1 ** 0.5 * length2 ** 0.5)