From: Neil Smith <neil.git@njae.me.uk> Date: Sat, 5 Jul 2014 13:12:19 +0000 (+0100) Subject: Done transposition slides X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=00bd8aa9109f421a9fda6bdc5cb4c1fb33bc79c6;hp=d1bdce3c3fa3db803c0cf4574c8dff22f42b58d8;p=cipher-training.git Done transposition slides --- diff --git a/language_models.py b/language_models.py index 52e7ac4..59d8588 100644 --- a/language_models.py +++ b/language_models.py @@ -123,6 +123,7 @@ Pw = Pdist(datafile('count_1w.txt'), log_probability_of_unknown_word) Pw_wrong = Pdist(datafile('count_1w.txt'), lambda _k, N: log10(1/N)) Pl = Pdist(datafile('count_1l.txt'), lambda _k, _N: 0) P2l = Pdist(datafile('count_2l.txt'), lambda _k, _N: 0) +P3l = Pdist(datafile('count_3l.txt'), lambda _k, _N: 0) def Pwords(words): """The Naive Bayes log probability of a sequence of words. @@ -146,6 +147,12 @@ def Pbigrams(letters): """ return sum(P2l[p] for p in ngrams(letters, 2)) +def Ptrigrams(letters): + """The Naive Bayes log probability of the trigrams formed from a sequence + of letters. + """ + return sum(P3l[p] for p in ngrams(letters, 3)) + def cosine_similarity_score(text): """Finds the dissimilarity of a text to English, using the cosine distance diff --git a/slides/index.html b/slides/index.html new file mode 100644 index 0000000..dad47e7 --- /dev/null +++ b/slides/index.html @@ -0,0 +1,83 @@ +<!DOCTYPE html> +<html> + <head> + <title>Keyword ciphers</title> + <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> + <style type="text/css"> + /* Slideshow styles */ + body { + font-size: 20px; + } + h1, h2, h3 { + font-weight: 400; + margin-bottom: 0; + } + h1 { font-size: 3em; } + h2 { font-size: 2em; } + h3 { font-size: 1.6em; } + a, a > code { + text-decoration: none; + } + code { + -moz-border-radius: 5px; + -web-border-radius: 5px; + background: #e7e8e2; + border-radius: 5px; + font-size: 16px; + } + .plaintext { + background: #272822; + color: #80ff80; + text-shadow: 0 0 20px #333; + padding: 2px 5px; + } + .ciphertext { + background: #272822; + color: #ff6666; + text-shadow: 0 0 20px #333; + padding: 2px 5px; + } + .float-right { + float: right; + } + </style> + </head> + <body> + <textarea id="source"> + +# Cipher programming training + +* [Aims](aims.html) +* Caesar ciphers: [Making](caesar-encipher.html) and [Breaking](caesar-break.html) +* Affine ciphers: [Making](affine-encipher.html) and [Breaking](affine-break.html) +* [Word segmentation](word-segmentation.html) +* Keyword ciphers: [Making](keyword-encipher.html) and [Breaking](keyword-break.html) +* Transposition ciphers: [Making](transposition-encipher.html) and [Breaking](transposition-break.html) +* [Alternative plausability scoring](alternative-plaintext-scoring.html) +* [Further work](further-work.html) + + </textarea> + <script src="http://gnab.github.io/remark/downloads/remark-0.6.0.min.js" type="text/javascript"> + </script> + + <script type="text/javascript" + src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML&delayStartupUntil=configured"></script> + + <script type="text/javascript"> + var slideshow = remark.create({ ratio: "16:9" }); + + // Setup MathJax + MathJax.Hub.Config({ + tex2jax: { + skipTags: ['script', 'noscript', 'style', 'textarea', 'pre'] + } + }); + MathJax.Hub.Queue(function() { + $(MathJax.Hub.getAllJax()).map(function(index, elem) { + return(elem.SourceElement()); + }).parent().addClass('has-jax'); + }); + MathJax.Hub.Configured(); + </script> + </body> +</html> diff --git a/slides/transposition-break.html b/slides/transposition-break.html new file mode 100644 index 0000000..5d8202a --- /dev/null +++ b/slides/transposition-break.html @@ -0,0 +1,157 @@ +<!DOCTYPE html> +<html> + <head> + <title>Keyword ciphers</title> + <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> + <style type="text/css"> + /* Slideshow styles */ + body { + font-size: 20px; + } + h1, h2, h3 { + font-weight: 400; + margin-bottom: 0; + } + h1 { font-size: 3em; } + h2 { font-size: 2em; } + h3 { font-size: 1.6em; } + a, a > code { + text-decoration: none; + } + code { + -moz-border-radius: 5px; + -web-border-radius: 5px; + background: #e7e8e2; + border-radius: 5px; + font-size: 16px; + } + .plaintext { + background: #272822; + color: #80ff80; + text-shadow: 0 0 20px #333; + padding: 2px 5px; + } + .ciphertext { + background: #272822; + color: #ff6666; + text-shadow: 0 0 20px #333; + padding: 2px 5px; + } + .float-right { + float: right; + } + </style> + </head> + <body> + <textarea id="source"> + +# Breaking transposition ciphers + + attack the fort at dawn + + a t t a c + k t h e f + o r t a t + d a w n + + akod ttra aean cft + +Generally quite familiar... + +## Try all the keys, pick the one that looks most like Englilsh + +--- + +# ...Pick one that looks most like English + +But the naïve Bayes score will always be the same! + +* Same letters, just a different order. + +Score by probability of substrings of letters + +* Bigrams, trigrams, _n_-grams + +--- + +# Finding _n_-grams + +Given `count_2l.txt` and `count_3l.txt`, counts of bigrams and trigrams in English + +# Write a function that returns all the _n_-grams for a text, given _n_ + * Assume the text is already sanitised + +# Build `P2l`, `P3l` (after `Pl`), `Pbigrams`, `Ptrigrams` (after `Pletters`) + +--- + +# Breaking scytale + +What are the possible keys? + +--- + +# Try all the keys... + +*All* the keys? + +What's the transposition of 'cat'? + +* 'bat'? +* 'car'? +* 'wry'? +* 'babe'? +* 'powwow'? + +--- + +# Equivalence classes and canonical forms + +Lots of words yield the same transposition + +* They're all in the same equivalence class +* Only need to test one from the class + +General idea: if there are different ways to represent something, pick one to make comparisons easier + +* Canonical form, canonical representation + +--- + +# Finding the transpositions to try + +``` +For each word: + if it's a new transposition: + add it to the list +``` + +What data structure to use to store the transpositions? + + + </textarea> + <script src="http://gnab.github.io/remark/downloads/remark-0.6.0.min.js" type="text/javascript"> + </script> + + <script type="text/javascript" + src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML&delayStartupUntil=configured"></script> + + <script type="text/javascript"> + var slideshow = remark.create({ ratio: "16:9" }); + + // Setup MathJax + MathJax.Hub.Config({ + tex2jax: { + skipTags: ['script', 'noscript', 'style', 'textarea', 'pre'] + } + }); + MathJax.Hub.Queue(function() { + $(MathJax.Hub.getAllJax()).map(function(index, elem) { + return(elem.SourceElement()); + }).parent().addClass('has-jax'); + }); + MathJax.Hub.Configured(); + </script> + </body> +</html> + diff --git a/slides/transposition-encipher.html b/slides/transposition-encipher.html index 0c09a4b..2fea8e3 100644 --- a/slides/transposition-encipher.html +++ b/slides/transposition-encipher.html @@ -223,6 +223,20 @@ Put it all together --- +# Back to the scytale + +Key is number of rows. + +No transposition of columns. + +* What does a null transposition look like? + +How to fill and empty? + +(Transposing the grid is easier) + +--- + # Masking the fill characters Padding characters can be distinctive.