Made a few tweaks

author Neil Smith <neil.git@njae.me.uk>

Wed, 16 Jul 2014 15:28:07 +0000 (16:28 +0100)

committer Neil Smith <neil.git@njae.me.uk>

Wed, 16 Jul 2014 15:28:07 +0000 (16:28 +0100)
author Neil Smith <neil.git@njae.me.uk>
Wed, 16 Jul 2014 15:28:07 +0000 (16:28 +0100)
committer Neil Smith <neil.git@njae.me.uk>
Wed, 16 Jul 2014 15:28:07 +0000 (16:28 +0100)
diff --git a/slides/caesar-break.html b/slides/caesar-break.html

index 4d2ebfa0d01d556dbdd37599f3e4320f92fc4031..7a2fbf6d550cbd8e8dc90bcea3b694c0e4dcd293 100644 (file)
--- a/slides/caesar-break.html
+++ b/slides/caesar-break.html
@@ -128,11 +128,11 @@ Use this to predict the probability of each letter, and hence the probability of
  
  ---
  
-# An infinite number of monkeys
+.float-right[![right-aligned Typing monkey](typingmonkeylarge.jpg)]
  
-What is the probability that this string of letters is a sample of English?
+# Naive Bayes, or the bag of letters
  
-## Naive Bayes, or the bag of letters
+What is the probability that this string of letters is a sample of English?
  
  Ignore letter order, just treat each letter individually.
  
@@ -234,13 +234,20 @@ def unaccent(text):
  
  1. Read from `shakespeare.txt`, `sherlock-holmes.txt`, and `war-and-peace.txt`.
  2. Find the frequencies (`.update()`)
-3. Sort by count 
-4. Write counts to `count_1l.txt` (`'text{}\n'.format()`)
+3. Sort by count (read the docs...)
+4. Write counts to `count_1l.txt` 
+```python
+with open('count_1l.txt', 'w') as f:
+    for each letter...:
+        f.write('text\t{}\n'.format(count))
+```
  
  ---
  
  # Reading letter probabilities
  
+New file: `language_models.py`
+
  1. Load the file `count_1l.txt` into a dict, with letters as keys.
  
  2. Normalise the counts (components of vector sum to 1): `$$ \hat{\mathbf{x}} = \frac{\mathbf{x}}{\| \mathbf{x} \|} = \frac{\mathbf{x}}{ \mathbf{x}_1 + \mathbf{x}_2 + \mathbf{x}_3 + \dots }$$`
@@ -257,6 +264,8 @@ def unaccent(text):
  
  # Breaking caesar ciphers
  
+New file: `cipherbreak.py`
+
  ## Remember the basic idea
  
  ```
diff --git a/slides/caesar-encipher.html b/slides/caesar-encipher.html

index 4ef1d341125d73a444a6d2cbbc01a7e7dce08c7c..4afd78dad4f3367c5107a99d0f40ff8f1e0aa037 100644 (file)
--- a/slides/caesar-encipher.html
+++ b/slides/caesar-encipher.html
@@ -82,6 +82,34 @@ chr()
  
  ---
  
+# Using the tools
+
+Before doing anything, create a new branch in Git
+
+* This will keep your changes isolated
+
+Experiment in IPython (ephemeral, for us)
+
+Once you've got something working, copy the code into a `.py` file (permanent and reusable)
+
+```python
+from imp import reload
+
+import test
+reload(test)
+from test import *
+```
+
+Re-evaluate the second cell to reload the file into the IPython notebook
+
+When you've made progress, make a Git commit
+
+* Commit early and often!
+
+When you've finished, change back to `master` branch and `merge` the development branch
+
+---
+
  # The [string module](http://docs.python.org/3.3/library/string.html) is your friend
  
  ```python
@@ -95,6 +123,7 @@ string.punctuation
  ```
  
  ---
+
  # DRY and YAGNI
  
  Is your code DRY?
@@ -131,7 +160,7 @@ if __name__ == "__main__":
  
  ---
  
-# Doing all the letters
+# Doing the whole message
  
  ## Test-first developement
  
@@ -142,7 +171,7 @@ if __name__ == "__main__":
  
  ---
  
-# Doing all the letters
+# Doing the whole message
  
  ## Abysmal
  
@@ -152,9 +181,11 @@ for i in range(len(plaintext)):
      ciphertext += caesar_encipher_letter(plaintext[i], key)
  ```
  
+Try it in IPython
+
  ---
  
-# Doing all the letters
+# Doing the whole message
  
  ## Bad
  
@@ -168,7 +199,7 @@ for p in plaintext:
  
  ---
  
-# Doing all the letters
+# Doing the whole message
  
  ## Good (but unPythonic)
  
@@ -178,7 +209,7 @@ ciphertext = map(lambda p: caesar_encipher_letter(p, key), plaintext)
  
  ---
  
-# Doing all the letters
+# Doing the whole message
  
  ## Best
  
diff --git a/slides/keyword-break.html b/slides/keyword-break.html

index 08013f3f0f2d5c21a212040f7dbf06d7d88f86fc..ddf82c1cf338e3ec71c9cf923a7c4242cf8d4dc7 100644 (file)
--- a/slides/keyword-break.html
+++ b/slides/keyword-break.html
@@ -115,7 +115,11 @@ for each key:
  
  Repetition of code is a bad smell.
  
-Separate the 'try all keys, keep the best' logic from the 'score this one key' logic.
+Separate out
+
+* enumerate the keys
+* score a key
+* find the key with the best score
  
  ---
  
diff --git a/slides/typingmonkeylarge.jpg b/slides/typingmonkeylarge.jpg

new file mode 100644 (file)

index 0000000..8078671

Binary files /dev/null and b/slides/typingmonkeylarge.jpg differ
diff --git a/slides/word-segmentation.html b/slides/word-segmentation.html

index 35721ab3fea6fc8529a893cb9d98f06ca8eb7b8d..6215255ca3c4825937d0d1177e4a54b64bce6b23 100644 (file)
--- a/slides/word-segmentation.html
+++ b/slides/word-segmentation.html
@@ -129,7 +129,7 @@ Constructor (`__init__`) takes a data file, does all the adding up and taking lo
  ```python
  class Pdist(dict):
      def __init__(self, data=[]):
-        for key, count in data2:
+        for key, count in data:
              ...
          self.total = ...
      def __missing__(self, key):
@@ -177,9 +177,9 @@ To segment a string:
      return the split with highest score
  ```
  
-Indexing pulls out letters. `'sometext'[0]` = 's' ; `'keyword'[3]` = 'e' ; `'keyword'[-1]` = 't'
+Indexing pulls out letters. `'sometext'[0]` = 's' ; `'sometext'[3]` = 'e' ; `'sometext'[-1]` = 't'
  
-Slices pulls out substrings. `'keyword'[1:4]` = 'ome' ; `'keyword'[:3]` = 'som' ; `'keyword'[5:]` = 'ext'
+Slices pulls out substrings. `'sometext'[1:4]` = 'ome' ; `'sometext'[:3]` = 'som' ; `'sometext'[5:]` = 'ext'
  
  `range()` will sweep across the string
author	Neil Smith <neil.git@njae.me.uk>
	Wed, 16 Jul 2014 15:28:07 +0000 (16:28 +0100)
committer	Neil Smith <neil.git@njae.me.uk>
	Wed, 16 Jul 2014 15:28:07 +0000 (16:28 +0100)
slides/caesar-break.html		patch \| blob \| history
slides/caesar-encipher.html		patch \| blob \| history
slides/keyword-break.html		patch \| blob \| history
slides/typingmonkeylarge.jpg	[new file with mode: 0644]	patch \| blob
slides/word-segmentation.html		patch \| blob \| history