X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=slides%2Fword-segmentation.html;h=9c3b3092babc6ba5770692c391c193d2d9e39446;hb=7203ac94911556e2b4bf4caab6f5285445faed3b;hp=16fcb0ad8c36c77041e790799046dd580e5128fa;hpb=5c3365f66a322e0295c890429b91578bdf11021b;p=cipher-training.git diff --git a/slides/word-segmentation.html b/slides/word-segmentation.html index 16fcb0a..9c3b309 100644 --- a/slides/word-segmentation.html +++ b/slides/word-segmentation.html @@ -36,6 +36,11 @@ color: #ff6666; text-shadow: 0 0 20px #333; padding: 2px 5px; + } + .indexlink { + position: absolute; + bottom: 1em; + left: 1em; } .float-right { float: right; @@ -54,6 +59,12 @@ --- +layout: true + +.indexlink[[Index](index.html)] + +--- + # The problem Ciphertext is re-split into groups to hide word bounaries. @@ -118,7 +129,7 @@ Constructor (`__init__`) takes a data file, does all the adding up and taking lo ```python class Pdist(dict): def __init__(self, data=[]): - for key, count in data2: + for key, count in data: ... self.total = ... def __missing__(self, key): @@ -138,9 +149,9 @@ def Pwords(words): ```python >>> 'hello' in Pw.keys() >>> Pwords(['hello']) True -4.25147684171819 ->>> 'inigo' in Pw.keys() >>> Pwords(['hello', 'my']) +>>> 'inigo' in Pw >>> Pwords(['hello', 'my']) True -6.995724679281423 ->>> 'blj' in Pw.keys() >>> Pwords(['hello', 'my', 'name']) +>>> 'blj' in Pw >>> Pwords(['hello', 'my', 'name']) False -10.098177451501074 >>> Pw['hello'] >>> Pwords(['hello', 'my', 'name', 'is']) -4.25147684171819 -12.195018236240843 @@ -166,9 +177,9 @@ To segment a string: return the split with highest score ``` -Indexing pulls out letters. `'sometext'[0]` = 's' ; `'keyword'[3]` = 'e' ; `'keyword'[-1]` = 't' +Indexing pulls out letters. `'sometext'[0]` = 's' ; `'sometext'[3]` = 'e' ; `'sometext'[-1]` = 't' -Slices pulls out substrings. `'keyword'[1:4]` = 'ome' ; `'keyword'[:3]` = 'som' ; `'keyword'[5:]` = 'ext' +Slices pulls out substrings. `'sometext'[1:4]` = 'ome' ; `'sometext'[:3]` = 'som' ; `'sometext'[5:]` = 'ext' `range()` will sweep across the string