X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=slides%2Fword-segmentation.html;h=9c3b3092babc6ba5770692c391c193d2d9e39446;hb=7203ac94911556e2b4bf4caab6f5285445faed3b;hp=35721ab3fea6fc8529a893cb9d98f06ca8eb7b8d;hpb=f0263833868deb709ae23d132a149a3f7d7ab204;p=cipher-training.git diff --git a/slides/word-segmentation.html b/slides/word-segmentation.html index 35721ab..9c3b309 100644 --- a/slides/word-segmentation.html +++ b/slides/word-segmentation.html @@ -129,7 +129,7 @@ Constructor (`__init__`) takes a data file, does all the adding up and taking lo ```python class Pdist(dict): def __init__(self, data=[]): - for key, count in data2: + for key, count in data: ... self.total = ... def __missing__(self, key): @@ -149,9 +149,9 @@ def Pwords(words): ```python >>> 'hello' in Pw.keys() >>> Pwords(['hello']) True -4.25147684171819 ->>> 'inigo' in Pw.keys() >>> Pwords(['hello', 'my']) +>>> 'inigo' in Pw >>> Pwords(['hello', 'my']) True -6.995724679281423 ->>> 'blj' in Pw.keys() >>> Pwords(['hello', 'my', 'name']) +>>> 'blj' in Pw >>> Pwords(['hello', 'my', 'name']) False -10.098177451501074 >>> Pw['hello'] >>> Pwords(['hello', 'my', 'name', 'is']) -4.25147684171819 -12.195018236240843 @@ -177,9 +177,9 @@ To segment a string: return the split with highest score ``` -Indexing pulls out letters. `'sometext'[0]` = 's' ; `'keyword'[3]` = 'e' ; `'keyword'[-1]` = 't' +Indexing pulls out letters. `'sometext'[0]` = 's' ; `'sometext'[3]` = 'e' ; `'sometext'[-1]` = 't' -Slices pulls out substrings. `'keyword'[1:4]` = 'ome' ; `'keyword'[:3]` = 'som' ; `'keyword'[5:]` = 'ext' +Slices pulls out substrings. `'sometext'[1:4]` = 'ome' ; `'sometext'[:3]` = 'som' ; `'sometext'[5:]` = 'ext' `range()` will sweep across the string