Works with letters, added trimmed Lovecraft
[name-generation.git] / markov / markov-letters.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 1,
6 "metadata": {
7 "collapsed": true
8 },
9 "outputs": [],
10 "source": [
11 "import re\n",
12 "import string\n",
13 "import collections\n",
14 "import unicodedata\n",
15 "import random"
16 ]
17 },
18 {
19 "cell_type": "code",
20 "execution_count": 2,
21 "metadata": {
22 "collapsed": true
23 },
24 "outputs": [],
25 "source": [
26 "rinden_words_text = \"\"\"Kotun\n",
27 " Tachor\n",
28 " Ilwen\n",
29 " Ennan\n",
30 "Tulor\n",
31 " Palil\n",
32 " Palarn\n",
33 " Firon\n",
34 "Kagarn\n",
35 " Eran\n",
36 " Lamil\n",
37 " Ilan\n",
38 "Chakoch\n",
39 " Yaril\n",
40 " Farmol\n",
41 " Ardon\n",
42 "Krolan\n",
43 " Lamon\n",
44 " Indan\n",
45 " Firil\n",
46 "\"\"\"\n",
47 "\n",
48 "jahga_words_text = \"\"\"Tsumun\n",
49 " Lansu\n",
50 " Jakura\n",
51 " Shunaja\n",
52 "Dankin\n",
53 " Aru-ki\n",
54 " Areni\n",
55 " Takari\n",
56 "Ki-me\n",
57 " Esse-to\n",
58 " Bedare\n",
59 " Shuyare\n",
60 "Romosu\n",
61 " Taiende\n",
62 " Sadiru\n",
63 " Natan-jo\n",
64 "Sa-joru\n",
65 " Sojaru\n",
66 " Na-joka\n",
67 " Heniru\"\"\"\n"
68 ]
69 },
70 {
71 "cell_type": "code",
72 "execution_count": 3,
73 "metadata": {
74 "collapsed": true
75 },
76 "outputs": [],
77 "source": [
78 "cat = ''.join"
79 ]
80 },
81 {
82 "cell_type": "code",
83 "execution_count": 4,
84 "metadata": {
85 "collapsed": false
86 },
87 "outputs": [
88 {
89 "data": {
90 "text/plain": [
91 "['kotun',\n",
92 " 'tachor',\n",
93 " 'ilwen',\n",
94 " 'ennan',\n",
95 " 'tulor',\n",
96 " 'palil',\n",
97 " 'palarn',\n",
98 " 'firon',\n",
99 " 'kagarn',\n",
100 " 'eran',\n",
101 " 'lamil',\n",
102 " 'ilan',\n",
103 " 'chakoch',\n",
104 " 'yaril',\n",
105 " 'farmol',\n",
106 " 'ardon',\n",
107 " 'krolan',\n",
108 " 'lamon',\n",
109 " 'indan',\n",
110 " 'firil']"
111 ]
112 },
113 "execution_count": 4,
114 "metadata": {},
115 "output_type": "execute_result"
116 }
117 ],
118 "source": [
119 "rinden_words = rinden_words_text.lower().split()\n",
120 "rinden_words"
121 ]
122 },
123 {
124 "cell_type": "code",
125 "execution_count": 5,
126 "metadata": {
127 "collapsed": false
128 },
129 "outputs": [
130 {
131 "data": {
132 "text/plain": [
133 "Counter({'a': 17,\n",
134 " 'c': 3,\n",
135 " 'd': 2,\n",
136 " 'e': 3,\n",
137 " 'f': 3,\n",
138 " 'g': 1,\n",
139 " 'h': 3,\n",
140 " 'i': 9,\n",
141 " 'k': 4,\n",
142 " 'l': 13,\n",
143 " 'm': 3,\n",
144 " 'n': 15,\n",
145 " 'o': 9,\n",
146 " 'p': 2,\n",
147 " 'r': 11,\n",
148 " 't': 3,\n",
149 " 'u': 2,\n",
150 " 'w': 1,\n",
151 " 'y': 1})"
152 ]
153 },
154 "execution_count": 5,
155 "metadata": {},
156 "output_type": "execute_result"
157 }
158 ],
159 "source": [
160 "collections.Counter(l for l in rinden_words_text.lower() if l in string.ascii_letters)"
161 ]
162 },
163 {
164 "cell_type": "code",
165 "execution_count": 6,
166 "metadata": {
167 "collapsed": false
168 },
169 "outputs": [],
170 "source": [
171 "def find_counts_of_item(item, counts, tuple_size):\n",
172 " for i in range(len(item)-(tuple_size)):\n",
173 " counts[tuple(item[i:i+tuple_size])].update([item[i+tuple_size]])\n",
174 " counts[tuple(item[-tuple_size:])].update([None])\n",
175 " return counts"
176 ]
177 },
178 {
179 "cell_type": "code",
180 "execution_count": 7,
181 "metadata": {
182 "collapsed": false
183 },
184 "outputs": [],
185 "source": [
186 "def find_counts(items, tuple_size=2):\n",
187 " counts = collections.defaultdict(collections.Counter)\n",
188 " starts = collections.Counter()\n",
189 " for item in items:\n",
190 " counts = find_counts_of_item(item, counts, tuple_size)\n",
191 " starts[tuple(item[:tuple_size])] += 1\n",
192 " return starts, counts"
193 ]
194 },
195 {
196 "cell_type": "code",
197 "execution_count": 8,
198 "metadata": {
199 "collapsed": false,
200 "scrolled": true
201 },
202 "outputs": [
203 {
204 "data": {
205 "text/plain": [
206 "(Counter({('a', 'r'): 1,\n",
207 " ('c', 'h'): 1,\n",
208 " ('e', 'n'): 1,\n",
209 " ('e', 'r'): 1,\n",
210 " ('f', 'a'): 1,\n",
211 " ('f', 'i'): 2,\n",
212 " ('i', 'l'): 2,\n",
213 " ('i', 'n'): 1,\n",
214 " ('k', 'a'): 1,\n",
215 " ('k', 'o'): 1,\n",
216 " ('k', 'r'): 1,\n",
217 " ('l', 'a'): 2,\n",
218 " ('p', 'a'): 2,\n",
219 " ('t', 'a'): 1,\n",
220 " ('t', 'u'): 1,\n",
221 " ('y', 'a'): 1}),\n",
222 " defaultdict(collections.Counter,\n",
223 " {('a', 'c'): Counter({'h': 1}),\n",
224 " ('a', 'g'): Counter({'a': 1}),\n",
225 " ('a', 'k'): Counter({'o': 1}),\n",
226 " ('a', 'l'): Counter({'a': 1, 'i': 1}),\n",
227 " ('a', 'm'): Counter({'i': 1, 'o': 1}),\n",
228 " ('a', 'n'): Counter({None: 5}),\n",
229 " ('a', 'r'): Counter({'d': 1, 'i': 1, 'm': 1, 'n': 2}),\n",
230 " ('c', 'h'): Counter({None: 1, 'a': 1, 'o': 1}),\n",
231 " ('d', 'a'): Counter({'n': 1}),\n",
232 " ('d', 'o'): Counter({'n': 1}),\n",
233 " ('e', 'n'): Counter({None: 1, 'n': 1}),\n",
234 " ('e', 'r'): Counter({'a': 1}),\n",
235 " ('f', 'a'): Counter({'r': 1}),\n",
236 " ('f', 'i'): Counter({'r': 2}),\n",
237 " ('g', 'a'): Counter({'r': 1}),\n",
238 " ('h', 'a'): Counter({'k': 1}),\n",
239 " ('h', 'o'): Counter({'r': 1}),\n",
240 " ('i', 'l'): Counter({'w': 1, None: 4, 'a': 1}),\n",
241 " ('i', 'n'): Counter({'d': 1}),\n",
242 " ('i', 'r'): Counter({'i': 1, 'o': 1}),\n",
243 " ('k', 'a'): Counter({'g': 1}),\n",
244 " ('k', 'o'): Counter({'c': 1, 't': 1}),\n",
245 " ('k', 'r'): Counter({'o': 1}),\n",
246 " ('l', 'a'): Counter({'m': 2, 'n': 2, 'r': 1}),\n",
247 " ('l', 'i'): Counter({'l': 1}),\n",
248 " ('l', 'o'): Counter({'r': 1}),\n",
249 " ('l', 'w'): Counter({'e': 1}),\n",
250 " ('m', 'i'): Counter({'l': 1}),\n",
251 " ('m', 'o'): Counter({'l': 1, 'n': 1}),\n",
252 " ('n', 'a'): Counter({'n': 1}),\n",
253 " ('n', 'd'): Counter({'a': 1}),\n",
254 " ('n', 'n'): Counter({'a': 1}),\n",
255 " ('o', 'c'): Counter({'h': 1}),\n",
256 " ('o', 'l'): Counter({None: 1, 'a': 1}),\n",
257 " ('o', 'n'): Counter({None: 3}),\n",
258 " ('o', 'r'): Counter({None: 2}),\n",
259 " ('o', 't'): Counter({'u': 1}),\n",
260 " ('p', 'a'): Counter({'l': 2}),\n",
261 " ('r', 'a'): Counter({'n': 1}),\n",
262 " ('r', 'd'): Counter({'o': 1}),\n",
263 " ('r', 'i'): Counter({'l': 2}),\n",
264 " ('r', 'm'): Counter({'o': 1}),\n",
265 " ('r', 'n'): Counter({None: 2}),\n",
266 " ('r', 'o'): Counter({'l': 1, 'n': 1}),\n",
267 " ('t', 'a'): Counter({'c': 1}),\n",
268 " ('t', 'u'): Counter({'l': 1, 'n': 1}),\n",
269 " ('u', 'l'): Counter({'o': 1}),\n",
270 " ('u', 'n'): Counter({None: 1}),\n",
271 " ('w', 'e'): Counter({'n': 1}),\n",
272 " ('y', 'a'): Counter({'r': 1})}))"
273 ]
274 },
275 "execution_count": 8,
276 "metadata": {},
277 "output_type": "execute_result"
278 }
279 ],
280 "source": [
281 "rinden_starts, rinden_counts = find_counts(rinden_words)\n",
282 "rinden_starts, rinden_counts"
283 ]
284 },
285 {
286 "cell_type": "code",
287 "execution_count": 9,
288 "metadata": {
289 "collapsed": false,
290 "scrolled": true
291 },
292 "outputs": [
293 {
294 "data": {
295 "text/plain": [
296 "(Counter({('a',): 1,\n",
297 " ('c',): 1,\n",
298 " ('e',): 2,\n",
299 " ('f',): 3,\n",
300 " ('i',): 3,\n",
301 " ('k',): 3,\n",
302 " ('l',): 2,\n",
303 " ('p',): 2,\n",
304 " ('t',): 2,\n",
305 " ('y',): 1}),\n",
306 " defaultdict(collections.Counter,\n",
307 " {('a',): Counter({'c': 1,\n",
308 " 'g': 1,\n",
309 " 'k': 1,\n",
310 " 'l': 2,\n",
311 " 'm': 2,\n",
312 " 'n': 5,\n",
313 " 'r': 5}),\n",
314 " ('c',): Counter({'h': 3}),\n",
315 " ('d',): Counter({'a': 1, 'o': 1}),\n",
316 " ('e',): Counter({'n': 2, 'r': 1}),\n",
317 " ('f',): Counter({'a': 1, 'i': 2}),\n",
318 " ('g',): Counter({'a': 1}),\n",
319 " ('h',): Counter({None: 1, 'a': 1, 'o': 1}),\n",
320 " ('i',): Counter({'l': 6, 'n': 1, 'r': 2}),\n",
321 " ('k',): Counter({'a': 1, 'o': 2, 'r': 1}),\n",
322 " ('l',): Counter({'w': 1, None: 5, 'o': 1, 'a': 5, 'i': 1}),\n",
323 " ('m',): Counter({'i': 1, 'o': 2}),\n",
324 " ('n',): Counter({'d': 1, 'n': 1, None: 12, 'a': 1}),\n",
325 " ('o',): Counter({'c': 1, 'l': 2, 'n': 3, 'r': 2, 't': 1}),\n",
326 " ('p',): Counter({'a': 2}),\n",
327 " ('r',): Counter({'d': 1,\n",
328 " 'i': 2,\n",
329 " 'n': 2,\n",
330 " None: 2,\n",
331 " 'o': 2,\n",
332 " 'm': 1,\n",
333 " 'a': 1}),\n",
334 " ('t',): Counter({'a': 1, 'u': 2}),\n",
335 " ('u',): Counter({'l': 1, 'n': 1}),\n",
336 " ('w',): Counter({'e': 1}),\n",
337 " ('y',): Counter({'a': 1})}))"
338 ]
339 },
340 "execution_count": 9,
341 "metadata": {},
342 "output_type": "execute_result"
343 }
344 ],
345 "source": [
346 "rinden_starts1, rinden_counts1 = find_counts(rinden_words, 1)\n",
347 "rinden_starts1, rinden_counts1"
348 ]
349 },
350 {
351 "cell_type": "code",
352 "execution_count": 10,
353 "metadata": {
354 "collapsed": false,
355 "scrolled": true
356 },
357 "outputs": [
358 {
359 "data": {
360 "text/plain": [
361 "(Counter({('a', 'r'): 2,\n",
362 " ('b', 'e'): 1,\n",
363 " ('d', 'a'): 1,\n",
364 " ('e', 's'): 1,\n",
365 " ('h', 'e'): 1,\n",
366 " ('j', 'a'): 1,\n",
367 " ('k', 'i'): 1,\n",
368 " ('l', 'a'): 1,\n",
369 " ('n', 'a'): 2,\n",
370 " ('r', 'o'): 1,\n",
371 " ('s', 'a'): 2,\n",
372 " ('s', 'h'): 2,\n",
373 " ('s', 'o'): 1,\n",
374 " ('t', 'a'): 2,\n",
375 " ('t', 's'): 1}),\n",
376 " defaultdict(collections.Counter,\n",
377 " {('-', 'j'): Counter({'o': 3}),\n",
378 " ('-', 'k'): Counter({'i': 1}),\n",
379 " ('-', 'm'): Counter({'e': 1}),\n",
380 " ('-', 't'): Counter({'o': 1}),\n",
381 " ('a', '-'): Counter({'j': 2}),\n",
382 " ('a', 'd'): Counter({'i': 1}),\n",
383 " ('a', 'i'): Counter({'e': 1}),\n",
384 " ('a', 'j'): Counter({'a': 1}),\n",
385 " ('a', 'k'): Counter({'a': 1, 'u': 1}),\n",
386 " ('a', 'n'): Counter({'-': 1, 'k': 1, 's': 1}),\n",
387 " ('a', 'r'): Counter({'e': 3, 'i': 1, 'u': 2}),\n",
388 " ('a', 't'): Counter({'a': 1}),\n",
389 " ('b', 'e'): Counter({'d': 1}),\n",
390 " ('d', 'a'): Counter({'n': 1, 'r': 1}),\n",
391 " ('d', 'e'): Counter({None: 1}),\n",
392 " ('d', 'i'): Counter({'r': 1}),\n",
393 " ('e', '-'): Counter({'t': 1}),\n",
394 " ('e', 'd'): Counter({'a': 1}),\n",
395 " ('e', 'n'): Counter({'d': 1, 'i': 2}),\n",
396 " ('e', 's'): Counter({'s': 1}),\n",
397 " ('h', 'e'): Counter({'n': 1}),\n",
398 " ('h', 'u'): Counter({'n': 1, 'y': 1}),\n",
399 " ('i', '-'): Counter({'m': 1}),\n",
400 " ('i', 'e'): Counter({'n': 1}),\n",
401 " ('i', 'n'): Counter({None: 1}),\n",
402 " ('i', 'r'): Counter({'u': 2}),\n",
403 " ('j', 'a'): Counter({None: 1, 'r': 1, 'k': 1}),\n",
404 " ('j', 'o'): Counter({None: 1, 'r': 1, 'k': 1}),\n",
405 " ('k', 'a'): Counter({None: 1, 'r': 1}),\n",
406 " ('k', 'i'): Counter({None: 1, '-': 1, 'n': 1}),\n",
407 " ('k', 'u'): Counter({'r': 1}),\n",
408 " ('l', 'a'): Counter({'n': 1}),\n",
409 " ('m', 'e'): Counter({None: 1}),\n",
410 " ('m', 'o'): Counter({'s': 1}),\n",
411 " ('m', 'u'): Counter({'n': 1}),\n",
412 " ('n', '-'): Counter({'j': 1}),\n",
413 " ('n', 'a'): Counter({'-': 1, 'j': 1, 't': 1}),\n",
414 " ('n', 'd'): Counter({'e': 1}),\n",
415 " ('n', 'i'): Counter({None: 1, 'r': 1}),\n",
416 " ('n', 'k'): Counter({'i': 1}),\n",
417 " ('n', 's'): Counter({'u': 1}),\n",
418 " ('o', 'j'): Counter({'a': 1}),\n",
419 " ('o', 'k'): Counter({'a': 1}),\n",
420 " ('o', 'm'): Counter({'o': 1}),\n",
421 " ('o', 'r'): Counter({'u': 1}),\n",
422 " ('o', 's'): Counter({'u': 1}),\n",
423 " ('r', 'a'): Counter({None: 1}),\n",
424 " ('r', 'e'): Counter({None: 2, 'n': 1}),\n",
425 " ('r', 'i'): Counter({None: 1}),\n",
426 " ('r', 'o'): Counter({'m': 1}),\n",
427 " ('r', 'u'): Counter({None: 4, '-': 1}),\n",
428 " ('s', 'a'): Counter({'-': 1, 'd': 1}),\n",
429 " ('s', 'e'): Counter({'-': 1}),\n",
430 " ('s', 'h'): Counter({'u': 2}),\n",
431 " ('s', 'o'): Counter({'j': 1}),\n",
432 " ('s', 's'): Counter({'e': 1}),\n",
433 " ('s', 'u'): Counter({None: 2, 'm': 1}),\n",
434 " ('t', 'a'): Counter({'i': 1, 'k': 1, 'n': 1}),\n",
435 " ('t', 'o'): Counter({None: 1}),\n",
436 " ('t', 's'): Counter({'u': 1}),\n",
437 " ('u', '-'): Counter({'k': 1}),\n",
438 " ('u', 'm'): Counter({'u': 1}),\n",
439 " ('u', 'n'): Counter({None: 1, 'a': 1}),\n",
440 " ('u', 'r'): Counter({'a': 1}),\n",
441 " ('u', 'y'): Counter({'a': 1}),\n",
442 " ('y', 'a'): Counter({'r': 1})}))"
443 ]
444 },
445 "execution_count": 10,
446 "metadata": {},
447 "output_type": "execute_result"
448 }
449 ],
450 "source": [
451 "jahga_words = jahga_words_text.lower().split()\n",
452 "jahga_starts, jahga_counts = find_counts(jahga_words)\n",
453 "jahga_starts, jahga_counts"
454 ]
455 },
456 {
457 "cell_type": "code",
458 "execution_count": 11,
459 "metadata": {
460 "collapsed": true
461 },
462 "outputs": [],
463 "source": [
464 "def markov_item(starts, counts, max_len=None):\n",
465 " valid_found = False\n",
466 " while not valid_found:\n",
467 " i = 0\n",
468 " current = random.choice(list(starts.elements()))\n",
469 " chain = list(current)\n",
470 " next_item = random.choice(list(counts[current].elements()))\n",
471 " while next_item and ((max_len and i < max_len) or not max_len):\n",
472 " chain += [next_item]\n",
473 " current = current[1:] + (next_item, )\n",
474 " i += 1\n",
475 " next_item = random.choice(list(counts[current].elements()))\n",
476 " # print(chain, ':', current, ':', list(counts[current].elements()), ':', next_item)\n",
477 " if max_len and i < max_len:\n",
478 " valid_found = True\n",
479 " if not max_len:\n",
480 " valid_found = True\n",
481 " return chain"
482 ]
483 },
484 {
485 "cell_type": "code",
486 "execution_count": 12,
487 "metadata": {
488 "collapsed": false
489 },
490 "outputs": [
491 {
492 "data": {
493 "text/plain": [
494 "'ilwennan'"
495 ]
496 },
497 "execution_count": 12,
498 "metadata": {},
499 "output_type": "execute_result"
500 }
501 ],
502 "source": [
503 "cat(markov_item(rinden_starts, rinden_counts, 50))"
504 ]
505 },
506 {
507 "cell_type": "code",
508 "execution_count": 13,
509 "metadata": {
510 "collapsed": false
511 },
512 "outputs": [
513 {
514 "data": {
515 "text/plain": [
516 "'pamotun'"
517 ]
518 },
519 "execution_count": 13,
520 "metadata": {},
521 "output_type": "execute_result"
522 }
523 ],
524 "source": [
525 "cat(markov_item(rinden_starts1, rinden_counts1, 50))"
526 ]
527 },
528 {
529 "cell_type": "code",
530 "execution_count": 14,
531 "metadata": {
532 "collapsed": false
533 },
534 "outputs": [
535 {
536 "data": {
537 "text/plain": [
538 "'natansu ki-me daru aru tankin sadiru arende ki tsu taiende daru soja esse-to arende aru esse-to tsu are daru ari sa-joka sa-jo kin heni takura tsumun dari heniru tsumun ari are ki-me kin hende romosu tanki ki areniru heni ki-me shun arende taka aru heni lan-jo takura na-joru sa-joru aru'"
539 ]
540 },
541 "execution_count": 14,
542 "metadata": {},
543 "output_type": "execute_result"
544 }
545 ],
546 "source": [
547 "\" \".join(cat(markov_item(jahga_starts, jahga_counts, 6)) for _ in range(50))"
548 ]
549 },
550 {
551 "cell_type": "code",
552 "execution_count": 15,
553 "metadata": {
554 "collapsed": false
555 },
556 "outputs": [
557 {
558 "data": {
559 "text/plain": [
560 "'sojaru kin bedaru taieniru sojakarende bedanki-me tsu dansu shuna-joru soja takura bedari jakaru heni sadiru dareni ja esse-to najakare esse-to esse-to lansu natanki-me esse-to aru lansu naja takare taiende areniru sojakare taieni romosu najarende tansu jakare sojaru lansu aru heni esse-to soja nataiende najakura taka lan-jokare ari ja nataiende dan-joru-ki'"
561 ]
562 },
563 "execution_count": 15,
564 "metadata": {},
565 "output_type": "execute_result"
566 }
567 ],
568 "source": [
569 "\" \".join(cat(markov_item(jahga_starts, jahga_counts, 12)) for _ in range(50))"
570 ]
571 },
572 {
573 "cell_type": "code",
574 "execution_count": 16,
575 "metadata": {
576 "collapsed": false
577 },
578 "outputs": [
579 {
580 "data": {
581 "text/plain": [
582 "'hende ja shun romosu sadiru-ki-me shuyare tan-joka tsumunaja ari bedansu are takura tan-jokareniru-kin aru nataieni sa-joka tsu na-joru romosumun areniru takura shunajakura soja heniru ja sadiru-kin shunajareniru romosu nataka areni esse-to are dansu tankin sadiru takura takura are na-joka ari dare jare heni sojaka esse-to dan-jo tsumun esse-to ja hende'"
583 ]
584 },
585 "execution_count": 16,
586 "metadata": {},
587 "output_type": "execute_result"
588 }
589 ],
590 "source": [
591 "\" \".join(cat(markov_item(jahga_starts, jahga_counts)) for _ in range(50))"
592 ]
593 },
594 {
595 "cell_type": "code",
596 "execution_count": 17,
597 "metadata": {
598 "collapsed": false,
599 "scrolled": true
600 },
601 "outputs": [
602 {
603 "data": {
604 "text/plain": [
605 "['chor',\n",
606 " 'firilwen',\n",
607 " 'il',\n",
608 " 'tachor',\n",
609 " 'kochor',\n",
610 " 'palan',\n",
611 " 'chor',\n",
612 " 'kochor',\n",
613 " 'chakochakoch',\n",
614 " 'indan',\n",
615 " 'yaril',\n",
616 " 'ardon',\n",
617 " 'ennan',\n",
618 " 'ardon',\n",
619 " 'lan',\n",
620 " 'kotun',\n",
621 " 'tun',\n",
622 " 'indan',\n",
623 " 'ardon',\n",
624 " 'kagardon',\n",
625 " 'palan',\n",
626 " 'tulor',\n",
627 " 'yarn',\n",
628 " 'firolarmol',\n",
629 " 'tun',\n",
630 " 'faril',\n",
631 " 'aril',\n",
632 " 'armolan',\n",
633 " 'palan',\n",
634 " 'tulor',\n",
635 " 'il',\n",
636 " 'ch',\n",
637 " 'tach',\n",
638 " 'palil',\n",
639 " 'kagarn',\n",
640 " 'il',\n",
641 " 'il',\n",
642 " 'palil',\n",
643 " 'kagarn',\n",
644 " 'ennan',\n",
645 " 'ch',\n",
646 " 'il',\n",
647 " 'firol',\n",
648 " 'farmol',\n",
649 " 'kotulor',\n",
650 " 'lan',\n",
651 " 'tach',\n",
652 " 'kotun',\n",
653 " 'kron',\n",
654 " 'indan']"
655 ]
656 },
657 "execution_count": 17,
658 "metadata": {},
659 "output_type": "execute_result"
660 }
661 ],
662 "source": [
663 "[cat(markov_item(rinden_starts, rinden_counts, 50)) for _ in range(50)]"
664 ]
665 },
666 {
667 "cell_type": "code",
668 "execution_count": 18,
669 "metadata": {
670 "collapsed": false
671 },
672 "outputs": [
673 {
674 "data": {
675 "text/html": [
676 "<p>palil kochor tachakochor kagardon ardon kagardon chor eran lan kochor palan palan lan tun larmon tun firon kochor indan firilan il indan tun kotulor eran ch eran palil firil lan tun firil kochakoch kotun ennan firil eran kagarn palan aril il lan kagarn indan il ilamil firil farn ardon farn arn kotun armolan firol indan firil kotulor il tach ch yardon lan koch kotun indan armol fardon indan tachor fardon ennan yardon lamon tach firilan palardon kotulor firilamon chor palil kagarmol tulor kagarn il arn kagarmon firil palamil il il il farn ilan eran eran arn palil palil kron il</p>"
677 ],
678 "text/plain": [
679 "<IPython.core.display.HTML object>"
680 ]
681 },
682 "metadata": {},
683 "output_type": "display_data"
684 }
685 ],
686 "source": [
687 "from IPython.core.display import display, HTML\n",
688 "display(HTML('<p>' + \n",
689 " \" \".join(cat(markov_item(rinden_starts, rinden_counts, 15)) for _ in range(100)) + \n",
690 " '</p>'))"
691 ]
692 },
693 {
694 "cell_type": "code",
695 "execution_count": 19,
696 "metadata": {
697 "collapsed": false
698 },
699 "outputs": [
700 {
701 "data": {
702 "text/html": [
703 "<p>najakura lan-joru najakaru sadiru-ki-me tsumunaja tsu ki-me jakura takura shuyare shuna-jo areni shun lansu esse-to sadiru bedare dan-jo ja ari romosu heniru tsu sadiru heniru bedanki-me sa-jo aru-kin takura natan-joru-kin sadiru ari hende sadiru-ki-me tanki ja tsu jaru shuyare aru bedankin tsumunatan-joru romosumun na-joka romosu ja hende bedansu shuyaru sa-jo ki tsumun sa-jo ki heni sojaru jaru-kin takura lanki-me sojari nataka sa-joka areni are heni naja soja sadiru romosu hende sojaka taiende shun ki-me tsu heni aru shunatan-jokaru ari romosu sadiru darende na-joru shuyaru bedari esse-to aru shuyaru-ki tansu esse-to na-jokari esse-to are kin aru heni dan-joru sojakura nataiende natan-jo</p>"
704 ],
705 "text/plain": [
706 "<IPython.core.display.HTML object>"
707 ]
708 },
709 "metadata": {},
710 "output_type": "display_data"
711 }
712 ],
713 "source": [
714 "from IPython.core.display import display, HTML\n",
715 "display(HTML('<p>' + \n",
716 " \" \".join(cat(markov_item(jahga_starts, jahga_counts, 15)) for _ in range(100)) + \n",
717 " '</p>'))"
718 ]
719 },
720 {
721 "cell_type": "code",
722 "execution_count": 20,
723 "metadata": {
724 "collapsed": false
725 },
726 "outputs": [
727 {
728 "data": {
729 "text/html": [
730 "<p>tun lon paroch tan in enn ilwenar fin irdandor il ern fil tulann famil tul farilotunndolan endan lamon filanagardon chor pamol in l karnn lar kol chon par fililamoch fil ch lilalil far pan lilakaril ilakordolwernn fan kan ilwerdotularol enn erororilaror paril tul ilamon filarn tul pandan ilon l tulan an korirol kranachar kan er il pachol pann famolon chamor pal fil en ilagann kolwern kamil tulan konnan il kor fin amoramil irnn er arilon ilwerormilarn tardach fililwer pakotun ern paril kon krn tular far ilardan l al il yachoril fach kamotarmochan kolanndon korn fil par tunalirdal anndon pamililwen l</p>"
731 ],
732 "text/plain": [
733 "<IPython.core.display.HTML object>"
734 ]
735 },
736 "metadata": {},
737 "output_type": "display_data"
738 }
739 ],
740 "source": [
741 "from IPython.core.display import display, HTML\n",
742 "display(HTML('<p>' + \n",
743 " \" \".join(cat(markov_item(rinden_starts1, rinden_counts1, 15)) for _ in range(100)) + \n",
744 " '</p>'))"
745 ]
746 },
747 {
748 "cell_type": "code",
749 "execution_count": 21,
750 "metadata": {
751 "collapsed": false,
752 "scrolled": true
753 },
754 "outputs": [
755 {
756 "data": {
757 "text/plain": [
758 "(defaultdict(collections.Counter,\n",
759 " {('a', 'c'): Counter({'h': 1}),\n",
760 " ('a', 'g'): Counter({'a': 1}),\n",
761 " ('a', 'k'): Counter({'o': 1}),\n",
762 " ('a', 'l'): Counter({'a': 1, 'i': 1}),\n",
763 " ('a', 'm'): Counter({'i': 1, 'o': 1}),\n",
764 " ('a', 'n'): Counter({None: 5}),\n",
765 " ('a', 'r'): Counter({'d': 1, 'i': 1, 'm': 1, 'n': 2}),\n",
766 " ('c', 'h'): Counter({None: 1, 'a': 1, 'o': 1}),\n",
767 " ('d', 'a'): Counter({'n': 1}),\n",
768 " ('d', 'o'): Counter({'n': 1}),\n",
769 " ('e', 'n'): Counter({None: 1, 'n': 1}),\n",
770 " ('e', 'r'): Counter({'a': 1}),\n",
771 " ('f', 'a'): Counter({'r': 1}),\n",
772 " ('f', 'i'): Counter({'r': 2}),\n",
773 " ('g', 'a'): Counter({'r': 1}),\n",
774 " ('h', 'a'): Counter({'k': 1}),\n",
775 " ('h', 'o'): Counter({'r': 1}),\n",
776 " ('i', 'l'): Counter({'w': 1, None: 4, 'a': 1}),\n",
777 " ('i', 'n'): Counter({'d': 1}),\n",
778 " ('i', 'r'): Counter({'i': 1, 'o': 1}),\n",
779 " ('k', 'a'): Counter({'g': 1}),\n",
780 " ('k', 'o'): Counter({'c': 1, 't': 1}),\n",
781 " ('k', 'r'): Counter({'o': 1}),\n",
782 " ('l', 'a'): Counter({'m': 2, 'n': 2, 'r': 1}),\n",
783 " ('l', 'i'): Counter({'l': 1}),\n",
784 " ('l', 'o'): Counter({'r': 1}),\n",
785 " ('l', 'w'): Counter({'e': 1}),\n",
786 " ('m', 'i'): Counter({'l': 1}),\n",
787 " ('m', 'o'): Counter({'l': 1, 'n': 1}),\n",
788 " ('n', 'a'): Counter({'n': 1}),\n",
789 " ('n', 'd'): Counter({'a': 1}),\n",
790 " ('n', 'n'): Counter({'a': 1}),\n",
791 " ('o', 'c'): Counter({'h': 1}),\n",
792 " ('o', 'l'): Counter({None: 1, 'a': 1}),\n",
793 " ('o', 'n'): Counter({None: 3}),\n",
794 " ('o', 'r'): Counter({None: 2}),\n",
795 " ('o', 't'): Counter({'u': 1}),\n",
796 " ('p', 'a'): Counter({'l': 2}),\n",
797 " ('r', 'a'): Counter({'n': 1}),\n",
798 " ('r', 'd'): Counter({'o': 1}),\n",
799 " ('r', 'i'): Counter({'l': 2}),\n",
800 " ('r', 'm'): Counter({'o': 1}),\n",
801 " ('r', 'n'): Counter({None: 2}),\n",
802 " ('r', 'o'): Counter({'l': 1, 'n': 1}),\n",
803 " ('t', 'a'): Counter({'c': 1}),\n",
804 " ('t', 'u'): Counter({'l': 1, 'n': 1}),\n",
805 " ('u', 'l'): Counter({'o': 1}),\n",
806 " ('u', 'n'): Counter({None: 1}),\n",
807 " ('w', 'e'): Counter({'n': 1}),\n",
808 " ('y', 'a'): Counter({'r': 1})}),\n",
809 " defaultdict(collections.Counter,\n",
810 " {('a',): Counter({'c': 1,\n",
811 " 'g': 1,\n",
812 " 'k': 1,\n",
813 " 'l': 2,\n",
814 " 'm': 2,\n",
815 " 'n': 5,\n",
816 " 'r': 5}),\n",
817 " ('c',): Counter({'h': 3}),\n",
818 " ('d',): Counter({'a': 1, 'o': 1}),\n",
819 " ('e',): Counter({'n': 2, 'r': 1}),\n",
820 " ('f',): Counter({'a': 1, 'i': 2}),\n",
821 " ('g',): Counter({'a': 1}),\n",
822 " ('h',): Counter({None: 1, 'a': 1, 'o': 1}),\n",
823 " ('i',): Counter({'l': 6, 'n': 1, 'r': 2}),\n",
824 " ('k',): Counter({'a': 1, 'o': 2, 'r': 1}),\n",
825 " ('l',): Counter({'w': 1, None: 5, 'o': 1, 'a': 5, 'i': 1}),\n",
826 " ('m',): Counter({'i': 1, 'o': 2}),\n",
827 " ('n',): Counter({'d': 1, 'n': 1, None: 12, 'a': 1}),\n",
828 " ('o',): Counter({'c': 1, 'l': 2, 'n': 3, 'r': 2, 't': 1}),\n",
829 " ('p',): Counter({'a': 2}),\n",
830 " ('r',): Counter({'d': 1,\n",
831 " 'i': 2,\n",
832 " 'n': 2,\n",
833 " None: 2,\n",
834 " 'o': 2,\n",
835 " 'm': 1,\n",
836 " 'a': 1}),\n",
837 " ('t',): Counter({'a': 1, 'u': 2}),\n",
838 " ('u',): Counter({'l': 1, 'n': 1}),\n",
839 " ('w',): Counter({'e': 1}),\n",
840 " ('y',): Counter({'a': 1})}))"
841 ]
842 },
843 "execution_count": 21,
844 "metadata": {},
845 "output_type": "execute_result"
846 }
847 ],
848 "source": [
849 "rinden_counts, rinden_counts1"
850 ]
851 },
852 {
853 "cell_type": "code",
854 "execution_count": 26,
855 "metadata": {
856 "collapsed": true
857 },
858 "outputs": [],
859 "source": [
860 "def scale_merge(left_starts, left_start_scale, left_counts, left_count_scale, \n",
861 " right_starts, right_start_scale, right_counts, right_count_scale):\n",
862 " starts = collections.Counter()\n",
863 " counts = collections.defaultdict(collections.Counter)\n",
864 " \n",
865 " for k, n in left_starts.items():\n",
866 " starts[k] = n * left_start_scale\n",
867 " for k, n in right_starts.items():\n",
868 " starts[k] += n * right_start_scale\n",
869 " \n",
870 " for k in left_counts:\n",
871 " for j in left_counts[k]:\n",
872 " counts[k][j] = left_counts[k][j] * left_count_scale\n",
873 " \n",
874 " for k in right_counts:\n",
875 " for j in right_counts[k]:\n",
876 " counts[k][j] = right_counts[k][j] * right_count_scale\n",
877 "\n",
878 " return starts, counts"
879 ]
880 },
881 {
882 "cell_type": "code",
883 "execution_count": 23,
884 "metadata": {
885 "collapsed": false
886 },
887 "outputs": [
888 {
889 "data": {
890 "text/plain": [
891 "(20, 85)"
892 ]
893 },
894 "execution_count": 23,
895 "metadata": {},
896 "output_type": "execute_result"
897 }
898 ],
899 "source": [
900 "sum(rinden_starts.values()), sum(sum(c.values()) for c in rinden_counts.values())"
901 ]
902 },
903 {
904 "cell_type": "code",
905 "execution_count": 24,
906 "metadata": {
907 "collapsed": false
908 },
909 "outputs": [
910 {
911 "data": {
912 "text/plain": [
913 "(20, 105)"
914 ]
915 },
916 "execution_count": 24,
917 "metadata": {},
918 "output_type": "execute_result"
919 }
920 ],
921 "source": [
922 "sum(jahga_starts.values()), sum(sum(c.values()) for c in jahga_counts.values())"
923 ]
924 },
925 {
926 "cell_type": "code",
927 "execution_count": 28,
928 "metadata": {
929 "collapsed": false
930 },
931 "outputs": [],
932 "source": [
933 "rj_starts, rj_counts = scale_merge(rinden_starts, 5, rinden_counts, 5, jahga_starts, 1, jahga_counts, 1)"
934 ]
935 },
936 {
937 "cell_type": "code",
938 "execution_count": 29,
939 "metadata": {
940 "collapsed": false
941 },
942 "outputs": [
943 {
944 "data": {
945 "text/html": [
946 "<p>sadiru kochor lamolamil jarmol tachakotunan farmon lan are kagaru-ki farn yarn tachoru era ennan aril ilaril ennansu tun are firilweniril kagarn jarn firilwen armosumunan koch tulor palarmosu era krol palilamilamol kotulor firon tachakotun kochoru larn arn tun tulor tunajakotunan ennan endarilwennan en arn tach bedan endardon bedan indarn palamolamon ilamil aru palilwen tun tulor eniril lamosu eran firil sadirilwendan sadiron armon na-joru arn ennan farmol eran dan palil eran in ch in ilamon palamon ilamilare firilweni arn palilwenirol faru ilamon chor tulor palamon inde aru lamon aru tsumun palamil ennan eran kotun shunajakotulor tulor in tach tunan indarn tulor ilamon</p>"
947 ],
948 "text/plain": [
949 "<IPython.core.display.HTML object>"
950 ]
951 },
952 "metadata": {},
953 "output_type": "display_data"
954 }
955 ],
956 "source": [
957 "from IPython.core.display import display, HTML\n",
958 "display(HTML('<p>' + \n",
959 " \" \".join(cat(markov_item(rj_starts, rj_counts, 15)) for _ in range(100)) + \n",
960 " '</p>'))"
961 ]
962 },
963 {
964 "cell_type": "code",
965 "execution_count": null,
966 "metadata": {
967 "collapsed": true
968 },
969 "outputs": [],
970 "source": []
971 }
972 ],
973 "metadata": {
974 "kernelspec": {
975 "display_name": "Python 3",
976 "language": "python",
977 "name": "python3"
978 },
979 "language_info": {
980 "codemirror_mode": {
981 "name": "ipython",
982 "version": 3
983 },
984 "file_extension": ".py",
985 "mimetype": "text/x-python",
986 "name": "python",
987 "nbconvert_exporter": "python",
988 "pygments_lexer": "ipython3",
989 "version": "3.4.3+"
990 }
991 },
992 "nbformat": 4,
993 "nbformat_minor": 0
994 }