Removing files from data analysis directory
[ou-summer-of-code-2017.git] / monotone-substrings / single_substring.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "# Monotone substrings\n",
8 "\n",
9 "Given a list of numbers, find the length of longest increasing or decreasing substring in the list.\n",
10 "\n",
11 "For instance, the sequence\n",
12 " 10, 1, 2, 3, 4, 5, 5, 5, 6, 4, 3, 5, 6\n",
13 "contains these increasing or decreasing substrings:\n",
14 "* 10, 1\n",
15 "* 1, 2, 3, 4, 5\n",
16 "* 5, 6\n",
17 "* 6, 4, 3\n",
18 "* 3, 5, 6\n",
19 "\n",
20 "As an extension, allow the substring to contain runs of identical numbers, each of which is included in the length of the longest substring.\n",
21 "\n",
22 "If identical numbers are allowed, the above sequence contains substrings:\n",
23 "* 10, 1\n",
24 "* 1, 2, 3, 4, 5, 5, 5, 6\n",
25 "* 6, 4, 3\n",
26 "* 3, 5, 6\n",
27 "\n",
28 "The list is given as a single line of comma-separated integers. "
29 ]
30 },
31 {
32 "cell_type": "code",
33 "execution_count": 1,
34 "metadata": {
35 "collapsed": false
36 },
37 "outputs": [],
38 "source": [
39 "def longest_monotone(seq, allow_same=False, debug=False):\n",
40 " \"\"\"Find the longest monotonic substring. If allow_same is True, \n",
41 " instead find the longest non-decreasing or non-increasing substring\"\"\"\n",
42 " longest_length = 0\n",
43 " current_length = 1\n",
44 " current_same = 0\n",
45 " current_increasing = True\n",
46 " for (last, this) in zip(seq, seq[1:]):\n",
47 " if this == last:\n",
48 " if allow_same:\n",
49 " current_length += 1\n",
50 " current_same += 1\n",
51 " else:\n",
52 " current_length = 1\n",
53 " elif this > last:\n",
54 " if current_increasing:\n",
55 " current_length += 1\n",
56 " else:\n",
57 " current_length = 2 + current_same\n",
58 " current_increasing = True\n",
59 " current_same = 0\n",
60 " elif this < last:\n",
61 " if not current_increasing:\n",
62 " current_length += 1\n",
63 " else:\n",
64 " current_length = 2 + current_same\n",
65 " current_increasing = False\n",
66 " current_same = 0\n",
67 " if current_length > longest_length:\n",
68 " longest_length = current_length\n",
69 " if debug:\n",
70 " if last < this: cmp = '<'\n",
71 " if last > this: cmp = '>'\n",
72 " if last == this: cmp = '='\n",
73 " print('{:4} {}\\t{}\\t{}\\t{}'.format(current_length, current_increasing, last, cmp, this))\n",
74 " return longest_length "
75 ]
76 },
77 {
78 "cell_type": "code",
79 "execution_count": 2,
80 "metadata": {
81 "collapsed": false
82 },
83 "outputs": [
84 {
85 "name": "stdout",
86 "output_type": "stream",
87 "text": [
88 " 2 True\t1\t<\t2\n",
89 " 3 True\t2\t<\t3\n",
90 " 4 True\t3\t<\t4\n",
91 " 5 True\t4\t<\t5\n",
92 " 2 False\t5\t>\t4\n",
93 " 2 True\t4\t<\t5\n",
94 " 3 True\t5\t<\t6\n"
95 ]
96 },
97 {
98 "data": {
99 "text/plain": [
100 "5"
101 ]
102 },
103 "execution_count": 2,
104 "metadata": {},
105 "output_type": "execute_result"
106 }
107 ],
108 "source": [
109 "longest_monotone([1,2,3,4,5,4,5,6], debug=True)"
110 ]
111 },
112 {
113 "cell_type": "code",
114 "execution_count": 3,
115 "metadata": {
116 "collapsed": false
117 },
118 "outputs": [
119 {
120 "name": "stdout",
121 "output_type": "stream",
122 "text": [
123 " 2 False\t10\t>\t1\n",
124 " 2 True\t1\t<\t2\n",
125 " 3 True\t2\t<\t3\n",
126 " 4 True\t3\t<\t4\n",
127 " 5 True\t4\t<\t5\n",
128 " 2 False\t5\t>\t4\n",
129 " 2 True\t4\t<\t5\n",
130 " 3 True\t5\t<\t6\n"
131 ]
132 },
133 {
134 "data": {
135 "text/plain": [
136 "5"
137 ]
138 },
139 "execution_count": 3,
140 "metadata": {},
141 "output_type": "execute_result"
142 }
143 ],
144 "source": [
145 "longest_monotone([10,1,2,3,4,5,4,5,6], debug=True)"
146 ]
147 },
148 {
149 "cell_type": "code",
150 "execution_count": 8,
151 "metadata": {
152 "collapsed": false
153 },
154 "outputs": [
155 {
156 "name": "stdout",
157 "output_type": "stream",
158 "text": [
159 " 2 False\t10\t>\t1\n",
160 " 2 True\t1\t<\t2\n",
161 " 3 True\t2\t<\t3\n",
162 " 4 True\t3\t<\t4\n",
163 " 5 True\t4\t<\t5\n",
164 " 1 True\t5\t=\t5\n",
165 " 1 True\t5\t=\t5\n",
166 " 2 False\t5\t>\t4\n",
167 " 3 False\t4\t>\t3\n",
168 " 2 True\t3\t<\t5\n",
169 " 3 True\t5\t<\t6\n"
170 ]
171 },
172 {
173 "data": {
174 "text/plain": [
175 "5"
176 ]
177 },
178 "execution_count": 8,
179 "metadata": {},
180 "output_type": "execute_result"
181 }
182 ],
183 "source": [
184 "longest_monotone([10,1,2,3,4,5,5,5,4,3,5,6], debug=True)"
185 ]
186 },
187 {
188 "cell_type": "code",
189 "execution_count": 9,
190 "metadata": {
191 "collapsed": false
192 },
193 "outputs": [
194 {
195 "name": "stdout",
196 "output_type": "stream",
197 "text": [
198 " 2 False\t10\t>\t1\n",
199 " 2 True\t1\t<\t2\n",
200 " 3 True\t2\t<\t3\n",
201 " 4 True\t3\t<\t4\n",
202 " 5 True\t4\t<\t5\n",
203 " 1 True\t5\t=\t5\n",
204 " 1 True\t5\t=\t5\n",
205 " 2 True\t5\t<\t6\n",
206 " 2 False\t6\t>\t4\n",
207 " 3 False\t4\t>\t3\n",
208 " 2 True\t3\t<\t5\n",
209 " 3 True\t5\t<\t6\n"
210 ]
211 },
212 {
213 "data": {
214 "text/plain": [
215 "5"
216 ]
217 },
218 "execution_count": 9,
219 "metadata": {},
220 "output_type": "execute_result"
221 }
222 ],
223 "source": [
224 "longest_monotone([10,1,2,3,4,5,5,5,6,4,3,5,6], debug=True)"
225 ]
226 },
227 {
228 "cell_type": "code",
229 "execution_count": 7,
230 "metadata": {
231 "collapsed": false
232 },
233 "outputs": [
234 {
235 "name": "stdout",
236 "output_type": "stream",
237 "text": [
238 " 2 False\t10\t>\t1\n",
239 " 2 True\t1\t<\t2\n",
240 " 3 True\t2\t<\t3\n",
241 " 4 True\t3\t<\t4\n",
242 " 5 True\t4\t<\t5\n",
243 " 6 True\t5\t=\t5\n",
244 " 7 True\t5\t=\t5\n",
245 " 8 True\t5\t<\t6\n",
246 " 2 False\t6\t>\t4\n",
247 " 3 False\t4\t>\t3\n",
248 " 2 True\t3\t<\t5\n",
249 " 3 True\t5\t<\t6\n"
250 ]
251 },
252 {
253 "data": {
254 "text/plain": [
255 "8"
256 ]
257 },
258 "execution_count": 7,
259 "metadata": {},
260 "output_type": "execute_result"
261 }
262 ],
263 "source": [
264 "longest_monotone([10,1,2,3,4,5,5,5,6,4,3,5,6], allow_same=True, debug=True)"
265 ]
266 },
267 {
268 "cell_type": "code",
269 "execution_count": 6,
270 "metadata": {
271 "collapsed": false
272 },
273 "outputs": [
274 {
275 "name": "stdout",
276 "output_type": "stream",
277 "text": [
278 " 2 False\t10\t>\t1\n",
279 " 2 True\t1\t<\t2\n",
280 " 3 True\t2\t<\t5\n",
281 " 4 True\t5\t=\t5\n",
282 " 5 True\t5\t=\t5\n",
283 " 4 False\t5\t>\t4\n",
284 " 5 False\t4\t>\t3\n",
285 " 6 False\t3\t>\t2\n",
286 " 7 False\t2\t>\t1\n",
287 " 2 True\t1\t<\t5\n",
288 " 3 True\t5\t<\t6\n"
289 ]
290 },
291 {
292 "data": {
293 "text/plain": [
294 "7"
295 ]
296 },
297 "execution_count": 6,
298 "metadata": {},
299 "output_type": "execute_result"
300 }
301 ],
302 "source": [
303 "longest_monotone([10,1,2,5,5,5,4,3,2,1,5,6], allow_same=True, debug=True)"
304 ]
305 },
306 {
307 "cell_type": "code",
308 "execution_count": 7,
309 "metadata": {
310 "collapsed": false
311 },
312 "outputs": [
313 {
314 "name": "stdout",
315 "output_type": "stream",
316 "text": [
317 " 2 False\t10\t>\t1\n",
318 " 2 True\t1\t<\t2\n",
319 " 3 True\t2\t<\t5\n",
320 " 4 True\t5\t=\t5\n",
321 " 5 True\t5\t=\t5\n",
322 " 4 False\t5\t>\t4\n",
323 " 5 False\t4\t>\t3\n",
324 " 6 False\t3\t>\t2\n",
325 " 2 True\t2\t<\t5\n",
326 " 3 True\t5\t<\t6\n",
327 " 4 True\t6\t=\t6\n",
328 " 5 True\t6\t=\t6\n",
329 " 6 True\t6\t=\t6\n",
330 " 7 True\t6\t=\t6\n",
331 " 8 True\t6\t=\t6\n",
332 " 9 True\t6\t=\t6\n",
333 " 8 False\t6\t>\t2\n"
334 ]
335 },
336 {
337 "data": {
338 "text/plain": [
339 "9"
340 ]
341 },
342 "execution_count": 7,
343 "metadata": {},
344 "output_type": "execute_result"
345 }
346 ],
347 "source": [
348 "longest_monotone([10,1,2,5,5,5,4,3,2,5,6,6,6,6,6,6,6,2], allow_same=True, debug=True)"
349 ]
350 },
351 {
352 "cell_type": "code",
353 "execution_count": 8,
354 "metadata": {
355 "collapsed": false
356 },
357 "outputs": [
358 {
359 "name": "stdout",
360 "output_type": "stream",
361 "text": [
362 " 2 False\t10\t>\t1\n",
363 " 2 True\t1\t<\t2\n",
364 " 3 True\t2\t<\t5\n",
365 " 1 True\t5\t=\t5\n",
366 " 1 True\t5\t=\t5\n",
367 " 2 False\t5\t>\t4\n",
368 " 3 False\t4\t>\t3\n",
369 " 4 False\t3\t>\t2\n",
370 " 2 True\t2\t<\t5\n",
371 " 3 True\t5\t<\t6\n",
372 " 1 True\t6\t=\t6\n",
373 " 1 True\t6\t=\t6\n",
374 " 1 True\t6\t=\t6\n",
375 " 1 True\t6\t=\t6\n",
376 " 1 True\t6\t=\t6\n",
377 " 1 True\t6\t=\t6\n",
378 " 2 False\t6\t>\t2\n"
379 ]
380 },
381 {
382 "data": {
383 "text/plain": [
384 "4"
385 ]
386 },
387 "execution_count": 8,
388 "metadata": {},
389 "output_type": "execute_result"
390 }
391 ],
392 "source": [
393 "longest_monotone([10,1,2,5,5,5,4,3,2,5,6,6,6,6,6,6,6,2], allow_same=False, debug=True)"
394 ]
395 },
396 {
397 "cell_type": "code",
398 "execution_count": null,
399 "metadata": {
400 "collapsed": false
401 },
402 "outputs": [],
403 "source": [
404 "# for i in range(1):\n",
405 "# with open('sequence{:03}.txt'.format(i)) as f:\n",
406 "# sseq = f.read()\n",
407 "# seq = [int(s) for s in sseq.split(',')]\n",
408 "# s, l = longest_monotone(seq, debug=True)\n",
409 "# print(i, s, l)"
410 ]
411 },
412 {
413 "cell_type": "code",
414 "execution_count": 10,
415 "metadata": {
416 "collapsed": false
417 },
418 "outputs": [
419 {
420 "name": "stdout",
421 "output_type": "stream",
422 "text": [
423 "0 24\n",
424 "1 34\n",
425 "2 23\n",
426 "3 23\n",
427 "4 22\n",
428 "5 27\n",
429 "6 27\n",
430 "7 24\n",
431 "8 22\n",
432 "9 21\n"
433 ]
434 }
435 ],
436 "source": [
437 "for i in range(10):\n",
438 " with open('sequence{:03}.txt'.format(i)) as f:\n",
439 " sseq = f.read()\n",
440 " seq = [int(s) for s in sseq.split(',')]\n",
441 " s = longest_monotone(seq)\n",
442 " print(i, s)"
443 ]
444 },
445 {
446 "cell_type": "code",
447 "execution_count": 106,
448 "metadata": {
449 "collapsed": false
450 },
451 "outputs": [
452 {
453 "name": "stdout",
454 "output_type": "stream",
455 "text": [
456 "0 29\n",
457 "1 34\n",
458 "2 25\n",
459 "3 30\n",
460 "4 29\n",
461 "5 27\n",
462 "6 27\n",
463 "7 28\n",
464 "8 23\n",
465 "9 26\n"
466 ]
467 }
468 ],
469 "source": [
470 "for i in range(10):\n",
471 " with open('sequence{:03}.txt'.format(i)) as f:\n",
472 " sseq = f.read()\n",
473 " seq = [int(s) for s in sseq.split(',')]\n",
474 " s = longest_monotone(seq, allow_same=True)\n",
475 " print(i, s)"
476 ]
477 },
478 {
479 "cell_type": "code",
480 "execution_count": null,
481 "metadata": {
482 "collapsed": true
483 },
484 "outputs": [],
485 "source": []
486 }
487 ],
488 "metadata": {
489 "kernelspec": {
490 "display_name": "Python 3",
491 "language": "python",
492 "name": "python3"
493 },
494 "language_info": {
495 "codemirror_mode": {
496 "name": "ipython",
497 "version": 3
498 },
499 "file_extension": ".py",
500 "mimetype": "text/x-python",
501 "name": "python",
502 "nbconvert_exporter": "python",
503 "pygments_lexer": "ipython3",
504 "version": "3.5.2"
505 }
506 },
507 "nbformat": 4,
508 "nbformat_minor": 0
509 }