Removed M269 files
[ou-summer-of-code-2017.git] / monotone-substrings / single_substring_itertools.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "# Monotone substrings\n",
8 "\n",
9 "Given a list of numbers, find the length of longest increasing or decreasing substring in the list.\n",
10 "\n",
11 "For instance, the sequence\n",
12 " 10, 1, 2, 3, 4, 5, 5, 5, 6, 4, 3, 5, 6\n",
13 "contains these increasing or decreasing substrings:\n",
14 "* 10, 1\n",
15 "* 1, 2, 3, 4, 5\n",
16 "* 5, 6\n",
17 "* 6, 4, 3\n",
18 "* 3, 5, 6\n",
19 "\n",
20 "As an extension, allow the substring to contain runs of identical numbers, each of which is included in the length of the longest substring.\n",
21 "\n",
22 "If identical numbers are allowed, the above sequence contains substrings:\n",
23 "* 10, 1\n",
24 "* 1, 2, 3, 4, 5, 5, 5, 6\n",
25 "* 6, 4, 3\n",
26 "* 3, 5, 6\n",
27 "\n",
28 "The list is given as a single line of comma-separated integers. "
29 ]
30 },
31 {
32 "cell_type": "code",
33 "execution_count": 1,
34 "metadata": {
35 "collapsed": true
36 },
37 "outputs": [],
38 "source": [
39 "import itertools"
40 ]
41 },
42 {
43 "cell_type": "code",
44 "execution_count": 2,
45 "metadata": {
46 "collapsed": true
47 },
48 "outputs": [],
49 "source": [
50 "def cmp(x, y):\n",
51 " if x == y:\n",
52 " return 0\n",
53 " elif x > y:\n",
54 " return -1\n",
55 " else:\n",
56 " return 1"
57 ]
58 },
59 {
60 "cell_type": "code",
61 "execution_count": 3,
62 "metadata": {
63 "collapsed": false
64 },
65 "outputs": [],
66 "source": [
67 "def longest_monotone(seq, allow_same=False, debug=False):\n",
68 " \"\"\"Find the longest monotonic substring. If allow_same is True, \n",
69 " instead find the longest non-decreasing or non-increasing substring\"\"\"\n",
70 " cmps = [cmp(x, y) for x, y in zip(seq, seq[1:])]\n",
71 " groups = [(k, len(list(g)) + 1) for k, g in itertools.groupby(cmps)]\n",
72 " if allow_same:\n",
73 " groups = merged(groups)\n",
74 " if debug:\n",
75 " return max(t[1] for t in groups), groups\n",
76 " else:\n",
77 " return max(t[1] for t in groups)"
78 ]
79 },
80 {
81 "cell_type": "code",
82 "execution_count": 4,
83 "metadata": {
84 "collapsed": true
85 },
86 "outputs": [],
87 "source": [
88 "def merged(gs):\n",
89 " padded = gs + [(0, 0)]\n",
90 " mgs = []\n",
91 " prev = (0, 1)\n",
92 " for curr, nxt in zip(padded, padded[1:]):\n",
93 " if prev[0] == 0:\n",
94 " prev = (curr[0], prev[1] + curr[1] - 1)\n",
95 " elif curr[0] == 0:\n",
96 " prev = (prev[0], prev[1] + curr[1] - 1)\n",
97 " if prev[0] != nxt[0]:\n",
98 " mgs += [prev]\n",
99 " prev = (0, curr[1])\n",
100 " elif prev[0] == curr[0]:\n",
101 " prev = (prev[0], prev[1] + curr[1] - 1)\n",
102 " else:\n",
103 " mgs += [prev]\n",
104 " prev = curr\n",
105 " mgs += [prev]\n",
106 " return mgs "
107 ]
108 },
109 {
110 "cell_type": "code",
111 "execution_count": 5,
112 "metadata": {
113 "collapsed": false
114 },
115 "outputs": [
116 {
117 "data": {
118 "text/plain": [
119 "5"
120 ]
121 },
122 "execution_count": 5,
123 "metadata": {},
124 "output_type": "execute_result"
125 }
126 ],
127 "source": [
128 "longest_monotone([1,2,3,4,5,4,5,6])"
129 ]
130 },
131 {
132 "cell_type": "code",
133 "execution_count": 6,
134 "metadata": {
135 "collapsed": false
136 },
137 "outputs": [
138 {
139 "data": {
140 "text/plain": [
141 "(5, [(1, 5), (-1, 2), (1, 3)])"
142 ]
143 },
144 "execution_count": 6,
145 "metadata": {},
146 "output_type": "execute_result"
147 }
148 ],
149 "source": [
150 "longest_monotone([1,2,3,4,5,4,5,6], debug=True)"
151 ]
152 },
153 {
154 "cell_type": "code",
155 "execution_count": 7,
156 "metadata": {
157 "collapsed": false
158 },
159 "outputs": [
160 {
161 "data": {
162 "text/plain": [
163 "5"
164 ]
165 },
166 "execution_count": 7,
167 "metadata": {},
168 "output_type": "execute_result"
169 }
170 ],
171 "source": [
172 "longest_monotone([10,1,2,3,4,5,4,5,6])"
173 ]
174 },
175 {
176 "cell_type": "code",
177 "execution_count": 8,
178 "metadata": {
179 "collapsed": false
180 },
181 "outputs": [
182 {
183 "data": {
184 "text/plain": [
185 "5"
186 ]
187 },
188 "execution_count": 8,
189 "metadata": {},
190 "output_type": "execute_result"
191 }
192 ],
193 "source": [
194 "longest_monotone([10,1,2,3,4,5,5,5,4,3,5,6])"
195 ]
196 },
197 {
198 "cell_type": "code",
199 "execution_count": 9,
200 "metadata": {
201 "collapsed": false
202 },
203 "outputs": [
204 {
205 "data": {
206 "text/plain": [
207 "7"
208 ]
209 },
210 "execution_count": 9,
211 "metadata": {},
212 "output_type": "execute_result"
213 }
214 ],
215 "source": [
216 "longest_monotone([10,1,2,3,4,5,5,5,4,3,5,6], allow_same=True)"
217 ]
218 },
219 {
220 "cell_type": "code",
221 "execution_count": 10,
222 "metadata": {
223 "collapsed": false
224 },
225 "outputs": [
226 {
227 "data": {
228 "text/plain": [
229 "5"
230 ]
231 },
232 "execution_count": 10,
233 "metadata": {},
234 "output_type": "execute_result"
235 }
236 ],
237 "source": [
238 "longest_monotone([10,1,2,5,5,5,4,3,2,1,5,6])"
239 ]
240 },
241 {
242 "cell_type": "code",
243 "execution_count": 11,
244 "metadata": {
245 "collapsed": false
246 },
247 "outputs": [
248 {
249 "data": {
250 "text/plain": [
251 "7"
252 ]
253 },
254 "execution_count": 11,
255 "metadata": {},
256 "output_type": "execute_result"
257 }
258 ],
259 "source": [
260 "longest_monotone([10,1,2,5,5,5,4,3,2,1,5,6], allow_same=True)"
261 ]
262 },
263 {
264 "cell_type": "code",
265 "execution_count": 12,
266 "metadata": {
267 "collapsed": false
268 },
269 "outputs": [
270 {
271 "data": {
272 "text/plain": [
273 "7"
274 ]
275 },
276 "execution_count": 12,
277 "metadata": {},
278 "output_type": "execute_result"
279 }
280 ],
281 "source": [
282 "longest_monotone([10,1,2,5,5,5,4,3,2,5,6,6,6,6,6,6,6,2])"
283 ]
284 },
285 {
286 "cell_type": "code",
287 "execution_count": 13,
288 "metadata": {
289 "collapsed": false
290 },
291 "outputs": [
292 {
293 "data": {
294 "text/plain": [
295 "9"
296 ]
297 },
298 "execution_count": 13,
299 "metadata": {},
300 "output_type": "execute_result"
301 }
302 ],
303 "source": [
304 "longest_monotone([10,1,2,5,5,5,4,3,2,5,6,6,6,6,6,6,6,2], allow_same=True)"
305 ]
306 },
307 {
308 "cell_type": "code",
309 "execution_count": 14,
310 "metadata": {
311 "collapsed": false
312 },
313 "outputs": [
314 {
315 "data": {
316 "text/plain": [
317 "7"
318 ]
319 },
320 "execution_count": 14,
321 "metadata": {},
322 "output_type": "execute_result"
323 }
324 ],
325 "source": [
326 "longest_monotone([10,1,2,5,5,5,6,7,7,8,8,3,2,5,6,6,6,6,6,6,6,2], allow_same=False)"
327 ]
328 },
329 {
330 "cell_type": "code",
331 "execution_count": 15,
332 "metadata": {
333 "collapsed": false
334 },
335 "outputs": [
336 {
337 "data": {
338 "text/plain": [
339 "10"
340 ]
341 },
342 "execution_count": 15,
343 "metadata": {},
344 "output_type": "execute_result"
345 }
346 ],
347 "source": [
348 "longest_monotone([10,1,2,5,5,5,6,7,7,8,8,3,2,5,6,6,6,6,6,6,6,2], allow_same=True)"
349 ]
350 },
351 {
352 "cell_type": "code",
353 "execution_count": 16,
354 "metadata": {
355 "collapsed": false
356 },
357 "outputs": [
358 {
359 "name": "stdout",
360 "output_type": "stream",
361 "text": [
362 "0 24\n"
363 ]
364 }
365 ],
366 "source": [
367 "for i in range(1):\n",
368 " with open('sequence{:03}.txt'.format(i)) as f:\n",
369 " sseq = f.read()\n",
370 " seq = [int(s) for s in sseq.split(',')]\n",
371 " s = longest_monotone(seq)\n",
372 " print(i, s)"
373 ]
374 },
375 {
376 "cell_type": "code",
377 "execution_count": 17,
378 "metadata": {
379 "collapsed": false
380 },
381 "outputs": [
382 {
383 "name": "stdout",
384 "output_type": "stream",
385 "text": [
386 "0 24\n",
387 "1 34\n",
388 "2 23\n",
389 "3 23\n",
390 "4 22\n",
391 "5 27\n",
392 "6 27\n",
393 "7 24\n",
394 "8 22\n",
395 "9 21\n"
396 ]
397 }
398 ],
399 "source": [
400 "for i in range(10):\n",
401 " with open('sequence{:03}.txt'.format(i)) as f:\n",
402 " sseq = f.read()\n",
403 " seq = [int(s) for s in sseq.split(',')]\n",
404 " s = longest_monotone(seq)\n",
405 " print(i, s)"
406 ]
407 },
408 {
409 "cell_type": "code",
410 "execution_count": 18,
411 "metadata": {
412 "collapsed": false
413 },
414 "outputs": [
415 {
416 "name": "stdout",
417 "output_type": "stream",
418 "text": [
419 "0 29\n",
420 "1 34\n",
421 "2 25\n",
422 "3 30\n",
423 "4 29\n",
424 "5 27\n",
425 "6 27\n",
426 "7 28\n",
427 "8 23\n",
428 "9 26\n"
429 ]
430 }
431 ],
432 "source": [
433 "for i in range(10):\n",
434 " with open('sequence{:03}.txt'.format(i)) as f:\n",
435 " sseq = f.read()\n",
436 " seq = [int(s) for s in sseq.split(',')]\n",
437 " s = longest_monotone(seq, allow_same=True)\n",
438 " print(i, s)"
439 ]
440 },
441 {
442 "cell_type": "code",
443 "execution_count": null,
444 "metadata": {
445 "collapsed": true
446 },
447 "outputs": [],
448 "source": []
449 }
450 ],
451 "metadata": {
452 "kernelspec": {
453 "display_name": "Python 3",
454 "language": "python",
455 "name": "python3"
456 },
457 "language_info": {
458 "codemirror_mode": {
459 "name": "ipython",
460 "version": 3
461 },
462 "file_extension": ".py",
463 "mimetype": "text/x-python",
464 "name": "python",
465 "nbconvert_exporter": "python",
466 "pygments_lexer": "ipython3",
467 "version": "3.5.2"
468 }
469 },
470 "nbformat": 4,
471 "nbformat_minor": 0
472 }