Removing files from data analysis directory
[ou-summer-of-code-2017.git] / monotone-substrings / single_substring_itertools.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "# Monotone substrings\n",
8 "\n",
9 "Given a list of numbers, find the length of longest increasing or decreasing substring in the list.\n",
10 "\n",
11 "For instance, the sequence\n",
12 " 10, 1, 2, 3, 4, 5, 5, 5, 6, 4, 3, 5, 6\n",
13 "contains these increasing or decreasing substrings:\n",
14 "* 10, 1\n",
15 "* 1, 2, 3, 4, 5\n",
16 "* 5, 6\n",
17 "* 6, 4, 3\n",
18 "* 3, 5, 6\n",
19 "\n",
20 "As an extension, allow the substring to contain runs of identical numbers, each of which is included in the length of the longest substring.\n",
21 "\n",
22 "If identical numbers are allowed, the above sequence contains substrings:\n",
23 "* 10, 1\n",
24 "* 1, 2, 3, 4, 5, 5, 5, 6\n",
25 "* 6, 4, 3\n",
26 "* 3, 5, 6\n",
27 "\n",
28 "The list is given as a single line of comma-separated integers. "
29 ]
30 },
31 {
32 "cell_type": "code",
33 "execution_count": 1,
34 "metadata": {
35 "collapsed": true
36 },
37 "outputs": [],
38 "source": [
39 "import itertools"
40 ]
41 },
42 {
43 "cell_type": "code",
44 "execution_count": 2,
45 "metadata": {
46 "collapsed": true
47 },
48 "outputs": [],
49 "source": [
50 "def cmp(x, y):\n",
51 " if x == y:\n",
52 " return 0\n",
53 " elif x > y:\n",
54 " return -1\n",
55 " else:\n",
56 " return 1"
57 ]
58 },
59 {
60 "cell_type": "code",
61 "execution_count": 3,
62 "metadata": {
63 "collapsed": false
64 },
65 "outputs": [],
66 "source": [
67 "def longest_monotone(seq, allow_same=False, debug=False):\n",
68 " \"\"\"Find the longest monotonic substring. If allow_same is True, \n",
69 " instead find the longest non-decreasing or non-increasing substring\"\"\"\n",
70 " cmps = [cmp(x, y) for x, y in zip(seq, seq[1:])]\n",
71 " groups = [(k, len(list(g)) + 1) for k, g in itertools.groupby(cmps)]\n",
72 " if allow_same:\n",
73 " groups = merged(groups)\n",
74 " if debug:\n",
75 " return max(t[1] for t in groups), groups\n",
76 " else:\n",
77 " return max(t[1] for t in groups)"
78 ]
79 },
80 {
81 "cell_type": "code",
82 "execution_count": 4,
83 "metadata": {
84 "collapsed": true
85 },
86 "outputs": [],
87 "source": [
88 "def merged(gs):\n",
89 " padded = gs + [(0, 0)]\n",
90 " mgs = []\n",
91 " prev = (0, 1)\n",
92 " for curr, nxt in zip(padded, padded[1:]):\n",
93 " if prev[0] == 0:\n",
94 " prev = (curr[0], prev[1] + curr[1] - 1)\n",
95 " elif curr[0] == 0:\n",
96 " prev = (prev[0], prev[1] + curr[1] - 1)\n",
97 " if prev[0] != nxt[0]:\n",
98 " mgs += [prev]\n",
99 " prev = (0, curr[1])\n",
100 " elif prev[0] == curr[0]:\n",
101 " prev = (prev[0], prev[1] + curr[1] - 1)\n",
102 " else:\n",
103 " mgs += [prev]\n",
104 " prev = curr\n",
105 " mgs += [prev]\n",
106 " return mgs "
107 ]
108 },
109 {
110 "cell_type": "code",
111 "execution_count": 5,
112 "metadata": {
113 "collapsed": false
114 },
115 "outputs": [
116 {
117 "data": {
118 "text/plain": [
119 "5"
120 ]
121 },
122 "execution_count": 5,
123 "metadata": {},
124 "output_type": "execute_result"
125 }
126 ],
127 "source": [
128 "longest_monotone([1,2,3,4,5,4,5,6])"
129 ]
130 },
131 {
132 "cell_type": "code",
133 "execution_count": 6,
134 "metadata": {
135 "collapsed": false
136 },
137 "outputs": [
138 {
139 "data": {
140 "text/plain": [
141 "(5, [(1, 5), (-1, 2), (1, 3)])"
142 ]
143 },
144 "execution_count": 6,
145 "metadata": {},
146 "output_type": "execute_result"
147 }
148 ],
149 "source": [
150 "longest_monotone([1,2,3,4,5,4,5,6], debug=True)"
151 ]
152 },
153 {
154 "cell_type": "code",
155 "execution_count": 7,
156 "metadata": {
157 "collapsed": false
158 },
159 "outputs": [
160 {
161 "data": {
162 "text/plain": [
163 "5"
164 ]
165 },
166 "execution_count": 7,
167 "metadata": {},
168 "output_type": "execute_result"
169 }
170 ],
171 "source": [
172 "longest_monotone([10,1,2,3,4,5,4,5,6])"
173 ]
174 },
175 {
176 "cell_type": "code",
177 "execution_count": 8,
178 "metadata": {
179 "collapsed": false
180 },
181 "outputs": [
182 {
183 "data": {
184 "text/plain": [
185 "5"
186 ]
187 },
188 "execution_count": 8,
189 "metadata": {},
190 "output_type": "execute_result"
191 }
192 ],
193 "source": [
194 "longest_monotone([10,1,2,3,4,5,5,5,4,3,5,6])"
195 ]
196 },
197 {
198 "cell_type": "code",
199 "execution_count": 9,
200 "metadata": {
201 "collapsed": false
202 },
203 "outputs": [
204 {
205 "data": {
206 "text/plain": [
207 "7"
208 ]
209 },
210 "execution_count": 9,
211 "metadata": {},
212 "output_type": "execute_result"
213 }
214 ],
215 "source": [
216 "longest_monotone([10,1,2,3,4,5,5,5,4,3,5,6], allow_same=True)"
217 ]
218 },
219 {
220 "cell_type": "code",
221 "execution_count": 10,
222 "metadata": {
223 "collapsed": false
224 },
225 "outputs": [
226 {
227 "data": {
228 "text/plain": [
229 "5"
230 ]
231 },
232 "execution_count": 10,
233 "metadata": {},
234 "output_type": "execute_result"
235 }
236 ],
237 "source": [
238 "longest_monotone([10,1,2,5,5,5,4,3,2,1,5,6])"
239 ]
240 },
241 {
242 "cell_type": "code",
243 "execution_count": 11,
244 "metadata": {
245 "collapsed": false
246 },
247 "outputs": [
248 {
249 "data": {
250 "text/plain": [
251 "7"
252 ]
253 },
254 "execution_count": 11,
255 "metadata": {},
256 "output_type": "execute_result"
257 }
258 ],
259 "source": [
260 "longest_monotone([10,1,2,5,5,5,4,3,2,1,5,6], allow_same=True)"
261 ]
262 },
263 {
264 "cell_type": "code",
265 "execution_count": 12,
266 "metadata": {
267 "collapsed": false
268 },
269 "outputs": [
270 {
271 "data": {
272 "text/plain": [
273 "7"
274 ]
275 },
276 "execution_count": 12,
277 "metadata": {},
278 "output_type": "execute_result"
279 }
280 ],
281 "source": [
282 "longest_monotone([10,1,2,5,5,5,4,3,2,5,6,6,6,6,6,6,6,2])"
283 ]
284 },
285 {
286 "cell_type": "code",
287 "execution_count": 13,
288 "metadata": {
289 "collapsed": false
290 },
291 "outputs": [
292 {
293 "data": {
294 "text/plain": [
295 "9"
296 ]
297 },
298 "execution_count": 13,
299 "metadata": {},
300 "output_type": "execute_result"
301 }
302 ],
303 "source": [
304 "longest_monotone([10,1,2,5,5,5,4,3,2,5,6,6,6,6,6,6,6,2], allow_same=True)"
305 ]
306 },
307 {
308 "cell_type": "code",
309 "execution_count": 14,
310 "metadata": {
311 "collapsed": false
312 },
313 "outputs": [
314 {
315 "data": {
316 "text/plain": [
317 "7"
318 ]
319 },
320 "execution_count": 14,
321 "metadata": {},
322 "output_type": "execute_result"
323 }
324 ],
325 "source": [
326 "longest_monotone([10,1,2,5,5,5,6,7,7,8,8,3,2,5,6,6,6,6,6,6,6,2], allow_same=False)"
327 ]
328 },
329 {
330 "cell_type": "code",
331 "execution_count": 15,
332 "metadata": {
333 "collapsed": false
334 },
335 "outputs": [
336 {
337 "data": {
338 "text/plain": [
339 "10"
340 ]
341 },
342 "execution_count": 15,
343 "metadata": {},
344 "output_type": "execute_result"
345 }
346 ],
347 "source": [
348 "longest_monotone([10,1,2,5,5,5,6,7,7,8,8,3,2,5,6,6,6,6,6,6,6,2], allow_same=True)"
349 ]
350 },
351 {
352 "cell_type": "code",
353 "execution_count": 16,
354 "metadata": {
355 "collapsed": false
356 },
357 "outputs": [
358 {
359 "name": "stdout",
360 "output_type": "stream",
361 "text": [
362 "0 24\n"
363 ]
364 }
365 ],
366 "source": [
367 "for i in range(1):\n",
368 " with open('sequence{:03}.txt'.format(i)) as f:\n",
369 " sseq = f.read()\n",
370 " seq = [int(s) for s in sseq.split(',')]\n",
371 " s = longest_monotone(seq)\n",
372 " print(i, s)"
373 ]
374 },
375 {
376 "cell_type": "code",
377 "execution_count": 17,
378 "metadata": {
379 "collapsed": false
380 },
381 "outputs": [
382 {
383 "name": "stdout",
384 "output_type": "stream",
385 "text": [
386 "0 24\n",
387 "1 34\n",
388 "2 23\n",
389 "3 23\n",
390 "4 22\n",
391 "5 27\n",
392 "6 27\n",
393 "7 24\n",
394 "8 22\n",
395 "9 21\n"
396 ]
397 }
398 ],
399 "source": [
400 "for i in range(10):\n",
401 " with open('sequence{:03}.txt'.format(i)) as f:\n",
402 " sseq = f.read()\n",
403 " seq = [int(s) for s in sseq.split(',')]\n",
404 " s = longest_monotone(seq)\n",
405 " print(i, s)"
406 ]
407 },
408 {
409 "cell_type": "code",
410 "execution_count": 18,
411 "metadata": {
412 "collapsed": false
413 },
414 "outputs": [
415 {
416 "name": "stdout",
417 "output_type": "stream",
418 "text": [
419 "0 29\n",
420 "1 34\n",
421 "2 25\n",
422 "3 30\n",
423 "4 29\n",
424 "5 27\n",
425 "6 27\n",
426 "7 28\n",
427 "8 23\n",
428 "9 26\n"
429 ]
430 }
431 ],
432 "source": [
433 "for i in range(10):\n",
434 " with open('sequence{:03}.txt'.format(i)) as f:\n",
435 " sseq = f.read()\n",
436 " seq = [int(s) for s in sseq.split(',')]\n",
437 " s = longest_monotone(seq, allow_same=True)\n",
438 " print(i, s)"
439 ]
440 },
441 {
442 "cell_type": "code",
443 "execution_count": null,
444 "metadata": {
445 "collapsed": true
446 },
447 "outputs": [],
448 "source": []
449 }
450 ],
451 "metadata": {
452 "kernelspec": {
453 "display_name": "Python 3",
454 "language": "python",
455 "name": "python3"
456 },
457 "language_info": {
458 "codemirror_mode": {
459 "name": "ipython",
460 "version": 3
461 },
462 "file_extension": ".py",
463 "mimetype": "text/x-python",
464 "name": "python",
465 "nbconvert_exporter": "python",
466 "pygments_lexer": "ipython3",
467 "version": "3.5.2"
468 }
469 },
470 "nbformat": 4,
471 "nbformat_minor": 0
472 }