Added some 'find duplicate letters' examples
[cas-master-teacher-training.git] / duplicate-letters.ipynb
1 {
2 "metadata": {
3 "name": "",
4 "signature": "sha256:19128e67a9754262fffc5d0706d52d873b5cd867f34b15dc0cb1987a03c4dd42"
5 },
6 "nbformat": 3,
7 "nbformat_minor": 0,
8 "worksheets": [
9 {
10 "cells": [
11 {
12 "cell_type": "markdown",
13 "metadata": {},
14 "source": [
15 "You need to write a procedure that returns all duplicate characters from a given String.\n",
16 "\n",
17 "For example if String is \"Java\" then program should return [\"a\"].\n",
18 "\n",
19 "* \"Java\" -> ['a']\n",
20 "* \"String\" -> []\n",
21 "* \"Computer science\" -> [\"c\", \"e\"]\n",
22 "* \"happy\" -> ['p']\n",
23 "\n",
24 "Bonus points if your program is robust and handle different kinds of input e.g. String without duplicate, null or empty String etc. \n",
25 "\n",
26 "Bonus points if you also write unit tests for normal and edge cases."
27 ]
28 },
29 {
30 "cell_type": "code",
31 "collapsed": false,
32 "input": [
33 "def duplicates_via_dict(word):\n",
34 " counts = {}\n",
35 " for letter in word:\n",
36 " if letter in counts:\n",
37 " counts[letter] += 1\n",
38 " else:\n",
39 " counts[letter] = 1\n",
40 " duplicates = []\n",
41 " for letter in counts:\n",
42 " if counts[letter] > 1:\n",
43 " duplicates += [letter]\n",
44 " return duplicates"
45 ],
46 "language": "python",
47 "metadata": {},
48 "outputs": [],
49 "prompt_number": 4
50 },
51 {
52 "cell_type": "code",
53 "collapsed": false,
54 "input": [
55 "duplicates_via_dict('occurrences')"
56 ],
57 "language": "python",
58 "metadata": {},
59 "outputs": [
60 {
61 "metadata": {},
62 "output_type": "pyout",
63 "prompt_number": 11,
64 "text": [
65 "['r', 'e', 'c']"
66 ]
67 }
68 ],
69 "prompt_number": 11
70 },
71 {
72 "cell_type": "code",
73 "collapsed": false,
74 "input": [
75 "def duplicates_via_counts(word):\n",
76 " def count_letter(word, letter):\n",
77 " count = 0\n",
78 " for l in word:\n",
79 " if l == letter:\n",
80 " count += 1\n",
81 " return count\n",
82 " counts = {}\n",
83 " for letter in word:\n",
84 " counts[letter] = count_letter(word, letter)\n",
85 " duplicates = []\n",
86 " for letter in counts:\n",
87 " if counts[letter] > 1:\n",
88 " duplicates += [letter]\n",
89 " return duplicates"
90 ],
91 "language": "python",
92 "metadata": {},
93 "outputs": [],
94 "prompt_number": 8
95 },
96 {
97 "cell_type": "code",
98 "collapsed": false,
99 "input": [
100 "duplicates_via_counts('occurrences')"
101 ],
102 "language": "python",
103 "metadata": {},
104 "outputs": [
105 {
106 "metadata": {},
107 "output_type": "pyout",
108 "prompt_number": 12,
109 "text": [
110 "['r', 'e', 'c']"
111 ]
112 }
113 ],
114 "prompt_number": 12
115 },
116 {
117 "cell_type": "code",
118 "collapsed": false,
119 "input": [
120 "def duplicates_via_comprehension(word):\n",
121 " counts = {letter: len([l for l in word if l == letter]) for letter in word}\n",
122 " return [letter for letter in counts if counts[letter] > 1]"
123 ],
124 "language": "python",
125 "metadata": {},
126 "outputs": [],
127 "prompt_number": 64
128 },
129 {
130 "cell_type": "code",
131 "collapsed": false,
132 "input": [
133 "duplicates_via_comprehension('occurrences')"
134 ],
135 "language": "python",
136 "metadata": {},
137 "outputs": [
138 {
139 "metadata": {},
140 "output_type": "pyout",
141 "prompt_number": 65,
142 "text": [
143 "['r', 'e', 'c']"
144 ]
145 }
146 ],
147 "prompt_number": 65
148 },
149 {
150 "cell_type": "code",
151 "collapsed": false,
152 "input": [
153 "def duplicates_via_recursion(word):\n",
154 " return duplicates_via_recursion_helper(word, [])\n",
155 "\n",
156 "def duplicates_via_recursion_helper(word, duplicates_so_far):\n",
157 " if word:\n",
158 " letter = word[0]\n",
159 " if letter in word[1:] and letter not in duplicates_so_far:\n",
160 " return duplicates_via_recursion_helper(word[1:], [letter] + duplicates_so_far)\n",
161 " else:\n",
162 " return duplicates_via_recursion_helper(word[1:], duplicates_so_far)\n",
163 " else:\n",
164 " return duplicates_so_far"
165 ],
166 "language": "python",
167 "metadata": {},
168 "outputs": [],
169 "prompt_number": 25
170 },
171 {
172 "cell_type": "code",
173 "collapsed": false,
174 "input": [
175 "duplicates_via_recursion('occurrences')"
176 ],
177 "language": "python",
178 "metadata": {},
179 "outputs": [
180 {
181 "metadata": {},
182 "output_type": "pyout",
183 "prompt_number": 26,
184 "text": [
185 "['e', 'r', 'c']"
186 ]
187 }
188 ],
189 "prompt_number": 26
190 },
191 {
192 "cell_type": "code",
193 "collapsed": false,
194 "input": [
195 "def duplicates_via_sorting(word):\n",
196 " duplicates = set()\n",
197 " sorted_word = sorted(word)\n",
198 " for i in range(len(sorted_word) - 1):\n",
199 " if sorted_word[i] == sorted_word[i+1]:\n",
200 " duplicates.add(sorted_word[i])\n",
201 " return list(duplicates)"
202 ],
203 "language": "python",
204 "metadata": {},
205 "outputs": [],
206 "prompt_number": 29
207 },
208 {
209 "cell_type": "code",
210 "collapsed": false,
211 "input": [
212 "duplicates_via_sorting('occurrences')"
213 ],
214 "language": "python",
215 "metadata": {},
216 "outputs": [
217 {
218 "metadata": {},
219 "output_type": "pyout",
220 "prompt_number": 30,
221 "text": [
222 "['r', 'e', 'c']"
223 ]
224 }
225 ],
226 "prompt_number": 30
227 },
228 {
229 "cell_type": "code",
230 "collapsed": false,
231 "input": [
232 "def duplicates_via_runs(word):\n",
233 " duplicates = []\n",
234 " sorted_word = sorted(word)\n",
235 " i = 0\n",
236 " found_duplicates = False\n",
237 " for i in range(len(sorted_word) - 1):\n",
238 " if sorted_word[i] == sorted_word[i+1]:\n",
239 " if not found_duplicates:\n",
240 " duplicates += [sorted_word[i]]\n",
241 " found_duplicates = True\n",
242 " else:\n",
243 " found_duplicates = False\n",
244 " return duplicates"
245 ],
246 "language": "python",
247 "metadata": {},
248 "outputs": [],
249 "prompt_number": 55
250 },
251 {
252 "cell_type": "code",
253 "collapsed": false,
254 "input": [
255 "duplicates_via_runs('occurrences')"
256 ],
257 "language": "python",
258 "metadata": {},
259 "outputs": [
260 {
261 "metadata": {},
262 "output_type": "pyout",
263 "prompt_number": 56,
264 "text": [
265 "['c', 'e', 'r']"
266 ]
267 }
268 ],
269 "prompt_number": 56
270 },
271 {
272 "cell_type": "code",
273 "collapsed": false,
274 "input": [
275 "def duplicates_via_segments(word):\n",
276 " segments = []\n",
277 " sorted_word = sorted(word)\n",
278 " segment_start = 0\n",
279 " for i in range(len(sorted_word) - 1):\n",
280 " if sorted_word[i] != sorted_word[i+1]:\n",
281 " segments += [sorted_word[segment_start:i+1]]\n",
282 " segment_start = i + 1\n",
283 " return [segment[0] for segment in segments if len(segment) > 1]"
284 ],
285 "language": "python",
286 "metadata": {},
287 "outputs": [],
288 "prompt_number": 49
289 },
290 {
291 "cell_type": "code",
292 "collapsed": false,
293 "input": [
294 "duplicates_via_segments('occurrences')"
295 ],
296 "language": "python",
297 "metadata": {},
298 "outputs": [
299 {
300 "metadata": {},
301 "output_type": "pyout",
302 "prompt_number": 50,
303 "text": [
304 "['c', 'e', 'r']"
305 ]
306 }
307 ],
308 "prompt_number": 50
309 },
310 {
311 "cell_type": "code",
312 "collapsed": false,
313 "input": [
314 "def duplicates_via_buckets(word):\n",
315 " buckets = []\n",
316 " for letter in word:\n",
317 " found_bucket = False\n",
318 " for bucket in buckets:\n",
319 " if bucket[0] == letter:\n",
320 " bucket += [letter]\n",
321 " found_bucket = True\n",
322 " if not found_bucket:\n",
323 " buckets += [[letter]]\n",
324 " duplicates = []\n",
325 " for bucket in buckets:\n",
326 " if len(bucket) > 1:\n",
327 " duplicates += bucket[0]\n",
328 " return duplicates"
329 ],
330 "language": "python",
331 "metadata": {},
332 "outputs": [],
333 "prompt_number": 43
334 },
335 {
336 "cell_type": "code",
337 "collapsed": false,
338 "input": [
339 "duplicates_via_buckets('occurrences')"
340 ],
341 "language": "python",
342 "metadata": {},
343 "outputs": [
344 {
345 "metadata": {},
346 "output_type": "pyout",
347 "prompt_number": 44,
348 "text": [
349 "['c', 'r', 'e']"
350 ]
351 }
352 ],
353 "prompt_number": 44
354 },
355 {
356 "cell_type": "code",
357 "collapsed": false,
358 "input": [
359 "def duplicates_via_set(word):\n",
360 " letters = set(word)\n",
361 " duplicates = list(word)\n",
362 " for letter in letters:\n",
363 " duplicates.remove(letter)\n",
364 " return list(set(duplicates))"
365 ],
366 "language": "python",
367 "metadata": {},
368 "outputs": [],
369 "prompt_number": 31
370 },
371 {
372 "cell_type": "code",
373 "collapsed": false,
374 "input": [
375 "duplicates_via_set('occurrences')"
376 ],
377 "language": "python",
378 "metadata": {},
379 "outputs": [
380 {
381 "metadata": {},
382 "output_type": "pyout",
383 "prompt_number": 32,
384 "text": [
385 "['r', 'e', 'c']"
386 ]
387 }
388 ],
389 "prompt_number": 32
390 },
391 {
392 "cell_type": "code",
393 "collapsed": false,
394 "input": [
395 "import collections\n",
396 "\n",
397 "def duplicates_via_counter(word):\n",
398 " counts = collections.Counter(word)\n",
399 " return [letter for letter, count in counts.items() \n",
400 " if count > 1]"
401 ],
402 "language": "python",
403 "metadata": {},
404 "outputs": [],
405 "prompt_number": 33
406 },
407 {
408 "cell_type": "code",
409 "collapsed": false,
410 "input": [
411 "duplicates_via_counter('occurrences')"
412 ],
413 "language": "python",
414 "metadata": {},
415 "outputs": [
416 {
417 "metadata": {},
418 "output_type": "pyout",
419 "prompt_number": 34,
420 "text": [
421 "['r', 'e', 'c']"
422 ]
423 }
424 ],
425 "prompt_number": 34
426 },
427 {
428 "cell_type": "code",
429 "collapsed": false,
430 "input": [
431 "import itertools\n",
432 "\n",
433 "def duplicates_via_groupby(word):\n",
434 " return [k for k, g in itertools.groupby(sorted(word)) \n",
435 " if len(list(g)) > 1]"
436 ],
437 "language": "python",
438 "metadata": {},
439 "outputs": [],
440 "prompt_number": 41
441 },
442 {
443 "cell_type": "code",
444 "collapsed": false,
445 "input": [
446 "duplicates_via_groupby('occurrences')"
447 ],
448 "language": "python",
449 "metadata": {},
450 "outputs": [
451 {
452 "metadata": {},
453 "output_type": "pyout",
454 "prompt_number": 42,
455 "text": [
456 "['c', 'e', 'r']"
457 ]
458 }
459 ],
460 "prompt_number": 42
461 },
462 {
463 "cell_type": "code",
464 "collapsed": false,
465 "input": [
466 "import functools\n",
467 "\n",
468 "def duplicates_via_reduce(word):\n",
469 " def merge_counts(counts1, counts2):\n",
470 " counts = {l: c for l, c in counts1.items()}\n",
471 " for l, c in counts2.items():\n",
472 " if l in counts:\n",
473 " counts[l] += c\n",
474 " else:\n",
475 " counts[l] = c\n",
476 " return counts\n",
477 " \n",
478 " counts = [{letter: 1} for letter in word]\n",
479 " return [letter for letter, count in functools.reduce(merge_counts, counts).items() if count > 1]"
480 ],
481 "language": "python",
482 "metadata": {},
483 "outputs": [],
484 "prompt_number": 62
485 },
486 {
487 "cell_type": "code",
488 "collapsed": false,
489 "input": [
490 "duplicates_via_reduce('occurrences')"
491 ],
492 "language": "python",
493 "metadata": {},
494 "outputs": [
495 {
496 "metadata": {},
497 "output_type": "pyout",
498 "prompt_number": 63,
499 "text": [
500 "['r', 'e', 'c']"
501 ]
502 }
503 ],
504 "prompt_number": 63
505 },
506 {
507 "cell_type": "code",
508 "collapsed": false,
509 "input": [],
510 "language": "python",
511 "metadata": {},
512 "outputs": []
513 }
514 ],
515 "metadata": {}
516 }
517 ]
518 }