Added some 'find duplicate letters' examples
authorNeil Smith <neil.git@njae.me.uk>
Fri, 6 Feb 2015 22:33:58 +0000 (22:33 +0000)
committerNeil Smith <neil.git@njae.me.uk>
Fri, 6 Feb 2015 22:33:58 +0000 (22:33 +0000)
duplicate-letters.ipynb [new file with mode: 0644]

diff --git a/duplicate-letters.ipynb b/duplicate-letters.ipynb
new file mode 100644 (file)
index 0000000..0b15536
--- /dev/null
@@ -0,0 +1,518 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:19128e67a9754262fffc5d0706d52d873b5cd867f34b15dc0cb1987a03c4dd42"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "You need to write a procedure that returns all duplicate characters from a given String.\n",
+      "\n",
+      "For example if String is \"Java\" then program should return [\"a\"].\n",
+      "\n",
+      "* \"Java\" -> ['a']\n",
+      "* \"String\" -> []\n",
+      "* \"Computer science\" -> [\"c\", \"e\"]\n",
+      "* \"happy\" -> ['p']\n",
+      "\n",
+      "Bonus points if your program is robust and handle different kinds of input e.g. String without duplicate, null or empty String etc. \n",
+      "\n",
+      "Bonus points if you also write unit tests for normal and edge cases."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def duplicates_via_dict(word):\n",
+      "    counts = {}\n",
+      "    for letter in word:\n",
+      "        if letter in counts:\n",
+      "            counts[letter] += 1\n",
+      "        else:\n",
+      "            counts[letter] = 1\n",
+      "    duplicates = []\n",
+      "    for letter in counts:\n",
+      "        if counts[letter] > 1:\n",
+      "            duplicates += [letter]\n",
+      "    return duplicates"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_dict('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 11,
+       "text": [
+        "['r', 'e', 'c']"
+       ]
+      }
+     ],
+     "prompt_number": 11
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def duplicates_via_counts(word):\n",
+      "    def count_letter(word, letter):\n",
+      "        count = 0\n",
+      "        for l in word:\n",
+      "            if l == letter:\n",
+      "                count += 1\n",
+      "        return count\n",
+      "    counts = {}\n",
+      "    for letter in word:\n",
+      "        counts[letter] = count_letter(word, letter)\n",
+      "    duplicates = []\n",
+      "    for letter in counts:\n",
+      "        if counts[letter] > 1:\n",
+      "            duplicates += [letter]\n",
+      "    return duplicates"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_counts('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 12,
+       "text": [
+        "['r', 'e', 'c']"
+       ]
+      }
+     ],
+     "prompt_number": 12
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def duplicates_via_comprehension(word):\n",
+      "    counts = {letter: len([l for l in word if l == letter]) for letter in word}\n",
+      "    return [letter for letter in counts if counts[letter] > 1]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 64
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_comprehension('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 65,
+       "text": [
+        "['r', 'e', 'c']"
+       ]
+      }
+     ],
+     "prompt_number": 65
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def duplicates_via_recursion(word):\n",
+      "    return duplicates_via_recursion_helper(word, [])\n",
+      "\n",
+      "def duplicates_via_recursion_helper(word, duplicates_so_far):\n",
+      "    if word:\n",
+      "        letter = word[0]\n",
+      "        if letter in word[1:] and letter not in duplicates_so_far:\n",
+      "            return duplicates_via_recursion_helper(word[1:], [letter] + duplicates_so_far)\n",
+      "        else:\n",
+      "            return duplicates_via_recursion_helper(word[1:], duplicates_so_far)\n",
+      "    else:\n",
+      "        return duplicates_so_far"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 25
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_recursion('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 26,
+       "text": [
+        "['e', 'r', 'c']"
+       ]
+      }
+     ],
+     "prompt_number": 26
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def duplicates_via_sorting(word):\n",
+      "    duplicates = set()\n",
+      "    sorted_word = sorted(word)\n",
+      "    for i in range(len(sorted_word) - 1):\n",
+      "        if sorted_word[i] == sorted_word[i+1]:\n",
+      "            duplicates.add(sorted_word[i])\n",
+      "    return list(duplicates)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 29
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_sorting('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 30,
+       "text": [
+        "['r', 'e', 'c']"
+       ]
+      }
+     ],
+     "prompt_number": 30
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def duplicates_via_runs(word):\n",
+      "    duplicates = []\n",
+      "    sorted_word = sorted(word)\n",
+      "    i = 0\n",
+      "    found_duplicates = False\n",
+      "    for i in range(len(sorted_word) - 1):\n",
+      "        if sorted_word[i] == sorted_word[i+1]:\n",
+      "            if not found_duplicates:\n",
+      "                duplicates += [sorted_word[i]]\n",
+      "                found_duplicates = True\n",
+      "        else:\n",
+      "            found_duplicates = False\n",
+      "    return duplicates"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 55
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_runs('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 56,
+       "text": [
+        "['c', 'e', 'r']"
+       ]
+      }
+     ],
+     "prompt_number": 56
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def duplicates_via_segments(word):\n",
+      "    segments = []\n",
+      "    sorted_word = sorted(word)\n",
+      "    segment_start = 0\n",
+      "    for i in range(len(sorted_word) - 1):\n",
+      "        if sorted_word[i] != sorted_word[i+1]:\n",
+      "            segments += [sorted_word[segment_start:i+1]]\n",
+      "            segment_start = i + 1\n",
+      "    return [segment[0] for segment in segments if len(segment) > 1]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 49
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_segments('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 50,
+       "text": [
+        "['c', 'e', 'r']"
+       ]
+      }
+     ],
+     "prompt_number": 50
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def duplicates_via_buckets(word):\n",
+      "    buckets = []\n",
+      "    for letter in word:\n",
+      "        found_bucket = False\n",
+      "        for bucket in buckets:\n",
+      "            if bucket[0] == letter:\n",
+      "                bucket += [letter]\n",
+      "                found_bucket = True\n",
+      "        if not found_bucket:\n",
+      "            buckets += [[letter]]\n",
+      "    duplicates = []\n",
+      "    for bucket in buckets:\n",
+      "        if len(bucket) > 1:\n",
+      "            duplicates += bucket[0]\n",
+      "    return duplicates"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 43
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_buckets('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 44,
+       "text": [
+        "['c', 'r', 'e']"
+       ]
+      }
+     ],
+     "prompt_number": 44
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def duplicates_via_set(word):\n",
+      "    letters = set(word)\n",
+      "    duplicates = list(word)\n",
+      "    for letter in letters:\n",
+      "        duplicates.remove(letter)\n",
+      "    return list(set(duplicates))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 31
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_set('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 32,
+       "text": [
+        "['r', 'e', 'c']"
+       ]
+      }
+     ],
+     "prompt_number": 32
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import collections\n",
+      "\n",
+      "def duplicates_via_counter(word):\n",
+      "    counts = collections.Counter(word)\n",
+      "    return [letter for letter, count in counts.items() \n",
+      "            if count > 1]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 33
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_counter('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 34,
+       "text": [
+        "['r', 'e', 'c']"
+       ]
+      }
+     ],
+     "prompt_number": 34
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import itertools\n",
+      "\n",
+      "def duplicates_via_groupby(word):\n",
+      "    return [k for k, g in itertools.groupby(sorted(word)) \n",
+      "            if len(list(g)) > 1]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 41
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_groupby('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 42,
+       "text": [
+        "['c', 'e', 'r']"
+       ]
+      }
+     ],
+     "prompt_number": 42
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import functools\n",
+      "\n",
+      "def duplicates_via_reduce(word):\n",
+      "    def merge_counts(counts1, counts2):\n",
+      "        counts = {l: c for l, c in counts1.items()}\n",
+      "        for l, c in counts2.items():\n",
+      "            if l in counts:\n",
+      "                counts[l] += c\n",
+      "            else:\n",
+      "                counts[l] = c\n",
+      "        return counts\n",
+      "    \n",
+      "    counts = [{letter: 1} for letter in word]\n",
+      "    return [letter for letter, count in functools.reduce(merge_counts, counts).items() if count > 1]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 62
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "duplicates_via_reduce('occurrences')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 63,
+       "text": [
+        "['r', 'e', 'c']"
+       ]
+      }
+     ],
+     "prompt_number": 63
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file