--- /dev/null
+{
+ "metadata": {
+ "name": "",
+ "signature": "sha256:19128e67a9754262fffc5d0706d52d873b5cd867f34b15dc0cb1987a03c4dd42"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+ {
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You need to write a procedure that returns all duplicate characters from a given String.\n",
+ "\n",
+ "For example if String is \"Java\" then program should return [\"a\"].\n",
+ "\n",
+ "* \"Java\" -> ['a']\n",
+ "* \"String\" -> []\n",
+ "* \"Computer science\" -> [\"c\", \"e\"]\n",
+ "* \"happy\" -> ['p']\n",
+ "\n",
+ "Bonus points if your program is robust and handle different kinds of input e.g. String without duplicate, null or empty String etc. \n",
+ "\n",
+ "Bonus points if you also write unit tests for normal and edge cases."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def duplicates_via_dict(word):\n",
+ " counts = {}\n",
+ " for letter in word:\n",
+ " if letter in counts:\n",
+ " counts[letter] += 1\n",
+ " else:\n",
+ " counts[letter] = 1\n",
+ " duplicates = []\n",
+ " for letter in counts:\n",
+ " if counts[letter] > 1:\n",
+ " duplicates += [letter]\n",
+ " return duplicates"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 4
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_dict('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 11,
+ "text": [
+ "['r', 'e', 'c']"
+ ]
+ }
+ ],
+ "prompt_number": 11
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def duplicates_via_counts(word):\n",
+ " def count_letter(word, letter):\n",
+ " count = 0\n",
+ " for l in word:\n",
+ " if l == letter:\n",
+ " count += 1\n",
+ " return count\n",
+ " counts = {}\n",
+ " for letter in word:\n",
+ " counts[letter] = count_letter(word, letter)\n",
+ " duplicates = []\n",
+ " for letter in counts:\n",
+ " if counts[letter] > 1:\n",
+ " duplicates += [letter]\n",
+ " return duplicates"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 8
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_counts('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 12,
+ "text": [
+ "['r', 'e', 'c']"
+ ]
+ }
+ ],
+ "prompt_number": 12
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def duplicates_via_comprehension(word):\n",
+ " counts = {letter: len([l for l in word if l == letter]) for letter in word}\n",
+ " return [letter for letter in counts if counts[letter] > 1]"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 64
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_comprehension('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 65,
+ "text": [
+ "['r', 'e', 'c']"
+ ]
+ }
+ ],
+ "prompt_number": 65
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def duplicates_via_recursion(word):\n",
+ " return duplicates_via_recursion_helper(word, [])\n",
+ "\n",
+ "def duplicates_via_recursion_helper(word, duplicates_so_far):\n",
+ " if word:\n",
+ " letter = word[0]\n",
+ " if letter in word[1:] and letter not in duplicates_so_far:\n",
+ " return duplicates_via_recursion_helper(word[1:], [letter] + duplicates_so_far)\n",
+ " else:\n",
+ " return duplicates_via_recursion_helper(word[1:], duplicates_so_far)\n",
+ " else:\n",
+ " return duplicates_so_far"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 25
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_recursion('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 26,
+ "text": [
+ "['e', 'r', 'c']"
+ ]
+ }
+ ],
+ "prompt_number": 26
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def duplicates_via_sorting(word):\n",
+ " duplicates = set()\n",
+ " sorted_word = sorted(word)\n",
+ " for i in range(len(sorted_word) - 1):\n",
+ " if sorted_word[i] == sorted_word[i+1]:\n",
+ " duplicates.add(sorted_word[i])\n",
+ " return list(duplicates)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 29
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_sorting('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 30,
+ "text": [
+ "['r', 'e', 'c']"
+ ]
+ }
+ ],
+ "prompt_number": 30
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def duplicates_via_runs(word):\n",
+ " duplicates = []\n",
+ " sorted_word = sorted(word)\n",
+ " i = 0\n",
+ " found_duplicates = False\n",
+ " for i in range(len(sorted_word) - 1):\n",
+ " if sorted_word[i] == sorted_word[i+1]:\n",
+ " if not found_duplicates:\n",
+ " duplicates += [sorted_word[i]]\n",
+ " found_duplicates = True\n",
+ " else:\n",
+ " found_duplicates = False\n",
+ " return duplicates"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 55
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_runs('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 56,
+ "text": [
+ "['c', 'e', 'r']"
+ ]
+ }
+ ],
+ "prompt_number": 56
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def duplicates_via_segments(word):\n",
+ " segments = []\n",
+ " sorted_word = sorted(word)\n",
+ " segment_start = 0\n",
+ " for i in range(len(sorted_word) - 1):\n",
+ " if sorted_word[i] != sorted_word[i+1]:\n",
+ " segments += [sorted_word[segment_start:i+1]]\n",
+ " segment_start = i + 1\n",
+ " return [segment[0] for segment in segments if len(segment) > 1]"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 49
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_segments('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 50,
+ "text": [
+ "['c', 'e', 'r']"
+ ]
+ }
+ ],
+ "prompt_number": 50
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def duplicates_via_buckets(word):\n",
+ " buckets = []\n",
+ " for letter in word:\n",
+ " found_bucket = False\n",
+ " for bucket in buckets:\n",
+ " if bucket[0] == letter:\n",
+ " bucket += [letter]\n",
+ " found_bucket = True\n",
+ " if not found_bucket:\n",
+ " buckets += [[letter]]\n",
+ " duplicates = []\n",
+ " for bucket in buckets:\n",
+ " if len(bucket) > 1:\n",
+ " duplicates += bucket[0]\n",
+ " return duplicates"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 43
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_buckets('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 44,
+ "text": [
+ "['c', 'r', 'e']"
+ ]
+ }
+ ],
+ "prompt_number": 44
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def duplicates_via_set(word):\n",
+ " letters = set(word)\n",
+ " duplicates = list(word)\n",
+ " for letter in letters:\n",
+ " duplicates.remove(letter)\n",
+ " return list(set(duplicates))"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 31
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_set('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 32,
+ "text": [
+ "['r', 'e', 'c']"
+ ]
+ }
+ ],
+ "prompt_number": 32
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import collections\n",
+ "\n",
+ "def duplicates_via_counter(word):\n",
+ " counts = collections.Counter(word)\n",
+ " return [letter for letter, count in counts.items() \n",
+ " if count > 1]"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 33
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_counter('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 34,
+ "text": [
+ "['r', 'e', 'c']"
+ ]
+ }
+ ],
+ "prompt_number": 34
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import itertools\n",
+ "\n",
+ "def duplicates_via_groupby(word):\n",
+ " return [k for k, g in itertools.groupby(sorted(word)) \n",
+ " if len(list(g)) > 1]"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 41
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_groupby('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 42,
+ "text": [
+ "['c', 'e', 'r']"
+ ]
+ }
+ ],
+ "prompt_number": 42
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import functools\n",
+ "\n",
+ "def duplicates_via_reduce(word):\n",
+ " def merge_counts(counts1, counts2):\n",
+ " counts = {l: c for l, c in counts1.items()}\n",
+ " for l, c in counts2.items():\n",
+ " if l in counts:\n",
+ " counts[l] += c\n",
+ " else:\n",
+ " counts[l] = c\n",
+ " return counts\n",
+ " \n",
+ " counts = [{letter: 1} for letter in word]\n",
+ " return [letter for letter, count in functools.reduce(merge_counts, counts).items() if count > 1]"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 62
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "duplicates_via_reduce('occurrences')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 63,
+ "text": [
+ "['r', 'e', 'c']"
+ ]
+ }
+ ],
+ "prompt_number": 63
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [],
+ "language": "python",
+ "metadata": {},
+ "outputs": []
+ }
+ ],
+ "metadata": {}
+ }
+ ]
+}
\ No newline at end of file