From 7b76523f486d4a6cabe9e2251e87868f4e6f88cb Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Fri, 6 Feb 2015 22:33:58 +0000 Subject: [PATCH] Added some 'find duplicate letters' examples --- duplicate-letters.ipynb | 518 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 518 insertions(+) create mode 100644 duplicate-letters.ipynb diff --git a/duplicate-letters.ipynb b/duplicate-letters.ipynb new file mode 100644 index 0000000..0b15536 --- /dev/null +++ b/duplicate-letters.ipynb @@ -0,0 +1,518 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:19128e67a9754262fffc5d0706d52d873b5cd867f34b15dc0cb1987a03c4dd42" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You need to write a procedure that returns all duplicate characters from a given String.\n", + "\n", + "For example if String is \"Java\" then program should return [\"a\"].\n", + "\n", + "* \"Java\" -> ['a']\n", + "* \"String\" -> []\n", + "* \"Computer science\" -> [\"c\", \"e\"]\n", + "* \"happy\" -> ['p']\n", + "\n", + "Bonus points if your program is robust and handle different kinds of input e.g. String without duplicate, null or empty String etc. \n", + "\n", + "Bonus points if you also write unit tests for normal and edge cases." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def duplicates_via_dict(word):\n", + " counts = {}\n", + " for letter in word:\n", + " if letter in counts:\n", + " counts[letter] += 1\n", + " else:\n", + " counts[letter] = 1\n", + " duplicates = []\n", + " for letter in counts:\n", + " if counts[letter] > 1:\n", + " duplicates += [letter]\n", + " return duplicates" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 4 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_dict('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 11, + "text": [ + "['r', 'e', 'c']" + ] + } + ], + "prompt_number": 11 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def duplicates_via_counts(word):\n", + " def count_letter(word, letter):\n", + " count = 0\n", + " for l in word:\n", + " if l == letter:\n", + " count += 1\n", + " return count\n", + " counts = {}\n", + " for letter in word:\n", + " counts[letter] = count_letter(word, letter)\n", + " duplicates = []\n", + " for letter in counts:\n", + " if counts[letter] > 1:\n", + " duplicates += [letter]\n", + " return duplicates" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 8 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_counts('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 12, + "text": [ + "['r', 'e', 'c']" + ] + } + ], + "prompt_number": 12 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def duplicates_via_comprehension(word):\n", + " counts = {letter: len([l for l in word if l == letter]) for letter in word}\n", + " return [letter for letter in counts if counts[letter] > 1]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 64 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_comprehension('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 65, + "text": [ + "['r', 'e', 'c']" + ] + } + ], + "prompt_number": 65 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def duplicates_via_recursion(word):\n", + " return duplicates_via_recursion_helper(word, [])\n", + "\n", + "def duplicates_via_recursion_helper(word, duplicates_so_far):\n", + " if word:\n", + " letter = word[0]\n", + " if letter in word[1:] and letter not in duplicates_so_far:\n", + " return duplicates_via_recursion_helper(word[1:], [letter] + duplicates_so_far)\n", + " else:\n", + " return duplicates_via_recursion_helper(word[1:], duplicates_so_far)\n", + " else:\n", + " return duplicates_so_far" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 25 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_recursion('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 26, + "text": [ + "['e', 'r', 'c']" + ] + } + ], + "prompt_number": 26 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def duplicates_via_sorting(word):\n", + " duplicates = set()\n", + " sorted_word = sorted(word)\n", + " for i in range(len(sorted_word) - 1):\n", + " if sorted_word[i] == sorted_word[i+1]:\n", + " duplicates.add(sorted_word[i])\n", + " return list(duplicates)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 29 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_sorting('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 30, + "text": [ + "['r', 'e', 'c']" + ] + } + ], + "prompt_number": 30 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def duplicates_via_runs(word):\n", + " duplicates = []\n", + " sorted_word = sorted(word)\n", + " i = 0\n", + " found_duplicates = False\n", + " for i in range(len(sorted_word) - 1):\n", + " if sorted_word[i] == sorted_word[i+1]:\n", + " if not found_duplicates:\n", + " duplicates += [sorted_word[i]]\n", + " found_duplicates = True\n", + " else:\n", + " found_duplicates = False\n", + " return duplicates" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 55 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_runs('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 56, + "text": [ + "['c', 'e', 'r']" + ] + } + ], + "prompt_number": 56 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def duplicates_via_segments(word):\n", + " segments = []\n", + " sorted_word = sorted(word)\n", + " segment_start = 0\n", + " for i in range(len(sorted_word) - 1):\n", + " if sorted_word[i] != sorted_word[i+1]:\n", + " segments += [sorted_word[segment_start:i+1]]\n", + " segment_start = i + 1\n", + " return [segment[0] for segment in segments if len(segment) > 1]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 49 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_segments('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 50, + "text": [ + "['c', 'e', 'r']" + ] + } + ], + "prompt_number": 50 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def duplicates_via_buckets(word):\n", + " buckets = []\n", + " for letter in word:\n", + " found_bucket = False\n", + " for bucket in buckets:\n", + " if bucket[0] == letter:\n", + " bucket += [letter]\n", + " found_bucket = True\n", + " if not found_bucket:\n", + " buckets += [[letter]]\n", + " duplicates = []\n", + " for bucket in buckets:\n", + " if len(bucket) > 1:\n", + " duplicates += bucket[0]\n", + " return duplicates" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 43 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_buckets('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 44, + "text": [ + "['c', 'r', 'e']" + ] + } + ], + "prompt_number": 44 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def duplicates_via_set(word):\n", + " letters = set(word)\n", + " duplicates = list(word)\n", + " for letter in letters:\n", + " duplicates.remove(letter)\n", + " return list(set(duplicates))" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 31 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_set('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 32, + "text": [ + "['r', 'e', 'c']" + ] + } + ], + "prompt_number": 32 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import collections\n", + "\n", + "def duplicates_via_counter(word):\n", + " counts = collections.Counter(word)\n", + " return [letter for letter, count in counts.items() \n", + " if count > 1]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 33 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_counter('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 34, + "text": [ + "['r', 'e', 'c']" + ] + } + ], + "prompt_number": 34 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import itertools\n", + "\n", + "def duplicates_via_groupby(word):\n", + " return [k for k, g in itertools.groupby(sorted(word)) \n", + " if len(list(g)) > 1]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 41 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_groupby('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 42, + "text": [ + "['c', 'e', 'r']" + ] + } + ], + "prompt_number": 42 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import functools\n", + "\n", + "def duplicates_via_reduce(word):\n", + " def merge_counts(counts1, counts2):\n", + " counts = {l: c for l, c in counts1.items()}\n", + " for l, c in counts2.items():\n", + " if l in counts:\n", + " counts[l] += c\n", + " else:\n", + " counts[l] = c\n", + " return counts\n", + " \n", + " counts = [{letter: 1} for letter in word]\n", + " return [letter for letter, count in functools.reduce(merge_counts, counts).items() if count > 1]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 62 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "duplicates_via_reduce('occurrences')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 63, + "text": [ + "['r', 'e', 'c']" + ] + } + ], + "prompt_number": 63 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file -- 2.34.1