},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
"[(0, ''), (1, 'a'), (2, 'aa'), (3, 'aab'), (4, 'aabc'), (5, 'aabcc')]"
]
},
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 4,
"metadata": {
"scrolled": true
},
" (5, 5): False}"
]
},
- "execution_count": 6,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 5,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 6,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "aabcc dbbca aadbbbaccc\n",
+ "aabcc dbbca aadbbcbcac\n",
"aa 0 0 ! ! ! True\n",
"s2 0 1 ! d a False\n",
"s2 0 2 ! b a False\n",
"xx 1 2 a b d False\n",
"xx 1 3 a b b False\n",
"xx 1 4 a c b False\n",
- "xx 1 5 a a b False\n",
+ "xx 1 5 a a c False\n",
"s1 2 0 a ! a True\n",
"s2 2 1 a d d True\n",
"s2 2 2 a b b True\n",
"s2 2 3 a b b True\n",
- "xx 2 4 a c b False\n",
- "xx 2 5 a a a False\n",
+ "s2 2 4 a c c True\n",
+ "xx 2 5 a a b False\n",
"s1 3 0 b ! d False\n",
"s1 3 1 b d b True\n",
"s2 3 2 b b b True\n",
"s1 3 2 b b b True\n",
- "s2 3 3 b b b True\n",
- "s1 3 3 b b b True\n",
- "xx 3 4 b c a False\n",
+ "xx 3 3 b b c False\n",
+ "s1 3 4 b c b True\n",
"xx 3 5 b a c False\n",
"s1 4 0 c ! b False\n",
"xx 4 1 c d b False\n",
- "xx 4 2 c b b False\n",
- "xx 4 3 c b a False\n",
- "xx 4 4 c c c False\n",
- "xx 4 5 c a c False\n",
+ "s1 4 2 c b c True\n",
+ "s2 4 3 c b b True\n",
+ "s2 4 4 c c c True\n",
+ "s1 4 4 c c c True\n",
+ "s2 4 5 c a a True\n",
"s1 5 0 c ! b False\n",
- "xx 5 1 c d b False\n",
- "xx 5 2 c b a False\n",
- "xx 5 3 c b c False\n",
- "xx 5 4 c c c False\n",
- "xx 5 5 c a c False\n",
+ "xx 5 1 c d c False\n",
+ "xx 5 2 c b b False\n",
+ "s1 5 3 c b c True\n",
+ "xx 5 4 c c a False\n",
+ "s1 5 5 c a c True\n",
"T . . . . .\n",
"T . . . . .\n",
- "T T T T . .\n",
- ". T T T . .\n",
- ". . . . . .\n",
- ". . . . . .\n"
+ "T T T T T .\n",
+ ". T T . T .\n",
+ ". . T T T T\n",
+ ". . . T . T\n"
]
},
{
" (2, 1): (2, 0, 'd', 's2'),\n",
" (2, 2): (2, 1, 'b', 's2'),\n",
" (2, 3): (2, 2, 'b', 's2'),\n",
+ " (2, 4): (2, 3, 'c', 's2'),\n",
" (3, 1): (2, 1, 'b', 's1'),\n",
" (3, 2): (2, 2, 'b', 's1'),\n",
- " (3, 3): (2, 3, 'b', 's1')}"
+ " (3, 4): (2, 4, 'b', 's1'),\n",
+ " (4, 2): (3, 2, 'c', 's1'),\n",
+ " (4, 3): (4, 2, 'b', 's2'),\n",
+ " (4, 4): (3, 4, 'c', 's1'),\n",
+ " (4, 5): (4, 4, 'a', 's2'),\n",
+ " (5, 3): (4, 3, 'c', 's1'),\n",
+ " (5, 5): (4, 5, 'c', 's1')}"
]
},
- "execution_count": 10,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "s3 = s3f\n",
+ "s3 = s3t\n",
"\n",
"print(s1, s2, s3)\n",
"\n",
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 1,
"metadata": {},
"outputs": [
{
"148"
]
},
- "execution_count": 26,
+ "execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 2,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 3,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 4,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 5,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 66,
+ "execution_count": 6,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": 35,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def is_subseq_rows(seq1, seq2, return_backpointers=False, debug=False):\n",
+ " \"\"\"Return true if seq1 is a subsequence of seq2.\n",
+ " If return_backpointers, also return the set of backpointers to\n",
+ " reconstruct the subsequence\"\"\"\n",
+ " \n",
+ " # dp_table[i, j] is True if first i characters of seq1 can\n",
+ " # be found in the first j characters of seq2\n",
+ " \n",
+ " backpointers = {}\n",
+ " \n",
+ " for i in range(len(seq1)+1):\n",
+ " row = [False] * (len(seq2)+1)\n",
+ " for j in range(i, len(seq2)+1):\n",
+ " if i == 0:\n",
+ " row[j] = True\n",
+ " if debug: print('aa', i, j, '!', '!', row[j])\n",
+ " elif j == 0:\n",
+ " row[j] = False\n",
+ " if debug: print('zz', i, j, '!', '!', row[j])\n",
+ " else:\n",
+ " # extend by character from s2\n",
+ " if row[j-1]:\n",
+ " row[j] = True\n",
+ " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+ " if debug: print('s2', i, j, seq1[i-1], seq2[j-1], row[j]) \n",
+ " # extend by character from s1\n",
+ " if previous_row[j-1] and seq1[i-1] == seq2[j-1]:\n",
+ " row[j] = True\n",
+ " backpointers[i, j] = (i-1, j-1, seq1[i-1], 'seq1') \n",
+ " if debug: print('s1', i, j, seq1[i-1], seq2[j-1], row[j])\n",
+ " if not row[j]:\n",
+ " if debug: print('xx', i, j, seq1[i-1], seq2[j-1], row[j])\n",
+ " previous_row = row\n",
+ " \n",
+ " if return_backpointers:\n",
+ " retval = [row[-1]]\n",
+ " if return_backpointers:\n",
+ " retval += [backpointers]\n",
+ " return tuple(retval)\n",
+ " else:\n",
+ " return row[-1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
"22"
]
},
- "execution_count": 54,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "22"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sum(1 for s in bills\n",
+ " if s != 0\n",
+ " if is_subseq_rows(bills[0], bills[s]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1 loop, best of 3: 8.02 s per loop\n"
+ ]
+ }
+ ],
"source": [
"%%timeit\n",
"sum(1 for s in bills\n",
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1 loop, best of 3: 3.04 s per loop\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "sum(1 for s in bills\n",
+ " if s != 0\n",
+ " if is_subseq_rows(bills[0], bills[s]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
"metadata": {
"scrolled": true
},
" 146]"
]
},
- "execution_count": 36,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
" return dp_table[len(seq1), len(seq2)]"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def is_interleave_rows(seq1, seq2, seq3, return_backpointers=False, debug=False):\n",
+ " \"\"\"Return true if seq3 is some interleaved merge of seq1 and seq2.\n",
+ " If return_backpointers, also return the set of backpointers to\n",
+ " reconstruct the interleaving.\n",
+ " \n",
+ " This version doesn't build the whole table, just keeps the current and previous rows\"\"\"\n",
+ " \n",
+ " # dp_table[i, j] is True if first i+j characters of seq is made up of \n",
+ " # an interleaving of the first i characters of seq1 and the \n",
+ " # first j characters of seq2\n",
+ " \n",
+ " if len(seq1) + len(seq2) != len(seq3):\n",
+ " if return_backpointers:\n",
+ " retval = [False]\n",
+ " if return_backpointers:\n",
+ " retval += [{}]\n",
+ " return tuple(retval)\n",
+ " else:\n",
+ " return False\n",
+ " \n",
+ "\n",
+ " backpointers = {}\n",
+ "\n",
+ " for i in range(len(seq1)+1):\n",
+ " row = [False] * (len(seq2)+1)\n",
+ " for j in range(len(seq2)+1):\n",
+ " if i == 0 and j == 0:\n",
+ " row[j] = True\n",
+ " if debug: print('xxxx', i, j, '!', '!', '!', row[j])\n",
+ " elif i == 0:\n",
+ " # extend by character from seq2\n",
+ " if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+ " row[j] = True\n",
+ " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+ " if debug: print('seq2', i, j, '!', seq2[j-1], seq3[i+j-1], row[j])\n",
+ " elif j == 0:\n",
+ " # extend by character from seq1\n",
+ " if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+ " row[j] = True\n",
+ " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')\n",
+ " if debug: print('seq1', i, j, seq1[i-1], '!', seq3[i+j-1], row[j])\n",
+ " else:\n",
+ " # extend by character from seq2\n",
+ " if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+ " row[j] = True\n",
+ " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+ " if debug: print('seq2', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j]) \n",
+ " # extend by character from seq1\n",
+ " if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+ " row[j] = True\n",
+ " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1') \n",
+ " if debug: print('seq1', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+ " if not row[j]:\n",
+ " if debug: print('xxxx', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+ " previous_row = row\n",
+ "\n",
+ " if return_backpointers:\n",
+ " retval = [row[-1]]\n",
+ " if return_backpointers:\n",
+ " retval += [backpointers]\n",
+ " return tuple(retval)\n",
+ " else:\n",
+ " return row[-1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def is_interleave_rows2(seq1, seq2, seq3, return_backpointers=False, debug=False):\n",
+ " \"\"\"Return true if seq3 is some interleaved merge of seq1 and seq2.\n",
+ " If return_backpointers, also return the set of backpointers to\n",
+ " reconstruct the interleaving.\n",
+ " \n",
+ " This version doesn't keep the whole table, just the current and previous\n",
+ " rows. It also builds the current row as it goes along, rather than\n",
+ " building the whole row and updating elements as required.\"\"\"\n",
+ " \n",
+ " # dp_table[i, j] is True if first i+j characters of seq is made up of \n",
+ " # an interleaving of the first i characters of seq1 and the \n",
+ " # first j characters of seq2\n",
+ " \n",
+ " if len(seq1) + len(seq2) != len(seq3):\n",
+ " if return_backpointers:\n",
+ " retval = [False]\n",
+ " if return_backpointers:\n",
+ " retval += [{}]\n",
+ " return tuple(retval)\n",
+ " else:\n",
+ " return False\n",
+ " \n",
+ "\n",
+ " backpointers = {}\n",
+ "\n",
+ " for i in range(len(seq1)+1):\n",
+ " row = []\n",
+ " for j in range(len(seq2)+1):\n",
+ " if i == 0 and j == 0:\n",
+ " row += [True]\n",
+ " if debug: print('xxxx', i, j, '!', '!', '!', row[j])\n",
+ " elif i == 0:\n",
+ " # extend by character from seq2\n",
+ " if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+ " row += [True]\n",
+ " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+ " else:\n",
+ " row += [False]\n",
+ " if debug: print('seq2', i, j, '!', seq2[j-1], seq3[i+j-1], row[j])\n",
+ " elif j == 0:\n",
+ " # extend by character from seq1\n",
+ " if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+ " row += [True]\n",
+ " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')\n",
+ " else:\n",
+ " row += [False]\n",
+ " if debug: print('seq1', i, j, seq1[i-1], '!', seq3[i+j-1], row[j])\n",
+ " else:\n",
+ " # extend by character from seq2\n",
+ " if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+ " row += [True]\n",
+ " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+ " if debug: print('seq2', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j]) \n",
+ " # extend by character from seq1\n",
+ " elif previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+ " row += [True]\n",
+ " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1') \n",
+ " if debug: print('seq1', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+ " else:\n",
+ " row += [False]\n",
+ " if debug: print('xxxx', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+ " previous_row = row\n",
+ "\n",
+ " if return_backpointers:\n",
+ " retval = [row[-1]]\n",
+ " if return_backpointers:\n",
+ " retval += [backpointers]\n",
+ " return tuple(retval)\n",
+ " else:\n",
+ " return row[-1]"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 41,
" if is_interleave(bills[0], bills[1], bills[s])]"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[30]"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "[s for s in bills\n",
+ " if is_interleave_rows(bills[0], bills[1], bills[s])]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[30]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "[s for s in bills\n",
+ " if is_interleave_rows2(bills[0], bills[1], bills[s])]"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 45,
" if is_interleave(bills[0], bills[1], bills[s])]"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1 loop, best of 3: 1.06 s per loop\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "[s for s in bills\n",
+ " if is_interleave_rows(bills[0], bills[1], bills[s])]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1 loop, best of 3: 1.56 s per loop\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "[s for s in bills\n",
+ " if is_interleave_rows2(bills[0], bills[1], bills[s])]"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 71,
" if is_interleave_recursive(bills[0], bills[1], bills[s])]"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "# Sense solution\n",
+ "Took \n",
+ "* 22.9 seconds to load file\n",
+ "* 14906.8 seconds to find subsequences (4.14 hours; 4 hours, 8 minutes, 26.8 seconds)\n",
+ "* 41726.9 seconds to check all interleavings (11.59 hours; 11 hours, 35 minutes, 26.9 seconds)\n",
+ "\n",
+ "Total of 15 hours 44 minutes 16.6 seconds."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(4.140777777777777, 4, 8, 26.799999999999272)"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rtime = 14906.8\n",
+ "(rtime / 3600,\n",
+ " int(rtime / 3600), \n",
+ " int(rtime / 60 - int(rtime / 3600) * 60), \n",
+ " rtime - int(rtime / 60) * 60\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(11.590805555555557, 11, 35, 26.900000000001455)"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rtime = 41726.9 \n",
+ "(rtime / 3600,\n",
+ " int(rtime / 3600), \n",
+ " int(rtime / 60 - int(rtime / 3600) * 60), \n",
+ " rtime - int(rtime / 60) * 60\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(15.737944444444445, 15, 44, 16.599999999998545)"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rtime = 22.9 + 14906.8 + 41726.9 \n",
+ "(rtime / 3600,\n",
+ " int(rtime / 3600), \n",
+ " int(rtime / 60 - int(rtime / 3600) * 60), \n",
+ " rtime - int(rtime / 60) * 60\n",
+ ")"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,