Initial working master
authorNeil Smith <neil.git@njae.me.uk>
Fri, 4 Mar 2016 14:12:00 +0000 (14:12 +0000)
committerNeil Smith <neil.git@njae.me.uk>
Fri, 4 Mar 2016 14:12:00 +0000 (14:12 +0000)
.gitignore [new file with mode: 0644]
generate-random-plots.ipynb [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..0546add
--- /dev/null
@@ -0,0 +1,45 @@
+*.py[cod]
+
+# C extensions
+*.so
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+lib
+lib64
+__pycache__
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+nosetests.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# IPython
+.ipynb*
+
+# Sublime text
+*.sublime-workspace
+
+# Logs
+*.log
diff --git a/generate-random-plots.ipynb b/generate-random-plots.ipynb
new file mode 100644 (file)
index 0000000..82a7f23
--- /dev/null
@@ -0,0 +1,338 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Generate random movie plot elements\n",
+    "\n",
+    "Data from [IMDB](http://www.imdb.com/interfaces), held at [ftp://ftp.fu-berlin.de/pub/misc/movies/database/](ftp://ftp.fu-berlin.de/pub/misc/movies/database/), in the `keywords.list.gz` file.\n",
+    "\n",
+    "[Abulafia generator](http://www.random-generator.com/index.php?title=Plot_Keyword_Oracle) and [Story-games thread](http://story-games.com/forums/discussion/3502/new-toy-imdb-plot-keywords), and [an earlier version of the list](http://www.logrus.com/~moose/page1/files/cleanplots.txt)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Get the file\n",
+    "!wget ftp://ftp.fu-berlin.de/pub/misc/movies/database/keywords.list.gz\n",
+    "\n",
+    "!gunzip keywords.list.gz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "generate-random-plots.ipynb  keywords.list\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import collections\n",
+    "import re\n",
+    "import random\n",
+    "import itertools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def read_keywords_from_list(line):\n",
+    "    if not line:\n",
+    "        return []\n",
+    "    accumulator = []\n",
+    "    for kn in re.split('\\t+', line):\n",
+    "        k, n = kn.split()\n",
+    "        n = int(n[1:-1])\n",
+    "        accumulator += [k] * n\n",
+    "    return accumulator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['blue-pantyhose', 'blue-party', 'blue-party', 'blue-pearl']"
+      ]
+     },
+     "execution_count": 75,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "read_keywords_from_list('blue-pantyhose (1)\t\tblue-party (2)\tblue-pearl (1)')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def read_keywords_from_title(line):\n",
+    "    if line:\n",
+    "        return [re.split('\\t+', line)[1]]\n",
+    "    else:\n",
+    "        return []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['beer-drinking']"
+      ]
+     },
+     "execution_count": 77,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "read_keywords_from_title('Been Down So Long It Looks Like Up to Me (1971)\t\tbeer-drinking')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "skipping underlines ========\n",
+      "ending list 5: Submission Rules\n",
+      "skipping underlines =============\n",
+      "skipping underlines ==================\n",
+      "skipping underlines ===============\n",
+      "starting list keywords in use:\n",
+      "ending list 5: Submission Rules\n",
+      "skipping underlines ===================\n",
+      "skipping underlines ==============\n",
+      "skipping underlines ======================\n",
+      "skipping underlines =================================================\n",
+      "pre-title 8: THE KEYWORDS LIST\n",
+      "starting title ====================\n"
+     ]
+    }
+   ],
+   "source": [
+    "keywords_from_list = collections.Counter()\n",
+    "keywords_from_title = collections.Counter()\n",
+    "reading_state = None\n",
+    "for line in open('keywords.list', encoding='latin-1').readlines():\n",
+    "        \n",
+    "    if line.strip().startswith('keywords in use'):\n",
+    "        reading_state = 'keywords_from_list'\n",
+    "        print('starting list', line.strip())\n",
+    "        continue\n",
+    "    if line.strip().startswith('5: Submission Rules'):\n",
+    "        readings_state = None\n",
+    "        print('ending list', line.strip())\n",
+    "        continue\n",
+    "    if reading_state == 'pre_from_title':\n",
+    "        if line.strip().startswith('==='):\n",
+    "            reading_state = 'keywords_from_title'\n",
+    "            print('starting title', line.strip())\n",
+    "        else:\n",
+    "            reading_state = None\n",
+    "            print('not start of titles', line.strip())\n",
+    "        continue\n",
+    "    if line.strip().startswith('8: THE KEYWORDS LIST'):\n",
+    "        reading_state = 'pre_from_title'\n",
+    "        print('pre-title', line.strip())\n",
+    "        continue\n",
+    "    if line.strip().startswith('==='):\n",
+    "        reading_state = None\n",
+    "        print('skipping underlines', line.strip())\n",
+    "        continue\n",
+    "        \n",
+    "    if reading_state == 'keywords_from_list':\n",
+    "        keywords_from_list.update(read_keywords_from_list(line.strip()))\n",
+    "    elif reading_state == 'keywords_from_title':\n",
+    "        keywords_from_title.update(read_keywords_from_title(line.strip()))\n",
+    "\n",
+    "sum_keywords = sum(keywords_from_list.values())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('sex', 83531),\n",
+       " ('hardcore', 69247),\n",
+       " ('character-name-in-title', 44747),\n",
+       " ('independent-film', 37932)]"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "keywords_from_list.most_common(4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('sex', 83531),\n",
+       " ('hardcore', 69247),\n",
+       " ('character-name-in-title', 44747),\n",
+       " ('independent-film', 37932)]"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "keywords_from_title.most_common(4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def pick_random(items, count):\n",
+    "    i = random.randint(0, count)\n",
+    "    return list(itertools.islice(items.elements(), i, i+1))[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "mother-daughter-relationship\n",
+      "oral-sex\n",
+      "cyberpunk\n",
+      "main-character-shot\n",
+      "medical-gloves\n",
+      "tv-show\n",
+      "female-nudity\n",
+      "conundrum\n",
+      "los-angeles-california\n",
+      "elevator\n",
+      "yorkshireman\n",
+      "paper-money\n",
+      "unfaithful-husband\n",
+      "brain-teaser\n",
+      "birthday\n",
+      "ranch\n",
+      "festival\n",
+      "world-war-two\n",
+      "reference-to-wikipedia\n",
+      "sundance\n"
+     ]
+    }
+   ],
+   "source": [
+    "for _ in range(20):\n",
+    "    print(pick_random(keywords_from_list, sum_keywords))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.3+"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}