+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Generate random movie plot elements\n",
+ "\n",
+ "Data from [IMDB](http://www.imdb.com/interfaces), held at [ftp://ftp.fu-berlin.de/pub/misc/movies/database/](ftp://ftp.fu-berlin.de/pub/misc/movies/database/), in the `keywords.list.gz` file.\n",
+ "\n",
+ "[Abulafia generator](http://www.random-generator.com/index.php?title=Plot_Keyword_Oracle) and [Story-games thread](http://story-games.com/forums/discussion/3502/new-toy-imdb-plot-keywords), and [an earlier version of the list](http://www.logrus.com/~moose/page1/files/cleanplots.txt)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# Get the file\n",
+ "!wget ftp://ftp.fu-berlin.de/pub/misc/movies/database/keywords.list.gz\n",
+ "\n",
+ "!gunzip keywords.list.gz"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "generate-random-plots.ipynb keywords.list\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import collections\n",
+ "import re\n",
+ "import random\n",
+ "import itertools"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def read_keywords_from_list(line):\n",
+ " if not line:\n",
+ " return []\n",
+ " accumulator = []\n",
+ " for kn in re.split('\\t+', line):\n",
+ " k, n = kn.split()\n",
+ " n = int(n[1:-1])\n",
+ " accumulator += [k] * n\n",
+ " return accumulator"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['blue-pantyhose', 'blue-party', 'blue-party', 'blue-pearl']"
+ ]
+ },
+ "execution_count": 75,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "read_keywords_from_list('blue-pantyhose (1)\t\tblue-party (2)\tblue-pearl (1)')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def read_keywords_from_title(line):\n",
+ " if line:\n",
+ " return [re.split('\\t+', line)[1]]\n",
+ " else:\n",
+ " return []"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['beer-drinking']"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "read_keywords_from_title('Been Down So Long It Looks Like Up to Me (1971)\t\tbeer-drinking')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "skipping underlines ========\n",
+ "ending list 5: Submission Rules\n",
+ "skipping underlines =============\n",
+ "skipping underlines ==================\n",
+ "skipping underlines ===============\n",
+ "starting list keywords in use:\n",
+ "ending list 5: Submission Rules\n",
+ "skipping underlines ===================\n",
+ "skipping underlines ==============\n",
+ "skipping underlines ======================\n",
+ "skipping underlines =================================================\n",
+ "pre-title 8: THE KEYWORDS LIST\n",
+ "starting title ====================\n"
+ ]
+ }
+ ],
+ "source": [
+ "keywords_from_list = collections.Counter()\n",
+ "keywords_from_title = collections.Counter()\n",
+ "reading_state = None\n",
+ "for line in open('keywords.list', encoding='latin-1').readlines():\n",
+ " \n",
+ " if line.strip().startswith('keywords in use'):\n",
+ " reading_state = 'keywords_from_list'\n",
+ " print('starting list', line.strip())\n",
+ " continue\n",
+ " if line.strip().startswith('5: Submission Rules'):\n",
+ " readings_state = None\n",
+ " print('ending list', line.strip())\n",
+ " continue\n",
+ " if reading_state == 'pre_from_title':\n",
+ " if line.strip().startswith('==='):\n",
+ " reading_state = 'keywords_from_title'\n",
+ " print('starting title', line.strip())\n",
+ " else:\n",
+ " reading_state = None\n",
+ " print('not start of titles', line.strip())\n",
+ " continue\n",
+ " if line.strip().startswith('8: THE KEYWORDS LIST'):\n",
+ " reading_state = 'pre_from_title'\n",
+ " print('pre-title', line.strip())\n",
+ " continue\n",
+ " if line.strip().startswith('==='):\n",
+ " reading_state = None\n",
+ " print('skipping underlines', line.strip())\n",
+ " continue\n",
+ " \n",
+ " if reading_state == 'keywords_from_list':\n",
+ " keywords_from_list.update(read_keywords_from_list(line.strip()))\n",
+ " elif reading_state == 'keywords_from_title':\n",
+ " keywords_from_title.update(read_keywords_from_title(line.strip()))\n",
+ "\n",
+ "sum_keywords = sum(keywords_from_list.values())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[('sex', 83531),\n",
+ " ('hardcore', 69247),\n",
+ " ('character-name-in-title', 44747),\n",
+ " ('independent-film', 37932)]"
+ ]
+ },
+ "execution_count": 79,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "keywords_from_list.most_common(4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[('sex', 83531),\n",
+ " ('hardcore', 69247),\n",
+ " ('character-name-in-title', 44747),\n",
+ " ('independent-film', 37932)]"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "keywords_from_title.most_common(4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def pick_random(items, count):\n",
+ " i = random.randint(0, count)\n",
+ " return list(itertools.islice(items.elements(), i, i+1))[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "mother-daughter-relationship\n",
+ "oral-sex\n",
+ "cyberpunk\n",
+ "main-character-shot\n",
+ "medical-gloves\n",
+ "tv-show\n",
+ "female-nudity\n",
+ "conundrum\n",
+ "los-angeles-california\n",
+ "elevator\n",
+ "yorkshireman\n",
+ "paper-money\n",
+ "unfaithful-husband\n",
+ "brain-teaser\n",
+ "birthday\n",
+ "ranch\n",
+ "festival\n",
+ "world-war-two\n",
+ "reference-to-wikipedia\n",
+ "sundance\n"
+ ]
+ }
+ ],
+ "source": [
+ "for _ in range(20):\n",
+ " print(pick_random(keywords_from_list, sum_keywords))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.4.3+"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}