{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate random movie plot elements\n", "\n", "Data from [IMDB](http://www.imdb.com/interfaces), held at [ftp://ftp.fu-berlin.de/pub/misc/movies/database/](ftp://ftp.fu-berlin.de/pub/misc/movies/database/), in the `keywords.list.gz` file.\n", "\n", "[Abulafia generator](http://www.random-generator.com/index.php?title=Plot_Keyword_Oracle) and [Story-games thread](http://story-games.com/forums/discussion/3502/new-toy-imdb-plot-keywords), and [an earlier version of the list](http://www.logrus.com/~moose/page1/files/cleanplots.txt)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Get the file\n", "!wget ftp://ftp.fu-berlin.de/pub/misc/movies/database/keywords.list.gz\n", "\n", "!gunzip keywords.list.gz" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "generate-random-plots.ipynb keywords.list\r\n" ] } ], "source": [ "!ls" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import collections\n", "import re\n", "import random\n", "import itertools" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def read_keywords_from_list(line):\n", " if not line:\n", " return []\n", " accumulator = []\n", " for kn in re.split('\\t+', line):\n", " k, n = kn.split()\n", " n = int(n[1:-1])\n", " accumulator += [k] * n\n", " return accumulator" ] }, { "cell_type": "code", "execution_count": 75, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['blue-pantyhose', 'blue-party', 'blue-party', 'blue-pearl']" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "read_keywords_from_list('blue-pantyhose (1)\t\tblue-party (2)\tblue-pearl (1)')" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def read_keywords_from_title(line):\n", " if line:\n", " return [re.split('\\t+', line)[1]]\n", " else:\n", " return []" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['beer-drinking']" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "read_keywords_from_title('Been Down So Long It Looks Like Up to Me (1971)\t\tbeer-drinking')" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "skipping underlines ========\n", "ending list 5: Submission Rules\n", "skipping underlines =============\n", "skipping underlines ==================\n", "skipping underlines ===============\n", "starting list keywords in use:\n", "ending list 5: Submission Rules\n", "skipping underlines ===================\n", "skipping underlines ==============\n", "skipping underlines ======================\n", "skipping underlines =================================================\n", "pre-title 8: THE KEYWORDS LIST\n", "starting title ====================\n" ] } ], "source": [ "keywords_from_list = collections.Counter()\n", "keywords_from_title = collections.Counter()\n", "reading_state = None\n", "for line in open('keywords.list', encoding='latin-1').readlines():\n", " \n", " if line.strip().startswith('keywords in use'):\n", " reading_state = 'keywords_from_list'\n", " print('starting list', line.strip())\n", " continue\n", " if line.strip().startswith('5: Submission Rules'):\n", " readings_state = None\n", " print('ending list', line.strip())\n", " continue\n", " if reading_state == 'pre_from_title':\n", " if line.strip().startswith('==='):\n", " reading_state = 'keywords_from_title'\n", " print('starting title', line.strip())\n", " else:\n", " reading_state = None\n", " print('not start of titles', line.strip())\n", " continue\n", " if line.strip().startswith('8: THE KEYWORDS LIST'):\n", " reading_state = 'pre_from_title'\n", " print('pre-title', line.strip())\n", " continue\n", " if line.strip().startswith('==='):\n", " reading_state = None\n", " print('skipping underlines', line.strip())\n", " continue\n", " \n", " if reading_state == 'keywords_from_list':\n", " keywords_from_list.update(read_keywords_from_list(line.strip()))\n", " elif reading_state == 'keywords_from_title':\n", " keywords_from_title.update(read_keywords_from_title(line.strip()))\n", "\n", "sum_keywords = sum(keywords_from_list.values())" ] }, { "cell_type": "code", "execution_count": 79, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('sex', 83531),\n", " ('hardcore', 69247),\n", " ('character-name-in-title', 44747),\n", " ('independent-film', 37932)]" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "keywords_from_list.most_common(4)" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('sex', 83531),\n", " ('hardcore', 69247),\n", " ('character-name-in-title', 44747),\n", " ('independent-film', 37932)]" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "keywords_from_title.most_common(4)" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def pick_random(items, count):\n", " i = random.randint(0, count)\n", " return list(itertools.islice(items.elements(), i, i+1))[0]" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mother-daughter-relationship\n", "oral-sex\n", "cyberpunk\n", "main-character-shot\n", "medical-gloves\n", "tv-show\n", "female-nudity\n", "conundrum\n", "los-angeles-california\n", "elevator\n", "yorkshireman\n", "paper-money\n", "unfaithful-husband\n", "brain-teaser\n", "birthday\n", "ranch\n", "festival\n", "world-war-two\n", "reference-to-wikipedia\n", "sundance\n" ] } ], "source": [ "for _ in range(20):\n", " print(pick_random(keywords_from_list, sum_keywords))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3+" } }, "nbformat": 4, "nbformat_minor": 0 }