4 "cell_type": "markdown",
7 "# Generate random movie plot elements\n",
9 "Data from [IMDB](http://www.imdb.com/interfaces), held at [ftp://ftp.fu-berlin.de/pub/misc/movies/database/](ftp://ftp.fu-berlin.de/pub/misc/movies/database/), in the `keywords.list.gz` file.\n",
11 "[Abulafia generator](http://www.random-generator.com/index.php?title=Plot_Keyword_Oracle) and [Story-games thread](http://story-games.com/forums/discussion/3502/new-toy-imdb-plot-keywords), and [an earlier version of the list](http://www.logrus.com/~moose/page1/files/cleanplots.txt)."
23 "!wget ftp://ftp.fu-berlin.de/pub/misc/movies/database/keywords.list.gz\n",
25 "!gunzip keywords.list.gz"
30 "execution_count": 72,
37 "output_type": "stream",
39 "generate-random-plots.ipynb keywords.list\r\n"
49 "execution_count": 73,
55 "import collections\n",
63 "execution_count": 74,
69 "def read_keywords_from_list(line):\n",
72 " accumulator = []\n",
73 " for kn in re.split('\\t+', line):\n",
74 " k, n = kn.split()\n",
75 " n = int(n[1:-1])\n",
76 " accumulator += [k] * n\n",
82 "execution_count": 75,
90 "['blue-pantyhose', 'blue-party', 'blue-party', 'blue-pearl']"
93 "execution_count": 75,
95 "output_type": "execute_result"
99 "read_keywords_from_list('blue-pantyhose (1)\t\tblue-party (2)\tblue-pearl (1)')"
104 "execution_count": 76,
110 "def read_keywords_from_title(line):\n",
112 " return [re.split('\\t+', line)[1]]\n",
119 "execution_count": 77,
130 "execution_count": 77,
132 "output_type": "execute_result"
136 "read_keywords_from_title('Been Down So Long It Looks Like Up to Me (1971)\t\tbeer-drinking')"
141 "execution_count": 78,
148 "output_type": "stream",
150 "skipping underlines ========\n",
151 "ending list 5: Submission Rules\n",
152 "skipping underlines =============\n",
153 "skipping underlines ==================\n",
154 "skipping underlines ===============\n",
155 "starting list keywords in use:\n",
156 "ending list 5: Submission Rules\n",
157 "skipping underlines ===================\n",
158 "skipping underlines ==============\n",
159 "skipping underlines ======================\n",
160 "skipping underlines =================================================\n",
161 "pre-title 8: THE KEYWORDS LIST\n",
162 "starting title ====================\n"
167 "keywords_from_list = collections.Counter()\n",
168 "keywords_from_title = collections.Counter()\n",
169 "reading_state = None\n",
170 "for line in open('keywords.list', encoding='latin-1').readlines():\n",
172 " if line.strip().startswith('keywords in use'):\n",
173 " reading_state = 'keywords_from_list'\n",
174 " print('starting list', line.strip())\n",
176 " if line.strip().startswith('5: Submission Rules'):\n",
177 " readings_state = None\n",
178 " print('ending list', line.strip())\n",
180 " if reading_state == 'pre_from_title':\n",
181 " if line.strip().startswith('==='):\n",
182 " reading_state = 'keywords_from_title'\n",
183 " print('starting title', line.strip())\n",
185 " reading_state = None\n",
186 " print('not start of titles', line.strip())\n",
188 " if line.strip().startswith('8: THE KEYWORDS LIST'):\n",
189 " reading_state = 'pre_from_title'\n",
190 " print('pre-title', line.strip())\n",
192 " if line.strip().startswith('==='):\n",
193 " reading_state = None\n",
194 " print('skipping underlines', line.strip())\n",
197 " if reading_state == 'keywords_from_list':\n",
198 " keywords_from_list.update(read_keywords_from_list(line.strip()))\n",
199 " elif reading_state == 'keywords_from_title':\n",
200 " keywords_from_title.update(read_keywords_from_title(line.strip()))\n",
202 "sum_keywords = sum(keywords_from_list.values())"
207 "execution_count": 79,
215 "[('sex', 83531),\n",
216 " ('hardcore', 69247),\n",
217 " ('character-name-in-title', 44747),\n",
218 " ('independent-film', 37932)]"
221 "execution_count": 79,
223 "output_type": "execute_result"
227 "keywords_from_list.most_common(4)"
232 "execution_count": 80,
240 "[('sex', 83531),\n",
241 " ('hardcore', 69247),\n",
242 " ('character-name-in-title', 44747),\n",
243 " ('independent-film', 37932)]"
246 "execution_count": 80,
248 "output_type": "execute_result"
252 "keywords_from_title.most_common(4)"
257 "execution_count": 81,
263 "def pick_random(items, count):\n",
264 " i = random.randint(0, count)\n",
265 " return list(itertools.islice(items.elements(), i, i+1))[0]"
270 "execution_count": 85,
277 "output_type": "stream",
279 "mother-daughter-relationship\n",
282 "main-character-shot\n",
287 "los-angeles-california\n",
291 "unfaithful-husband\n",
297 "reference-to-wikipedia\n",
303 "for _ in range(20):\n",
304 " print(pick_random(keywords_from_list, sum_keywords))"
309 "execution_count": null,
319 "display_name": "Python 3",
320 "language": "python",
328 "file_extension": ".py",
329 "mimetype": "text/x-python",
331 "nbconvert_exporter": "python",
332 "pygments_lexer": "ipython3",