Problem 1 done
[ou-summer-of-code-2017.git] / 01-ticket-prices / ticket-pricing-generator.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 45,
6 "metadata": {
7 "collapsed": true
8 },
9 "outputs": [],
10 "source": [
11 "import random\n",
12 "import re\n",
13 "import uuid"
14 ]
15 },
16 {
17 "cell_type": "code",
18 "execution_count": 19,
19 "metadata": {
20 "collapsed": true
21 },
22 "outputs": [],
23 "source": [
24 "def cap_after_hyphen(string):\n",
25 " return re.sub(r'\\-(?P<first>[a-z])', \n",
26 " lambda m: '-' + m.group('first').upper(), \n",
27 " string) "
28 ]
29 },
30 {
31 "cell_type": "code",
32 "execution_count": 20,
33 "metadata": {},
34 "outputs": [
35 {
36 "data": {
37 "text/plain": [
38 "['Aalborg',\n",
39 " 'Nullarbor',\n",
40 " 'Morgantown',\n",
41 " 'Estacada',\n",
42 " 'Almaty',\n",
43 " 'Tambon-Pa-Fa',\n",
44 " 'Lemnos',\n",
45 " 'Sofia',\n",
46 " 'Puente-Laguna-Garzonkuala-Penyu',\n",
47 " 'Hajnowski',\n",
48 " 'Karlukovo',\n",
49 " 'Mamula',\n",
50 " 'Stonington-Island',\n",
51 " 'Grimsey-Island',\n",
52 " 'Uzupis',\n",
53 " 'Sukhumi',\n",
54 " 'Ijsseloog',\n",
55 " 'Giessenmestia',\n",
56 " 'Nordkapp',\n",
57 " 'Gorongosa',\n",
58 " 'Brorfelde',\n",
59 " 'Parowan',\n",
60 " 'Tubakuba',\n",
61 " 'Geoje-Si',\n",
62 " 'Mora',\n",
63 " 'Holmegaard',\n",
64 " 'Jayuya',\n",
65 " 'Zhangye-Shi',\n",
66 " 'Luoyang']"
67 ]
68 },
69 "execution_count": 20,
70 "metadata": {},
71 "output_type": "execute_result"
72 }
73 ],
74 "source": [
75 "destination_names = ('Aalborg Nullarbor MORGANTOWN ESTACADA ALMATY TAMBON-PA-FA Lemnos Sofia Puente-Laguna-Garzon' +\n",
76 "'KUALA-PENYU HAJNOWSKI KARLUKOVO Mamula STONINGTON-ISLAND Grimsey-Island Uzupis SUKHUMI IJsseloog GIESSEN' +\n",
77 "'MESTIA NORDKAPP GORONGOSA Brorfelde PAROWAN Tubakuba GEOJE-SI MORA HOLMEGAARD JAYUYA ZHANGYE-SHI LUOYANG')\n",
78 "destination_names = destination_names.lower().split()\n",
79 "destination_names = [n[0].upper() + n[1:] for n in destination_names]\n",
80 "destination_names = [cap_after_hyphen(n) for n in destination_names]\n",
81 "\n",
82 "destination_names"
83 ]
84 },
85 {
86 "cell_type": "code",
87 "execution_count": 39,
88 "metadata": {},
89 "outputs": [
90 {
91 "data": {
92 "text/plain": [
93 "{'Aalborg': 1,\n",
94 " 'Almaty': 1,\n",
95 " 'Brorfelde': 1,\n",
96 " 'Estacada': 1,\n",
97 " 'Geoje-Si': 1,\n",
98 " 'Giessenmestia': 1,\n",
99 " 'Gorongosa': 1,\n",
100 " 'Grimsey-Island': 1,\n",
101 " 'Hajnowski': 1,\n",
102 " 'Holmegaard': 1,\n",
103 " 'Ijsseloog': 1,\n",
104 " 'Jayuya': 1,\n",
105 " 'Karlukovo': 1,\n",
106 " 'Lemnos': 1,\n",
107 " 'Luoyang': 1,\n",
108 " 'Mamula': 1,\n",
109 " 'Mora': 1,\n",
110 " 'Morgantown': 1,\n",
111 " 'Nordkapp': 1,\n",
112 " 'Nullarbor': 1,\n",
113 " 'Parowan': 1,\n",
114 " 'Puente-Laguna-Garzonkuala-Penyu': 1,\n",
115 " 'Sofia': 1,\n",
116 " 'Stonington-Island': 1,\n",
117 " 'Sukhumi': 1,\n",
118 " 'Tambon-Pa-Fa': 1,\n",
119 " 'Tubakuba': 1,\n",
120 " 'Uzupis': 1,\n",
121 " 'Zhangye-Shi': 1}"
122 ]
123 },
124 "execution_count": 39,
125 "metadata": {},
126 "output_type": "execute_result"
127 }
128 ],
129 "source": [
130 "destination_scores = {n: 1 for n in destination_names}\n",
131 "destination_scores"
132 ]
133 },
134 {
135 "cell_type": "code",
136 "execution_count": 40,
137 "metadata": {},
138 "outputs": [
139 {
140 "data": {
141 "text/plain": [
142 "{'Aalborg': 1,\n",
143 " 'Almaty': 2.0,\n",
144 " 'Brorfelde': 0.9,\n",
145 " 'Estacada': 0.4,\n",
146 " 'Geoje-Si': 1,\n",
147 " 'Giessenmestia': 1,\n",
148 " 'Gorongosa': 1,\n",
149 " 'Grimsey-Island': 1,\n",
150 " 'Hajnowski': 1,\n",
151 " 'Holmegaard': 1,\n",
152 " 'Ijsseloog': 1,\n",
153 " 'Jayuya': 0.6,\n",
154 " 'Karlukovo': 2.2,\n",
155 " 'Lemnos': 1,\n",
156 " 'Luoyang': 1,\n",
157 " 'Mamula': 1,\n",
158 " 'Mora': 1,\n",
159 " 'Morgantown': 2.9,\n",
160 " 'Nordkapp': 1.5,\n",
161 " 'Nullarbor': 2.2,\n",
162 " 'Parowan': 1,\n",
163 " 'Puente-Laguna-Garzonkuala-Penyu': 0.4,\n",
164 " 'Sofia': 1,\n",
165 " 'Stonington-Island': 1,\n",
166 " 'Sukhumi': 1,\n",
167 " 'Tambon-Pa-Fa': 1,\n",
168 " 'Tubakuba': 1,\n",
169 " 'Uzupis': 0.9,\n",
170 " 'Zhangye-Shi': 1}"
171 ]
172 },
173 "execution_count": 40,
174 "metadata": {},
175 "output_type": "execute_result"
176 }
177 ],
178 "source": [
179 "adjust = random.sample(destination_names, 10)\n",
180 "inc_names = adjust[:5]\n",
181 "dec_names = adjust[5:]\n",
182 "for n in inc_names:\n",
183 " destination_scores[n] = float(random.randint(11, 30)) / 10\n",
184 "for n in dec_names:\n",
185 " destination_scores[n] = float(random.randint(3, 9)) / 10 \n",
186 "destination_scores"
187 ]
188 },
189 {
190 "cell_type": "code",
191 "execution_count": 42,
192 "metadata": {
193 "collapsed": true
194 },
195 "outputs": [],
196 "source": [
197 "destination_scores = {'Aalborg': 1, 'Almaty': 2.0, 'Brorfelde': 0.9, 'Estacada': 0.4, 'Geoje-Si': 1,\n",
198 " 'Giessenmestia': 1, 'Gorongosa': 1, 'Grimsey-Island': 1, 'Hajnowski': 1, 'Holmegaard': 1, 'Ijsseloog': 1,\n",
199 " 'Jayuya': 0.6, 'Karlukovo': 2.2, 'Lemnos': 1, 'Luoyang': 1, 'Mamula': 1, 'Mora': 1, 'Morgantown': 2.9,\n",
200 " 'Nordkapp': 1.5, 'Nullarbor': 2.2, 'Parowan': 1, 'Puente-Laguna-Garzonkuala-Penyu': 0.4, 'Sofia': 1,\n",
201 " 'Stonington-Island': 1, 'Sukhumi': 1, 'Tambon-Pa-Fa': 1, 'Tubakuba': 1, 'Uzupis': 0.9, 'Zhangye-Shi': 1}"
202 ]
203 },
204 {
205 "cell_type": "code",
206 "execution_count": 66,
207 "metadata": {
208 "collapsed": true
209 },
210 "outputs": [],
211 "source": [
212 "destination_score_exceptions = {'Almaty': 2.0, 'Brorfelde': 0.9, 'Estacada': 0.4, 'Jayuya': 0.6, 'Karlukovo': 2.2, 'Morgantown': 2.9,\n",
213 " 'Nordkapp': 1.5, 'Nullarbor': 2.2, 'Puente-Laguna-Garzonkuala-Penyu': 0.4, 'Uzupis': 0.9}"
214 ]
215 },
216 {
217 "cell_type": "code",
218 "execution_count": 44,
219 "metadata": {},
220 "outputs": [
221 {
222 "name": "stdout",
223 "output_type": "stream",
224 "text": [
225 "| Destination | Score |\n",
226 "|-------------|-------|\n",
227 "| Almaty | 2.0 |\n",
228 "| Brorfelde | 0.9 |\n",
229 "| Estacada | 0.4 |\n",
230 "| Jayuya | 0.6 |\n",
231 "| Karlukovo | 2.2 |\n",
232 "| Morgantown | 2.9 |\n",
233 "| Nordkapp | 1.5 |\n",
234 "| Nullarbor | 2.2 |\n",
235 "| Puente-Laguna-Garzonkuala-Penyu | 0.4 |\n",
236 "| Uzupis | 0.9 |\n"
237 ]
238 }
239 ],
240 "source": [
241 "print('| Destination | Score |')\n",
242 "print('|-------------|-------|')\n",
243 "for n in sorted(destination_scores):\n",
244 " if destination_scores[n] != 1:\n",
245 " print('|', n, '|', destination_scores[n], '|')"
246 ]
247 },
248 {
249 "cell_type": "code",
250 "execution_count": 48,
251 "metadata": {
252 "collapsed": true
253 },
254 "outputs": [],
255 "source": [
256 "def make_holiday(name=None):\n",
257 " if not name:\n",
258 " name = random.choice(destination_names)\n",
259 " days = random.choice([3, 7, 7, 7, 10, 14, 14, 14, 14, 21, 21])\n",
260 " price = 0\n",
261 " for _ in range(4):\n",
262 " price += random.randint(100, 500)\n",
263 " hol_id = str(uuid.uuid4())\n",
264 " return hol_id, price, name, days"
265 ]
266 },
267 {
268 "cell_type": "code",
269 "execution_count": 54,
270 "metadata": {},
271 "outputs": [
272 {
273 "data": {
274 "text/plain": [
275 "('4615dad7-f5ab-4c9b-8028-b3e5c676ff24', 1046, 'Almaty', 14)"
276 ]
277 },
278 "execution_count": 54,
279 "metadata": {},
280 "output_type": "execute_result"
281 }
282 ],
283 "source": [
284 "make_holiday()"
285 ]
286 },
287 {
288 "cell_type": "code",
289 "execution_count": 59,
290 "metadata": {},
291 "outputs": [
292 {
293 "data": {
294 "text/plain": [
295 "[('3c425201-5538-4a3f-bfd7-1219137c9d63', 1147, 'Aalborg', 21),\n",
296 " ('5913e357-dc18-49c5-850d-aa0172041cc4', 1398, 'Nullarbor', 3),\n",
297 " ('bc966859-49ad-4c26-b36e-94251b0a29b5', 1064, 'Morgantown', 7),\n",
298 " ('ab1d0a54-41bc-4351-beb8-f6e5fd4f07f0', 1722, 'Estacada', 14),\n",
299 " ('b5efba4e-373d-4a15-b048-8fcb3169c441', 1132, 'Almaty', 7),\n",
300 " ('bd1ff2b9-5b97-4a13-9d8a-601faea40e9c', 1085, 'Tambon-Pa-Fa', 14),\n",
301 " ('5b9df004-a8b7-4c28-a27e-21e24cd566b5', 1264, 'Lemnos', 7),\n",
302 " ('5951de60-9e98-471f-a917-74e3c8a3457a', 938, 'Sofia', 14),\n",
303 " ('c6a4cad7-cd9a-4e05-bd16-92305c933269',\n",
304 " 922,\n",
305 " 'Puente-Laguna-Garzonkuala-Penyu',\n",
306 " 14),\n",
307 " ('9527ee0b-4bcd-4a1f-95fd-b3a627eacf02', 1071, 'Hajnowski', 14),\n",
308 " ('82b6adfb-f09d-420f-bbf9-ea90450d21f6', 734, 'Karlukovo', 3),\n",
309 " ('7b08192c-5e30-45dc-bb31-2da61861c0ac', 1692, 'Mamula', 21),\n",
310 " ('5abbe3e4-7d2e-404c-9ce2-27083f95c140', 1277, 'Stonington-Island', 7),\n",
311 " ('b3f517ca-2a07-427d-8531-81227468b611', 1280, 'Grimsey-Island', 3),\n",
312 " ('f1fa90d2-0f58-480f-80c0-0fddb845b71a', 1225, 'Uzupis', 7),\n",
313 " ('330e4f58-31d2-474f-8e23-654224689a18', 1016, 'Sukhumi', 14),\n",
314 " ('a6837ba1-42aa-488d-b39a-5b49422ce165', 966, 'Ijsseloog', 21),\n",
315 " ('a7e4161a-0435-4915-961e-8c204e527a4b', 1465, 'Giessenmestia', 21),\n",
316 " ('633be2e4-a719-496e-9987-d5ea8b6ab428', 1256, 'Nordkapp', 3),\n",
317 " ('bd83d3e3-6134-4511-918c-a94338ffb0c8', 1508, 'Gorongosa', 7),\n",
318 " ('05ed4593-3582-4fde-8958-a63274507344', 1099, 'Brorfelde', 7),\n",
319 " ('b8361d0a-7f23-4491-9450-c28917807a68', 1364, 'Parowan', 7),\n",
320 " ('dc7f49c2-672e-4abc-a58a-4e8b45e786d7', 1491, 'Tubakuba', 14),\n",
321 " ('b709a89e-e5c5-4a60-97ca-55ad350fa7f1', 1373, 'Geoje-Si', 7),\n",
322 " ('18888ae2-e410-4430-87af-1d07ba9b25bb', 1317, 'Mora', 3),\n",
323 " ('b08ebc25-7d02-4973-8e28-fa3b75ed4e29', 1483, 'Holmegaard', 14),\n",
324 " ('d3519dde-9a15-4982-b1ed-1dd6dbecc2c1', 653, 'Jayuya', 21),\n",
325 " ('fa882848-1787-424a-90c3-8593bb91f174', 1027, 'Zhangye-Shi', 10),\n",
326 " ('21d2fcc8-f528-467d-b0ba-a6748933a4a8', 1261, 'Luoyang', 14)]"
327 ]
328 },
329 "execution_count": 59,
330 "metadata": {},
331 "output_type": "execute_result"
332 }
333 ],
334 "source": [
335 "holidays = []\n",
336 "for n in destination_names:\n",
337 " holidays += [make_holiday(n)]\n",
338 "holidays"
339 ]
340 },
341 {
342 "cell_type": "code",
343 "execution_count": 60,
344 "metadata": {
345 "collapsed": true
346 },
347 "outputs": [],
348 "source": [
349 "for _ in range(95):\n",
350 " holidays += [make_holiday()]"
351 ]
352 },
353 {
354 "cell_type": "code",
355 "execution_count": 62,
356 "metadata": {},
357 "outputs": [],
358 "source": [
359 "random.shuffle(holidays)\n",
360 "with open('01-holidays.txt', 'w') as f:\n",
361 " for hid, price, name, days in holidays:\n",
362 " f.write('{} {} {} {}\\n'.format(hid[-10:], price, name, days))"
363 ]
364 },
365 {
366 "cell_type": "code",
367 "execution_count": 63,
368 "metadata": {},
369 "outputs": [
370 {
371 "data": {
372 "text/plain": [
373 "[['5f12ce1b86', '1192', 'Sukhumi', '14'],\n",
374 " ['0279c8a91b', '1008', 'Estacada', '14'],\n",
375 " ['1faea40e9c', '1085', 'Tambon-Pa-Fa', '14']]"
376 ]
377 },
378 "execution_count": 63,
379 "metadata": {},
380 "output_type": "execute_result"
381 }
382 ],
383 "source": [
384 "with open('01-holidays.txt') as f:\n",
385 " hols = [h.split() for h in f.readlines()]\n",
386 "hols[:3]"
387 ]
388 },
389 {
390 "cell_type": "code",
391 "execution_count": 64,
392 "metadata": {
393 "collapsed": true
394 },
395 "outputs": [],
396 "source": [
397 "assert len(set(h[0] for h in hols)) == len(hols)"
398 ]
399 },
400 {
401 "cell_type": "code",
402 "execution_count": 78,
403 "metadata": {},
404 "outputs": [
405 {
406 "data": {
407 "text/plain": [
408 "[('6138068a-3c16-4f7f-8dc6-a1199608abc5', 1209, 'Nordkapp', 21),\n",
409 " ('eafb15a4-1839-4fb5-be9b-d8389018bd07', 1052, 'Estacada', 21),\n",
410 " ('ab659b71-347d-45dc-9416-34def36ffc7d', 1514, 'Giessenmestia', 21),\n",
411 " ('bb8d5034-49ce-483d-ab6c-d16caf2584a5', 724, 'Stonington-Island', 14),\n",
412 " ('7476b214-8b55-47f6-833e-dfa487c4270a', 782, 'Geoje-Si', 14),\n",
413 " ('f2ffe169-7cc5-42e1-9b46-8cdb61bb906d', 769, 'Morgantown', 3),\n",
414 " ('2e6a6d6d-6c08-4021-b99f-05202c898b5f', 1184, 'Morgantown', 21)]"
415 ]
416 },
417 "execution_count": 78,
418 "metadata": {},
419 "output_type": "execute_result"
420 }
421 ],
422 "source": [
423 "example_holidays = []\n",
424 "names = random.sample(destination_score_exceptions.keys(), 2) + random.sample(destination_names, 4)\n",
425 "names += [names[-1]]\n",
426 "example_holidays = [make_holiday(n) for n in names]\n",
427 "example_holidays"
428 ]
429 },
430 {
431 "cell_type": "code",
432 "execution_count": 79,
433 "metadata": {},
434 "outputs": [
435 {
436 "data": {
437 "text/plain": [
438 "[('f2ffe169-7cc5-42e1-9b46-8cdb61bb906d', 769, 'Morgantown', 3),\n",
439 " ('2e6a6d6d-6c08-4021-b99f-05202c898b5f', 1184, 'Morgantown', 21),\n",
440 " ('ab659b71-347d-45dc-9416-34def36ffc7d', 1514, 'Giessenmestia', 21),\n",
441 " ('eafb15a4-1839-4fb5-be9b-d8389018bd07', 1052, 'Estacada', 21),\n",
442 " ('7476b214-8b55-47f6-833e-dfa487c4270a', 782, 'Geoje-Si', 14),\n",
443 " ('bb8d5034-49ce-483d-ab6c-d16caf2584a5', 724, 'Stonington-Island', 14),\n",
444 " ('6138068a-3c16-4f7f-8dc6-a1199608abc5', 1209, 'Nordkapp', 21)]"
445 ]
446 },
447 "execution_count": 79,
448 "metadata": {},
449 "output_type": "execute_result"
450 }
451 ],
452 "source": [
453 "random.shuffle(example_holidays)\n",
454 "example_holidays"
455 ]
456 },
457 {
458 "cell_type": "code",
459 "execution_count": 82,
460 "metadata": {
461 "collapsed": true
462 },
463 "outputs": [],
464 "source": [
465 "example_holidays = [('f2ffe169-7cc5-42e1-9b46-8cdb61bb906d', 769, 'Morgantown', 3),\n",
466 " ('2e6a6d6d-6c08-4021-b99f-05202c898b5f', 1284, 'Morgantown', 21),\n",
467 " ('ab659b71-347d-45dc-9416-34def36ffc7d', 1514, 'Giessenmestia', 21),\n",
468 " ('eafb15a4-1839-4fb5-be9b-d8389018bd07', 1052, 'Estacada', 21),\n",
469 " ('7476b214-8b55-47f6-833e-dfa487c4270a', 782, 'Geoje-Si', 14),\n",
470 " ('bb8d5034-49ce-483d-ab6c-d16caf2584a5', 724, 'Stonington-Island', 14),\n",
471 " ('6138068a-3c16-4f7f-8dc6-a1199608abc5', 1209, 'Nordkapp', 21)]"
472 ]
473 },
474 {
475 "cell_type": "code",
476 "execution_count": 83,
477 "metadata": {},
478 "outputs": [
479 {
480 "name": "stdout",
481 "output_type": "stream",
482 "text": [
483 "db61bb906d 769 Morgantown 3\n",
484 "202c898b5f 1284 Morgantown 21\n",
485 "def36ffc7d 1514 Giessenmestia 21\n",
486 "389018bd07 1052 Estacada 21\n",
487 "a487c4270a 782 Geoje-Si 14\n",
488 "6caf2584a5 724 Stonington-Island 14\n",
489 "199608abc5 1209 Nordkapp 21\n"
490 ]
491 }
492 ],
493 "source": [
494 "for hid, price, name, days in example_holidays:\n",
495 " print('{} {} {} {}'.format(hid[-10:], price, name, days))"
496 ]
497 },
498 {
499 "cell_type": "code",
500 "execution_count": null,
501 "metadata": {
502 "collapsed": true
503 },
504 "outputs": [],
505 "source": []
506 }
507 ],
508 "metadata": {
509 "kernelspec": {
510 "display_name": "Python 3",
511 "language": "python",
512 "name": "python3"
513 },
514 "language_info": {
515 "codemirror_mode": {
516 "name": "ipython",
517 "version": 3
518 },
519 "file_extension": ".py",
520 "mimetype": "text/x-python",
521 "name": "python",
522 "nbconvert_exporter": "python",
523 "pygments_lexer": "ipython3",
524 "version": "3.5.2+"
525 }
526 },
527 "nbformat": 4,
528 "nbformat_minor": 2
529 }