X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;ds=sidebyside;f=beatles-vs-stones-gather-data.ipynb;h=05b77a0e81f4ae0b21d7041de35237790c0341c9;hb=d9e7bac3496d434d65e3c1358918e4df3b247433;hp=a902935c59d59193144e34405faaaec8cde59644;hpb=a14e843063a70273cd4f92acd5378c92bda7d8d9;p=battle-of-the-bands.git

diff --git a/beatles-vs-stones-gather-data.ipynb b/beatles-vs-stones-gather-data.ipynb
index a902935..05b77a0 100644
--- a/beatles-vs-stones-gather-data.ipynb
+++ b/beatles-vs-stones-gather-data.ipynb
@@ -12,9 +12,15 @@
     "\n",
     "I'm also on a bit of a Beatles jag, so I've also done the analysis for Beatles songs.\n",
     "\n",
-    "http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0115255#s5\n",
-    "https://www.discogs.com/developers/#page:database,header:database-artist-releases\n",
-    "http://data.discogs.com/\n",
+    "### Some data sources\n",
+    "\n",
+    "* http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0115255#s5\n",
+    "* https://www.discogs.com/developers/#page:database,header:database-artist-releases\n",
+    "* http://data.discogs.com/\n",
+    "\n",
+    "* https://labrosa.ee.columbia.edu/millionsong/\n",
+    "\n",
+    "* https://twitter.com/kcimc/status/893855561590157312?s=09 and https://drive.google.com/file/d/0B9tyIRZ76JCdN3NtaVpPU3c4QWs/view (stored locally in the [1m.pkl](1m.pkl) folder)\n",
     "\n",
     "\n",
     "## Contents\n",
@@ -98,9 +104,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# try:\n",
@@ -112,9 +116,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Create a database and a collections within it.\n",
@@ -136,9 +138,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -188,9 +188,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def get_artists(artist_name):\n",
@@ -211,9 +209,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -238,9 +234,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -272,9 +266,7 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def get_albums(artist_id):\n",
@@ -292,9 +284,7 @@
   {
    "cell_type": "code",
    "execution_count": 41,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -315,9 +305,7 @@
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -338,9 +326,7 @@
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -658,7 +644,6 @@
    "cell_type": "code",
    "execution_count": 11,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -1091,9 +1076,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def get_tracks(album_id):\n",
@@ -1117,7 +1100,6 @@
    "cell_type": "code",
    "execution_count": 45,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -1151,7 +1133,6 @@
    "cell_type": "code",
    "execution_count": 17,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -1185,7 +1166,6 @@
    "cell_type": "code",
    "execution_count": 18,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -1766,9 +1746,7 @@
   {
    "cell_type": "code",
    "execution_count": 49,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1830,9 +1808,7 @@
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1865,7 +1841,6 @@
    "cell_type": "code",
    "execution_count": 16,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -2601,9 +2576,7 @@
   {
    "cell_type": "code",
    "execution_count": 30,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "a_type, a_token = get_spotify_auth_token()\n",
@@ -2616,7 +2589,6 @@
    "cell_type": "code",
    "execution_count": 37,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -4562,7 +4534,6 @@
    "cell_type": "code",
    "execution_count": 33,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -6199,9 +6170,7 @@
   {
    "cell_type": "code",
    "execution_count": 38,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -6231,9 +6200,7 @@
   {
    "cell_type": "code",
    "execution_count": 35,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -6263,9 +6230,7 @@
   {
    "cell_type": "code",
    "execution_count": 31,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -6299,9 +6264,7 @@
   {
    "cell_type": "code",
    "execution_count": 39,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def genius_artist_search(artist_name, per_page=20):\n",
@@ -6322,9 +6285,7 @@
   {
    "cell_type": "code",
    "execution_count": 40,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -6346,9 +6307,7 @@
   {
    "cell_type": "code",
    "execution_count": 41,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -6377,9 +6336,7 @@
   {
    "cell_type": "code",
    "execution_count": 42,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def genius_song_search(artist_id):\n",
@@ -6406,9 +6363,7 @@
   {
    "cell_type": "code",
    "execution_count": 43,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -6429,9 +6384,7 @@
   {
    "cell_type": "code",
    "execution_count": 44,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -6452,9 +6405,7 @@
   {
    "cell_type": "code",
    "execution_count": 45,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -6496,7 +6447,6 @@
    "cell_type": "code",
    "execution_count": 46,
    "metadata": {
-    "collapsed": false,
     "scrolled": false
    },
    "outputs": [
@@ -7415,9 +7365,7 @@
   {
    "cell_type": "code",
    "execution_count": 47,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def genius_lyrics(song_url):\n",
@@ -7437,9 +7385,7 @@
   {
    "cell_type": "code",
    "execution_count": 48,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -7461,9 +7407,7 @@
   {
    "cell_type": "code",
    "execution_count": 49,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -7491,9 +7435,7 @@
   {
    "cell_type": "code",
    "execution_count": 50,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -7514,7 +7456,6 @@
    "cell_type": "code",
    "execution_count": 51,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -7570,9 +7511,7 @@
   {
    "cell_type": "code",
    "execution_count": 53,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -7620,9 +7559,7 @@
   {
    "cell_type": "code",
    "execution_count": 55,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -7646,7 +7583,6 @@
    "cell_type": "code",
    "execution_count": 56,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -7724,9 +7660,7 @@
   {
    "cell_type": "code",
    "execution_count": 57,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -8304,9 +8238,7 @@
   {
    "cell_type": "code",
    "execution_count": 58,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -8326,9 +8258,7 @@
   {
    "cell_type": "code",
    "execution_count": 59,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -8357,9 +8287,7 @@
   {
    "cell_type": "code",
    "execution_count": 52,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -8390,9 +8318,7 @@
   {
    "cell_type": "code",
    "execution_count": 79,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -8425,9 +8351,7 @@
   {
    "cell_type": "code",
    "execution_count": 80,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "for t in tracks.find({}, ['ctitle', 'duration_ms']):\n",
@@ -8453,7 +8377,6 @@
    "cell_type": "code",
    "execution_count": 81,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -8474,9 +8397,7 @@
   {
    "cell_type": "code",
    "execution_count": 82,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -8496,9 +8417,7 @@
   {
    "cell_type": "code",
    "execution_count": 83,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -8710,9 +8629,7 @@
   {
    "cell_type": "code",
    "execution_count": 84,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -8940,9 +8857,7 @@
   {
    "cell_type": "code",
    "execution_count": 85,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -9080,9 +8995,7 @@
   {
    "cell_type": "code",
    "execution_count": 86,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -9112,9 +9025,7 @@
   {
    "cell_type": "code",
    "execution_count": 88,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -9151,9 +9062,7 @@
   {
    "cell_type": "code",
    "execution_count": 89,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -9195,7 +9104,6 @@
    "cell_type": "code",
    "execution_count": 90,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -9383,7 +9291,6 @@
    "cell_type": "code",
    "execution_count": 91,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -9567,9 +9474,7 @@
   {
    "cell_type": "code",
    "execution_count": 92,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -9600,9 +9505,7 @@
   {
    "cell_type": "code",
    "execution_count": 93,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -9633,9 +9536,7 @@
   {
    "cell_type": "code",
    "execution_count": 95,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -9676,9 +9577,7 @@
   {
    "cell_type": "code",
    "execution_count": 97,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -9859,9 +9758,7 @@
   {
    "cell_type": "code",
    "execution_count": 471,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -10060,9 +9957,7 @@
   {
    "cell_type": "code",
    "execution_count": 98,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -10250,7 +10145,6 @@
    "cell_type": "code",
    "execution_count": 99,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -10451,9 +10345,7 @@
   {
    "cell_type": "code",
    "execution_count": 100,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -10647,9 +10539,7 @@
   {
    "cell_type": "code",
    "execution_count": 101,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -10841,9 +10731,7 @@
   {
    "cell_type": "code",
    "execution_count": 102,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -10943,9 +10831,7 @@
   {
    "cell_type": "code",
    "execution_count": 103,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -11139,9 +11025,7 @@
   {
    "cell_type": "code",
    "execution_count": 104,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -11326,9 +11210,7 @@
   {
    "cell_type": "code",
    "execution_count": 105,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -11358,7 +11240,6 @@
    "cell_type": "code",
    "execution_count": 106,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -11890,9 +11771,7 @@
   {
    "cell_type": "code",
    "execution_count": 107,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -12098,9 +11977,7 @@
   {
    "cell_type": "code",
    "execution_count": 108,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -12303,9 +12180,7 @@
   {
    "cell_type": "code",
    "execution_count": 109,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -12337,7 +12212,6 @@
    "cell_type": "code",
    "execution_count": 110,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -12363,7 +12237,6 @@
    "cell_type": "code",
    "execution_count": 111,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -12416,7 +12289,6 @@
    "cell_type": "code",
    "execution_count": 112,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -12749,9 +12621,7 @@
   {
    "cell_type": "code",
    "execution_count": 113,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -12782,9 +12652,7 @@
   {
    "cell_type": "code",
    "execution_count": 114,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -12955,9 +12823,7 @@
   {
    "cell_type": "code",
    "execution_count": 115,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -12986,9 +12852,7 @@
   {
    "cell_type": "code",
    "execution_count": 116,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -13139,9 +13003,7 @@
   {
    "cell_type": "code",
    "execution_count": 117,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -13180,7 +13042,6 @@
    "cell_type": "code",
    "execution_count": 118,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -13233,7 +13094,6 @@
    "cell_type": "code",
    "execution_count": 119,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [
@@ -13334,9 +13194,7 @@
   {
    "cell_type": "code",
    "execution_count": 120,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -13365,9 +13223,7 @@
   {
    "cell_type": "code",
    "execution_count": 121,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -13405,9 +13261,7 @@
   {
    "cell_type": "code",
    "execution_count": 122,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "pipeline = [\n",
@@ -13422,9 +13276,7 @@
   {
    "cell_type": "code",
    "execution_count": 123,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -13455,9 +13307,7 @@
   {
    "cell_type": "code",
    "execution_count": 124,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -13543,5 +13393,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }