X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;ds=sidebyside;f=beatles-vs-stones-gather-data.ipynb;h=05b77a0e81f4ae0b21d7041de35237790c0341c9;hb=d9e7bac3496d434d65e3c1358918e4df3b247433;hp=a902935c59d59193144e34405faaaec8cde59644;hpb=a14e843063a70273cd4f92acd5378c92bda7d8d9;p=battle-of-the-bands.git diff --git a/beatles-vs-stones-gather-data.ipynb b/beatles-vs-stones-gather-data.ipynb index a902935..05b77a0 100644 --- a/beatles-vs-stones-gather-data.ipynb +++ b/beatles-vs-stones-gather-data.ipynb @@ -12,9 +12,15 @@ "\n", "I'm also on a bit of a Beatles jag, so I've also done the analysis for Beatles songs.\n", "\n", - "http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0115255#s5\n", - "https://www.discogs.com/developers/#page:database,header:database-artist-releases\n", - "http://data.discogs.com/\n", + "### Some data sources\n", + "\n", + "* http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0115255#s5\n", + "* https://www.discogs.com/developers/#page:database,header:database-artist-releases\n", + "* http://data.discogs.com/\n", + "\n", + "* https://labrosa.ee.columbia.edu/millionsong/\n", + "\n", + "* https://twitter.com/kcimc/status/893855561590157312?s=09 and https://drive.google.com/file/d/0B9tyIRZ76JCdN3NtaVpPU3c4QWs/view (stored locally in the [1m.pkl](1m.pkl) folder)\n", "\n", "\n", "## Contents\n", @@ -98,9 +104,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "# try:\n", @@ -112,9 +116,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "# Create a database and a collections within it.\n", @@ -136,9 +138,7 @@ { "cell_type": "code", "execution_count": 5, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -188,9 +188,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def get_artists(artist_name):\n", @@ -211,9 +209,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -238,9 +234,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -272,9 +266,7 @@ { "cell_type": "code", "execution_count": 9, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def get_albums(artist_id):\n", @@ -292,9 +284,7 @@ { "cell_type": "code", "execution_count": 41, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -315,9 +305,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -338,9 +326,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -658,7 +644,6 @@ "cell_type": "code", "execution_count": 11, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -1091,9 +1076,7 @@ { "cell_type": "code", "execution_count": 16, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def get_tracks(album_id):\n", @@ -1117,7 +1100,6 @@ "cell_type": "code", "execution_count": 45, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -1151,7 +1133,6 @@ "cell_type": "code", "execution_count": 17, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -1185,7 +1166,6 @@ "cell_type": "code", "execution_count": 18, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -1766,9 +1746,7 @@ { "cell_type": "code", "execution_count": 49, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -1830,9 +1808,7 @@ { "cell_type": "code", "execution_count": 15, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -1865,7 +1841,6 @@ "cell_type": "code", "execution_count": 16, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -2601,9 +2576,7 @@ { "cell_type": "code", "execution_count": 30, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "a_type, a_token = get_spotify_auth_token()\n", @@ -2616,7 +2589,6 @@ "cell_type": "code", "execution_count": 37, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -4562,7 +4534,6 @@ "cell_type": "code", "execution_count": 33, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -6199,9 +6170,7 @@ { "cell_type": "code", "execution_count": 38, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -6231,9 +6200,7 @@ { "cell_type": "code", "execution_count": 35, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -6263,9 +6230,7 @@ { "cell_type": "code", "execution_count": 31, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -6299,9 +6264,7 @@ { "cell_type": "code", "execution_count": 39, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def genius_artist_search(artist_name, per_page=20):\n", @@ -6322,9 +6285,7 @@ { "cell_type": "code", "execution_count": 40, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -6346,9 +6307,7 @@ { "cell_type": "code", "execution_count": 41, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -6377,9 +6336,7 @@ { "cell_type": "code", "execution_count": 42, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def genius_song_search(artist_id):\n", @@ -6406,9 +6363,7 @@ { "cell_type": "code", "execution_count": 43, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -6429,9 +6384,7 @@ { "cell_type": "code", "execution_count": 44, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -6452,9 +6405,7 @@ { "cell_type": "code", "execution_count": 45, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -6496,7 +6447,6 @@ "cell_type": "code", "execution_count": 46, "metadata": { - "collapsed": false, "scrolled": false }, "outputs": [ @@ -7415,9 +7365,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def genius_lyrics(song_url):\n", @@ -7437,9 +7385,7 @@ { "cell_type": "code", "execution_count": 48, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -7461,9 +7407,7 @@ { "cell_type": "code", "execution_count": 49, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -7491,9 +7435,7 @@ { "cell_type": "code", "execution_count": 50, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -7514,7 +7456,6 @@ "cell_type": "code", "execution_count": 51, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -7570,9 +7511,7 @@ { "cell_type": "code", "execution_count": 53, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -7620,9 +7559,7 @@ { "cell_type": "code", "execution_count": 55, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -7646,7 +7583,6 @@ "cell_type": "code", "execution_count": 56, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -7724,9 +7660,7 @@ { "cell_type": "code", "execution_count": 57, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -8304,9 +8238,7 @@ { "cell_type": "code", "execution_count": 58, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -8326,9 +8258,7 @@ { "cell_type": "code", "execution_count": 59, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -8357,9 +8287,7 @@ { "cell_type": "code", "execution_count": 52, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -8390,9 +8318,7 @@ { "cell_type": "code", "execution_count": 79, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -8425,9 +8351,7 @@ { "cell_type": "code", "execution_count": 80, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "for t in tracks.find({}, ['ctitle', 'duration_ms']):\n", @@ -8453,7 +8377,6 @@ "cell_type": "code", "execution_count": 81, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [], @@ -8474,9 +8397,7 @@ { "cell_type": "code", "execution_count": 82, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -8496,9 +8417,7 @@ { "cell_type": "code", "execution_count": 83, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -8710,9 +8629,7 @@ { "cell_type": "code", "execution_count": 84, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -8940,9 +8857,7 @@ { "cell_type": "code", "execution_count": 85, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -9080,9 +8995,7 @@ { "cell_type": "code", "execution_count": 86, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -9112,9 +9025,7 @@ { "cell_type": "code", "execution_count": 88, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -9151,9 +9062,7 @@ { "cell_type": "code", "execution_count": 89, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -9195,7 +9104,6 @@ "cell_type": "code", "execution_count": 90, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -9383,7 +9291,6 @@ "cell_type": "code", "execution_count": 91, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -9567,9 +9474,7 @@ { "cell_type": "code", "execution_count": 92, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -9600,9 +9505,7 @@ { "cell_type": "code", "execution_count": 93, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -9633,9 +9536,7 @@ { "cell_type": "code", "execution_count": 95, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -9676,9 +9577,7 @@ { "cell_type": "code", "execution_count": 97, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -9859,9 +9758,7 @@ { "cell_type": "code", "execution_count": 471, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -10060,9 +9957,7 @@ { "cell_type": "code", "execution_count": 98, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -10250,7 +10145,6 @@ "cell_type": "code", "execution_count": 99, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -10451,9 +10345,7 @@ { "cell_type": "code", "execution_count": 100, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -10647,9 +10539,7 @@ { "cell_type": "code", "execution_count": 101, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -10841,9 +10731,7 @@ { "cell_type": "code", "execution_count": 102, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -10943,9 +10831,7 @@ { "cell_type": "code", "execution_count": 103, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -11139,9 +11025,7 @@ { "cell_type": "code", "execution_count": 104, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -11326,9 +11210,7 @@ { "cell_type": "code", "execution_count": 105, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -11358,7 +11240,6 @@ "cell_type": "code", "execution_count": 106, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -11890,9 +11771,7 @@ { "cell_type": "code", "execution_count": 107, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -12098,9 +11977,7 @@ { "cell_type": "code", "execution_count": 108, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -12303,9 +12180,7 @@ { "cell_type": "code", "execution_count": 109, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -12337,7 +12212,6 @@ "cell_type": "code", "execution_count": 110, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [], @@ -12363,7 +12237,6 @@ "cell_type": "code", "execution_count": 111, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -12416,7 +12289,6 @@ "cell_type": "code", "execution_count": 112, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -12749,9 +12621,7 @@ { "cell_type": "code", "execution_count": 113, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -12782,9 +12652,7 @@ { "cell_type": "code", "execution_count": 114, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -12955,9 +12823,7 @@ { "cell_type": "code", "execution_count": 115, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -12986,9 +12852,7 @@ { "cell_type": "code", "execution_count": 116, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -13139,9 +13003,7 @@ { "cell_type": "code", "execution_count": 117, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -13180,7 +13042,6 @@ "cell_type": "code", "execution_count": 118, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -13233,7 +13094,6 @@ "cell_type": "code", "execution_count": 119, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -13334,9 +13194,7 @@ { "cell_type": "code", "execution_count": 120, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -13365,9 +13223,7 @@ { "cell_type": "code", "execution_count": 121, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -13405,9 +13261,7 @@ { "cell_type": "code", "execution_count": 122, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "pipeline = [\n", @@ -13422,9 +13276,7 @@ { "cell_type": "code", "execution_count": 123, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -13455,9 +13307,7 @@ { "cell_type": "code", "execution_count": 124, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -13543,5 +13393,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 }