{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os,sys,inspect\n", "currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n", "parentdir = os.path.dirname(currentdir)\n", "sys.path.insert(0,parentdir) \n", "\n", "from support.utilities import *" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "279585" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "many_words = set()\n", "\n", "for file in os.listdir():\n", " if file.endswith(\".txt\") or file.endswith('list'):\n", " these_words = set(sanitise(w) for w in open(file))\n", " many_words.update(these_words)\n", "\n", "len(many_words)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "with open('combined.txt', 'w') as f:\n", " for w in many_words:\n", " f.write(w + '\\n')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }