37de917608fb63b9f730fcf04cfc8c86035e7e58
[cipher-training.git] / make-cracking-dictionary.py
1 import language_models
2
3 american = set(open('/usr/share/dict/american-english', 'r').readlines())
4 british = set(open('/usr/share/dict/british-english', 'r').readlines())
5 cracklib = set(open('/usr/share/dict/cracklib-small', 'r').readlines())
6
7 words = american | british | cracklib
8
9 # sanitised_words = set()
10
11 # for w in words:
12 # sanitised_words.add(language_models.sanitise(w))
13
14 sanitised_words = set(language_models.sanitise(w) for w in words)
15
16 sanitised_words.discard('')
17
18 with open('words.txt', 'w') as f:
19 f.write('\n'.join(sorted(sanitised_words, key=lambda w: (len(w), w))))