cipherbreak.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 from itertools import zip_longest, cycle, permutations
   6 from segment import segment, Pwords
   7 from multiprocessing import Pool
   8 from math import log10
   9
  10 import matplotlib.pyplot as plt
  11
  12 from cipher import *
  13
  14 # To time a run:
  15 #
  16 # import timeit
  17 # c5a = open('2012/5a.ciphertext', 'r').read()
  18 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  19 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
  20
  21
  22 english_counts = collections.defaultdict(int)
  23 with open('count_1l.txt', 'r') as f:
  24     for line in f:
  25         (letter, count) = line.split("\t")
  26         english_counts[letter] = int(count)
  27 normalised_english_counts = norms.normalise(english_counts)
  28
  29 english_bigram_counts = collections.defaultdict(int)
  30 with open('count_2l.txt', 'r') as f:
  31     for line in f:
  32         (bigram, count) = line.split("\t")
  33         english_bigram_counts[bigram] = int(count)
  34 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  35
  36 english_trigram_counts = collections.defaultdict(int)
  37 with open('count_3l.txt', 'r') as f:
  38     for line in f:
  39         (trigram, count) = line.split("\t")
  40         english_trigram_counts[trigram] = int(count)
  41 normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
  42
  43
  44 with open('words.txt', 'r') as f:
  45     keywords = [line.rstrip() for line in f]
  46
  47 transpositions = collections.defaultdict(list)
  48 for word in keywords:
  49     transpositions[transpositions_of(word)] += [word]
  50
  51 def frequencies(text):
  52     """Count the number of occurrences of each character in text
  53
  54     >>> sorted(frequencies('abcdefabc').items())
  55     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
  56     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
  57          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
  58     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
  59      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
  60      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
  61      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
  62     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
  63          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
  64     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
  65      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
  66      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
  67      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
  68      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
  69     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
  70          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
  71     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
  72      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
  73      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
  74      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
  75     >>> frequencies('abcdefabcdef')['x']
  76     0
  77     """
  78     #counts = collections.defaultdict(int)
  79     #for c in text:
  80     #    counts[c] += 1
  81     #return counts
  82     return collections.Counter(c for c in text)
  83 letter_frequencies = frequencies
  84
  85
  86 def bigram_likelihood(bigram, bf, lf):
  87     return bf[bigram] / (lf[bigram[0]] * lf[bigram[1]])
  88
  89
  90 def caesar_break(message,
  91                  metric=norms.euclidean_distance,
  92                  target_counts=normalised_english_counts,
  93                  message_frequency_scaling=norms.normalise):
  94     """Breaks a Caesar cipher using frequency analysis
  95
  96     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
  97           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
  98     (4, 0.080345432737...)
  99     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 100           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 101     (19, 0.11189290326...)
 102     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 103           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 104     (13, 0.08293968842...)
 105     """
 106     sanitised_message = sanitise(message)
 107     best_shift = 0
 108     best_fit = float("inf")
 109     for shift in range(26):
 110         plaintext = caesar_decipher(sanitised_message, shift)
 111         counts = message_frequency_scaling(letter_frequencies(plaintext))
 112         fit = metric(target_counts, counts)
 113         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 114                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 115         if fit < best_fit:
 116             best_fit = fit
 117             best_shift = shift
 118     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 119                 'decrypt starting: {2}'.format(best_shift, best_fit,
 120                     caesar_decipher(sanitised_message, best_shift)[:50]))
 121     return best_shift, best_fit
 122
 123 def affine_break(message,
 124                  metric=norms.euclidean_distance,
 125                  target_counts=normalised_english_counts,
 126                  message_frequency_scaling=norms.normalise):
 127     """Breaks an affine cipher using frequency analysis
 128
 129     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 130           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 131           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 132           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 133           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 134           'clm ckuxj.') # doctest: +ELLIPSIS
 135     ((15, 22, True), 0.0598745365924...)
 136     """
 137     sanitised_message = sanitise(message)
 138     best_multiplier = 0
 139     best_adder = 0
 140     best_one_based = True
 141     best_fit = float("inf")
 142     for one_based in [True, False]:
 143         for multiplier in range(1, 26, 2):
 144             for adder in range(26):
 145                 plaintext = affine_decipher(sanitised_message,
 146                                             multiplier, adder, one_based)
 147                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 148                 fit = metric(target_counts, counts)
 149                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 150                              'gives fit of {3} and decrypt starting: {4}'.
 151                              format(multiplier, adder, one_based, fit,
 152                                     plaintext[:50]))
 153                 if fit < best_fit:
 154                     best_fit = fit
 155                     best_multiplier = multiplier
 156                     best_adder = adder
 157                     best_one_based = one_based
 158     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 159                 'and decrypt starting: {4}'.format(
 160                     best_multiplier, best_adder, best_one_based, best_fit,
 161                     affine_decipher(sanitised_message, best_multiplier,
 162                         best_adder, best_one_based)[:50]))
 163     return (best_multiplier, best_adder, best_one_based), best_fit
 164
 165 def keyword_break(message,
 166                   wordlist=keywords,
 167                   metric=norms.euclidean_distance,
 168                   target_counts=normalised_english_counts,
 169                   message_frequency_scaling=norms.normalise):
 170     """Breaks a keyword substitution cipher using a dictionary and
 171     frequency analysis
 172
 173     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 174           'keyword decipherment', 'elephant', 1), \
 175           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 176     (('elephant', 1), 0.1066453448861...)
 177     """
 178     best_keyword = ''
 179     best_wrap_alphabet = True
 180     best_fit = float("inf")
 181     for wrap_alphabet in range(3):
 182         for keyword in wordlist:
 183             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 184             counts = message_frequency_scaling(letter_frequencies(plaintext))
 185             fit = metric(target_counts, counts)
 186             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 187                          'gives fit of {2} and decrypt starting: {3}'.format(
 188                              keyword, wrap_alphabet, fit,
 189                              sanitise(plaintext)[:50]))
 190             if fit < best_fit:
 191                 best_fit = fit
 192                 best_keyword = keyword
 193                 best_wrap_alphabet = wrap_alphabet
 194     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 195                 '{2} and decrypt starting: {3}'.format(best_keyword,
 196                     best_wrap_alphabet, best_fit, sanitise(
 197                         keyword_decipher(message, best_keyword,
 198                                          best_wrap_alphabet))[:50]))
 199     return (best_keyword, best_wrap_alphabet), best_fit
 200
 201 def keyword_break_mp(message,
 202                      wordlist=keywords,
 203                      metric=norms.euclidean_distance,
 204                      target_counts=normalised_english_counts,
 205                      message_frequency_scaling=norms.normalise,
 206                      chunksize=500):
 207     """Breaks a keyword substitution cipher using a dictionary and
 208     frequency analysis
 209
 210     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 211           'keyword decipherment', 'elephant', 1), \
 212           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 213     (('elephant', 1), 0.106645344886...)
 214     """
 215     with Pool() as pool:
 216         helper_args = [(message, word, wrap, metric, target_counts,
 217                         message_frequency_scaling)
 218                        for word in wordlist for wrap in range(3)]
 219         # Gotcha: the helper function here needs to be defined at the top level
 220         #   (limitation of Pool.starmap)
 221         breaks = pool.starmap(keyword_break_worker, helper_args, chunksize)
 222         return min(breaks, key=lambda k: k[1])
 223
 224 def keyword_break_worker(message, keyword, wrap_alphabet, metric, target_counts,
 225                       message_frequency_scaling):
 226     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 227     counts = message_frequency_scaling(letter_frequencies(plaintext))
 228     fit = metric(target_counts, counts)
 229     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 230                  '{2} and decrypt starting: {3}'.format(keyword,
 231                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 232     return (keyword, wrap_alphabet), fit
 233
 234 def scytale_break(message,
 235                   metric=norms.euclidean_distance,
 236                   target_counts=normalised_english_bigram_counts,
 237                   message_frequency_scaling=norms.normalise):
 238     """Breaks a Scytale cipher
 239
 240     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 241            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 242            'aek') # doctest: +ELLIPSIS
 243     (6, 0.092599933059...)
 244     """
 245     best_key = 0
 246     best_fit = float("inf")
 247     ngram_length = len(next(iter(target_counts.keys())))
 248     for key in range(1, 20):
 249         if len(message) % key == 0:
 250             plaintext = scytale_decipher(message, key)
 251             counts = message_frequency_scaling(frequencies(
 252                          ngrams(sanitise(plaintext), ngram_length)))
 253             fit = metric(target_counts, counts)
 254             logger.debug('Scytale break attempt using key {0} gives fit of '
 255                          '{1} and decrypt starting: {2}'.format(key,
 256                              fit, sanitise(plaintext)[:50]))
 257             if fit < best_fit:
 258                 best_fit = fit
 259                 best_key = key
 260     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 261                 'decrypt starting: {2}'.format(best_key, best_fit,
 262                     sanitise(scytale_decipher(message, best_key))[:50]))
 263     return best_key, best_fit
 264
 265
 266 def column_transposition_break_mp(message,
 267                      translist=transpositions,
 268                      metric=norms.euclidean_distance,
 269                      target_counts=normalised_english_bigram_counts,
 270                      message_frequency_scaling=norms.normalise,
 271                      chunksize=500):
 272     """Breaks a column transposition cipher using a dictionary and
 273     n-gram frequency analysis
 274
 275     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 276             "It is a truth universally acknowledged, that a single man in \
 277              possession of a good fortune, must be in want of a wife. However \
 278              little known the feelings or views of such a man may be on his \
 279              first entering a neighbourhood, this truth is so well fixed in the \
 280              minds of the surrounding families, that he is considered the \
 281              rightful property of some one or other of their daughters."), \
 282         'encipher'), \
 283         translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
 284                    (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
 285                    (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
 286     (((2, 0, 5, 3, 1, 4, 6), False), 0.0628106372...)
 287     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 288             "It is a truth universally acknowledged, that a single man in \
 289              possession of a good fortune, must be in want of a wife. However \
 290              little known the feelings or views of such a man may be on his \
 291              first entering a neighbourhood, this truth is so well fixed in the \
 292              minds of the surrounding families, that he is considered the \
 293              rightful property of some one or other of their daughters."), \
 294         'encipher'), \
 295         translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
 296                    (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
 297                    (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
 298         target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
 299     (((2, 0, 5, 3, 1, 4, 6), False), 0.0592259560...)
 300     """
 301     ngram_length = len(next(iter(target_counts.keys())))
 302     with Pool() as pool:
 303         helper_args = [(message, trans, columnwise, metric, target_counts, ngram_length,
 304                         message_frequency_scaling)
 305                        for trans in translist.keys() for columnwise in [True, False]]
 306         # Gotcha: the helper function here needs to be defined at the top level
 307         #   (limitation of Pool.starmap)
 308         breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize)
 309         return min(breaks, key=lambda k: k[1])
 310 column_transposition_break = column_transposition_break_mp
 311
 312 def column_transposition_break_worker(message, transposition, columnwise, metric, target_counts,
 313                       ngram_length, message_frequency_scaling):
 314     plaintext = column_transposition_decipher(message, transposition, columnwise=columnwise)
 315     counts = message_frequency_scaling(frequencies(
 316                          ngrams(sanitise(plaintext), ngram_length)))
 317     fit = metric(target_counts, counts)
 318     logger.debug('Column transposition break attempt using key {0} '
 319                          'gives fit of {1} and decrypt starting: {2}'.format(
 320                              transposition, fit,
 321                              sanitise(plaintext)[:50]))
 322     return (transposition, columnwise), fit
 323
 324
 325 def transposition_break_exhaustive(message):
 326     best_transposition = ''
 327     best_pw = -float('inf')
 328     for keylength in range(1, 21):
 329         if len(message) % keylength == 0:
 330             for transposition in permutations(range(keylength)):
 331                 for columnwise in [True, False]:
 332                     plaintext = column_transposition_decipher(message,
 333                         transposition, columnwise=columnwise)
 334                     # pw = Pwords(segment(plaintext))
 335                     pw = sum([log10(bigram_likelihood(b,
 336                                               normalised_english_bigram_counts,
 337                                               normalised_english_counts))
 338                                            for b in ngrams(plaintext, 2)])
 339                     logger.debug('Column transposition break attempt using key {0} {1} '
 340                          'gives fit of {2} and decrypt starting: {3}'.format(
 341                              transposition, columnwise, pw,
 342                              sanitise(plaintext)[:50]))
 343                     if pw > best_pw:
 344                         best_transposition = transposition
 345                         best_columnwise = columnwise
 346                         best_pw = pw
 347     return (best_transposition, best_columnwise), best_pw
 348
 349
 350 def vigenere_keyword_break(message,
 351                   wordlist=keywords,
 352                   metric=norms.euclidean_distance,
 353                   target_counts=normalised_english_counts,
 354                   message_frequency_scaling=norms.normalise):
 355     """Breaks a vigenere cipher using a dictionary and
 356     frequency analysis
 357
 358     >>> vigenere_keyword_break(vigenere_encipher(sanitise('this is a test ' \
 359              'message for the vigenere decipherment'), 'cat'), \
 360              wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 361     ('cat', 0.15965224935...)
 362     """
 363     best_keyword = ''
 364     best_fit = float("inf")
 365     for keyword in wordlist:
 366         plaintext = vigenere_decipher(message, keyword)
 367         counts = message_frequency_scaling(letter_frequencies(plaintext))
 368         fit = metric(target_counts, counts)
 369         logger.debug('Vigenere break attempt using key {0} '
 370                          'gives fit of {1} and decrypt starting: {2}'.format(
 371                              keyword, fit,
 372                              sanitise(plaintext)[:50]))
 373         if fit < best_fit:
 374             best_fit = fit
 375             best_keyword = keyword
 376     logger.info('Vigenere break best fit with key {0} gives fit '
 377                 'of {1} and decrypt starting: {2}'.format(best_keyword,
 378                     best_fit, sanitise(
 379                         vigenere_decipher(message, best_keyword))[:50]))
 380     return best_keyword, best_fit
 381
 382 def vigenere_keyword_break_mp(message,
 383                      wordlist=keywords,
 384                      metric=norms.euclidean_distance,
 385                      target_counts=normalised_english_counts,
 386                      message_frequency_scaling=norms.normalise,
 387                      chunksize=500):
 388     """Breaks a vigenere cipher using a dictionary and
 389     frequency analysis
 390
 391     >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
 392              'message for the vigenere decipherment'), 'cat'), \
 393              wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 394     ('cat', 0.159652249358...)
 395     """
 396     with Pool() as pool:
 397         helper_args = [(message, word, metric, target_counts,
 398                         message_frequency_scaling)
 399                        for word in wordlist]
 400         # Gotcha: the helper function here needs to be defined at the top level
 401         #   (limitation of Pool.starmap)
 402         breaks = pool.starmap(vigenere_keyword_break_worker, helper_args, chunksize)
 403         return min(breaks, key=lambda k: k[1])
 404
 405 def vigenere_keyword_break_worker(message, keyword, metric, target_counts,
 406                       message_frequency_scaling):
 407     plaintext = vigenere_decipher(message, keyword)
 408     counts = message_frequency_scaling(letter_frequencies(plaintext))
 409     fit = metric(target_counts, counts)
 410     logger.debug('Vigenere keyword break attempt using key {0} gives fit of '
 411                  '{1} and decrypt starting: {2}'.format(keyword,
 412                      fit, sanitise(plaintext)[:50]))
 413     return keyword, fit
 414
 415
 416
 417 def vigenere_frequency_break(message,
 418                   metric=norms.euclidean_distance,
 419                   target_counts=normalised_english_counts,
 420                   message_frequency_scaling=norms.normalise):
 421     """Breaks a Vigenere cipher with frequency analysis
 422
 423     >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
 424             "run. She is ready and so am I. I stole Daniel's pocketbook this " \
 425             "afternoon when he left his jacket hanging on the easel in the " \
 426             "attic."), 'florence')) # doctest: +ELLIPSIS
 427     ('florence', 0.077657073...)
 428     """
 429     best_fit = float("inf")
 430     best_key = ''
 431     sanitised_message = sanitise(message)
 432     for trial_length in range(1, 20):
 433         splits = every_nth(sanitised_message, trial_length)
 434         key = ''.join([chr(caesar_break(s, target_counts=target_counts)[0] + ord('a')) for s in splits])
 435         plaintext = vigenere_decipher(sanitised_message, key)
 436         counts = message_frequency_scaling(frequencies(plaintext))
 437         fit = metric(target_counts, counts)
 438         logger.debug('Vigenere key length of {0} ({1}) gives fit of {2}'.
 439                      format(trial_length, key, fit))
 440         if fit < best_fit:
 441             best_fit = fit
 442             best_key = key
 443     logger.info('Vigenere break best fit with key {0} gives fit '
 444                 'of {1} and decrypt starting: {2}'.format(best_key,
 445                     best_fit, sanitise(
 446                         vigenere_decipher(message, best_key))[:50]))
 447     return best_key, best_fit
 448
 449 def beaufort_frequency_break(message,
 450                   metric=norms.euclidean_distance,
 451                   target_counts=normalised_english_counts,
 452                   message_frequency_scaling=norms.normalise):
 453     """Breaks a Beaufort cipher with frequency analysis
 454
 455     >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
 456             "run. She is ready and so am I. I stole Daniel's pocketbook this " \
 457             "afternoon when he left his jacket hanging on the easel in the " \
 458             "attic."), 'florence')) # doctest: +ELLIPSIS
 459     ('florence', 0.077657073...)
 460     """
 461     best_fit = float("inf")
 462     best_key = ''
 463     sanitised_message = sanitise(message)
 464     for trial_length in range(1, 20):
 465         splits = every_nth(sanitised_message, trial_length)
 466         key = ''.join([chr(caesar_break(s, target_counts=target_counts)[0] + ord('a')) for s in splits])
 467         plaintext = beaufort_decipher(sanitised_message, key)
 468         counts = message_frequency_scaling(frequencies(plaintext))
 469         fit = metric(target_counts, counts)
 470         logger.debug('Beaufort key length of {0} ({1}) gives fit of {2}'.
 471                      format(trial_length, key, fit))
 472         if fit < best_fit:
 473             best_fit = fit
 474             best_key = key
 475     logger.info('Beaufort break best fit with key {0} gives fit '
 476                 'of {1} and decrypt starting: {2}'.format(best_key,
 477                     best_fit, sanitise(
 478                         beaufort_decipher(message, best_key))[:50]))
 479     return best_key, best_fit
 480
 481
 482
 483 def plot_frequency_histogram(freqs, sort_key=None):
 484     x = range(len(freqs.keys()))
 485     y = [freqs[l] for l in sorted(freqs.keys(), key=sort_key)]
 486     f = plt.figure()
 487     ax = f.add_axes([0.1, 0.1, 0.9, 0.9])
 488     ax.bar(x, y, align='center')
 489     ax.set_xticks(x)
 490     ax.set_xticklabels(sorted(freqs.keys(), key=sort_key))
 491     f.show()
 492
 493
 494 if __name__ == "__main__":
 495     import doctest
 496     doctest.testmod()
 497