X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=cipher.py;h=fdff17fc4e7c0c811253ef295c02d9791e7ec157;hb=62bbe4277e9676b9255ef98a33ba2ad3dbc0c7ed;hp=f271120ccefcc7a687edd63964653547199c404c;hpb=a89d923f5a5e225bacfcbec82f01f6cc236a6ac2;p=cipher-tools.git diff --git a/cipher.py b/cipher.py index f271120..fdff17f 100644 --- a/cipher.py +++ b/cipher.py @@ -43,6 +43,14 @@ for a in range(26): c = (a * b) % 26 modular_division_table[b][c] = a +def letters(text): + """Remove all non-alphabetic characters from a text + >>> letters('The Quick') + 'TheQuick' + >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG') + 'TheQuickBROWNfoxjumpedoverthelazyDOG' + """ + return ''.join([c for c in text if c in string.ascii_letters]) def sanitise(text): """Remove all non-alphabetic characters and convert the text to lowercase @@ -52,8 +60,9 @@ def sanitise(text): >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG') 'thequickbrownfoxjumpedoverthelazydog' """ - sanitised = [c.lower() for c in text if c in string.ascii_letters] - return ''.join(sanitised) + # sanitised = [c.lower() for c in text if c in string.ascii_letters] + # return ''.join(sanitised) + return letters(text).lower() def ngrams(text, n): """Returns all n-grams of a text @@ -120,11 +129,14 @@ def frequencies(text): ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)] + >>> frequencies('abcdefabcdef')['x'] + 0 """ - counts = collections.defaultdict(int) - for c in text: - counts[c] += 1 - return counts + #counts = collections.defaultdict(int) + #for c in text: + # counts[c] += 1 + #return counts + return collections.Counter(c for c in text) letter_frequencies = frequencies def deduplicate(text): @@ -516,7 +528,9 @@ def keyword_break_mp(message, helper_args = [(message, word, wrap, metric, target_counts, message_frequency_scaling) for word in wordlist for wrap in range(3)] - breaks = pool.starmap(keyword_break_one, helper_args, chunksize) # Gotcha: the helper function here needs to be defined at the top level (limitation of Pool.starmap) + # Gotcha: the helper function here needs to be defined at the top level + # (limitation of Pool.starmap) + breaks = pool.starmap(keyword_break_one, helper_args, chunksize) return min(breaks, key=lambda k: k[1]) def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,