3125a9436dbad1d8ec7738f7b347aeef8389fa30
4 from itertools
import zip_longest
6 # join a a list of letters into a string
9 # join a list of words into a string, separated by spaces
12 # join a list of lines, separated by newline
16 """Return the position of a letter in the alphabet (0-25)"""
17 if letter
in string
.ascii_lowercase
:
18 return ord(letter
) - ord('a')
19 elif letter
in string
.ascii_uppercase
:
20 return ord(letter
) - ord('A')
22 raise ValueError('pos requires input of {} to be an ascii letter'.format(letter
))
25 """Return the letter in the given position in the alphabet (mod 26)"""
26 return chr(number
% 26 + ord('a'))
28 def every_nth(text
, n
, fillvalue
=''):
29 """Returns n strings, each of which consists of every nth character,
30 starting with the 0th, 1st, 2nd, ... (n-1)th character
32 >>> every_nth(string.ascii_lowercase, 5)
33 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
34 >>> every_nth(string.ascii_lowercase, 1)
35 ['abcdefghijklmnopqrstuvwxyz']
36 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
37 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
38 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
39 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
40 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
42 split_text
= chunks(text
, n
, fillvalue
)
43 return [cat(l
) for l
in zip_longest(*split_text
, fillvalue
=fillvalue
)]
45 def combine_every_nth(split_text
):
46 """Reforms a text split into every_nth strings
48 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
49 'abcdefghijklmnopqrstuvwxyz'
50 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
51 'abcdefghijklmnopqrstuvwxyz'
52 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
53 'abcdefghijklmnopqrstuvwxyz'
56 for l
in zip_longest(*split_text
, fillvalue
='')])
58 def chunks(text
, n
, fillvalue
=None):
59 """Split a text into chunks of n characters
61 >>> chunks('abcdefghi', 3)
63 >>> chunks('abcdefghi', 4)
65 >>> chunks('abcdefghi', 4, fillvalue='!')
66 ['abcd', 'efgh', 'i!!!']
69 padding
= fillvalue
[0] * (n
- len(text
) % n
)
72 return [(text
+padding
)[i
:i
+n
] for i
in range(0, len(text
), n
)]
74 def transpose(items
, transposition
):
75 """Moves items around according to the given transposition
77 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
79 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
81 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
82 [13, 12, 14, 11, 15, 10]
84 transposed
= [''] * len(transposition
)
85 for p
, t
in enumerate(transposition
):
86 transposed
[p
] = items
[t
]
89 def untranspose(items
, transposition
):
92 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
94 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
96 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
97 [10, 11, 12, 13, 14, 15]
99 transposed
= [''] * len(transposition
)
100 for p
, t
in enumerate(transposition
):
101 transposed
[t
] = items
[p
]
104 def deduplicate(text
):
105 return list(collections
.OrderedDict
.fromkeys(text
))
109 """Remove all non-alphabetic characters from a text
110 >>> letters('The Quick')
112 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
113 'TheQuickBROWNfoxjumpedoverthelazyDOG'
115 return ''.join([c
for c
in text
if c
in string
.ascii_letters
])
117 # Special characters for conversion, such as smart quotes.
118 unaccent_specials
= ''.maketrans({"’": "'", '“': '"', '”': '"'})
121 """Remove all accents from letters.
122 It does this by converting the unicode string to decomposed compatability
123 form, dropping all the combining accents, then re-encoding the bytes.
125 >>> unaccent('hello')
127 >>> unaccent('HELLO')
129 >>> unaccent('héllo')
131 >>> unaccent('héllö')
133 >>> unaccent('HÉLLÖ')
136 translated_text
= text
.translate(unaccent_specials
)
137 return unicodedata
.normalize('NFKD', translated_text
).\
138 encode('ascii', 'ignore').\
142 """Remove all non-alphabetic characters and convert the text to lowercase
144 >>> sanitise('The Quick')
146 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
147 'thequickbrownfoxjumpedoverthelazydog'
148 >>> sanitise('HÉLLÖ')
151 return letters(unaccent(text
)).lower()
154 def index_of_coincidence(text
):
155 stext
= sanitise(text
)
156 counts
= collections
.Counter(stext
)
157 denom
= len(stext
) * (len(text
) - 1) / 26
159 sum(max(counts
[l
] * counts
[l
] - 1, 0) for l
in string
.ascii_lowercase
)
165 def frequencies(text
):
166 """Count the number of occurrences of each character in text
168 >>> sorted(frequencies('abcdefabc').items())
169 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
170 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
171 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
172 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
173 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
174 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
175 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
176 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
177 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
178 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
179 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
180 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
181 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
182 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
183 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
184 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
185 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
186 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
187 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
188 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
189 >>> frequencies('abcdefabcdef')['x']
192 return collections
.Counter(c
for c
in text
)
194 if __name__
== "__main__":