7a61ae709a55d2115118991bc78f860caf1ad66b
4 from itertools
import zip_longest
6 # join a a list of letters into a string
9 # join a list of words into a string, separated by spaces
12 # join a list of lines, separated by newline
16 """Return the position of a letter in the alphabet (0-25)"""
17 if letter
in string
.ascii_lowercase
:
18 return ord(letter
) - ord('a')
19 elif letter
in string
.ascii_uppercase
:
20 return ord(letter
) - ord('A')
22 raise ValueError('pos requires input of {} to be an ascii letter'.format(letter
))
25 """Return the letter in the given position in the alphabet (mod 26)"""
26 return chr(number
% 26 + ord('a'))
28 def pad(message_len
, group_len
, fillvalue
):
29 """Return the padding needed to extend a message to a multiple of group_len
32 fillvalue can be a function or a literal value. If a function, it is called
33 once for each padded character. Use this with fillvalue=random_english_letter
34 to pad a message with random letters.
36 padding_length
= group_len
- message_len
% group_len
37 if padding_length
== group_len
: padding_length
= 0
39 if callable(fillvalue
):
40 for i
in range(padding_length
):
41 padding
+= fillvalue()
43 padding
+= fillvalue
* padding_length
46 def every_nth(text
, n
, fillvalue
=''):
47 """Returns n strings, each of which consists of every nth character,
48 starting with the 0th, 1st, 2nd, ... (n-1)th character
50 >>> every_nth(string.ascii_lowercase, 5)
51 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
52 >>> every_nth(string.ascii_lowercase, 1)
53 ['abcdefghijklmnopqrstuvwxyz']
54 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
55 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
56 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
57 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
58 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
60 split_text
= chunks(text
, n
, fillvalue
)
61 return [cat(l
) for l
in zip_longest(*split_text
, fillvalue
=fillvalue
)]
63 def combine_every_nth(split_text
):
64 """Reforms a text split into every_nth strings
66 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
67 'abcdefghijklmnopqrstuvwxyz'
68 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
69 'abcdefghijklmnopqrstuvwxyz'
70 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
71 'abcdefghijklmnopqrstuvwxyz'
74 for l
in zip_longest(*split_text
, fillvalue
='')])
76 def chunks(text
, n
, fillvalue
=None):
77 """Split a text into chunks of n characters
79 >>> chunks('abcdefghi', 3)
81 >>> chunks('abcdefghi', 4)
83 >>> chunks('abcdefghi', 4, fillvalue='!')
84 ['abcd', 'efgh', 'i!!!']
87 # padding = fillvalue[0] * (n - len(text) % n)
88 padding
= pad(len(text
), n
, fillvalue
)
89 padded_text
= text
+ padding
92 return [(padded_text
)[i
:i
+n
] for i
in range(0, len(text
), n
)]
94 def transpose(items
, transposition
):
95 """Moves items around according to the given transposition
97 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
99 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
101 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
102 [13, 12, 14, 11, 15, 10]
104 transposed
= [''] * len(transposition
)
105 for p
, t
in enumerate(transposition
):
106 transposed
[p
] = items
[t
]
109 def untranspose(items
, transposition
):
110 """Undoes a transpose
112 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
114 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
116 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
117 [10, 11, 12, 13, 14, 15]
119 transposed
= [''] * len(transposition
)
120 for p
, t
in enumerate(transposition
):
121 transposed
[t
] = items
[p
]
124 def deduplicate(text
):
125 return list(collections
.OrderedDict
.fromkeys(text
))
129 """Remove all non-alphabetic characters from a text
130 >>> letters('The Quick')
132 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
133 'TheQuickBROWNfoxjumpedoverthelazyDOG'
135 return ''.join([c
for c
in text
if c
in string
.ascii_letters
])
137 # Special characters for conversion, such as smart quotes.
138 unaccent_specials
= ''.maketrans({"’": "'", '“': '"', '”': '"'})
141 """Remove all accents from letters.
142 It does this by converting the unicode string to decomposed compatability
143 form, dropping all the combining accents, then re-encoding the bytes.
145 >>> unaccent('hello')
147 >>> unaccent('HELLO')
149 >>> unaccent('héllo')
151 >>> unaccent('héllö')
153 >>> unaccent('HÉLLÖ')
156 translated_text
= text
.translate(unaccent_specials
)
157 return unicodedata
.normalize('NFKD', translated_text
).\
158 encode('ascii', 'ignore').\
162 """Remove all non-alphabetic characters and convert the text to lowercase
164 >>> sanitise('The Quick')
166 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
167 'thequickbrownfoxjumpedoverthelazydog'
168 >>> sanitise('HÉLLÖ')
171 return letters(unaccent(text
)).lower()
174 def index_of_coincidence(text
):
175 stext
= sanitise(text
)
176 counts
= collections
.Counter(stext
)
177 denom
= len(stext
) * (len(text
) - 1) / 26
179 sum(max(counts
[l
] * counts
[l
] - 1, 0) for l
in string
.ascii_lowercase
)
185 def frequencies(text
):
186 """Count the number of occurrences of each character in text
188 >>> sorted(frequencies('abcdefabc').items())
189 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
190 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
191 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
192 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
193 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
194 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
195 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
196 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
197 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
198 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
199 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
200 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
201 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
202 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
203 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
204 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
205 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
206 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
207 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
208 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
209 >>> frequencies('abcdefabcdef')['x']
212 return collections
.Counter(c
for c
in text
)
214 if __name__
== "__main__":