ca984a30f96e4b09545eb09b277a5b0dc188be28
3 from itertools
import zip_longest
5 # join a a list of letters into a string
8 # join a list of words into a string, separated by spaces
11 # join a list of lines, separated by newline
15 """Return the position of a letter in the alphabet (0-25)"""
16 if letter
in string
.ascii_lowercase
:
17 return ord(letter
) - ord('a')
18 elif letter
in string
.ascii_uppercase
:
19 return ord(letter
) - ord('A')
21 raise ValueError('pos requires input of {} to be an ascii letter'.format(letter
))
24 """Return the letter in the given position in the alphabet (mod 26)"""
25 return chr(number
% 26 + ord('a'))
27 def every_nth(text
, n
, fillvalue
=''):
28 """Returns n strings, each of which consists of every nth character,
29 starting with the 0th, 1st, 2nd, ... (n-1)th character
31 >>> every_nth(string.ascii_lowercase, 5)
32 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
33 >>> every_nth(string.ascii_lowercase, 1)
34 ['abcdefghijklmnopqrstuvwxyz']
35 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
36 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
37 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
38 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
39 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
41 split_text
= chunks(text
, n
, fillvalue
)
42 return [cat(l
) for l
in zip_longest(*split_text
, fillvalue
=fillvalue
)]
44 def combine_every_nth(split_text
):
45 """Reforms a text split into every_nth strings
47 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
48 'abcdefghijklmnopqrstuvwxyz'
49 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
50 'abcdefghijklmnopqrstuvwxyz'
51 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
52 'abcdefghijklmnopqrstuvwxyz'
55 for l
in zip_longest(*split_text
, fillvalue
='')])
57 def chunks(text
, n
, fillvalue
=None):
58 """Split a text into chunks of n characters
60 >>> chunks('abcdefghi', 3)
62 >>> chunks('abcdefghi', 4)
64 >>> chunks('abcdefghi', 4, fillvalue='!')
65 ['abcd', 'efgh', 'i!!!']
68 padding
= fillvalue
[0] * (n
- len(text
) % n
)
71 return [(text
+padding
)[i
:i
+n
] for i
in range(0, len(text
), n
)]
73 def transpose(items
, transposition
):
74 """Moves items around according to the given transposition
76 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
78 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
80 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
81 [13, 12, 14, 11, 15, 10]
83 transposed
= [''] * len(transposition
)
84 for p
, t
in enumerate(transposition
):
85 transposed
[p
] = items
[t
]
88 def untranspose(items
, transposition
):
91 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
93 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
95 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
96 [10, 11, 12, 13, 14, 15]
98 transposed
= [''] * len(transposition
)
99 for p
, t
in enumerate(transposition
):
100 transposed
[t
] = items
[p
]
103 def deduplicate(text
):
104 return list(collections
.OrderedDict
.fromkeys(text
))
108 """Remove all non-alphabetic characters from a text
109 >>> letters('The Quick')
111 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
112 'TheQuickBROWNfoxjumpedoverthelazyDOG'
114 return ''.join([c
for c
in text
if c
in string
.ascii_letters
])
116 # Special characters for conversion, such as smart quotes.
117 unaccent_specials
= ''.maketrans({"’": "'", '“': '"', '”': '"'})
120 """Remove all accents from letters.
121 It does this by converting the unicode string to decomposed compatability
122 form, dropping all the combining accents, then re-encoding the bytes.
124 >>> unaccent('hello')
126 >>> unaccent('HELLO')
128 >>> unaccent('héllo')
130 >>> unaccent('héllö')
132 >>> unaccent('HÉLLÖ')
135 translated_text
= text
.translate(unaccent_specials
)
136 return unicodedata
.normalize('NFKD', translated_text
).\
137 encode('ascii', 'ignore').\
141 """Remove all non-alphabetic characters and convert the text to lowercase
143 >>> sanitise('The Quick')
145 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
146 'thequickbrownfoxjumpedoverthelazydog'
147 >>> sanitise('HÉLLÖ')
150 return letters(unaccent(text
)).lower()
153 def index_of_coincidence(text
):
154 stext
= sanitise(text
)
155 counts
= collections
.Counter(stext
)
156 denom
= len(stext
) * (len(text
) - 1) / 26
158 sum(max(counts
[l
] * counts
[l
] - 1, 0) for l
in string
.ascii_lowercase
)
164 def frequencies(text
):
165 """Count the number of occurrences of each character in text
167 >>> sorted(frequencies('abcdefabc').items())
168 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
169 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
170 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
171 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
172 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
173 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
174 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
175 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
176 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
177 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
178 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
179 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
180 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
181 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
182 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
183 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
184 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
185 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
186 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
187 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
188 >>> frequencies('abcdefabcdef')['x']
191 return collections
.Counter(c
for c
in text
)
193 if __name__
== "__main__":