d1961a8a4e534e4cda82eee6249603dc5b4999de
4 # join a a list of letters into a string
7 # join a list of words into a string, separated by spaces
10 # join a list of lines, separated by newline
14 """Return the position of a letter in the alphabet (0-25)"""
15 if letter
in string
.ascii_lowercase
:
16 return ord(letter
) - ord('a')
17 elif letter
in string
.ascii_uppercase
:
18 return ord(letter
) - ord('A')
23 """Return the letter in the given position in the alphabet (mod 26)"""
24 return chr(number
% 26 + ord('a'))
26 def every_nth(text
, n
, fillvalue
=''):
27 """Returns n strings, each of which consists of every nth character,
28 starting with the 0th, 1st, 2nd, ... (n-1)th character
30 >>> every_nth(string.ascii_lowercase, 5)
31 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
32 >>> every_nth(string.ascii_lowercase, 1)
33 ['abcdefghijklmnopqrstuvwxyz']
34 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
35 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
36 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
37 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
38 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
40 split_text
= chunks(text
, n
, fillvalue
)
41 return [cat(l
) for l
in zip_longest(*split_text
, fillvalue
=fillvalue
)]
43 def combine_every_nth(split_text
):
44 """Reforms a text split into every_nth strings
46 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
47 'abcdefghijklmnopqrstuvwxyz'
48 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
49 'abcdefghijklmnopqrstuvwxyz'
50 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
51 'abcdefghijklmnopqrstuvwxyz'
54 for l
in zip_longest(*split_text
, fillvalue
='')])
56 def chunks(text
, n
, fillvalue
=None):
57 """Split a text into chunks of n characters
59 >>> chunks('abcdefghi', 3)
61 >>> chunks('abcdefghi', 4)
63 >>> chunks('abcdefghi', 4, fillvalue='!')
64 ['abcd', 'efgh', 'i!!!']
67 padding
= fillvalue
[0] * (n
- len(text
) % n
)
70 return [(text
+padding
)[i
:i
+n
] for i
in range(0, len(text
), n
)]
72 def transpose(items
, transposition
):
73 """Moves items around according to the given transposition
75 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
77 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
79 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
80 [13, 12, 14, 11, 15, 10]
82 transposed
= [''] * len(transposition
)
83 for p
, t
in enumerate(transposition
):
84 transposed
[p
] = items
[t
]
87 def untranspose(items
, transposition
):
90 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
92 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
94 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
95 [10, 11, 12, 13, 14, 15]
97 transposed
= [''] * len(transposition
)
98 for p
, t
in enumerate(transposition
):
99 transposed
[t
] = items
[p
]
102 def deduplicate(text
):
103 return list(collections
.OrderedDict
.fromkeys(text
))
107 """Remove all non-alphabetic characters from a text
108 >>> letters('The Quick')
110 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
111 'TheQuickBROWNfoxjumpedoverthelazyDOG'
113 return ''.join([c
for c
in text
if c
in string
.ascii_letters
])
115 # Special characters for conversion, such as smart quotes.
116 unaccent_specials
= ''.maketrans({"’": "'", '“': '"', '”': '"'})
119 """Remove all accents from letters.
120 It does this by converting the unicode string to decomposed compatability
121 form, dropping all the combining accents, then re-encoding the bytes.
123 >>> unaccent('hello')
125 >>> unaccent('HELLO')
127 >>> unaccent('héllo')
129 >>> unaccent('héllö')
131 >>> unaccent('HÉLLÖ')
134 translated_text
= text
.translate(unaccent_specials
)
135 return unicodedata
.normalize('NFKD', translated_text
).\
136 encode('ascii', 'ignore').\
140 """Remove all non-alphabetic characters and convert the text to lowercase
142 >>> sanitise('The Quick')
144 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
145 'thequickbrownfoxjumpedoverthelazydog'
146 >>> sanitise('HÉLLÖ')
149 return letters(unaccent(text
)).lower()
152 def index_of_coincidence(text
):
153 stext
= sanitise(text
)
154 counts
= collections
.Counter(stext
)
155 denom
= len(stext
) * (len(text
) - 1) / 26
157 sum(max(counts
[l
] * counts
[l
] - 1, 0) for l
in string
.ascii_lowercase
)
163 transpositions
= collections
.defaultdict(list)
164 for word
in keywords
:
165 transpositions
[transpositions_of(word
)] += [word
]
167 def frequencies(text
):
168 """Count the number of occurrences of each character in text
170 >>> sorted(frequencies('abcdefabc').items())
171 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
172 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
173 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
174 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
175 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
176 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
177 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
178 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
179 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
180 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
181 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
182 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
183 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
184 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
185 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
186 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
187 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
188 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
189 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
190 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
191 >>> frequencies('abcdefabcdef')['x']
194 return collections
.Counter(c
for c
in text
)