Many tests done, still more to come.
[szyfrow.git] / szyfrow / railfence.py
1 import math
2 from enum import Enum
3 from itertools import starmap, zip_longest
4 from szyfrow.support.utilities import *
5 from szyfrow.support.language_models import *
6
7
8 def railfence_encipher(message, height, fillvalue=''):
9 """Railfence cipher.
10 Works by splitting the text into sections, then reading across them to
11 generate the rows in the cipher. The rows are then combined to form the
12 ciphertext.
13
14 Example: the plaintext "hellotherefriends", with a height of four, written
15 out in the railfence as
16 h h i
17 etere*
18 lorfns
19 l e d
20 (with the * showing the one character to finish the last section).
21 Each 'section' is two columns, but unfolded. In the example, the first
22 section is 'hellot'.
23
24 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 2, fillvalue='!')
25 'hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!'
26 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3, fillvalue='!')
27 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!'
28 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5, fillvalue='!')
29 'hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!'
30 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 10, fillvalue='!')
31 'hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!'
32 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3)
33 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece'
34 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5)
35 'hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp'
36 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 7)
37 'haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic'
38 """
39 sections = chunks(message, (height - 1) * 2, fillvalue=fillvalue)
40 n_sections = len(sections)
41 # Add the top row
42 rows = [cat([s[0] for s in sections])]
43 # process the middle rows of the grid
44 for r in range(1, height-1):
45 rows += [cat([s[r:r+1] + s[height*2-r-2:height*2-r-1] for s in sections])]
46 # process the bottom row
47 rows += [cat([s[height - 1:height] for s in sections])]
48 # rows += [wcat([s[height - 1] for s in sections])]
49 return cat(rows)
50
51 def railfence_decipher(message, height, fillvalue=''):
52 """Railfence decipher.
53 Works by reconstructing the grid used to generate the ciphertext, then
54 unfolding the sections so the text can be concatenated together.
55
56 Example: given the ciphertext 'hhieterelorfnsled' and a height of 4, first
57 work out that the second row has a character missing, find the rows of the
58 grid, then split the section into its two columns.
59
60 'hhieterelorfnsled' is split into
61 h h i
62 etere
63 lorfns
64 l e d
65 (spaces added for clarity), which is stored in 'rows'. This is then split
66 into 'down_rows' and 'up_rows':
67
68 down_rows:
69 hhi
70 eee
71 lrn
72 led
73
74 up_rows:
75 tr
76 ofs
77
78 These are then zipped together (after the up_rows are reversed) to recover
79 the plaintext.
80
81 Most of the procedure is about finding the correct lengths for each row then
82 splitting the ciphertext into those rows.
83
84 >>> railfence_decipher('hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!', 2).strip('!')
85 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
86 >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!', 3).strip('!')
87 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
88 >>> railfence_decipher('hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!', 5).strip('!')
89 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
90 >>> railfence_decipher('hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!', 10).strip('!')
91 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
92 >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece', 3)
93 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
94 >>> railfence_decipher('hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp', 5)
95 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
96 >>> railfence_decipher('haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic', 7)
97 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
98 """
99 # find the number and size of the sections, including how many characters
100 # are missing for a full grid
101 n_sections = math.ceil(len(message) / ((height - 1) * 2))
102 padding_to_add = n_sections * (height - 1) * 2 - len(message)
103 # row_lengths are for the both up rows and down rows
104 row_lengths = [n_sections] * (height - 1) * 2
105 for i in range((height - 1) * 2 - 1, (height - 1) * 2 - (padding_to_add + 1), -1):
106 row_lengths[i] -= 1
107 # folded_rows are the combined row lengths in the middle of the railfence
108 folded_row_lengths = [row_lengths[0]]
109 for i in range(1, height-1):
110 folded_row_lengths += [row_lengths[i] + row_lengths[-i]]
111 folded_row_lengths += [row_lengths[height - 1]]
112 # find the rows that form the railfence grid
113 rows = []
114 row_start = 0
115 for i in folded_row_lengths:
116 rows += [message[row_start:row_start + i]]
117 row_start += i
118 # split the rows into the 'down_rows' (those that form the first column of
119 # a section) and the 'up_rows' (those that ofrm the second column of a
120 # section).
121 down_rows = [rows[0]]
122 up_rows = []
123 for i in range(1, height-1):
124 down_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 0])]
125 up_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 1])]
126 down_rows += [rows[-1]]
127 up_rows.reverse()
128 return cat(c for r in zip_longest(*(down_rows + up_rows), fillvalue='') for c in r)
129
130
131 def railfence_break(message, max_key_length=20,
132 fitness=Pbigrams, chunksize=500):
133 """Breaks a railfence cipher using a range of lengths and
134 n-gram frequency analysis
135
136 >>> railfence_break(railfence_encipher(sanitise( \
137 "It is a truth universally acknowledged, that a single man in \
138 possession of a good fortune, must be in want of a wife. However \
139 little known the feelings or views of such a man may be on his \
140 first entering a neighbourhood, this truth is so well fixed in \
141 the minds of the surrounding families, that he is considered the \
142 rightful property of some one or other of their daughters."), \
143 7)) # doctest: +ELLIPSIS
144 (7, -709.46467226...)
145 >>> railfence_break(railfence_encipher(sanitise( \
146 "It is a truth universally acknowledged, that a single man in \
147 possession of a good fortune, must be in want of a wife. However \
148 little known the feelings or views of such a man may be on his \
149 first entering a neighbourhood, this truth is so well fixed in \
150 the minds of the surrounding families, that he is considered the \
151 rightful property of some one or other of their daughters."), \
152 7), \
153 fitness=Ptrigrams) # doctest: +ELLIPSIS
154 (7, -997.0129085...)
155 """
156 def worker(message, height, fitness):
157 plaintext = railfence_decipher(message, height)
158 fit = fitness(plaintext)
159 return height, fit
160
161 sanitised_message = sanitise(message)
162 results = starmap(worker, [(sanitised_message, i, fitness)
163 for i in range(2, max_key_length+1)])
164 return max(results, key=lambda k: k[1])