Started on documentation
[szyfrow.git] / szyfrow / railfence.py
1 """[Railfence transposition cipher](https://en.wikipedia.org/wiki/Rail_fence_cipher).
2
3 Works by splitting the text into sections, then reading across them to
4 generate the rows in the cipher. The rows are then combined to form the
5 ciphertext.
6
7 Example: the plaintext "hellotherefriends", with a height of four, written
8 out in the railfence as
9 h h i
10 etere*
11 lorfns
12 l e d
13 (with the * showing the one character to finish the last section).
14 Each 'section' is two columns, but unfolded. In the example, the first
15 section is 'hellot'.
16 """
17 import math
18 from enum import Enum
19 from itertools import starmap, zip_longest
20 from szyfrow.support.utilities import *
21 from szyfrow.support.language_models import *
22
23
24 def railfence_encipher(message, height, fillvalue=''):
25 """Railfence cipher.
26 Works by splitting the text into sections, then reading across them to
27 generate the rows in the cipher. The rows are then combined to form the
28 ciphertext.
29
30 Example: the plaintext "hellotherefriends", with a height of four, written
31 out in the railfence as
32 h h i
33 etere*
34 lorfns
35 l e d
36 (with the * showing the one character to finish the last section).
37 Each 'section' is two columns, but unfolded. In the example, the first
38 section is 'hellot'.
39
40 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 2, fillvalue='!')
41 'hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!'
42 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3, fillvalue='!')
43 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!'
44 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5, fillvalue='!')
45 'hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!'
46 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 10, fillvalue='!')
47 'hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!'
48 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 3)
49 'horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece'
50 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 5)
51 'hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp'
52 >>> railfence_encipher('hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers', 7)
53 'haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic'
54 """
55 sections = chunks(message, (height - 1) * 2, fillvalue=fillvalue)
56 n_sections = len(sections)
57 # Add the top row
58 rows = [cat([s[0] for s in sections])]
59 # process the middle rows of the grid
60 for r in range(1, height-1):
61 rows += [cat([s[r:r+1] + s[height*2-r-2:height*2-r-1] for s in sections])]
62 # process the bottom row
63 rows += [cat([s[height - 1:height] for s in sections])]
64 # rows += [wcat([s[height - 1] for s in sections])]
65 return cat(rows)
66
67 def railfence_decipher(message, height, fillvalue=''):
68 """Railfence decipher.
69 Works by reconstructing the grid used to generate the ciphertext, then
70 unfolding the sections so the text can be concatenated together.
71
72 Example: given the ciphertext 'hhieterelorfnsled' and a height of 4, first
73 work out that the second row has a character missing, find the rows of the
74 grid, then split the section into its two columns.
75
76 'hhieterelorfnsled' is split into
77 h h i
78 etere
79 lorfns
80 l e d
81 (spaces added for clarity), which is stored in 'rows'. This is then split
82 into 'down_rows' and 'up_rows':
83
84 down_rows:
85 hhi
86 eee
87 lrn
88 led
89
90 up_rows:
91 tr
92 ofs
93
94 These are then zipped together (after the up_rows are reversed) to recover
95 the plaintext.
96
97 Most of the procedure is about finding the correct lengths for each row then
98 splitting the ciphertext into those rows.
99
100 >>> railfence_decipher('hlohraateerishsslnpeefetotsigaleccpeselteevsmhatetiiaogicotxfretnrifneihr!', 2).strip('!')
101 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
102 >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihr!!lhateihsnefttiaece!', 3).strip('!')
103 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
104 >>> railfence_decipher('hresleogcseeemhetaocofrnrner!!lhateihsnefttiaece!!ltvsatiigitxetifih!!oarspeslp!', 5).strip('!')
105 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
106 >>> railfence_decipher('hepisehagitnr!!lernesge!!lmtocerh!!otiletap!!tseaorii!!hassfolc!!evtitffe!!rahsetec!!eixn!', 10).strip('!')
107 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
108 >>> railfence_decipher('horaersslpeeosglcpselteevsmhatetiiaogicotxfretnrifneihrlhateihsnefttiaece', 3)
109 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
110 >>> railfence_decipher('hresleogcseeemhetaocofrnrnerlhateihsnefttiaeceltvsatiigitxetifihoarspeslp', 5)
111 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
112 >>> railfence_decipher('haspolsevsetgifrifrlatihnettaeelemtiocxernhorersleesgcptehaiaottneihesfic', 7)
113 'hellothereavastmeheartiesthisisalongpieceoftextfortestingrailfenceciphers'
114 """
115 # find the number and size of the sections, including how many characters
116 # are missing for a full grid
117 n_sections = math.ceil(len(message) / ((height - 1) * 2))
118 padding_to_add = n_sections * (height - 1) * 2 - len(message)
119 # row_lengths are for the both up rows and down rows
120 row_lengths = [n_sections] * (height - 1) * 2
121 for i in range((height - 1) * 2 - 1, (height - 1) * 2 - (padding_to_add + 1), -1):
122 row_lengths[i] -= 1
123 # folded_rows are the combined row lengths in the middle of the railfence
124 folded_row_lengths = [row_lengths[0]]
125 for i in range(1, height-1):
126 folded_row_lengths += [row_lengths[i] + row_lengths[-i]]
127 folded_row_lengths += [row_lengths[height - 1]]
128 # find the rows that form the railfence grid
129 rows = []
130 row_start = 0
131 for i in folded_row_lengths:
132 rows += [message[row_start:row_start + i]]
133 row_start += i
134 # split the rows into the 'down_rows' (those that form the first column of
135 # a section) and the 'up_rows' (those that ofrm the second column of a
136 # section).
137 down_rows = [rows[0]]
138 up_rows = []
139 for i in range(1, height-1):
140 down_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 0])]
141 up_rows += [cat([c for n, c in enumerate(rows[i]) if n % 2 == 1])]
142 down_rows += [rows[-1]]
143 up_rows.reverse()
144 return cat(c for r in zip_longest(*(down_rows + up_rows), fillvalue='') for c in r)
145
146
147 def railfence_break(message, max_key_length=20,
148 fitness=Pbigrams, chunksize=500):
149 """Breaks a railfence cipher using a range of lengths and
150 n-gram frequency analysis
151
152 >>> railfence_break(railfence_encipher(sanitise( \
153 "It is a truth universally acknowledged, that a single man in \
154 possession of a good fortune, must be in want of a wife. However \
155 little known the feelings or views of such a man may be on his \
156 first entering a neighbourhood, this truth is so well fixed in \
157 the minds of the surrounding families, that he is considered the \
158 rightful property of some one or other of their daughters."), \
159 7)) # doctest: +ELLIPSIS
160 (7, -709.46467226...)
161 >>> railfence_break(railfence_encipher(sanitise( \
162 "It is a truth universally acknowledged, that a single man in \
163 possession of a good fortune, must be in want of a wife. However \
164 little known the feelings or views of such a man may be on his \
165 first entering a neighbourhood, this truth is so well fixed in \
166 the minds of the surrounding families, that he is considered the \
167 rightful property of some one or other of their daughters."), \
168 7), \
169 fitness=Ptrigrams) # doctest: +ELLIPSIS
170 (7, -997.0129085...)
171 """
172 def worker(message, height, fitness):
173 plaintext = railfence_decipher(message, height)
174 fit = fitness(plaintext)
175 return height, fit
176
177 sanitised_message = sanitise(message)
178 results = starmap(worker, [(sanitised_message, i, fitness)
179 for i in range(2, max_key_length+1)])
180 return max(results, key=lambda k: k[1])