From: Neil Smith Date: Fri, 31 Jan 2014 15:32:30 +0000 (+0000) Subject: Various changes X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=833724c23cc4f479a185cdd8dae89ce35964cf7c;p=cipher-tools.git Various changes --- diff --git a/cipher.py b/cipher.py index 3e42da1..5f39a78 100644 --- a/cipher.py +++ b/cipher.py @@ -11,32 +11,12 @@ logger.setLevel(logging.WARNING) #logger.setLevel(logging.DEBUG) -modular_division_table = [[0]*26 for x in range(26)] +modular_division_table = [[0]*26 for _ in range(26)] for a in range(26): for b in range(26): c = (a * b) % 26 modular_division_table[b][c] = a -def letters(text): - """Remove all non-alphabetic characters from a text - >>> letters('The Quick') - 'TheQuick' - >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG') - 'TheQuickBROWNfoxjumpedoverthelazyDOG' - """ - return ''.join([c for c in text if c in string.ascii_letters]) - -def sanitise(text): - """Remove all non-alphabetic characters and convert the text to lowercase - - >>> sanitise('The Quick') - 'thequick' - >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG') - 'thequickbrownfoxjumpedoverthelazydog' - """ - # sanitised = [c.lower() for c in text if c in string.ascii_letters] - # return ''.join(sanitised) - return letters(text).lower() def ngrams(text, n): """Returns all n-grams of a text diff --git a/find_best_caesar_break_parameters.py b/find_best_caesar_break_parameters.py index 88de1dc..edab90f 100644 --- a/find_best_caesar_break_parameters.py +++ b/find_best_caesar_break_parameters.py @@ -38,12 +38,12 @@ def eval_all(): itertools.product(metrics, scalings, message_lengths))) def eval_one_parameter_set(metric, scaling, message_length): - for i in range(trials): + for _ in range(trials): sample_start = random.randint(0, corpus_length - message_length) sample = corpus[sample_start:(sample_start + message_length)] key = random.randint(1, 25) sample_ciphertext = caesar_encipher(sample, key) - (found_key, score) = caesar_break(sample_ciphertext, + found_key, _ = caesar_break(sample_ciphertext, metric=metric['func'], target_counts=scaling['corpus_frequency'], message_frequency_scaling=scaling['scaling']) diff --git a/language_models.py b/language_models.py index 5626edb..8c98a2e 100644 --- a/language_models.py +++ b/language_models.py @@ -70,7 +70,9 @@ def unaccent(text): >>> unaccent('HÉLLÖ') 'HELLO' """ - return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8') + return unicodedata.normalize('NFKD', text).\ + encode('ascii', 'ignore').\ + decode('utf-8') def sanitise(text): """Remove all non-alphabetic characters and convert the text to lowercase diff --git a/make-cracking-dictionary.py b/make-cracking-dictionary.py index 2c94ff2..f2ba6cb 100644 --- a/make-cracking-dictionary.py +++ b/make-cracking-dictionary.py @@ -1,4 +1,4 @@ -import cipher +import language_models american = set(open('/usr/share/dict/american-english', 'r').readlines()) british = set(open('/usr/share/dict/british-english', 'r').readlines()) @@ -9,19 +9,9 @@ words = american | british | cracklib sanitised_words = set() for w in words: - sanitised_words.add(cipher.sanitise(w)) + sanitised_words.add(language_models.sanitise(w)) sanitised_words.discard('') with open('words.txt', 'w') as f: f.write('\n'.join(sorted(sanitised_words, key=lambda w: (len(w), w)))) - #for w in sanitised_words: - #f.write('{0}\n'.format(w)) - - - - - - - - diff --git a/segment.py b/segment.py index bd15e00..dd0b2a8 100644 --- a/segment.py +++ b/segment.py @@ -1,4 +1,3 @@ -# import re, string, random, glob, operator, heapq import string import collections from math import log10 diff --git a/war-and-peace.txt b/war-and-peace.txt index 0af8a94..3179049 100644 --- a/war-and-peace.txt +++ b/war-and-peace.txt @@ -1,4 +1,33 @@ -WAR AND PEACE +The Project Gutenberg EBook of War and Peace, by Leo Tolstoy + +This eBook is for the use of anyone anywhere at no cost and with almost +no restrictions whatsoever. You may copy it, give it away or re-use it +under the terms of the Project Gutenberg License included with this +eBook or online at www.gutenberg.org + + +Title: War and Peace + +Author: Leo Tolstoy + +Translators: Louise and Aylmer Maude + +Posting Date: January 10, 2009 [EBook #2600] + +Last Updated: March 15, 2013 + +Language: English + + +*** START OF THIS PROJECT GUTENBERG EBOOK WAR AND PEACE *** + +An Anonymous Volunteer, and David Widger + + + + + +WAR AND PEACE By Leo Tolstoy/Tolstoi @@ -2998,7 +3027,7 @@ CHAPTER XIV After receiving her visitors, the countess was so tired that she gave orders to admit no more, but the porter was told to be sure to invite to dinner all who came "to congratulate." The countess wished to have a -tete-a-tete talk with the friend of her childhood, Princess Anna +tête-à-tête talk with the friend of her childhood, Princess Anna Mikhaylovna, whom she had not seen properly since she returned from Petersburg. Anna Mikhaylovna, with her tear-worn but pleasant face, drew her chair nearer to that of the countess. @@ -9309,14 +9338,14 @@ executing of the plan. "I am not jesting," Bilibin went on. "Nothing is truer or sadder. These gentlemen ride onto the bridge alone and wave white handkerchiefs; they assure the officer on duty that they, the marshals, are on their way to -negotiate with Prince Auersperg. He lets them enter the tete-de-pont. * +negotiate with Prince Auersperg. He lets them enter the tête-de-pont. * They spin him a thousand gasconades, saying that the war is over, that the Emperor Francis is arranging a meeting with Bonaparte, that they desire to see Prince Auersperg, and so on. The officer sends for Auersperg; these gentlemen embrace the officers, crack jokes, sit on the cannon, and meanwhile a French battalion gets to the bridge unobserved, flings the bags of incendiary material into the water, and approaches -the tete-de-pont. At length appears the lieutenant general, our dear +the tête-de-pont. At length appears the lieutenant general, our dear Prince Auersperg von Mautern himself. 'Dearest foe! Flower of the Austrian army, hero of the Turkish wars Hostilities are ended, we can shake one another's hand.... The Emperor Napoleon burns with impatience @@ -21254,7 +21283,7 @@ and Princess Mary's helpless attempts to protect them were their customary long-established relations on the matter. "Mais, ma bonne amie," said Prince Andrew, "vous devriez au contraire -m'etre reconnaissante de ce que j'explique a Pierre votre intimite avec +m'être reconnaissante de ce que j'explique a Pierre votre intimité avec ce jeune homme." * @@ -23422,7 +23451,7 @@ of nothing. As he had done on their first meeting at Kochubey's, Speranski produced a strong impression on Prince Andrew on the Wednesday, when he received -him tete-a-tete at his own house and talked to him long and +him tête-à-tête at his own house and talked to him long and confidentially. To Bolkonski so many people appeared contemptible and insignificant @@ -30091,7 +30120,7 @@ princess told the count that she would be delighted, and only begged him to stay longer at Anna Semenovna's, and he departed. Despite the uneasy glances thrown at her by Princess Mary--who wished to -have a tete-a-tete with Natasha--Mademoiselle Bourienne remained in the +have a tête-à-tête with Natasha--Mademoiselle Bourienne remained in the room and persistently talked about Moscow amusements and theaters. Natasha felt offended by the hesitation she had noticed in the anteroom, by her father's nervousness, and by the unnatural manner of the princess @@ -32254,7 +32283,7 @@ honor I can't allow anyone to use." Pierre glanced at him with amazement, unable to understand what he wanted. -"Though it was tete-a-tete," Anatole continued, "still I can't..." +"Though it was tête-à-tête," Anatole continued, "still I can't..." "Is it satisfaction you want?" said Pierre ironically. @@ -41615,7 +41644,7 @@ Preis geben," * said one of them. * "The war must be extended widely. I cannot sufficiently commend that view." -"Oh, ja," said the other, "der Zweck ist nur den Feind zu schwachen, so +"Oh, ja," said the other, "der Zweck ist nur den Feind zu schwächen, so kann man gewiss nicht den Verlust der Privat-Personen in Achtung nehmen." * @@ -64619,3 +64648,360 @@ the present case it is similarly necessary to renounce a freedom that does not exist, and to recognize a dependence of which we are not conscious. + + + + +End of the Project Gutenberg EBook of War and Peace, by Leo Tolstoy + +*** END OF THIS PROJECT GUTENBERG EBOOK WAR AND PEACE *** + +***** This file should be named 2600-8.txt or 2600-8.zip ***** This and +all associated files of various formats will be found in: +http://www.gutenberg.org/2/6/0/2600/ + +An Anonymous Volunteer, and David Widger + + +Updated editions will replace the previous one--the old editions will be +renamed. + +Creating the works from public domain print editions means that no one +owns a United States copyright in these works, so the Foundation (and +you!) can copy and distribute it in the United States without permission +and without paying copyright royalties. Special rules, set forth in the +General Terms of Use part of this license, apply to copying and +distributing Project Gutenberg-tm electronic works to protect the +PROJECT GUTENBERG-tm concept and trademark. Project Gutenberg is a +registered trademark, and may not be used if you charge for the eBooks, +unless you receive specific permission. If you do not charge anything +for copies of this eBook, complying with the rules is very easy. You +may use this eBook for nearly any purpose such as creation of derivative +works, reports, performances and research. They may be modified and +printed and given away--you may do practically ANYTHING with public +domain eBooks. Redistribution is subject to the trademark license, +especially commercial redistribution. + +*** START: FULL LICENSE *** + +THE FULL PROJECT GUTENBERG LICENSE PLEASE READ THIS BEFORE YOU +DISTRIBUTE OR USE THIS WORK + +To protect the Project Gutenberg-tm mission of promoting the free +distribution of electronic works, by using or distributing this work (or +any other work associated in any way with the phrase "Project +Gutenberg"), you agree to comply with all the terms of the Full Project +Gutenberg-tm License (available with this file or online at +http://gutenberg.org/license). + + +Section 1. General Terms of Use and Redistributing Project Gutenberg-tm +electronic works + +1.A. By reading or using any part of this Project Gutenberg-tm +electronic work, you indicate that you have read, understand, agree to +and accept all the terms of this license and intellectual property +(trademark/copyright) agreement. If you do not agree to abide by all +the terms of this agreement, you must cease using and return or destroy +all copies of Project Gutenberg-tm electronic works in your possession. +If you paid a fee for obtaining a copy of or access to a Project +Gutenberg-tm electronic work and you do not agree to be bound by the +terms of this agreement, you may obtain a refund from the person or +entity to whom you paid the fee as set forth in paragraph 1.E.8. + +1.B. "Project Gutenberg" is a registered trademark. It may only be +used on or associated in any way with an electronic work by people who +agree to be bound by the terms of this agreement. There are a few +things that you can do with most Project Gutenberg-tm electronic works +even without complying with the full terms of this agreement. See +paragraph 1.C below. There are a lot of things you can do with Project +Gutenberg-tm electronic works if you follow the terms of this agreement +and help preserve free future access to Project Gutenberg-tm electronic +works. See paragraph 1.E below. + +1.C. The Project Gutenberg Literary Archive Foundation ("the +Foundation" or PGLAF), owns a compilation copyright in the collection of +Project Gutenberg-tm electronic works. Nearly all the individual works +in the collection are in the public domain in the United States. If an +individual work is in the public domain in the United States and you are +located in the United States, we do not claim a right to prevent you +from copying, distributing, performing, displaying or creating +derivative works based on the work as long as all references to Project +Gutenberg are removed. Of course, we hope that you will support the +Project Gutenberg-tm mission of promoting free access to electronic +works by freely sharing Project Gutenberg-tm works in compliance with +the terms of this agreement for keeping the Project Gutenberg-tm name +associated with the work. You can easily comply with the terms of this +agreement by keeping this work in the same format with its attached full +Project Gutenberg-tm License when you share it without charge with +others. + +1.D. The copyright laws of the place where you are located also govern +what you can do with this work. Copyright laws in most countries are in +a constant state of change. If you are outside the United States, check +the laws of your country in addition to the terms of this agreement +before downloading, copying, displaying, performing, distributing or +creating derivative works based on this work or any other Project +Gutenberg-tm work. The Foundation makes no representations concerning +the copyright status of any work in any country outside the United +States. + +1.E. Unless you have removed all references to Project Gutenberg: + +1.E.1. The following sentence, with active links to, or other immediate +access to, the full Project Gutenberg-tm License must appear prominently +whenever any copy of a Project Gutenberg-tm work (any work on which the +phrase "Project Gutenberg" appears, or with which the phrase "Project +Gutenberg" is associated) is accessed, displayed, performed, viewed, +copied or distributed: + +This eBook is for the use of anyone anywhere at no cost and with almost +no restrictions whatsoever. You may copy it, give it away or re-use it +under the terms of the Project Gutenberg License included with this +eBook or online at www.gutenberg.org + +1.E.2. If an individual Project Gutenberg-tm electronic work is derived +from the public domain (does not contain a notice indicating that it is +posted with permission of the copyright holder), the work can be copied +and distributed to anyone in the United States without paying any fees +or charges. If you are redistributing or providing access to a work +with the phrase "Project Gutenberg" associated with or appearing on the +work, you must comply either with the requirements of paragraphs 1.E.1 +through 1.E.7 or obtain permission for the use of the work and the +Project Gutenberg-tm trademark as set forth in paragraphs 1.E.8 or +1.E.9. + +1.E.3. If an individual Project Gutenberg-tm electronic work is posted +with the permission of the copyright holder, your use and distribution +must comply with both paragraphs 1.E.1 through 1.E.7 and any additional +terms imposed by the copyright holder. Additional terms will be linked +to the Project Gutenberg-tm License for all works posted with the +permission of the copyright holder found at the beginning of this work. + +1.E.4. Do not unlink or detach or remove the full Project Gutenberg-tm +License terms from this work, or any files containing a part of this +work or any other work associated with Project Gutenberg-tm. + +1.E.5. Do not copy, display, perform, distribute or redistribute this +electronic work, or any part of this electronic work, without +prominently displaying the sentence set forth in paragraph 1.E.1 with +active links or immediate access to the full terms of the Project +Gutenberg-tm License. + +1.E.6. You may convert to and distribute this work in any binary, +compressed, marked up, nonproprietary or proprietary form, including any +word processing or hypertext form. However, if you provide access to or +distribute copies of a Project Gutenberg-tm work in a format other than +"Plain Vanilla ASCII" or other format used in the official version +posted on the official Project Gutenberg-tm web site +(www.gutenberg.org), you must, at no additional cost, fee or expense to +the user, provide a copy, a means of exporting a copy, or a means of +obtaining a copy upon request, of the work in its original "Plain +Vanilla ASCII" or other form. Any alternate format must include the +full Project Gutenberg-tm License as specified in paragraph 1.E.1. + +1.E.7. Do not charge a fee for access to, viewing, displaying, +performing, copying or distributing any Project Gutenberg-tm works +unless you comply with paragraph 1.E.8 or 1.E.9. + +1.E.8. You may charge a reasonable fee for copies of or providing +access to or distributing Project Gutenberg-tm electronic works provided +that + +- You pay a royalty fee of 20% of the gross profits you derive from the +use of Project Gutenberg-tm works calculated using the method you +already use to calculate your applicable taxes. The fee is owed to the +owner of the Project Gutenberg-tm trademark, but he has agreed to donate +royalties under this paragraph to the Project Gutenberg Literary Archive +Foundation. Royalty payments must be paid within 60 days following each +date on which you prepare (or are legally required to prepare) your +periodic tax returns. Royalty payments should be clearly marked as such +and sent to the Project Gutenberg Literary Archive Foundation at the +address specified in Section 4, "Information about donations to the +Project Gutenberg Literary Archive Foundation." + +- You provide a full refund of any money paid by a user who notifies you +in writing (or by e-mail) within 30 days of receipt that s/he does not +agree to the terms of the full Project Gutenberg-tm License. You must +require such a user to return or destroy all copies of the works +possessed in a physical medium and discontinue all use of and all access +to other copies of Project Gutenberg-tm works. + +- You provide, in accordance with paragraph 1.F.3, a full refund of any +money paid for a work or a replacement copy, if a defect in the +electronic work is discovered and reported to you within 90 days of +receipt of the work. + +- You comply with all other terms of this agreement for free +distribution of Project Gutenberg-tm works. + +1.E.9. If you wish to charge a fee or distribute a Project Gutenberg-tm +electronic work or group of works on different terms than are set forth +in this agreement, you must obtain permission in writing from both the +Project Gutenberg Literary Archive Foundation and Michael Hart, the +owner of the Project Gutenberg-tm trademark. Contact the Foundation as +set forth in Section 3 below. + +1.F. + +1.F.1. Project Gutenberg volunteers and employees expend considerable +effort to identify, do copyright research on, transcribe and proofread +public domain works in creating the Project Gutenberg-tm collection. +Despite these efforts, Project Gutenberg-tm electronic works, and the +medium on which they may be stored, may contain "Defects," such as, but +not limited to, incomplete, inaccurate or corrupt data, transcription +errors, a copyright or other intellectual property infringement, a +defective or damaged disk or other medium, a computer virus, or computer +codes that damage or cannot be read by your equipment. + +1.F.2. LIMITED WARRANTY, DISCLAIMER OF DAMAGES - Except for the "Right +of Replacement or Refund" described in paragraph 1.F.3, the Project +Gutenberg Literary Archive Foundation, the owner of the Project +Gutenberg-tm trademark, and any other party distributing a Project +Gutenberg-tm electronic work under this agreement, disclaim all +liability to you for damages, costs and expenses, including legal fees. +YOU AGREE THAT YOU HAVE NO REMEDIES FOR NEGLIGENCE, STRICT LIABILITY, +BREACH OF WARRANTY OR BREACH OF CONTRACT EXCEPT THOSE PROVIDED IN +PARAGRAPH F3. YOU AGREE THAT THE FOUNDATION, THE TRADEMARK OWNER, AND +ANY DISTRIBUTOR UNDER THIS AGREEMENT WILL NOT BE LIABLE TO YOU FOR +ACTUAL, DIRECT, INDIRECT, CONSEQUENTIAL, PUNITIVE OR INCIDENTAL DAMAGES +EVEN IF YOU GIVE NOTICE OF THE POSSIBILITY OF SUCH DAMAGE. + +1.F.3. LIMITED RIGHT OF REPLACEMENT OR REFUND - If you discover a +defect in this electronic work within 90 days of receiving it, you can +receive a refund of the money (if any) you paid for it by sending a +written explanation to the person you received the work from. If you +received the work on a physical medium, you must return the medium with +your written explanation. The person or entity that provided you with +the defective work may elect to provide a replacement copy in lieu of a +refund. If you received the work electronically, the person or entity +providing it to you may choose to give you a second opportunity to +receive the work electronically in lieu of a refund. If the second copy +is also defective, you may demand a refund in writing without further +opportunities to fix the problem. + +1.F.4. Except for the limited right of replacement or refund set forth +in paragraph 1.F.3, this work is provided to you 'AS-IS' WITH NO OTHER +WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +WARRANTIES OF MERCHANTIBILITY OR FITNESS FOR ANY PURPOSE. + +1.F.5. Some states do not allow disclaimers of certain implied +warranties or the exclusion or limitation of certain types of damages. +If any disclaimer or limitation set forth in this agreement violates the +law of the state applicable to this agreement, the agreement shall be +interpreted to make the maximum disclaimer or limitation permitted by +the applicable state law. The invalidity or unenforceability of any +provision of this agreement shall not void the remaining provisions. + +1.F.6. INDEMNITY - You agree to indemnify and hold the Foundation, the +trademark owner, any agent or employee of the Foundation, anyone +providing copies of Project Gutenberg-tm electronic works in accordance +with this agreement, and any volunteers associated with the production, +promotion and distribution of Project Gutenberg-tm electronic works, +harmless from all liability, costs and expenses, including legal fees, +that arise directly or indirectly from any of the following which you do +or cause to occur: (a) distribution of this or any Project Gutenberg-tm +work, (b) alteration, modification, or additions or deletions to any +Project Gutenberg-tm work, and (c) any Defect you cause. + + +Section 2. Information about the Mission of Project Gutenberg-tm + +Project Gutenberg-tm is synonymous with the free distribution of +electronic works in formats readable by the widest variety of computers +including obsolete, old, middle-aged and new computers. It exists +because of the efforts of hundreds of volunteers and donations from +people in all walks of life. + +Volunteers and financial support to provide volunteers with the +assistance they need, are critical to reaching Project Gutenberg-tm's +goals and ensuring that the Project Gutenberg-tm collection will remain +freely available for generations to come. In 2001, the Project +Gutenberg Literary Archive Foundation was created to provide a secure +and permanent future for Project Gutenberg-tm and future generations. To +learn more about the Project Gutenberg Literary Archive Foundation and +how your efforts and donations can help, see Sections 3 and 4 and the +Foundation web page at http://www.pglaf.org. + + +Section 3. Information about the Project Gutenberg Literary Archive +Foundation + +The Project Gutenberg Literary Archive Foundation is a non profit +501(c)(3) educational corporation organized under the laws of the state +of Mississippi and granted tax exempt status by the Internal Revenue +Service. The Foundation's EIN or federal tax identification number is +64-6221541. Its 501(c)(3) letter is posted at +http://pglaf.org/fundraising. Contributions to the Project Gutenberg +Literary Archive Foundation are tax deductible to the full extent +permitted by U.S. federal laws and your state's laws. + +The Foundation's principal office is located at 4557 Melan Dr. S. +Fairbanks, AK, 99712., but its volunteers and employees are scattered +throughout numerous locations. Its business office is located at 809 +North 1500 West, Salt Lake City, UT 84116, (801) 596-1887, email +business@pglaf.org. Email contact links and up to date contact +information can be found at the Foundation's web site and official page +at http://pglaf.org + +For additional contact information: Dr. Gregory B. Newby Chief Executive +and Director gbnewby@pglaf.org + + +Section 4. Information about Donations to the Project Gutenberg +Literary Archive Foundation + +Project Gutenberg-tm depends upon and cannot survive without wide spread +public support and donations to carry out its mission of increasing the +number of public domain and licensed works that can be freely +distributed in machine readable form accessible by the widest array of +equipment including outdated equipment. Many small donations ($1 to +$5,000) are particularly important to maintaining tax exempt status with +the IRS. + +The Foundation is committed to complying with the laws regulating +charities and charitable donations in all 50 states of the United +States. Compliance requirements are not uniform and it takes a +considerable effort, much paperwork and many fees to meet and keep up +with these requirements. We do not solicit donations in locations where +we have not received written confirmation of compliance. To SEND +DONATIONS or determine the status of compliance for any particular state +visit http://pglaf.org + +While we cannot and do not solicit contributions from states where we +have not met the solicitation requirements, we know of no prohibition +against accepting unsolicited donations from donors in such states who +approach us with offers to donate. + +International donations are gratefully accepted, but we cannot make any +statements concerning tax treatment of donations received from outside +the United States. U.S. laws alone swamp our small staff. + +Please check the Project Gutenberg Web pages for current donation +methods and addresses. Donations are accepted in a number of other ways +including checks, online payments and credit card donations. To donate, +please visit: http://pglaf.org/donate + + +Section 5. General Information About Project Gutenberg-tm electronic +works. + +Professor Michael S. Hart is the originator of the Project Gutenberg-tm +concept of a library of electronic works that could be freely shared +with anyone. For thirty years, he produced and distributed Project +Gutenberg-tm eBooks with only a loose network of volunteer support. + + +Project Gutenberg-tm eBooks are often created from several printed +editions, all of which are confirmed as Public Domain in the U.S. unless +a copyright notice is included. Thus, we do not necessarily keep eBooks +in compliance with any particular paper edition. + +Most people start at our Web site which has the main PG search facility: + +http://www.gutenberg.org + +This Web site includes information about Project Gutenberg-tm, including +how to make donations to the Project Gutenberg Literary Archive +Foundation, how to help produce our new eBooks, and how to subscribe to +our email newsletter to hear about new eBooks.