26
Cryptograms A Tour of Code

Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

Embed Size (px)

Citation preview

Page 1: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

Cryptograms

A Tour of Code

Page 2: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

Initial stuff

package cryptograms

import java.io.Fileimport Code._

object Cryptograms {

type Word = String type Pattern = String type PatternMap = Map[String, List[Word]]

val alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

2

Page 3: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }

3

Page 4: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

readWordFile /** * Returns a list of UPPERCASED words from a file * that has one word per line. */ def readWordFile: List[Word] = { val file = "unranked-words.txt" val stream = scala.io.Source.fromFile(file) val words = stream.getLines.toList stream.close words map (_.toUpperCase) }

4

Page 5: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

makePatternMap /** * Returns a map from word patterns to a list of * words having that pattern. */ def makePatternMap(words: List[Word]): PatternMap = words.groupBy(word => patternOf(word))

5

Page 6: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

patternOf /** * Returns the pattern of a word. * For example, "people" -> "ABCADB" */ def patternOf(word: Word): Pattern = { var currentLetter = ('A' - 1).toChar var map = Map[Char, Char]() for (ch <- word) yield { if (!ch.isLetter) ch else if (map contains ch) map(ch) else { currentLetter = (currentLetter + 1).toChar map = map + (ch -> currentLetter) currentLetter } } }

6

Page 7: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

patternOf #2 def patternOf2(word: Word): Pattern = { def pat2(word: Word, alpha: String, map: Map[Char, Char], pat: Pattern): Pattern = if (word isEmpty) pat else if (map contains word.head) { pat2(word.tail, alpha, map, pat + map(word.head)) } else { pat2(word.tail, alpha.tail, map + (word.head -> alpha.head), pat + alpha.head) } pat2(word, alphabet, Map[Char, Char](), "") }

7

Page 8: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

patternOf #3 def patternOf3(word: Word): Pattern = { def buildMap(word: Word, alpha: String, map: Map[Char, Char]): Map[Char, Char] = { if (word isEmpty) map else if (map contains word.head) { buildMap(word.tail, alpha, map) } else buildMap(word.tail, alpha.tail, map + (word.head -> alpha.head)) } val map = buildMap(word, alphabet, Map[Char, Char]()) for (ch <- word) yield map(ch) }

8

Page 9: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }

9

Page 10: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

extractWords /** * Returns a Set of words, uppercased, from a given message. */ def extractWords(message: String): Set[Word] = { message.toUpperCase.trim.split("[^A-Z']+").toSet }

10

Page 11: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }

11

Page 12: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

getSetOfPatterns /** * Returns a set of patterns of the given words. */ def getSetOfPatterns(words: Set[Word]): Set[Pattern] = (for (word <- words) yield patternOf(word)).toSet

12

Page 13: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }

13

Page 14: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

makeCustomizedPatternMap /** * Get map of patterns to all likely common words in message. */ def makeCustomizedPatternMap(patterns: Set[Pattern], bigMap: PatternMap) = { bigMap filter (pair => patterns contains pair._1) }

14

Page 15: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }

15

Page 16: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

sortPatternsByFrequency /** * Get list of patterns sorted according to the number of * common words fitting that pattern, fewest to most. */ def sortPatternsByFrequency(map: PatternMap) = { val pairs = map.toList sortWith ((x, y) => x._2.length < y._2.length) for (pair <- pairs) yield pair._1 }

16

Page 17: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }

17

Page 18: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

sortCodeWordsByPatternFrequency

/** * The pattern list is sorted least-to-most frequent; sort the * codeWords in the same order. */ def sortCodeWordsByPatternFrequency(codeWords: List[Word], patternList: List[Pattern]) = { for ( pattern <- patternList; codeWord <- codeWords; if patternOf(codeWord) == pattern ) yield codeWord }

18

Page 19: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }

19

Page 20: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

findBestCode, I /** * Recursive search to find the best code for a message. * @param codeWords All remaining code words, rarest pattern first. * @param smallMap * A map of patterns in the cryptogram to possible real words. * @param code The code to be extended into a more complete code. */ def findBestCode(codeWords: List[Word], smallMap: PatternMap, code: Code): Code = { if (codeWords isEmpty) return code // bottom of recursion if (smallMap isEmpty) return code // out of things to try val codeWord = codeWords.head val pattern = patternOf(codeWord) if (!(smallMap.keys.toList contains pattern)) { // can't match codeWord println(s"Can't match $pattern") return findBestCode(codeWords.tail, smallMap, code) }

20

Page 21: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

findBestCode, II val realWordsWithSamePattern = smallMap(pattern) val wordsMatchingCodeWord = shrinkList(codeWord, code, realWordsWithSamePattern) if (wordsMatchingCodeWord.isEmpty) { findBestCode(codeWords.tail, smallMap, code) } else { var bestCode = code for (realWord <- wordsMatchingCodeWord) { val nextCode = findBestCode(codeWords.tail, smallMap, code.extend(realWord, codeWord)) if (nextCode.wordsMatched > bestCode.wordsMatched) { bestCode = nextCode } } bestCode } }

21

Page 22: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

shrinkList and code.extend

/** * Assuming that the given code is correct, eliminate from the * list of words all those that cannot fit the given codeWord. */ def shrinkList(codeWord: Word, code: Code, words: List[Word]) = words filter (word => isPossibleWord(codeWord, word, code))

/** * Add plain word -> code word to this code, giving a new code. */ def extend(plain: String, code: String): Code = { val additions = (plain zip code).toMap val extendedCode = new Code(encoder ++ additions) extendedCode.wordsMatched = this.wordsMatched + 1 extendedCode }

22

Page 23: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }

23

Page 24: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

Code.encode /** * Encodes a plaintext word; unrecognized characters map to '_'. */ def encode(plainWord: String) = for (ch <- plainWord) yield encoder.getOrElse(ch, '_')

24

Page 25: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

Sample results

Good:I DON'T KNOW HALF OF YOU HALF AS WELL AS I SHOULD LIKE; AND I LIKE LESS THAN HALF OF YOU HALF AS WELL AS YOU DESERVE.U BXA'C VAXI QHJZ XZ DXS QHJZ HM IYJJ HM U MQXSJB JUVY; HAB U JUVY JYMM CQHA QHJZ XZ DXS QHJZ HM IYJJ HM DXS BYMYPNY.I DON'T KNOW HALF OF YOU HALF AS WELL AS I SHOULD LIKE; AND I LIKE LESS THAN HALF OF YOU HALF AS WELL AS YOU DESERVE.17 characters correctly decoded.

Medium:YOU CAN FOOL ALL OF THE PEOPLE SOME OF THE TIME, AND SOME OF THE PEOPLE ALL THE TIME, BUT YOU CANNOT FOOL ALL THE PEOPLETMN PZY IMMU ZUU MI HOL WLMWUL KMJL MI HOL HEJL, ZYA KMJL MI HOL WLMWUL ZUU HOL HEJL, GNH TMN PZYYMH IMMU ZUU HOL WLMWULYOU NAR COOL ALL OC WOE PEOPLE SOME OC WOE WEME, ART SOME OC WOE PEOPLE ALL WOE WEME, GUW YOU NARROW COOL ALL WOE PEOPLE9 characters correctly decoded.

25

Page 26: Cryptograms A Tour of Code. Initial stuff package cryptograms import java.io.File import Code._ object Cryptograms { type Word = String type Pattern =

26

The End