Upload
jeremy-stafford
View
223
Download
5
Embed Size (px)
Citation preview
Cryptograms
A Tour of Code
Initial stuff
package cryptograms
import java.io.Fileimport Code._
object Cryptograms {
type Word = String type Pattern = String type PatternMap = Map[String, List[Word]]
val alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2
discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }
3
readWordFile /** * Returns a list of UPPERCASED words from a file * that has one word per line. */ def readWordFile: List[Word] = { val file = "unranked-words.txt" val stream = scala.io.Source.fromFile(file) val words = stream.getLines.toList stream.close words map (_.toUpperCase) }
4
makePatternMap /** * Returns a map from word patterns to a list of * words having that pattern. */ def makePatternMap(words: List[Word]): PatternMap = words.groupBy(word => patternOf(word))
5
patternOf /** * Returns the pattern of a word. * For example, "people" -> "ABCADB" */ def patternOf(word: Word): Pattern = { var currentLetter = ('A' - 1).toChar var map = Map[Char, Char]() for (ch <- word) yield { if (!ch.isLetter) ch else if (map contains ch) map(ch) else { currentLetter = (currentLetter + 1).toChar map = map + (ch -> currentLetter) currentLetter } } }
6
patternOf #2 def patternOf2(word: Word): Pattern = { def pat2(word: Word, alpha: String, map: Map[Char, Char], pat: Pattern): Pattern = if (word isEmpty) pat else if (map contains word.head) { pat2(word.tail, alpha, map, pat + map(word.head)) } else { pat2(word.tail, alpha.tail, map + (word.head -> alpha.head), pat + alpha.head) } pat2(word, alphabet, Map[Char, Char](), "") }
7
patternOf #3 def patternOf3(word: Word): Pattern = { def buildMap(word: Word, alpha: String, map: Map[Char, Char]): Map[Char, Char] = { if (word isEmpty) map else if (map contains word.head) { buildMap(word.tail, alpha, map) } else buildMap(word.tail, alpha.tail, map + (word.head -> alpha.head)) } val map = buildMap(word, alphabet, Map[Char, Char]()) for (ch <- word) yield map(ch) }
8
discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }
9
extractWords /** * Returns a Set of words, uppercased, from a given message. */ def extractWords(message: String): Set[Word] = { message.toUpperCase.trim.split("[^A-Z']+").toSet }
10
discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }
11
getSetOfPatterns /** * Returns a set of patterns of the given words. */ def getSetOfPatterns(words: Set[Word]): Set[Pattern] = (for (word <- words) yield patternOf(word)).toSet
12
discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }
13
makeCustomizedPatternMap /** * Get map of patterns to all likely common words in message. */ def makeCustomizedPatternMap(patterns: Set[Pattern], bigMap: PatternMap) = { bigMap filter (pair => patterns contains pair._1) }
14
discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }
15
sortPatternsByFrequency /** * Get list of patterns sorted according to the number of * common words fitting that pattern, fewest to most. */ def sortPatternsByFrequency(map: PatternMap) = { val pairs = map.toList sortWith ((x, y) => x._2.length < y._2.length) for (pair <- pairs) yield pair._1 }
16
discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }
17
sortCodeWordsByPatternFrequency
/** * The pattern list is sorted least-to-most frequent; sort the * codeWords in the same order. */ def sortCodeWordsByPatternFrequency(codeWords: List[Word], patternList: List[Pattern]) = { for ( pattern <- patternList; codeWord <- codeWords; if patternOf(codeWord) == pattern ) yield codeWord }
18
discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }
19
findBestCode, I /** * Recursive search to find the best code for a message. * @param codeWords All remaining code words, rarest pattern first. * @param smallMap * A map of patterns in the cryptogram to possible real words. * @param code The code to be extended into a more complete code. */ def findBestCode(codeWords: List[Word], smallMap: PatternMap, code: Code): Code = { if (codeWords isEmpty) return code // bottom of recursion if (smallMap isEmpty) return code // out of things to try val codeWord = codeWords.head val pattern = patternOf(codeWord) if (!(smallMap.keys.toList contains pattern)) { // can't match codeWord println(s"Can't match $pattern") return findBestCode(codeWords.tail, smallMap, code) }
20
findBestCode, II val realWordsWithSamePattern = smallMap(pattern) val wordsMatchingCodeWord = shrinkList(codeWord, code, realWordsWithSamePattern) if (wordsMatchingCodeWord.isEmpty) { findBestCode(codeWords.tail, smallMap, code) } else { var bestCode = code for (realWord <- wordsMatchingCodeWord) { val nextCode = findBestCode(codeWords.tail, smallMap, code.extend(realWord, codeWord)) if (nextCode.wordsMatched > bestCode.wordsMatched) { bestCode = nextCode } } bestCode } }
21
shrinkList and code.extend
/** * Assuming that the given code is correct, eliminate from the * list of words all those that cannot fit the given codeWord. */ def shrinkList(codeWord: Word, code: Code, words: List[Word]) = words filter (word => isPossibleWord(codeWord, word, code))
/** * Add plain word -> code word to this code, giving a new code. */ def extend(plain: String, code: String): Code = { val additions = (plain zip code).toMap val extendedCode = new Code(encoder ++ additions) extendedCode.wordsMatched = this.wordsMatched + 1 extendedCode }
22
discoverCode /** * Given a cryptogram, try to find the best guess for the code * that was used to create the cryptogram. */ def discoverCode(codedMessage: String): String = { val bigMap = makePatternMap(readWordFile) val codeWords = extractWords(codedMessage).toSet val codePatterns = getSetOfPatterns(codeWords) val smallMap = makeCustomizedPatternMap(codePatterns, bigMap) val patternList = sortPatternsByFrequency(smallMap) val codeWordList = sortCodeWordsByPatternFrequency(codeWords.toList, patternList) val bestCode = findBestCode(codeWordList, smallMap, new Code) bestCode.encode(alphabet) }
23
Code.encode /** * Encodes a plaintext word; unrecognized characters map to '_'. */ def encode(plainWord: String) = for (ch <- plainWord) yield encoder.getOrElse(ch, '_')
24
Sample results
Good:I DON'T KNOW HALF OF YOU HALF AS WELL AS I SHOULD LIKE; AND I LIKE LESS THAN HALF OF YOU HALF AS WELL AS YOU DESERVE.U BXA'C VAXI QHJZ XZ DXS QHJZ HM IYJJ HM U MQXSJB JUVY; HAB U JUVY JYMM CQHA QHJZ XZ DXS QHJZ HM IYJJ HM DXS BYMYPNY.I DON'T KNOW HALF OF YOU HALF AS WELL AS I SHOULD LIKE; AND I LIKE LESS THAN HALF OF YOU HALF AS WELL AS YOU DESERVE.17 characters correctly decoded.
Medium:YOU CAN FOOL ALL OF THE PEOPLE SOME OF THE TIME, AND SOME OF THE PEOPLE ALL THE TIME, BUT YOU CANNOT FOOL ALL THE PEOPLETMN PZY IMMU ZUU MI HOL WLMWUL KMJL MI HOL HEJL, ZYA KMJL MI HOL WLMWUL ZUU HOL HEJL, GNH TMN PZYYMH IMMU ZUU HOL WLMWULYOU NAR COOL ALL OC WOE PEOPLE SOME OC WOE WEME, ART SOME OC WOE PEOPLE ALL WOE WEME, GUW YOU NARROW COOL ALL WOE PEOPLE9 characters correctly decoded.
25
26
The End