Cryptopals, Set 1

Last updated: Dec 22, 2024.


(ns set1
  (:require [clojure.data.codec.base64 :as b64]))

We’ll write a little utility function to verify answers:

(defn verify
  "Takes a boolean or sequence of booleans and returns emoji checkmarks or crosses"
  [input]
  (let [to-emoji #(if % "✅" "❌")]
    (if (sequential? input)
      (str/join " " (map to-emoji input))
      (to-emoji input))))

Challenge 1

Convert a hex string to its base64 equivalent.

The key thing to understand here are the following equivalencies:

\(6\) hex chars = \(3\) bytes = \(24\) bits = \(4\) base64 chars.

(defn hex->bytes
  "Convert a hex string to a byte array"
  [hex-string]
  (let [hex-pairs (re-seq #".{2}" hex-string)]
    (byte-array (map #(Integer/parseInt % 16) hex-pairs))))

(defn hex->base64
  [input]
  (-> input
      hex->bytes
      b64/encode
      String.))
      
(defn solve-ch1
  []
  (let [CH1-INPUT "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d"
        CH1-ANSWER "SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t"]

    (verify (= CH1-ANSWER
               (hex->base64 CH1-INPUT)))))

(solve-ch1) => ✅

Challenge 2

Write a function that takes two equal-length buffers and produces their XOR combination.

(defn bytes->hex
  "Convert a byte array to a hex string"
  [input]
  (apply str (map #(format "%02x" %1) input)))
  
(defn solve-ch2
  []
  (let [CH2-INPUT1 "1c0111001f010100061a024b53535009181c"
        CH2-INPUT2 "686974207468652062756c6c277320657965"
        CH2-ANSWER "746865206b696420646f6e277420706c6179"]

    (verify (= CH2-ANSWER
               (bytes->hex (map bit-xor
                                (hex->bytes CH2-INPUT1)
                                (hex->bytes CH2-INPUT2)))))))

(solve-ch2) => ✅

Challenge 3

The hex encoded string [input] has been XOR’d against a single character. Find the key, decrypt the message.

You can do this by hand. But don’t: write code to do it for you.

How? Devise some method for “scoring” a piece of English plaintext. Character frequency is a good metric. Evaluate each output and choose the one with the best score.

(def CH3-CIPHER
  (hex->bytes
   "1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736"))

At first I thought of using something like the KL divergence (see this post) to score the plaintext but then A readable ASCII byte is either in the range A-Z, or in the range a-z or a space. A non-readable byte is the negation of that.

(defn non-readable [b]
  (not (or (and (>= b (byte \A)) (<= b (byte \Z)))
           (and (>= b (byte \a)) (<= b (byte \z)))
           (= b (byte \space)))))

The gibberish score:

(defn gibberish-score
  [^bytes text]
  (/ (double (count (filter non-readable text)))
     (count text)))

(defn gibberish-score-str
  [text]
  (byte-array (map byte text)))

(gibberish-score-str “ABCD?”) => 0.2 (gibberish-score-str “@&#$&@#”) => 1.0

(defn xor-trial-keys-scores
  [ciphertext trial-keys]
  
  (for [key trial-keys
        :let [plain (byte-array (map #(bit-xor key %) ciphertext))]]

    {:key (char key)
     :score (gibberish-score plain)
     :plain (String. plain)}))
(defn solve-ch3
  [trial-keys]

  (let [CH3-CIPHER (hex->bytes "1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736")]
    (apply min-key :score
         (xor-trial-keys-scores CH3-CIPHER trial-keys))))

(String. (:plain (solve-ch3 (range (byte \A) (byte \Z))))) {:key \X, :score 0.029411764705882353, :plain “Cooking MC’s like a pound of bacon”}

Challenge 4

One of the 60-character strings in this file has been encrypted by single-character XOR. Find it.