Calculating compression gain - File compression in Clojure (1)

less than 1 minute read

Notes

;; text_analysis.clj

(def file "245-0.txt")

(defn words [text]
  (clojure.string/split text #"\s+"))

(defn remove-punctions [text]
  (clojure.string/replace text #"\W+" " "))

(defn words-in-file [file]
 (->>
 file
 slurp
 remove-punctions
 words))

(defn word-frequencies-in-file [file]
  (->>
   file
   words-in-file
   frequencies))

(defn compression-gain [word-frequncies]
  (map #(let [word (first %)
              word-length (count word)
              word-count (last %)
              gain (* word-length word-count)]
          (list gain word)) word-frequncies))


(defn file-compression-gain [file]
  (reverse
   (sort-by first
            (compression-gain (word-frequencies-in-file file)))))

Mastering Regular Expressions

Classic Computer Science Problems in Python

Updated: