Find out why people live longer using Clojure

less than 1 minute read

Notes

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
;; longevity.clj

;; The article
;; https://www.theguardian.com/science/2023/feb/18/100-centenarians-100-tips-for-a-life-well-lived

;; Stop words
;; https://github.com/stopwords-iso/stopwords-en/blob/master/stopwords-en.txt

(require '[clojure.string :as str])

(defn remove-punctuation [string]
  (str/replace string #"\W" " "))

(defn split-to-words [string]
  (str/split string #"\s+"))

(defn split-by-line [string]
  (str/split string #"\n"))

(def stop-words
  (->> (slurp "stopwords-en.txt")
       split-by-line
       set))

(defn stop-word? [word]
  (contains? stop-words word))

(defn not-a-stop-word? [word]
  (not (stop-word? word)))

(defn remove-stop-words [words-vector]
  (filter not-a-stop-word? words-vector))

(defn sort-by-frequency [a b]
  (let [frequency-of-a (second a)
        frequency-of-b (second b)]
    (< frequency-of-a frequency-of-b)))

(def word-frequencies
  (->> (slurp "longevity.txt")
       remove-punctuation
       clojure.string/lower-case
       split-to-words
       remove-stop-words
       frequencies))

(filter #(> (second %) 10) word-frequencies)

;; (take 100 (reverse (sort sort-by-frequency word-frequencies)))

(->> word-frequencies
     (sort sort-by-frequency)
     reverse
     (take 20))

Updated: