Find out why people live longer using Clojure
Notes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
;; longevity.clj
;; The article
;; https://www.theguardian.com/science/2023/feb/18/100-centenarians-100-tips-for-a-life-well-lived
;; Stop words
;; https://github.com/stopwords-iso/stopwords-en/blob/master/stopwords-en.txt
(require '[clojure.string :as str])
(defn remove-punctuation [string]
(str/replace string #"\W" " "))
(defn split-to-words [string]
(str/split string #"\s+"))
(defn split-by-line [string]
(str/split string #"\n"))
(def stop-words
(->> (slurp "stopwords-en.txt")
split-by-line
set))
(defn stop-word? [word]
(contains? stop-words word))
(defn not-a-stop-word? [word]
(not (stop-word? word)))
(defn remove-stop-words [words-vector]
(filter not-a-stop-word? words-vector))
(defn sort-by-frequency [a b]
(let [frequency-of-a (second a)
frequency-of-b (second b)]
(< frequency-of-a frequency-of-b)))
(def word-frequencies
(->> (slurp "longevity.txt")
remove-punctuation
clojure.string/lower-case
split-to-words
remove-stop-words
frequencies))
(filter #(> (second %) 10) word-frequencies)
;; (take 100 (reverse (sort sort-by-frequency word-frequencies)))
(->> word-frequencies
(sort sort-by-frequency)
reverse
(take 20))