debbugs: Ignore duplicate mails.
authorRicardo Wurmus <rekado@elephly.net>
Thu, 23 Apr 2020 05:44:20 +0000 (07:44 +0200)
committerRicardo Wurmus <rekado@elephly.net>
Thu, 23 Apr 2020 05:44:20 +0000 (07:44 +0200)
Debbugs records mails that are received via different addresses more
than once, so we only add them if the message id is new.

mumi/debbugs.scm

index 913c290cd6e4c029f581cfd668706b1327e926e1..7873c863bb59adfa8506790d1248fd45a85d1502 100644 (file)
@@ -57,7 +57,8 @@ They are only used for the Debbugs web interface.
 This is for received emails.  The raw email is introduced by ^G and
 ends with ^C.
 "
-  (let loop ((mails '())
+  (let loop ((msgids (make-hash-table))
+             (mails '())
              (lines '())
              (type 'init)
              (skip? #f))
@@ -67,50 +68,53 @@ ends with ^C.
                       (string-ref line 0))
             ;; Ctrl-A, autocheck
             (#\soh
-             (loop mails lines 'autocheck #f))
+             (loop msgids mails lines 'autocheck #f))
             ;; Ctrl-B, recips: skip until beginning of email
             (#\stx
-             (loop mails lines 'recips #t))
+             (loop msgids mails lines 'recips #t))
             ;; Ctrl-C, end of record
             (#\etx
-             (loop (if (member type keep)
-                       ;; TODO: This is very ugly.  The first few
-                       ;; lines of the raw messages stored in Debbugs
-                       ;; logs seem to confuse the email parser, so we
-                       ;; try to strip them off.
-                       (let* ((content (string-join
-                                        (drop-while (lambda (line)
-                                                      (or (string-prefix? "From " line)
-                                                          (string-prefix? "Received" line)
-                                                          (string-prefix? "\t" line)
-                                                          (string-prefix? " " line)))
-                                                    (reverse lines)) "\n"))
-                              (mail (catch #t
-                                      (lambda ()
-                                        (parse-email content))
-                                      (lambda args
-                                        (format (current-error-port)
-                                                "failed to process email~%")
-                                        #f))))
-                         (if mail
-                             (cons mail mails)
-                             mails))
-                       mails)
-                   '()
-                   'init #f))
+             (let ((mails*
+                    (if (member type keep)
+                        ;; TODO: This is very ugly.  The first few
+                        ;; lines of the raw messages stored in Debbugs
+                        ;; logs seem to confuse the email parser, so we
+                        ;; try to strip them off.
+                        (let* ((content (string-join
+                                         (drop-while (lambda (line)
+                                                       (or (string-prefix? "From " line)
+                                                           (string-prefix? "Received" line)
+                                                           (string-prefix? "\t" line)
+                                                           (string-prefix? " " line)))
+                                                     (reverse lines)) "\n"))
+                               (mail (catch #t
+                                       (lambda ()
+                                         (parse-email content))
+                                       (lambda args
+                                         (format (current-error-port)
+                                                 "failed to process email~%")
+                                         #f))))
+                          (let ((id (and mail (assoc-ref (email-headers mail) 'message-id))))
+                            (if (and id (not (hash-ref msgids id)))
+                                (begin
+                                  (hash-set! msgids id #t)
+                                  (cons mail mails))
+                                mails)))
+                        mails)))
+               (loop msgids mails* '() 'init #f)))
             ;; Ctrl-E, beginning of email in recips
             (#\enq
-             (loop mails lines 'recips #f))
+             (loop msgids mails lines 'recips #f))
             ;; Ctrl-F, raw HTML snippet: skip
             (#\ack
-             (loop mails lines 'html #t))
+             (loop msgids mails lines 'html #t))
             ;; Ctrl-G, incoming-recv
             (#\bel
-             (loop mails lines 'incoming-recv #f))
+             (loop msgids mails lines 'incoming-recv #f))
             (_
              (if skip?
-                 (loop mails lines type skip?)
-                 (loop mails (cons line lines) type skip?))))))))
+                 (loop msgids mails lines type skip?)
+                 (loop msgids mails (cons line lines) type skip?))))))))
 
 (define* (extract-bug-numbers packages #:key archived?)
   "Open up a Debbugs index file and return the bug numbers for those