summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorRicardo Wurmus <rekado@elephly.net>2020-05-10 08:28:38 +0200
committerRicardo Wurmus <rekado@elephly.net>2020-05-10 08:28:38 +0200
commitb6f26c77e1e6756909e47d143ff2a1766454c475 (patch)
treea7205c654b0a3e56362c809b9b768a62d1fd4a8b /tests
parent43a931ace99e154124b855b5468ea7b10582bfdb (diff)
xapian: Add phrase-aware tokenizer.
* mumi/xapian.scm (tokenize): New procedure. (search): Use it instead of string-tokenize. * tests/xapian.scm: Test it.
Diffstat (limited to 'tests')
-rw-r--r--tests/xapian.scm11
1 files changed, 11 insertions, 0 deletions
diff --git a/tests/xapian.scm b/tests/xapian.scm
index 607fdb8..04d1a4d 100644
--- a/tests/xapian.scm
+++ b/tests/xapian.scm
@@ -68,4 +68,15 @@ given by REPLACEMENT."
(time->datestamp 1m)
(time->datestamp today))))
+(define tokenize
+ (@@ (mumi xapian) tokenize))
+
+(test-equal "tokenize: keeps phrases intact 1"
+ (tokenize "subject:\"hello world\" how are you")
+ '("subject:\"hello world\"" "how" "are" "you"))
+
+(test-equal "tokenize: keeps phrases intact 2"
+ (tokenize "subject:\"hello world\" how \"are\" you")
+ '("subject:\"hello world\"" "how" "\"are\"" "you"))
+
(test-end "xapian")