From ebdb9de9bac30d0237e83af56cdd965271924754 Mon Sep 17 00:00:00 2001 From: taskylizard <75871323+taskylizard@users.noreply.github.com> Date: Sat, 8 Jun 2024 09:30:46 +0000 Subject: [PATCH] feat(search): custom tokenizer --- .vitepress/constants.ts | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/.vitepress/constants.ts b/.vitepress/constants.ts index b7dec3a79..ff8eeaf6e 100644 --- a/.vitepress/constants.ts +++ b/.vitepress/constants.ts @@ -19,18 +19,54 @@ export const feedback = `Made with export const search: DefaultTheme.Config['search'] = { options: { miniSearch: { + options: { + tokenize: (text) => text.split(/[\n\r #%*,=/:;?[\]{}()&]+/u), // simplified charset: removed [-_.@] and non-english chars (diacritics etc.) + processTerm: (term, fieldName) => { + term = term + .trim() + .toLowerCase() + .replace(/^\.+/, '') + .replace(/\.+$/, '') + const stopWords = [ + 'frontmatter', + '$frontmatter.synopsis', + 'and', + 'about', + 'but', + 'now', + 'the', + 'with', + 'you' + ] + if (term.length < 2 || stopWords.includes(term)) return false + + if (fieldName === 'text') { + const parts = term.split('.') + if (parts.length > 1) { + const newTerms = [term, ...parts] + .filter((t) => t.length >= 2) + .filter((t) => !stopWords.includes(t)) + return newTerms + } + } + return term + } + }, searchOptions: { combineWith: 'AND', - fuzzy: false, + fuzzy: true, // @ts-ignore boostDocument: ( - _, + documentId, term, storedFields: Record ) => { const titles = (storedFields?.titles as string[]) .filter((t) => Boolean(t)) .map((t) => t.toLowerCase()) + // Downrank posts + if (documentId.match(/\/posts/)) return -5 + // Uprate if term appears in titles. Add bonus for higher levels (i.e. lower index) const titleIndex = titles