merge search branch into redesign

2025-11-28 08:51:08 +11:00 · 2025-07-04 11:18:29 +00:00 · 2025-07-04 11:18:29 +00:00 · 5bbdf245ae
commit 5bbdf245ae
parent c7153b6739
7 changed files with 1532 additions and 35 deletions
--- a/docs/.vitepress/config.mts
+++ b/docs/.vitepress/config.mts
@ -108,6 +108,12 @@ export default defineConfig({
              import.meta.url
            )
          )
+        },
+        {
+          find: /^.*VPLocalSearchBox.vue$/,
+          replacement: fileURLToPath(
+            new URL('./theme/components/Search.vue', import.meta.url)
+          )
        }
      ]
    },
--- a/docs/.vitepress/constants.ts
+++ b/docs/.vitepress/constants.ts
@ -1,5 +1,6 @@
 import type { DefaultTheme } from 'vitepress'
 import consola from 'consola'
+import { customTokenize, customTokenProcessor } from './search'
 import { transform, transformGuide } from '../../website/transformer'

 // @unocss-include
@ -49,44 +50,13 @@ export const search: DefaultTheme.Config['search'] = {
    },
    miniSearch: {
      options: {
-        tokenize: (text) => text.split(/[\n\r #%*,=/:;?[\]{}()&]+/u), // simplified charset: removed [-_.@] and non-english chars (diacritics etc.)
-        processTerm: (term, fieldName) => {
-          // biome-ignore lint/style/noParameterAssign: h
-          term = term
-            .trim()
-            .toLowerCase()
-            .replace(/^\.+/, '')
-            .replace(/\.+$/, '')
-          const stopWords = [
-            'frontmatter',
-            '$frontmatter.synopsis',
-            'and',
-            'about',
-            'but',
-            'now',
-            'the',
-            'with',
-            'you'
-          ]
-          if (term.length < 2 || stopWords.includes(term)) return false
-
-          if (fieldName === 'text') {
-            const parts = term.split('.')
-            if (parts.length > 1) {
-              const newTerms = [term, ...parts]
-                .filter((t) => t.length >= 2)
-                .filter((t) => !stopWords.includes(t))
-              return newTerms
-            }
-          }
-          return term
-        }
+        tokenize: customTokenize,
+        processTerm: customTokenProcessor
      },
      searchOptions: {
-        combineWith: 'AND',
-        fuzzy: true,
        // @ts-ignore
        boostDocument: (documentId, term, storedFields: Record) => {
+          console.log(storedFields.titles)
          const titles = (storedFields?.titles as string[])
            .filter((t) => Boolean(t))
            .map((t) => t.toLowerCase())
--- a/docs/.vitepress/search.ts
+++ b/docs/.vitepress/search.ts
@ -0,0 +1,360 @@
+export const customTokenProcessor = (token: string): string | null => {
+  // Remove dots and normalize case before processing
+  const normalizedToken = token.replace(/\./g, '').toLowerCase()
+
+  const step2list: Record<string, string> = {
+    ational: 'ate',
+    tional: 'tion',
+    enci: 'ence',
+    anci: 'ance',
+    izer: 'ize',
+    bli: 'ble',
+    alli: 'al',
+    entli: 'ent',
+    eli: 'e',
+    ousli: 'ous',
+    ization: 'ize',
+    ation: 'ate',
+    ator: 'ate',
+    alism: 'al',
+    iveness: 'ive',
+    fulness: 'ful',
+    ousness: 'ous',
+    aliti: 'al',
+    iviti: 'ive',
+    biliti: 'ble',
+    logi: 'log'
+  }
+
+  const step3list: Record<string, string> = {
+    icate: 'ic',
+    ative: '',
+    alize: 'al',
+    iciti: 'ic',
+    ical: 'ic',
+    ful: '',
+    ness: ''
+  }
+
+  const consonant = '[^aeiou]'
+  const vowel = '[aeiouy]'
+  const consonants = '(' + consonant + '[^aeiouy]*)'
+  const vowels = '(' + vowel + '[aeiou]*)'
+
+  const gt0 = new RegExp('^' + consonants + '?' + vowels + consonants)
+  const eq1 = new RegExp(
+    '^' + consonants + '?' + vowels + consonants + vowels + '?$'
+  )
+  const gt1 = new RegExp(
+    '^' + consonants + '?(' + vowels + consonants + '){2,}'
+  )
+  const vowelInStem = new RegExp('^' + consonants + '?' + vowel)
+  const consonantLike = new RegExp('^' + consonants + vowel + '[^aeiouwxy]$')
+
+  const sfxLl = /ll$/
+  const sfxE = /^(.+?)e$/
+  const sfxY = /^(.+?)y$/
+  const sfxIon = /^(.+?(s|t))(ion)$/
+  const sfxEdOrIng = /^(.+?)(ed|ing)$/
+  const sfxAtOrBlOrIz = /(at|bl|iz)$/
+  const sfxEED = /^(.+?)eed$/
+  const sfxS = /^.+?[^s]s$/
+  const sfxSsesOrIes = /^.+?(ss|i)es$/
+  const sfxMultiConsonantLike = /([^aeiouylsz])\1$/
+  const step2 =
+    /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/
+  const step3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/
+  const step4 =
+    /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/
+
+  function stemmer(value: string) {
+    let result = String(value).toLowerCase()
+
+    // Exit early.
+    if (result.length < 3) {
+      return result
+    }
+
+    /** @type {boolean} */
+    let firstCharacterWasLowerCaseY = false
+
+    // Detect initial `y`, make sure it never matches.
+    if (
+      result.codePointAt(0) === 121 // Lowercase Y
+    ) {
+      firstCharacterWasLowerCaseY = true
+      result = 'Y' + result.slice(1)
+    }
+
+    // Step 1a.
+    if (sfxSsesOrIes.test(result)) {
+      // Remove last two characters.
+      result = result.slice(0, -2)
+    } else if (sfxS.test(result)) {
+      // Remove last character.
+      result = result.slice(0, -1)
+    }
+
+    /** @type {RegExpMatchArray|null} */
+    let match
+
+    // Step 1b.
+    if ((match = sfxEED.exec(result))) {
+      if (gt0.test(match[1])) {
+        // Remove last character.
+        result = result.slice(0, -1)
+      }
+    } else if (
+      (match = sfxEdOrIng.exec(result)) &&
+      vowelInStem.test(match[1])
+    ) {
+      result = match[1]
+
+      if (sfxAtOrBlOrIz.test(result)) {
+        // Append `e`.
+        result += 'e'
+      } else if (sfxMultiConsonantLike.test(result)) {
+        // Remove last character.
+        result = result.slice(0, -1)
+      } else if (consonantLike.test(result)) {
+        // Append `e`.
+        result += 'e'
+      }
+    }
+
+    // Step 1c.
+    if ((match = sfxY.exec(result)) && vowelInStem.test(match[1])) {
+      // Remove suffixing `y` and append `i`.
+      result = match[1] + 'i'
+    }
+
+    // Step 2.
+    if ((match = step2.exec(result)) && gt0.test(match[1])) {
+      result = match[1] + step2list[match[2]]
+    }
+
+    // Step 3.
+    if ((match = step3.exec(result)) && gt0.test(match[1])) {
+      result = match[1] + step3list[match[2]]
+    }
+
+    // Step 4.
+    if ((match = step4.exec(result))) {
+      if (gt1.test(match[1])) {
+        result = match[1]
+      }
+    } else if ((match = sfxIon.exec(result)) && gt1.test(match[1])) {
+      result = match[1]
+    }
+
+    // Step 5.
+    if (
+      (match = sfxE.exec(result)) &&
+      (gt1.test(match[1]) ||
+        (eq1.test(match[1]) && !consonantLike.test(match[1])))
+    ) {
+      result = match[1]
+    }
+
+    if (sfxLl.test(result) && gt1.test(result)) {
+      result = result.slice(0, -1)
+    }
+
+    // Turn initial `Y` back to `y`.
+    if (firstCharacterWasLowerCaseY) {
+      result = 'y' + result.slice(1)
+    }
+
+    return result
+  }
+  // adapted from these two sources
+  // https://gist.github.com/sebleier/554280
+  // https://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list
+  const stopWords = new Set([
+    'a',
+    'about',
+    'above',
+    'after',
+    'again',
+    'against',
+    'all',
+    'am',
+    'an',
+    'and',
+    'any',
+    'are',
+    'aren',
+    'as',
+    'at',
+    'be',
+    'because',
+    'been',
+    'before',
+    'being',
+    'below',
+    'between',
+    'both',
+    'but',
+    'by',
+    'can',
+    'cannot',
+    'com',
+    'could',
+    'couldn',
+    'did',
+    'didn',
+    'do',
+    'does',
+    'doesn',
+    'doing',
+    'down',
+    'during',
+    'each',
+    'few',
+    'for',
+    'from',
+    'further',
+    'had',
+    'hadn',
+    'has',
+    'hasn',
+    'have',
+    'haven',
+    'having',
+    'he',
+    'her',
+    'here',
+    'hers',
+    'herself',
+    'him',
+    'himself',
+    'his',
+    'how',
+    'i',
+    'if',
+    'in',
+    'into',
+    'is',
+    'isn',
+    'it',
+    'its',
+    'itself',
+    'just',
+    'let',
+    'll',
+    'me',
+    'more',
+    'most',
+    'mustn',
+    'my',
+    'myself',
+    'no',
+    'nor',
+    'not',
+    'now',
+    'of',
+    'off',
+    'on',
+    'once',
+    'only',
+    'or',
+    'other',
+    'ought',
+    'our',
+    'ours',
+    'ourselves',
+    'out',
+    'over',
+    'own',
+    're',
+    's',
+    'same',
+    'shan',
+    'she',
+    'should',
+    'shouldn',
+    'so',
+    'some',
+    'such',
+    't',
+    'than',
+    'that',
+    'the',
+    'their',
+    'theirs',
+    'them',
+    'themselves',
+    'then',
+    'there',
+    'these',
+    'they',
+    'this',
+    'those',
+    'through',
+    'to',
+    'too',
+    'under',
+    'until',
+    'up',
+    've',
+    'very',
+    'was',
+    'wasn',
+    'we',
+    'were',
+    'weren',
+    'what',
+    'when',
+    'where',
+    'which',
+    'while',
+    'who',
+    'whom',
+    'why',
+    'will',
+    'with',
+    'won',
+    'would',
+    'wouldn',
+    'you',
+    'your',
+    'yours',
+    'yourself',
+    'yourselves'
+  ])
+
+  return stopWords.has(normalizedToken) ? null : stemmer(normalizedToken)
+}
+
+export const customTokenize = (text: string): string[] => {
+  // Pre-process the text to handle dots in special cases
+  // This will help with cases like "V.R" to match with "vr" by removing dots
+  const preprocessedText = text.replace(/([A-Za-z])\.([A-Za-z])/g, '$1$2') // Remove dots between letters (like V.R -> VR)
+
+  // This regular expression matches any Unicode space or punctuation character
+  // Copied from https://github.com/lucaong/minisearch
+  // which adapted from https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7BZ%7D%5Cp%7BP%7D&abb=on&c=on&esc=on
+  const SPACE_OR_PUNCTUATION =
+    /[\n\r -#%-*,-/:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]+/u
+
+  // Split on any space or punctuation; same as minisearch default tokenizer
+  // except i've corrected for the possibility for returning empty string
+  const tokens = preprocessedText.split(SPACE_OR_PUNCTUATION).filter(Boolean)
+
+  // Handle cases with capital letters in the middle (like "xManager" -> "x Manager")
+  const expandedTokens: string[] = []
+
+  for (const token of tokens) {
+    expandedTokens.push(token)
+
+    // If token has a capital letter in the middle, add a version with space before it
+    // This helps with cases like "xManager" to match with "x Manager"
+    const splitOnCapitals = token.replace(/([a-z])([A-Z])/g, '$1 $2')
+    if (splitOnCapitals !== token) {
+      const additionalTokens = splitOnCapitals.split(' ').filter(Boolean)
+      expandedTokens.push(...additionalTokens)
+    }
+  }
+
+  return expandedTokens
+}
--- a/docs/.vitepress/vue-shim.d.ts
+++ b/docs/.vitepress/vue-shim.d.ts
@ -0,0 +1,50 @@
+/**
+ *  Copyright (c) 2025 taskylizard. Apache License 2.0.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+declare const __VP_HASH_MAP__: Record<string, string>
+declare const __VP_LOCAL_SEARCH__: boolean
+declare const __ALGOLIA__: boolean
+declare const __CARBON__: boolean
+declare const __VUE_PROD_DEVTOOLS__: boolean
+declare const __ASSETS_DIR__: string
+
+declare module '*.vue' {
+  import type { DefineComponent } from 'vue'
+  const component: DefineComponent
+  export default component
+}
+
+declare module '@siteData' {
+  import type { SiteData } from 'vitepress'
+  const data: SiteData
+  export default data
+}
+
+declare module '@theme/index' {
+  import type { Theme } from 'vitepress'
+  const theme: Theme
+  export default theme
+}
+
+declare module '@localSearchIndex' {
+  const data: Record<string, () => Promise<{ default: string }>>
+  export default data
+}
+
+declare module 'mark.js/src/vanilla.js' {
+  import type Mark from 'mark.js'
+  const mark: typeof Mark
+  export default mark
+}
--- a/package.json
+++ b/package.json
@ -26,14 +26,19 @@
    "@headlessui/vue": "^1.7.23",
    "@resvg/resvg-js": "^2.6.2",
    "@vueuse/core": "^13.0.0",
+    "@vueuse/integrations": "^13.1.0",
    "consola": "^3.2.3",
    "feed": "^4.2.2",
    "itty-fetcher": "^0.9.4",
+    "mark.js": "^8.11.1",
+    "markdown-it": "^14.1.0",
+    "minisearch": "^7.1.2",
    "nitro-cors": "^0.7.1",
    "nitropack": "^2.11.6",
    "nprogress": "^0.2.0",
    "pathe": "^2.0.1",
    "reka-ui": "^2.3.1",
+    "stemmer": "^2.0.1",
    "unocss": "66.3.2",
    "vitepress": "1.6.3",
    "vue": "^3.5.17",
--- a/tsconfig.json
+++ b/tsconfig.json
@ -13,7 +13,7 @@
    "noUnusedLocals": true,
    "strictNullChecks": true,
    "forceConsistentCasingInFileNames": true,
-    "types": ["vitepress"]
+    "types": ["vitepress", "vitepress/client"]
  },
  "exclude": ["node_modules"],
  "include": [
--- a/website/theme/components/Search.vue
+++ b/website/theme/components/Search.vue