diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index a72a9582c..acfdbee7c 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -108,6 +108,12 @@ export default defineConfig({ import.meta.url ) ) + }, + { + find: /^.*VPLocalSearchBox.vue$/, + replacement: fileURLToPath( + new URL('./theme/components/Search.vue', import.meta.url) + ) } ] }, diff --git a/docs/.vitepress/constants.ts b/docs/.vitepress/constants.ts index 819d6517c..5f7d2d2e0 100644 --- a/docs/.vitepress/constants.ts +++ b/docs/.vitepress/constants.ts @@ -1,5 +1,6 @@ import type { DefaultTheme } from 'vitepress' import consola from 'consola' +import { customTokenize, customTokenProcessor } from './search' import { transform, transformGuide } from '../../website/transformer' // @unocss-include @@ -49,44 +50,13 @@ export const search: DefaultTheme.Config['search'] = { }, miniSearch: { options: { - tokenize: (text) => text.split(/[\n\r #%*,=/:;?[\]{}()&]+/u), // simplified charset: removed [-_.@] and non-english chars (diacritics etc.) - processTerm: (term, fieldName) => { - // biome-ignore lint/style/noParameterAssign: h - term = term - .trim() - .toLowerCase() - .replace(/^\.+/, '') - .replace(/\.+$/, '') - const stopWords = [ - 'frontmatter', - '$frontmatter.synopsis', - 'and', - 'about', - 'but', - 'now', - 'the', - 'with', - 'you' - ] - if (term.length < 2 || stopWords.includes(term)) return false - - if (fieldName === 'text') { - const parts = term.split('.') - if (parts.length > 1) { - const newTerms = [term, ...parts] - .filter((t) => t.length >= 2) - .filter((t) => !stopWords.includes(t)) - return newTerms - } - } - return term - } + tokenize: customTokenize, + processTerm: customTokenProcessor }, searchOptions: { - combineWith: 'AND', - fuzzy: true, // @ts-ignore boostDocument: (documentId, term, storedFields: Record) => { + console.log(storedFields.titles) const titles = (storedFields?.titles as string[]) .filter((t) => Boolean(t)) .map((t) => t.toLowerCase()) diff --git a/docs/.vitepress/search.ts b/docs/.vitepress/search.ts new file mode 100644 index 000000000..1a2428303 --- /dev/null +++ b/docs/.vitepress/search.ts @@ -0,0 +1,360 @@ +export const customTokenProcessor = (token: string): string | null => { + // Remove dots and normalize case before processing + const normalizedToken = token.replace(/\./g, '').toLowerCase() + + const step2list: Record = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + } + + const step3list: Record = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + } + + const consonant = '[^aeiou]' + const vowel = '[aeiouy]' + const consonants = '(' + consonant + '[^aeiouy]*)' + const vowels = '(' + vowel + '[aeiou]*)' + + const gt0 = new RegExp('^' + consonants + '?' + vowels + consonants) + const eq1 = new RegExp( + '^' + consonants + '?' + vowels + consonants + vowels + '?$' + ) + const gt1 = new RegExp( + '^' + consonants + '?(' + vowels + consonants + '){2,}' + ) + const vowelInStem = new RegExp('^' + consonants + '?' + vowel) + const consonantLike = new RegExp('^' + consonants + vowel + '[^aeiouwxy]$') + + const sfxLl = /ll$/ + const sfxE = /^(.+?)e$/ + const sfxY = /^(.+?)y$/ + const sfxIon = /^(.+?(s|t))(ion)$/ + const sfxEdOrIng = /^(.+?)(ed|ing)$/ + const sfxAtOrBlOrIz = /(at|bl|iz)$/ + const sfxEED = /^(.+?)eed$/ + const sfxS = /^.+?[^s]s$/ + const sfxSsesOrIes = /^.+?(ss|i)es$/ + const sfxMultiConsonantLike = /([^aeiouylsz])\1$/ + const step2 = + /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/ + const step3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/ + const step4 = + /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/ + + function stemmer(value: string) { + let result = String(value).toLowerCase() + + // Exit early. + if (result.length < 3) { + return result + } + + /** @type {boolean} */ + let firstCharacterWasLowerCaseY = false + + // Detect initial `y`, make sure it never matches. + if ( + result.codePointAt(0) === 121 // Lowercase Y + ) { + firstCharacterWasLowerCaseY = true + result = 'Y' + result.slice(1) + } + + // Step 1a. + if (sfxSsesOrIes.test(result)) { + // Remove last two characters. + result = result.slice(0, -2) + } else if (sfxS.test(result)) { + // Remove last character. + result = result.slice(0, -1) + } + + /** @type {RegExpMatchArray|null} */ + let match + + // Step 1b. + if ((match = sfxEED.exec(result))) { + if (gt0.test(match[1])) { + // Remove last character. + result = result.slice(0, -1) + } + } else if ( + (match = sfxEdOrIng.exec(result)) && + vowelInStem.test(match[1]) + ) { + result = match[1] + + if (sfxAtOrBlOrIz.test(result)) { + // Append `e`. + result += 'e' + } else if (sfxMultiConsonantLike.test(result)) { + // Remove last character. + result = result.slice(0, -1) + } else if (consonantLike.test(result)) { + // Append `e`. + result += 'e' + } + } + + // Step 1c. + if ((match = sfxY.exec(result)) && vowelInStem.test(match[1])) { + // Remove suffixing `y` and append `i`. + result = match[1] + 'i' + } + + // Step 2. + if ((match = step2.exec(result)) && gt0.test(match[1])) { + result = match[1] + step2list[match[2]] + } + + // Step 3. + if ((match = step3.exec(result)) && gt0.test(match[1])) { + result = match[1] + step3list[match[2]] + } + + // Step 4. + if ((match = step4.exec(result))) { + if (gt1.test(match[1])) { + result = match[1] + } + } else if ((match = sfxIon.exec(result)) && gt1.test(match[1])) { + result = match[1] + } + + // Step 5. + if ( + (match = sfxE.exec(result)) && + (gt1.test(match[1]) || + (eq1.test(match[1]) && !consonantLike.test(match[1]))) + ) { + result = match[1] + } + + if (sfxLl.test(result) && gt1.test(result)) { + result = result.slice(0, -1) + } + + // Turn initial `Y` back to `y`. + if (firstCharacterWasLowerCaseY) { + result = 'y' + result.slice(1) + } + + return result + } + // adapted from these two sources + // https://gist.github.com/sebleier/554280 + // https://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list + const stopWords = new Set([ + 'a', + 'about', + 'above', + 'after', + 'again', + 'against', + 'all', + 'am', + 'an', + 'and', + 'any', + 'are', + 'aren', + 'as', + 'at', + 'be', + 'because', + 'been', + 'before', + 'being', + 'below', + 'between', + 'both', + 'but', + 'by', + 'can', + 'cannot', + 'com', + 'could', + 'couldn', + 'did', + 'didn', + 'do', + 'does', + 'doesn', + 'doing', + 'down', + 'during', + 'each', + 'few', + 'for', + 'from', + 'further', + 'had', + 'hadn', + 'has', + 'hasn', + 'have', + 'haven', + 'having', + 'he', + 'her', + 'here', + 'hers', + 'herself', + 'him', + 'himself', + 'his', + 'how', + 'i', + 'if', + 'in', + 'into', + 'is', + 'isn', + 'it', + 'its', + 'itself', + 'just', + 'let', + 'll', + 'me', + 'more', + 'most', + 'mustn', + 'my', + 'myself', + 'no', + 'nor', + 'not', + 'now', + 'of', + 'off', + 'on', + 'once', + 'only', + 'or', + 'other', + 'ought', + 'our', + 'ours', + 'ourselves', + 'out', + 'over', + 'own', + 're', + 's', + 'same', + 'shan', + 'she', + 'should', + 'shouldn', + 'so', + 'some', + 'such', + 't', + 'than', + 'that', + 'the', + 'their', + 'theirs', + 'them', + 'themselves', + 'then', + 'there', + 'these', + 'they', + 'this', + 'those', + 'through', + 'to', + 'too', + 'under', + 'until', + 'up', + 've', + 'very', + 'was', + 'wasn', + 'we', + 'were', + 'weren', + 'what', + 'when', + 'where', + 'which', + 'while', + 'who', + 'whom', + 'why', + 'will', + 'with', + 'won', + 'would', + 'wouldn', + 'you', + 'your', + 'yours', + 'yourself', + 'yourselves' + ]) + + return stopWords.has(normalizedToken) ? null : stemmer(normalizedToken) +} + +export const customTokenize = (text: string): string[] => { + // Pre-process the text to handle dots in special cases + // This will help with cases like "V.R" to match with "vr" by removing dots + const preprocessedText = text.replace(/([A-Za-z])\.([A-Za-z])/g, '$1$2') // Remove dots between letters (like V.R -> VR) + + // This regular expression matches any Unicode space or punctuation character + // Copied from https://github.com/lucaong/minisearch + // which adapted from https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7BZ%7D%5Cp%7BP%7D&abb=on&c=on&esc=on + const SPACE_OR_PUNCTUATION = + /[\n\r -#%-*,-/:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]+/u + + // Split on any space or punctuation; same as minisearch default tokenizer + // except i've corrected for the possibility for returning empty string + const tokens = preprocessedText.split(SPACE_OR_PUNCTUATION).filter(Boolean) + + // Handle cases with capital letters in the middle (like "xManager" -> "x Manager") + const expandedTokens: string[] = [] + + for (const token of tokens) { + expandedTokens.push(token) + + // If token has a capital letter in the middle, add a version with space before it + // This helps with cases like "xManager" to match with "x Manager" + const splitOnCapitals = token.replace(/([a-z])([A-Z])/g, '$1 $2') + if (splitOnCapitals !== token) { + const additionalTokens = splitOnCapitals.split(' ').filter(Boolean) + expandedTokens.push(...additionalTokens) + } + } + + return expandedTokens +} diff --git a/docs/.vitepress/vue-shim.d.ts b/docs/.vitepress/vue-shim.d.ts new file mode 100644 index 000000000..77273f5fb --- /dev/null +++ b/docs/.vitepress/vue-shim.d.ts @@ -0,0 +1,50 @@ +/** + * Copyright (c) 2025 taskylizard. Apache License 2.0. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +declare const __VP_HASH_MAP__: Record +declare const __VP_LOCAL_SEARCH__: boolean +declare const __ALGOLIA__: boolean +declare const __CARBON__: boolean +declare const __VUE_PROD_DEVTOOLS__: boolean +declare const __ASSETS_DIR__: string + +declare module '*.vue' { + import type { DefineComponent } from 'vue' + const component: DefineComponent + export default component +} + +declare module '@siteData' { + import type { SiteData } from 'vitepress' + const data: SiteData + export default data +} + +declare module '@theme/index' { + import type { Theme } from 'vitepress' + const theme: Theme + export default theme +} + +declare module '@localSearchIndex' { + const data: Record Promise<{ default: string }>> + export default data +} + +declare module 'mark.js/src/vanilla.js' { + import type Mark from 'mark.js' + const mark: typeof Mark + export default mark +} diff --git a/package.json b/package.json index c5baec1cb..6b4059714 100644 --- a/package.json +++ b/package.json @@ -26,14 +26,19 @@ "@headlessui/vue": "^1.7.23", "@resvg/resvg-js": "^2.6.2", "@vueuse/core": "^13.0.0", + "@vueuse/integrations": "^13.1.0", "consola": "^3.2.3", "feed": "^4.2.2", "itty-fetcher": "^0.9.4", + "mark.js": "^8.11.1", + "markdown-it": "^14.1.0", + "minisearch": "^7.1.2", "nitro-cors": "^0.7.1", "nitropack": "^2.11.6", "nprogress": "^0.2.0", "pathe": "^2.0.1", "reka-ui": "^2.3.1", + "stemmer": "^2.0.1", "unocss": "66.3.2", "vitepress": "1.6.3", "vue": "^3.5.17", diff --git a/tsconfig.json b/tsconfig.json index 1a4386d09..988613a32 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -13,7 +13,7 @@ "noUnusedLocals": true, "strictNullChecks": true, "forceConsistentCasingInFileNames": true, - "types": ["vitepress"] + "types": ["vitepress", "vitepress/client"] }, "exclude": ["node_modules"], "include": [ diff --git a/website/theme/components/Search.vue b/website/theme/components/Search.vue new file mode 100644 index 000000000..260ca3496 --- /dev/null +++ b/website/theme/components/Search.vue @@ -0,0 +1,1106 @@ + + + + +