merge search branch into redesign

This commit is contained in:
taskylizard 2025-07-04 11:18:29 +00:00
parent c7153b6739
commit 5bbdf245ae
No known key found for this signature in database
GPG key ID: 1820131ED1A24120
7 changed files with 1532 additions and 35 deletions

View file

@ -108,6 +108,12 @@ export default defineConfig({
import.meta.url
)
)
},
{
find: /^.*VPLocalSearchBox.vue$/,
replacement: fileURLToPath(
new URL('./theme/components/Search.vue', import.meta.url)
)
}
]
},

View file

@ -1,5 +1,6 @@
import type { DefaultTheme } from 'vitepress'
import consola from 'consola'
import { customTokenize, customTokenProcessor } from './search'
import { transform, transformGuide } from '../../website/transformer'
// @unocss-include
@ -49,44 +50,13 @@ export const search: DefaultTheme.Config['search'] = {
},
miniSearch: {
options: {
tokenize: (text) => text.split(/[\n\r #%*,=/:;?[\]{}()&]+/u), // simplified charset: removed [-_.@] and non-english chars (diacritics etc.)
processTerm: (term, fieldName) => {
// biome-ignore lint/style/noParameterAssign: h
term = term
.trim()
.toLowerCase()
.replace(/^\.+/, '')
.replace(/\.+$/, '')
const stopWords = [
'frontmatter',
'$frontmatter.synopsis',
'and',
'about',
'but',
'now',
'the',
'with',
'you'
]
if (term.length < 2 || stopWords.includes(term)) return false
if (fieldName === 'text') {
const parts = term.split('.')
if (parts.length > 1) {
const newTerms = [term, ...parts]
.filter((t) => t.length >= 2)
.filter((t) => !stopWords.includes(t))
return newTerms
}
}
return term
}
tokenize: customTokenize,
processTerm: customTokenProcessor
},
searchOptions: {
combineWith: 'AND',
fuzzy: true,
// @ts-ignore
boostDocument: (documentId, term, storedFields: Record) => {
console.log(storedFields.titles)
const titles = (storedFields?.titles as string[])
.filter((t) => Boolean(t))
.map((t) => t.toLowerCase())

360
docs/.vitepress/search.ts Normal file
View file

@ -0,0 +1,360 @@
export const customTokenProcessor = (token: string): string | null => {
// Remove dots and normalize case before processing
const normalizedToken = token.replace(/\./g, '').toLowerCase()
const step2list: Record<string, string> = {
ational: 'ate',
tional: 'tion',
enci: 'ence',
anci: 'ance',
izer: 'ize',
bli: 'ble',
alli: 'al',
entli: 'ent',
eli: 'e',
ousli: 'ous',
ization: 'ize',
ation: 'ate',
ator: 'ate',
alism: 'al',
iveness: 'ive',
fulness: 'ful',
ousness: 'ous',
aliti: 'al',
iviti: 'ive',
biliti: 'ble',
logi: 'log'
}
const step3list: Record<string, string> = {
icate: 'ic',
ative: '',
alize: 'al',
iciti: 'ic',
ical: 'ic',
ful: '',
ness: ''
}
const consonant = '[^aeiou]'
const vowel = '[aeiouy]'
const consonants = '(' + consonant + '[^aeiouy]*)'
const vowels = '(' + vowel + '[aeiou]*)'
const gt0 = new RegExp('^' + consonants + '?' + vowels + consonants)
const eq1 = new RegExp(
'^' + consonants + '?' + vowels + consonants + vowels + '?$'
)
const gt1 = new RegExp(
'^' + consonants + '?(' + vowels + consonants + '){2,}'
)
const vowelInStem = new RegExp('^' + consonants + '?' + vowel)
const consonantLike = new RegExp('^' + consonants + vowel + '[^aeiouwxy]$')
const sfxLl = /ll$/
const sfxE = /^(.+?)e$/
const sfxY = /^(.+?)y$/
const sfxIon = /^(.+?(s|t))(ion)$/
const sfxEdOrIng = /^(.+?)(ed|ing)$/
const sfxAtOrBlOrIz = /(at|bl|iz)$/
const sfxEED = /^(.+?)eed$/
const sfxS = /^.+?[^s]s$/
const sfxSsesOrIes = /^.+?(ss|i)es$/
const sfxMultiConsonantLike = /([^aeiouylsz])\1$/
const step2 =
/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/
const step3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/
const step4 =
/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/
function stemmer(value: string) {
let result = String(value).toLowerCase()
// Exit early.
if (result.length < 3) {
return result
}
/** @type {boolean} */
let firstCharacterWasLowerCaseY = false
// Detect initial `y`, make sure it never matches.
if (
result.codePointAt(0) === 121 // Lowercase Y
) {
firstCharacterWasLowerCaseY = true
result = 'Y' + result.slice(1)
}
// Step 1a.
if (sfxSsesOrIes.test(result)) {
// Remove last two characters.
result = result.slice(0, -2)
} else if (sfxS.test(result)) {
// Remove last character.
result = result.slice(0, -1)
}
/** @type {RegExpMatchArray|null} */
let match
// Step 1b.
if ((match = sfxEED.exec(result))) {
if (gt0.test(match[1])) {
// Remove last character.
result = result.slice(0, -1)
}
} else if (
(match = sfxEdOrIng.exec(result)) &&
vowelInStem.test(match[1])
) {
result = match[1]
if (sfxAtOrBlOrIz.test(result)) {
// Append `e`.
result += 'e'
} else if (sfxMultiConsonantLike.test(result)) {
// Remove last character.
result = result.slice(0, -1)
} else if (consonantLike.test(result)) {
// Append `e`.
result += 'e'
}
}
// Step 1c.
if ((match = sfxY.exec(result)) && vowelInStem.test(match[1])) {
// Remove suffixing `y` and append `i`.
result = match[1] + 'i'
}
// Step 2.
if ((match = step2.exec(result)) && gt0.test(match[1])) {
result = match[1] + step2list[match[2]]
}
// Step 3.
if ((match = step3.exec(result)) && gt0.test(match[1])) {
result = match[1] + step3list[match[2]]
}
// Step 4.
if ((match = step4.exec(result))) {
if (gt1.test(match[1])) {
result = match[1]
}
} else if ((match = sfxIon.exec(result)) && gt1.test(match[1])) {
result = match[1]
}
// Step 5.
if (
(match = sfxE.exec(result)) &&
(gt1.test(match[1]) ||
(eq1.test(match[1]) && !consonantLike.test(match[1])))
) {
result = match[1]
}
if (sfxLl.test(result) && gt1.test(result)) {
result = result.slice(0, -1)
}
// Turn initial `Y` back to `y`.
if (firstCharacterWasLowerCaseY) {
result = 'y' + result.slice(1)
}
return result
}
// adapted from these two sources
// https://gist.github.com/sebleier/554280
// https://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list
const stopWords = new Set([
'a',
'about',
'above',
'after',
'again',
'against',
'all',
'am',
'an',
'and',
'any',
'are',
'aren',
'as',
'at',
'be',
'because',
'been',
'before',
'being',
'below',
'between',
'both',
'but',
'by',
'can',
'cannot',
'com',
'could',
'couldn',
'did',
'didn',
'do',
'does',
'doesn',
'doing',
'down',
'during',
'each',
'few',
'for',
'from',
'further',
'had',
'hadn',
'has',
'hasn',
'have',
'haven',
'having',
'he',
'her',
'here',
'hers',
'herself',
'him',
'himself',
'his',
'how',
'i',
'if',
'in',
'into',
'is',
'isn',
'it',
'its',
'itself',
'just',
'let',
'll',
'me',
'more',
'most',
'mustn',
'my',
'myself',
'no',
'nor',
'not',
'now',
'of',
'off',
'on',
'once',
'only',
'or',
'other',
'ought',
'our',
'ours',
'ourselves',
'out',
'over',
'own',
're',
's',
'same',
'shan',
'she',
'should',
'shouldn',
'so',
'some',
'such',
't',
'than',
'that',
'the',
'their',
'theirs',
'them',
'themselves',
'then',
'there',
'these',
'they',
'this',
'those',
'through',
'to',
'too',
'under',
'until',
'up',
've',
'very',
'was',
'wasn',
'we',
'were',
'weren',
'what',
'when',
'where',
'which',
'while',
'who',
'whom',
'why',
'will',
'with',
'won',
'would',
'wouldn',
'you',
'your',
'yours',
'yourself',
'yourselves'
])
return stopWords.has(normalizedToken) ? null : stemmer(normalizedToken)
}
export const customTokenize = (text: string): string[] => {
// Pre-process the text to handle dots in special cases
// This will help with cases like "V.R" to match with "vr" by removing dots
const preprocessedText = text.replace(/([A-Za-z])\.([A-Za-z])/g, '$1$2') // Remove dots between letters (like V.R -> VR)
// This regular expression matches any Unicode space or punctuation character
// Copied from https://github.com/lucaong/minisearch
// which adapted from https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7BZ%7D%5Cp%7BP%7D&abb=on&c=on&esc=on
const SPACE_OR_PUNCTUATION =
/[\n\r -#%-*,-/:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]+/u
// Split on any space or punctuation; same as minisearch default tokenizer
// except i've corrected for the possibility for returning empty string
const tokens = preprocessedText.split(SPACE_OR_PUNCTUATION).filter(Boolean)
// Handle cases with capital letters in the middle (like "xManager" -> "x Manager")
const expandedTokens: string[] = []
for (const token of tokens) {
expandedTokens.push(token)
// If token has a capital letter in the middle, add a version with space before it
// This helps with cases like "xManager" to match with "x Manager"
const splitOnCapitals = token.replace(/([a-z])([A-Z])/g, '$1 $2')
if (splitOnCapitals !== token) {
const additionalTokens = splitOnCapitals.split(' ').filter(Boolean)
expandedTokens.push(...additionalTokens)
}
}
return expandedTokens
}

50
docs/.vitepress/vue-shim.d.ts vendored Normal file
View file

@ -0,0 +1,50 @@
/**
* Copyright (c) 2025 taskylizard. Apache License 2.0.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
declare const __VP_HASH_MAP__: Record<string, string>
declare const __VP_LOCAL_SEARCH__: boolean
declare const __ALGOLIA__: boolean
declare const __CARBON__: boolean
declare const __VUE_PROD_DEVTOOLS__: boolean
declare const __ASSETS_DIR__: string
declare module '*.vue' {
import type { DefineComponent } from 'vue'
const component: DefineComponent
export default component
}
declare module '@siteData' {
import type { SiteData } from 'vitepress'
const data: SiteData
export default data
}
declare module '@theme/index' {
import type { Theme } from 'vitepress'
const theme: Theme
export default theme
}
declare module '@localSearchIndex' {
const data: Record<string, () => Promise<{ default: string }>>
export default data
}
declare module 'mark.js/src/vanilla.js' {
import type Mark from 'mark.js'
const mark: typeof Mark
export default mark
}

View file

@ -26,14 +26,19 @@
"@headlessui/vue": "^1.7.23",
"@resvg/resvg-js": "^2.6.2",
"@vueuse/core": "^13.0.0",
"@vueuse/integrations": "^13.1.0",
"consola": "^3.2.3",
"feed": "^4.2.2",
"itty-fetcher": "^0.9.4",
"mark.js": "^8.11.1",
"markdown-it": "^14.1.0",
"minisearch": "^7.1.2",
"nitro-cors": "^0.7.1",
"nitropack": "^2.11.6",
"nprogress": "^0.2.0",
"pathe": "^2.0.1",
"reka-ui": "^2.3.1",
"stemmer": "^2.0.1",
"unocss": "66.3.2",
"vitepress": "1.6.3",
"vue": "^3.5.17",

View file

@ -13,7 +13,7 @@
"noUnusedLocals": true,
"strictNullChecks": true,
"forceConsistentCasingInFileNames": true,
"types": ["vitepress"]
"types": ["vitepress", "vitepress/client"]
},
"exclude": ["node_modules"],
"include": [

File diff suppressed because it is too large Load diff