teidesu-scripts/scripts/infra/navidrome/find-duplicates.ts
2025-05-09 06:46:50 +00:00

100 lines
2.6 KiB
TypeScript

import type { NavidromeSong } from '../../../utils/navidrome.ts'
import { createRequire } from 'node:module'
import { join } from 'node:path'
import kuromoji from 'kuromoji'
import { isKana, toRomaji } from 'wanakana'
import { fetchSongsIter } from '../../../utils/navidrome.ts'
const WHITELIST_KEYS = new Set([
// actual different tracks with the same title
'["sorry about my face","untitled track"]',
'["kooeetekumogeemusu","neko bushou sengoku emaki"]',
'["eve","merufuakutorii"]',
// todo
'["arm","legend of zelda"]',
'["arm","tomorrow heart beat ~ ashita anata ni dokkidoki☆ ~"]',
'["dwat","rotladatormarf"]',
'["fujiwara mari sai","zenbuatashinokawaiino"]',
])
const moji = await new Promise<any>((resolve, reject) => {
kuromoji.builder({
dicPath: join(createRequire(import.meta.url).resolve('kuromoji/'), '../../dict'),
}).build((err, tokenizer) => {
if (err) return reject(err)
resolve(tokenizer)
})
})
function clean(s: string) {
const str = s.toLowerCase()
.replace(/\(Explicit\)/i, '')
.replace(/[!@#$%^&*()_+=[\]{}\\|/,.;':"<>`~-]/g, '')
if (str.match(/[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\uFF00-\uFF9F\u4E00-\u9FAF\u3400-\u4DBF]/)) {
// has japanese
const tokens = moji.tokenize(str)
let res = ''
for (const token of tokens) {
if (token.word_type === 'UNKNOWN') {
res += isKana(token.surface_form) ? toRomaji(token.surface_form) : token.surface_form
} else if (token.word_type === 'KNOWN') {
res += `${toRomaji(token.reading)} `
}
}
return res.trimEnd()
}
return str
}
function getSongKey(song: NavidromeSong) {
return JSON.stringify([
clean(song.artist),
clean(song.title),
])
}
const seen = new Map<string, NavidromeSong[]>()
for await (const song of fetchSongsIter({
onChunkProcessed: (page, items) => {
console.log('⌛ fetched chunk %d (%d items)', page, items)
},
})) {
const key = getSongKey(song)
if (WHITELIST_KEYS.has(key)) continue
let arr = seen.get(key)
if (!arr) {
arr = []
seen.set(key, arr)
}
arr.push(song)
}
const keysSorted = Array.from(seen.keys()).sort()
let duplicates = 0
for (const key of keysSorted) {
const arr = seen.get(key)!
if (arr.length === 1) continue
duplicates += 1
console.log()
console.log('found duplicates for %s:', key)
for (const song of arr) {
console.log(' %s - %s (from %s - %s) (at %s)', song.artist, song.title, song.albumArtist, song.album, song.path)
}
}
if (duplicates === 0) {
console.log('✅ no duplicates found')
} else {
console.log('🚨 %d duplicates found', duplicates)
}