import type { NavidromeSong } from '../../../utils/navidrome.ts' import { createRequire } from 'node:module' import { join } from 'node:path' import kuromoji from 'kuromoji' import { isKana, toRomaji } from 'wanakana' import { fetchSongsIter } from '../../../utils/navidrome.ts' const WHITELIST_KEYS = new Set([ // actual different tracks with the same title '["sorry about my face","untitled track"]', '["kooeetekumogeemusu","neko bushou sengoku emaki"]', '["eve","merufuakutorii"]', // todo '["arm","legend of zelda"]', '["arm","tomorrow heart beat ~ ashita anata ni dokkidoki☆ ~"]', '["dwat","rotladatormarf"]', '["fujiwara mari sai","zenbuatashinokawaiino"]', ]) const moji = await new Promise((resolve, reject) => { kuromoji.builder({ dicPath: join(createRequire(import.meta.url).resolve('kuromoji/'), '../../dict'), }).build((err, tokenizer) => { if (err) return reject(err) resolve(tokenizer) }) }) function clean(s: string) { const str = s.toLowerCase() .replace(/\(Explicit\)/i, '') .replace(/[!@#$%^&*()_+=[\]{}\\|/,.;':"<>`~-]/g, '') if (str.match(/[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\uFF00-\uFF9F\u4E00-\u9FAF\u3400-\u4DBF]/)) { // has japanese const tokens = moji.tokenize(str) let res = '' for (const token of tokens) { if (token.word_type === 'UNKNOWN') { res += isKana(token.surface_form) ? toRomaji(token.surface_form) : token.surface_form } else if (token.word_type === 'KNOWN') { res += `${toRomaji(token.reading)} ` } } return res.trimEnd() } return str } function getSongKey(song: NavidromeSong) { return JSON.stringify([ clean(song.artist), clean(song.title), ]) } const seen = new Map() for await (const song of fetchSongsIter({ onChunkProcessed: (page, items) => { console.log('⌛ fetched chunk %d (%d items)', page, items) }, })) { const key = getSongKey(song) if (WHITELIST_KEYS.has(key)) continue let arr = seen.get(key) if (!arr) { arr = [] seen.set(key, arr) } arr.push(song) } const keysSorted = Array.from(seen.keys()).sort() let duplicates = 0 for (const key of keysSorted) { const arr = seen.get(key)! if (arr.length === 1) continue duplicates += 1 console.log() console.log('found duplicates for %s:', key) for (const song of arr) { console.log(' %s - %s (from %s - %s) (at %s)', song.artist, song.title, song.albumArtist, song.album, song.path) } } if (duplicates === 0) { console.log('✅ no duplicates found') } else { console.log('🚨 %d duplicates found', duplicates) }