mirror of
https://git.stupid.fish/teidesu/scripts.git
synced 2025-07-27 18:22:10 +10:00
105 lines
2.7 KiB
TypeScript
105 lines
2.7 KiB
TypeScript
import type { NavidromeSong } from '../../utils/navidrome.ts'
|
|
import { createRequire } from 'node:module'
|
|
|
|
import { join } from 'node:path'
|
|
import kuromoji from 'kuromoji'
|
|
import { isKana, toRomaji } from 'wanakana'
|
|
|
|
import { fetchSongs, navidromeFfetch as ffetch } from '../../utils/navidrome.ts'
|
|
|
|
const WHITELIST_KEYS = new Set([
|
|
// actual different tracks with the same title
|
|
'["sorry about my face","untitled track"]',
|
|
'["kooeetekumogeemusu","neko bushou sengoku emaki"]',
|
|
'["eve","merufuakutorii"]',
|
|
// todo
|
|
'["arm","legend of zelda"]',
|
|
'["arm","tomorrow heart beat ~ ashita anata ni dokkidoki☆ ~"]',
|
|
'["dwat","rotladatormarf"]',
|
|
'["fujiwara mari sai","zenbuatashinokawaiino"]',
|
|
])
|
|
|
|
const moji = await new Promise<any>((resolve, reject) => {
|
|
kuromoji.builder({
|
|
dicPath: join(createRequire(import.meta.url).resolve('kuromoji/'), '../../dict'),
|
|
}).build((err, tokenizer) => {
|
|
if (err) return reject(err)
|
|
resolve(tokenizer)
|
|
})
|
|
})
|
|
|
|
function clean(s: string) {
|
|
const str = s.toLowerCase()
|
|
.replace(/\(Explicit\)/i, '')
|
|
.replace(/[!@#$%^&*()_+=[\]{}\\|/,.;':"<>`~-]/g, '')
|
|
|
|
if (str.match(/[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\uFF00-\uFF9F\u4E00-\u9FAF\u3400-\u4DBF]/)) {
|
|
// has japanese
|
|
const tokens = moji.tokenize(str)
|
|
|
|
let res = ''
|
|
|
|
for (const token of tokens) {
|
|
if (token.word_type === 'UNKNOWN') {
|
|
res += isKana(token.surface_form) ? toRomaji(token.surface_form) : token.surface_form
|
|
} else if (token.word_type === 'KNOWN') {
|
|
res += `${toRomaji(token.reading)} `
|
|
}
|
|
}
|
|
|
|
return res.trimEnd()
|
|
}
|
|
|
|
return str
|
|
}
|
|
|
|
const CHUNK_SIZE = 1000
|
|
|
|
function getSongKey(song: NavidromeSong) {
|
|
return JSON.stringify([
|
|
clean(song.artist),
|
|
clean(song.title),
|
|
])
|
|
}
|
|
|
|
const seen = new Map<string, NavidromeSong[]>()
|
|
|
|
for (let offset = 0; ; offset += CHUNK_SIZE) {
|
|
const songs = await fetchSongs(offset, CHUNK_SIZE)
|
|
if (songs.length === 0) break
|
|
|
|
for (const song of songs) {
|
|
const key = getSongKey(song)
|
|
if (WHITELIST_KEYS.has(key)) continue
|
|
let arr = seen.get(key)
|
|
if (!arr) {
|
|
arr = []
|
|
seen.set(key, arr)
|
|
}
|
|
|
|
arr.push(song)
|
|
}
|
|
|
|
console.log('⌛ fetched chunk %d (%d items)', Math.floor(offset / CHUNK_SIZE), songs.length)
|
|
}
|
|
|
|
const keysSorted = Array.from(seen.keys()).sort()
|
|
|
|
let duplicates = 0
|
|
for (const key of keysSorted) {
|
|
const arr = seen.get(key)!
|
|
if (arr.length === 1) continue
|
|
|
|
duplicates += 1
|
|
console.log()
|
|
console.log('found duplicates for %s:', key)
|
|
for (const song of arr) {
|
|
console.log(' %s - %s (from %s - %s) (at %s)', song.artist, song.title, song.albumArtist, song.album, song.path)
|
|
}
|
|
}
|
|
|
|
if (duplicates === 0) {
|
|
console.log('✅ no duplicates found')
|
|
} else {
|
|
console.log('🚨 %d duplicates found', duplicates)
|
|
}
|