mirror of
https://git.stupid.fish/teidesu/scripts.git
synced 2025-07-28 02:32:11 +10:00
chore: update public repo
This commit is contained in:
parent
96426d01c1
commit
68a2d17239
5 changed files with 50 additions and 7 deletions
100
scripts/infra/navidrome/find-duplicates.ts
Normal file
100
scripts/infra/navidrome/find-duplicates.ts
Normal file
|
@ -0,0 +1,100 @@
|
|||
import type { NavidromeSong } from '../../../utils/navidrome.ts'
|
||||
import { createRequire } from 'node:module'
|
||||
|
||||
import { join } from 'node:path'
|
||||
import kuromoji from 'kuromoji'
|
||||
import { isKana, toRomaji } from 'wanakana'
|
||||
|
||||
import { fetchSongsIter } from '../../../utils/navidrome.ts'
|
||||
|
||||
const WHITELIST_KEYS = new Set([
|
||||
// actual different tracks with the same title
|
||||
'["sorry about my face","untitled track"]',
|
||||
'["kooeetekumogeemusu","neko bushou sengoku emaki"]',
|
||||
'["eve","merufuakutorii"]',
|
||||
// todo
|
||||
'["arm","legend of zelda"]',
|
||||
'["arm","tomorrow heart beat ~ ashita anata ni dokkidoki☆ ~"]',
|
||||
'["dwat","rotladatormarf"]',
|
||||
'["fujiwara mari sai","zenbuatashinokawaiino"]',
|
||||
])
|
||||
|
||||
const moji = await new Promise<any>((resolve, reject) => {
|
||||
kuromoji.builder({
|
||||
dicPath: join(createRequire(import.meta.url).resolve('kuromoji/'), '../../dict'),
|
||||
}).build((err, tokenizer) => {
|
||||
if (err) return reject(err)
|
||||
resolve(tokenizer)
|
||||
})
|
||||
})
|
||||
|
||||
function clean(s: string) {
|
||||
const str = s.toLowerCase()
|
||||
.replace(/\(Explicit\)/i, '')
|
||||
.replace(/[!@#$%^&*()_+=[\]{}\\|/,.;':"<>`~-]/g, '')
|
||||
|
||||
if (str.match(/[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\uFF00-\uFF9F\u4E00-\u9FAF\u3400-\u4DBF]/)) {
|
||||
// has japanese
|
||||
const tokens = moji.tokenize(str)
|
||||
|
||||
let res = ''
|
||||
|
||||
for (const token of tokens) {
|
||||
if (token.word_type === 'UNKNOWN') {
|
||||
res += isKana(token.surface_form) ? toRomaji(token.surface_form) : token.surface_form
|
||||
} else if (token.word_type === 'KNOWN') {
|
||||
res += `${toRomaji(token.reading)} `
|
||||
}
|
||||
}
|
||||
|
||||
return res.trimEnd()
|
||||
}
|
||||
|
||||
return str
|
||||
}
|
||||
|
||||
function getSongKey(song: NavidromeSong) {
|
||||
return JSON.stringify([
|
||||
clean(song.artist),
|
||||
clean(song.title),
|
||||
])
|
||||
}
|
||||
|
||||
const seen = new Map<string, NavidromeSong[]>()
|
||||
|
||||
for await (const song of fetchSongsIter({
|
||||
onChunkProcessed: (page, items) => {
|
||||
console.log('⌛ fetched chunk %d (%d items)', page, items)
|
||||
},
|
||||
})) {
|
||||
const key = getSongKey(song)
|
||||
if (WHITELIST_KEYS.has(key)) continue
|
||||
let arr = seen.get(key)
|
||||
if (!arr) {
|
||||
arr = []
|
||||
seen.set(key, arr)
|
||||
}
|
||||
|
||||
arr.push(song)
|
||||
}
|
||||
|
||||
const keysSorted = Array.from(seen.keys()).sort()
|
||||
|
||||
let duplicates = 0
|
||||
for (const key of keysSorted) {
|
||||
const arr = seen.get(key)!
|
||||
if (arr.length === 1) continue
|
||||
|
||||
duplicates += 1
|
||||
console.log()
|
||||
console.log('found duplicates for %s:', key)
|
||||
for (const song of arr) {
|
||||
console.log(' %s - %s (from %s - %s) (at %s)', song.artist, song.title, song.albumArtist, song.album, song.path)
|
||||
}
|
||||
}
|
||||
|
||||
if (duplicates === 0) {
|
||||
console.log('✅ no duplicates found')
|
||||
} else {
|
||||
console.log('🚨 %d duplicates found', duplicates)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue