mirror of
https://git.stupid.fish/teidesu/scripts.git
synced 2025-07-28 02:32:11 +10:00
chore: update public repo
This commit is contained in:
parent
e0109980c0
commit
e7c9507247
25 changed files with 5364 additions and 0 deletions
105
scripts/infra/navidrome-find-duplicates.ts
Normal file
105
scripts/infra/navidrome-find-duplicates.ts
Normal file
|
@ -0,0 +1,105 @@
|
|||
import type { NavidromeSong } from '../../utils/navidrome.ts'
|
||||
import { createRequire } from 'node:module'
|
||||
|
||||
import { join } from 'node:path'
|
||||
import kuromoji from 'kuromoji'
|
||||
import { isKana, toRomaji } from 'wanakana'
|
||||
|
||||
import { fetchSongs, navidromeFfetch as ffetch } from '../../utils/navidrome.ts'
|
||||
|
||||
const WHITELIST_KEYS = new Set([
|
||||
// actual different tracks with the same title
|
||||
'["sorry about my face","untitled track"]',
|
||||
'["kooeetekumogeemusu","neko bushou sengoku emaki"]',
|
||||
'["eve","merufuakutorii"]',
|
||||
// todo
|
||||
'["arm","legend of zelda"]',
|
||||
'["arm","tomorrow heart beat ~ ashita anata ni dokkidoki☆ ~"]',
|
||||
'["dwat","rotladatormarf"]',
|
||||
'["fujiwara mari sai","zenbuatashinokawaiino"]',
|
||||
])
|
||||
|
||||
const moji = await new Promise<any>((resolve, reject) => {
|
||||
kuromoji.builder({
|
||||
dicPath: join(createRequire(import.meta.url).resolve('kuromoji/'), '../../dict'),
|
||||
}).build((err, tokenizer) => {
|
||||
if (err) return reject(err)
|
||||
resolve(tokenizer)
|
||||
})
|
||||
})
|
||||
|
||||
function clean(s: string) {
|
||||
const str = s.toLowerCase()
|
||||
.replace(/\(Explicit\)/i, '')
|
||||
.replace(/[!@#$%^&*()_+=[\]{}\\|/,.;':"<>`~-]/g, '')
|
||||
|
||||
if (str.match(/[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\uFF00-\uFF9F\u4E00-\u9FAF\u3400-\u4DBF]/)) {
|
||||
// has japanese
|
||||
const tokens = moji.tokenize(str)
|
||||
|
||||
let res = ''
|
||||
|
||||
for (const token of tokens) {
|
||||
if (token.word_type === 'UNKNOWN') {
|
||||
res += isKana(token.surface_form) ? toRomaji(token.surface_form) : token.surface_form
|
||||
} else if (token.word_type === 'KNOWN') {
|
||||
res += `${toRomaji(token.reading)} `
|
||||
}
|
||||
}
|
||||
|
||||
return res.trimEnd()
|
||||
}
|
||||
|
||||
return str
|
||||
}
|
||||
|
||||
const CHUNK_SIZE = 1000
|
||||
|
||||
function getSongKey(song: NavidromeSong) {
|
||||
return JSON.stringify([
|
||||
clean(song.artist),
|
||||
clean(song.title),
|
||||
])
|
||||
}
|
||||
|
||||
const seen = new Map<string, NavidromeSong[]>()
|
||||
|
||||
for (let offset = 0; ; offset += CHUNK_SIZE) {
|
||||
const songs = await fetchSongs(offset, CHUNK_SIZE)
|
||||
if (songs.length === 0) break
|
||||
|
||||
for (const song of songs) {
|
||||
const key = getSongKey(song)
|
||||
if (WHITELIST_KEYS.has(key)) continue
|
||||
let arr = seen.get(key)
|
||||
if (!arr) {
|
||||
arr = []
|
||||
seen.set(key, arr)
|
||||
}
|
||||
|
||||
arr.push(song)
|
||||
}
|
||||
|
||||
console.log('⌛ fetched chunk %d (%d items)', Math.floor(offset / CHUNK_SIZE), songs.length)
|
||||
}
|
||||
|
||||
const keysSorted = Array.from(seen.keys()).sort()
|
||||
|
||||
let duplicates = 0
|
||||
for (const key of keysSorted) {
|
||||
const arr = seen.get(key)!
|
||||
if (arr.length === 1) continue
|
||||
|
||||
duplicates += 1
|
||||
console.log()
|
||||
console.log('found duplicates for %s:', key)
|
||||
for (const song of arr) {
|
||||
console.log(' %s - %s (from %s - %s) (at %s)', song.artist, song.title, song.albumArtist, song.album, song.path)
|
||||
}
|
||||
}
|
||||
|
||||
if (duplicates === 0) {
|
||||
console.log('✅ no duplicates found')
|
||||
} else {
|
||||
console.log('🚨 %d duplicates found', duplicates)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue