chore: update public repo

2026-01-14 08:01:12 +11:00 · 2025-05-09 06:46:50 +00:00 · 2025-05-09 06:46:50 +00:00 · 68a2d17239
commit 68a2d17239
parent 96426d01c1
5 changed files with 50 additions and 7 deletions
--- a/scripts/infra/navidrome/find-broken.ts
+++ b/scripts/infra/navidrome/find-broken.ts
@ -0,0 +1,42 @@
+import { $, ProcessOutput } from 'zx'
+import { fetchSongsIter } from '../../../utils/navidrome.ts'
+import { asyncPool } from '@fuman/utils'
+import { join } from 'path/posix'
+
+// async function checkIfBroken(path: string) {
+//   const r = await $`ffprobe -v error -show_entries stream=codec_type,codec_name,index:stream_tags=title,language -of json ${path}`.json()
+// }
+
+// for await (const song of fetchSongsIter()) {
+
+// }
+
+const broken: string[] = []
+
+await asyncPool(fetchSongsIter({
+  onChunkProcessed: (page, items) => {
+    console.log(`Processed page ${page} with ${items} items`)
+  },
+}), async (song) => {
+  const fullPath = join(song.libraryPath, song.path)
+  const path = fullPath.replace('/music/s3/', '/mnt/tank/enc/media/music/')
+  try {
+    const r = await $`ffmpeg -v error -i ${path} -f null -`.quiet()
+    if (r.exitCode !== 0 || r.stderr.trim() !== '') throw r
+  } catch (e) {
+    if (!(e instanceof ProcessOutput)) throw e
+
+    console.log('%s - %s (%s) seems broken:', song.artist, song.title, path)
+    console.log(e.stderr)
+    broken.push(path)
+  }
+}, { limit: 8 })
+
+
+if (broken.length > 0) {
+  console.log('Found %d broken files:', broken.length)
+  for (const path of broken) {
+    console.log('  %s', path)
+  }
+  process.exit(1)
+}
--- a/scripts/infra/navidrome/find-duplicates.ts
+++ b/scripts/infra/navidrome/find-duplicates.ts
@ -0,0 +1,100 @@
+import type { NavidromeSong } from '../../../utils/navidrome.ts'
+import { createRequire } from 'node:module'
+
+import { join } from 'node:path'
+import kuromoji from 'kuromoji'
+import { isKana, toRomaji } from 'wanakana'
+
+import { fetchSongsIter } from '../../../utils/navidrome.ts'
+
+const WHITELIST_KEYS = new Set([
+  // actual different tracks with the same title
+  '["sorry about my face","untitled track"]',
+  '["kooeetekumogeemusu","neko bushou sengoku emaki"]',
+  '["eve","merufuakutorii"]',
+  // todo
+  '["arm","legend of zelda"]',
+  '["arm","tomorrow heart beat ~ ashita anata ni dokkidoki☆ ~"]',
+  '["dwat","rotladatormarf"]',
+  '["fujiwara mari sai","zenbuatashinokawaiino"]',
+])
+
+const moji = await new Promise<any>((resolve, reject) => {
+  kuromoji.builder({
+    dicPath: join(createRequire(import.meta.url).resolve('kuromoji/'), '../../dict'),
+  }).build((err, tokenizer) => {
+    if (err) return reject(err)
+    resolve(tokenizer)
+  })
+})
+
+function clean(s: string) {
+  const str = s.toLowerCase()
+    .replace(/\(Explicit\)/i, '')
+    .replace(/[!@#$%^&*()_+=[\]{}\\|/,.;':"<>`~-]/g, '')
+
+  if (str.match(/[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\uFF00-\uFF9F\u4E00-\u9FAF\u3400-\u4DBF]/)) {
+    // has japanese
+    const tokens = moji.tokenize(str)
+
+    let res = ''
+
+    for (const token of tokens) {
+      if (token.word_type === 'UNKNOWN') {
+        res += isKana(token.surface_form) ? toRomaji(token.surface_form) : token.surface_form
+      } else if (token.word_type === 'KNOWN') {
+        res += `${toRomaji(token.reading)} `
+      }
+    }
+
+    return res.trimEnd()
+  }
+
+  return str
+}
+
+function getSongKey(song: NavidromeSong) {
+  return JSON.stringify([
+    clean(song.artist),
+    clean(song.title),
+  ])
+}
+
+const seen = new Map<string, NavidromeSong[]>()
+
+for await (const song of fetchSongsIter({
+  onChunkProcessed: (page, items) => {
+    console.log('⌛ fetched chunk %d (%d items)', page, items)
+  },
+})) {
+  const key = getSongKey(song)
+  if (WHITELIST_KEYS.has(key)) continue
+  let arr = seen.get(key)
+  if (!arr) {
+    arr = []
+    seen.set(key, arr)
+  }
+
+  arr.push(song)
+}
+
+const keysSorted = Array.from(seen.keys()).sort()
+
+let duplicates = 0
+for (const key of keysSorted) {
+  const arr = seen.get(key)!
+  if (arr.length === 1) continue
+
+  duplicates += 1
+  console.log()
+  console.log('found duplicates for %s:', key)
+  for (const song of arr) {
+    console.log('  %s - %s (from %s - %s) (at %s)', song.artist, song.title, song.albumArtist, song.album, song.path)
+  }
+}
+
+if (duplicates === 0) {
+  console.log('✅ no duplicates found')
+} else {
+  console.log('🚨 %d duplicates found', duplicates)
+}
--- a/scripts/infra/navidrome/remux-m4a.ts
+++ b/scripts/infra/navidrome/remux-m4a.ts
@ -0,0 +1,66 @@
+import { readFile, rm } from 'node:fs/promises'
+import { join } from 'node:path'
+import { $ } from 'zx'
+import { downloadStream } from '../../../utils/fetch.ts'
+import { getEnv } from '../../../utils/misc.ts'
+import { fetchSongs } from '../../../utils/navidrome.ts'
+import { WebdavClient } from '../../../utils/webdav.ts'
+
+const webdav = new WebdavClient({
+  baseUrl: getEnv('NAVIDROME_WEBDAV_ENDPOINT'),
+  username: getEnv('NAVIDROME_WEBDAV_USERNAME'),
+  password: getEnv('NAVIDROME_WEBDAV_PASSWORD'),
+})
+
+const CHUNK_SIZE = 1000
+for (let offset = 0; ; offset += CHUNK_SIZE) {
+  const songs = await fetchSongs(offset, CHUNK_SIZE)
+  if (songs.length === 0) break
+
+  for (const song of songs) {
+    const ext = song.path.split('.').pop()!
+    if (ext !== 'm4a') continue
+
+    console.log('❌ song %s is m4a, remuxing...', song.path)
+    const webdavPath = song.path.replace('/music/s3/', '/')
+    const res = await webdav.get(webdavPath).catch(() => null)
+
+    if (!res) {
+      console.log('  ❌ failed to get %s', webdavPath)
+      continue
+    }
+
+    const tmpfile = join('assets', `${song.id}.m4a`)
+    await downloadStream(res.body!, tmpfile)
+    console.log('  - downloaded to %s', tmpfile)
+
+    const probe = await $`ffprobe -v error -show_entries stream=codec_type,codec_name,index:stream_tags=title,language -of json ${tmpfile}`.json()
+    const audioStream = probe.streams.find(stream => stream.codec_type === 'audio')
+    if (!audioStream) {
+      console.log('  ❌ no audio stream found')
+      await rm(tmpfile)
+      continue
+    }
+    const codec = audioStream.codec_name
+
+    if (codec !== 'flac') {
+      console.log(`  ❌ audio stream is ${codec}, not flac, skipping`)
+      await rm(tmpfile)
+      continue
+    }
+
+    console.log('  - audio stream is flac, remuxing')
+
+    // remux
+    const remuxed = join('assets', `${song.id}.flac`)
+    await rm(remuxed, { force: true })
+    await $`ffmpeg -i ${tmpfile} -c:a copy ${remuxed}`.quiet(true)
+    console.log('  - remuxed to %s', remuxed)
+    await rm(tmpfile)
+
+    await webdav.put(webdavPath.replace('.m4a', '.flac'), await readFile(remuxed))
+    await webdav.delete(webdavPath)
+    console.log('  - uploaded to %s', webdavPath.replace('.m4a', '.flac'))
+    await rm(remuxed)
+  }
+}
--- a/scripts/infra/navidrome/stats.ts
+++ b/scripts/infra/navidrome/stats.ts
@ -0,0 +1,18 @@
+import { fetchSongs, fetchSongsIter } from '../../../utils/navidrome.ts'
+
+let count = 0
+let totalSize = 0
+let totalDuration = 0
+
+console.log('⌛ fetching songs...')
+
+for await (const song of fetchSongsIter()) {
+  count += 1
+  totalSize += song.size
+  totalDuration += song.duration
+}
+
+console.log('---')
+console.log('total songs: %d', count)
+console.log('total size: %d GiB', (totalSize / 1024 / 1024 / 1024).toFixed(3))
+console.log('total duration: %d min (%d h)', (totalDuration / 60).toFixed(3), (totalDuration / 60 / 60).toFixed(3))