teidesu-scripts/scripts/media/soundcloud-dl.ts
2025-01-18 04:01:14 +00:00

285 lines
7.7 KiB
TypeScript

import { mkdir, rm, writeFile } from 'node:fs/promises'
import { join } from 'node:path'
import { ffetchAddons } from '@fuman/fetch'
import { assert, asyncPool, base64 } from '@fuman/utils'
import { load } from 'cheerio'
import Spinnies from 'spinnies'
import { ProxyAgent } from 'undici'
import { z } from 'zod'
import { $, question } from 'zx'
import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts'
import { sanitizeFilename } from '../../utils/fs.ts'
import { chunks, getEnv } from '../../utils/misc.ts'
import { generateOpusImageBlob } from '../../utils/opus.ts'
const ffetchApi = ffetchBase.extend({
baseUrl: 'https://api-v2.soundcloud.com',
// @ts-expect-error lol fixme
query: {
client_id: '4BowhSywvkJtklODQDzjNMq9sK9wyDJ4',
app_version: '1736857534',
app_locale: 'en',
},
addons: [
ffetchAddons.rateLimitHandler(),
],
rateLimit: {
isRejected(res) {
return res.status === 429
},
defaultWaitTime: 10_000,
},
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`,
},
})
const ffetchHtml = ffetchBase.extend({
baseUrl: 'https://soundcloud.com',
headers: {
Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`,
},
extra: {
// @ts-expect-error lol fixme
dispatcher: new ProxyAgent('http://127.0.0.1:7891'),
},
})
const ScTrack = z.object({
id: z.number(),
kind: z.literal('track'),
title: z.string(),
duration: z.number(),
permalink_url: z.string(),
artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(),
media: z.object({
transcodings: z.array(z.object({
url: z.string(),
preset: z.string(),
format: z.object({
protocol: z.string(),
mime_type: z.string(),
}),
quality: z.string(),
is_legacy_transcoding: z.boolean(),
})),
}),
track_authorization: z.string(),
user: z.object({
username: z.string(),
permalink: z.string(),
}),
})
type ScTrack = z.infer<typeof ScTrack>
const ScPlaylist = z.object({
id: z.number(),
title: z.string(),
duration: z.number(),
permalink_url: z.string(),
genre: z.string(),
description: z.string().nullable(),
track_count: z.number(),
user: z.object({
username: z.string(),
}),
tracks: z.array(z.union([
ScTrack,
z.object({
id: z.number(),
kind: z.literal('track'),
}),
])),
})
type ScPlaylist = z.infer<typeof ScPlaylist>
function extractHydrationData(html: string) {
const $ = load(html)
const script = $('script:contains(window.__sc_hydration = )')
return JSON.parse(script.html()!.replace('window.__sc_hydration = ', '').slice(0, -1))
}
async function fetchTrackByUrl(url: string) {
const html = await ffetchHtml(url).text()
const hydrationData = extractHydrationData(html)
const track = hydrationData.find(it => it.hydratable === 'sound')
if (!track) throw new Error('no track found')
return ScTrack.parse(track.data)
}
async function fetchPlaylistByUrl(url: string) {
const html = await ffetchHtml(url).text()
const hydrationData = extractHydrationData(html)
const playlist = hydrationData.find(it => it.hydratable === 'playlist')
if (!playlist) throw new Error('no playlist found')
return ScPlaylist.parse(playlist.data)
}
async function fetchTracksById(trackIds: number[]) {
return ffetchApi('/tracks', {
query: {
ids: trackIds.join(','),
},
}).parsedJson(z.array(ScTrack))
}
async function downloadTrack(track: ScTrack, opts: {
/* download destination (filename without extension) */
destination: string
}) {
const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`)
const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null
// find the best transcoding
let transcoding!: typeof track.media.transcodings[0]
for (const t of track.media.transcodings) {
if (t.quality === 'hq') {
transcoding = t
break
}
if (t.preset === 'opus_0_0') {
transcoding = t
break
}
transcoding = t
}
const { url: hlsUrl } = await ffetchApi(transcoding.url, {
query: {
track_authorization: track.track_authorization,
},
}).parsedJson(z.object({
url: z.string(),
}))
let ext = transcoding.format.mime_type.match(/^audio\/(\w+)(;|$)/)![1]
if (ext === 'mp4') ext = 'm4a'
const filename = `${opts.destination}.${ext}`
const params: string[] = [
'-y',
'-i',
hlsUrl,
]
if (artworkBytes) {
if (ext === 'mp3') {
await writeFile(artworkPath, artworkBytes)
params.push(
'-i',
artworkPath,
'-map',
'1:v:0',
'-id3v2_version',
'3',
'-metadata:s:v',
'title=Album cover',
'-metadata:s:v',
'comment=Cover (front)',
)
} else if (ext === 'ogg') {
const blob = base64.encode(await generateOpusImageBlob(artworkBytes))
params.push(
'-metadata',
`metadata_block_picture=${blob}`,
)
} else if (ext === 'm4a') {
await writeFile(artworkPath, artworkBytes)
params.push(
'-i',
artworkPath,
'-map',
'1',
'-disposition:v',
'attached_pic',
)
}
}
params.push(
'-map',
'0:a',
'-c',
'copy',
'-metadata',
`title=${track.title}`,
'-metadata',
`artist=${track.user.username}`,
filename,
)
await $`ffmpeg ${params}`.quiet(true)
await rm(artworkPath, { force: true })
}
async function downloadPlaylist(playlist: ScPlaylist) {
const tracks: ScTrack[] = []
const tracksToFetch = new Set<number>()
const trackIdToPosition = new Map<number, number>()
for (let i = 0; i < playlist.tracks.length; i++) {
const track = playlist.tracks[i]
trackIdToPosition.set(track.id, i + 1)
if ('user' in track) {
tracks.push(track)
} else {
tracksToFetch.add(track.id)
}
}
const spinnies = new Spinnies()
if (tracksToFetch.size) {
let remaining = tracksToFetch.size
spinnies.add('fetching', { text: `fetching ${remaining} tracks` })
await asyncPool(chunks(Array.from(tracksToFetch), 20), async (ids) => {
const res = await fetchTracksById(Array.from(ids))
for (const track of res) {
tracks.push(track)
}
remaining -= ids.length
spinnies.update('fetching', { text: `fetching ${remaining} tracks` })
})
spinnies.succeed('fetching')
}
const destDir = join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`))
await mkdir(destDir, { recursive: true })
const posPadSize = Math.ceil(Math.log10(tracks.length))
await asyncPool(tracks, async (track) => {
const position = trackIdToPosition.get(track.id)!
const filename = `${position.toString().padStart(posPadSize, '0')}. ${track.user.username} - ${track.title}`
spinnies.add(`${track.id}`, { text: filename })
await downloadTrack(track, {
destination: join(destDir, filename),
})
spinnies.remove(`${track.id}`)
}, { limit: 8 })
console.log('done')
spinnies.stopAll()
}
const url = process.argv[2] ?? await question('url > ')
if (!url.startsWith('https://soundcloud.com/')) {
console.error('url must start with https://soundcloud.com/')
process.exit(1)
}
if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/sets\//i)) {
await downloadPlaylist(await fetchPlaylistByUrl(url))
} else {
const track = await fetchTrackByUrl(url)
const filename = `${track.user.username}-${track.title}`
console.log('downloading track:', filename)
await downloadTrack(track, {
destination: join('assets/soundcloud-dl', sanitizeFilename(filename)),
})
}