teidesu-scripts/scripts/media/soundcloud-dl.ts
2025-01-24 09:56:11 +00:00

424 lines
12 KiB
TypeScript

import { mkdir, rm, writeFile } from 'node:fs/promises'
import { join } from 'node:path'
import { ffetchAddons } from '@fuman/fetch'
import { assert, asyncPool, base64, sleep } from '@fuman/utils'
import { load } from 'cheerio'
import Spinnies from 'spinnies'
import { ProxyAgent } from 'undici'
import { z } from 'zod'
import { $, ProcessOutput, question } from 'zx'
import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts'
import { sanitizeFilename } from '../../utils/fs.ts'
import { chunks, getEnv } from '../../utils/misc.ts'
import { generateOpusImageBlob } from '../../utils/opus.ts'
const ffetchApi = ffetchBase.extend({
baseUrl: 'https://api-v2.soundcloud.com',
query: {
client_id: '4BowhSywvkJtklODQDzjNMq9sK9wyDJ4',
app_version: '1736857534',
app_locale: 'en',
},
addons: [
ffetchAddons.rateLimitHandler(),
],
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`,
},
})
const ffetchHtml = ffetchBase.extend({
baseUrl: 'https://soundcloud.com',
headers: {
Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`,
},
})
const ScTrack = z.object({
id: z.number(),
kind: z.literal('track'),
title: z.string(),
duration: z.number(),
description: z.string().nullable(),
permalink_url: z.string(),
artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(),
media: z.object({
transcodings: z.array(z.object({
url: z.string(),
preset: z.string(),
format: z.object({
protocol: z.string(),
mime_type: z.string(),
}),
quality: z.string(),
is_legacy_transcoding: z.boolean(),
})),
}),
track_authorization: z.string(),
user: z.object({
username: z.string(),
permalink: z.string(),
}),
})
type ScTrack = z.infer<typeof ScTrack>
const ScPlaylist = z.object({
id: z.number(),
title: z.string(),
duration: z.number(),
permalink_url: z.string(),
genre: z.string().nullish(),
description: z.string().nullish(),
track_count: z.number(),
user: z.object({
username: z.string(),
}),
tracks: z.array(z.union([
ScTrack,
z.object({
id: z.number(),
kind: z.literal('track'),
}),
])).default(() => []),
})
type ScPlaylist = z.infer<typeof ScPlaylist>
const ScLike = z.object({
created_at: z.string(),
kind: z.literal('like'),
track: ScTrack.optional(),
playlist: ScPlaylist.optional(),
})
function extractHydrationData(html: string) {
const $ = load(html)
const script = $('script:contains(window.__sc_hydration = )')
return JSON.parse(script.html()!.replace('window.__sc_hydration = ', '').slice(0, -1))
}
async function fetchTrackByUrl(url: string) {
const html = await ffetchHtml(url).text()
const hydrationData = extractHydrationData(html)
const track = hydrationData.find(it => it.hydratable === 'sound')
if (!track) throw new Error('no track found')
return ScTrack.parse(track.data)
}
async function fetchPlaylistByUrl(url: string) {
const html = await ffetchHtml(url).text()
const hydrationData = extractHydrationData(html)
const playlist = hydrationData.find(it => it.hydratable === 'playlist')
if (!playlist) throw new Error('no playlist found')
return ScPlaylist.parse(playlist.data)
}
async function fetchPlaylistById(id: number) {
return ffetchApi(`/playlists/${id}`, {
query: {
linked_partitioning: '1',
},
}).parsedJson(ScPlaylist)
}
async function fetchTracksById(trackIds: number[]) {
return ffetchApi('/tracks', {
query: {
ids: trackIds.join(','),
},
}).parsedJson(z.array(ScTrack))
}
async function downloadTrack(track: ScTrack, opts: {
/* download destination (filename without extension) */
destination: string
onRateLimit?: (waitTime: number) => void
onCdnRateLimit?: () => void
}) {
const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`)
const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null
// find the best transcoding
let transcoding!: typeof track.media.transcodings[0]
for (const t of track.media.transcodings) {
if (t.quality === 'hq') {
transcoding = t
break
}
if (t.preset === 'opus_0_0') {
transcoding = t
break
}
transcoding = t
}
const { url: hlsUrl } = await ffetchApi(transcoding.url, {
query: {
track_authorization: track.track_authorization,
},
rateLimit: {
isRejected(res) {
return res.status === 429
},
defaultWaitTime: 60_000,
maxRetries: 10,
onRateLimitExceeded(res, waitTime) {
opts.onRateLimit?.(waitTime)
},
},
}).parsedJson(z.object({
url: z.string(),
}))
let ext = transcoding.format.mime_type.match(/^audio\/(\w+)(;|$)/)![1]
if (ext === 'mp4') ext = 'm4a'
const filename = `${opts.destination}.${ext}`
const params: string[] = [
'-y',
'-i',
hlsUrl,
]
if (artworkBytes) {
if (ext === 'mp3') {
await writeFile(artworkPath, artworkBytes)
params.push(
'-i',
artworkPath,
'-map',
'1:v:0',
'-id3v2_version',
'3',
'-metadata:s:v',
'title=Album cover',
'-metadata:s:v',
'comment=Cover (front)',
)
} else if (ext === 'ogg') {
const blob = base64.encode(await generateOpusImageBlob(artworkBytes))
params.push(
'-metadata',
`metadata_block_picture=${blob}`,
)
} else if (ext === 'm4a') {
await writeFile(artworkPath, artworkBytes)
params.push(
'-i',
artworkPath,
'-map',
'1',
'-disposition:v',
'attached_pic',
)
}
}
params.push(
'-map',
'0:a',
'-c',
'copy',
'-metadata',
`title=${track.title}`,
'-metadata',
`artist=${track.user.username}`,
'-metadata',
`comment=${track.description ?? ''}`,
filename,
)
while (true) {
try {
await $`ffmpeg ${params}`.quiet(true)
break
} catch (e) {
if (!(e instanceof ProcessOutput)) {
throw e
}
if (e.stderr.includes('429 Too Many Requests')) {
opts.onCdnRateLimit?.()
await sleep(10_000)
continue
}
throw e
}
}
await rm(artworkPath, { force: true })
}
async function downloadPlaylist(playlist: ScPlaylist, params: {
destination?: string
} = {}) {
const tracks: ScTrack[] = []
const tracksToFetch = new Set<number>()
const trackIdToPosition = new Map<number, number>()
for (let i = 0; i < playlist.tracks.length; i++) {
const track = playlist.tracks[i]
trackIdToPosition.set(track.id, i + 1)
if ('user' in track) {
tracks.push(track)
} else {
tracksToFetch.add(track.id)
}
}
const spinnies = new Spinnies()
if (tracksToFetch.size) {
let remaining = tracksToFetch.size
spinnies.add('fetching', { text: `fetching ${remaining} tracks` })
await asyncPool(chunks(Array.from(tracksToFetch), 20), async (ids) => {
const res = await fetchTracksById(Array.from(ids))
for (const track of res) {
tracks.push(track)
}
remaining -= ids.length
spinnies.update('fetching', { text: `fetching ${remaining} tracks` })
})
spinnies.succeed('fetching', { text: `fetched ${tracks.length} tracks` })
}
const destDir = params.destination ?? join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`))
await mkdir(destDir, { recursive: true })
const posPadSize = Math.ceil(Math.log10(tracks.length))
await asyncPool(tracks, async (track) => {
const position = trackIdToPosition.get(track.id)!
const filename = `${position.toString().padStart(posPadSize, '0')}. ${track.user.username} - ${track.title}`
spinnies.add(`${track.id}`, { text: filename })
await downloadTrack(track, {
destination: join(destDir, sanitizeFilename(filename)),
onRateLimit: (wait) => {
spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` })
},
onCdnRateLimit: () => {
spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` })
},
})
spinnies.remove(`${track.id}`)
})
console.log('done')
spinnies.stopAll()
}
async function downloadLikes(username: string) {
const spinnies = new Spinnies()
spinnies.add('collect', { text: 'collecting likes...' })
const userPage = await ffetchHtml(`/${username}`).text()
const hydrationData = extractHydrationData(userPage)
const user = hydrationData.find(it => it.hydratable === 'user')
if (!user) throw new Error('no user found')
const userData = z.object({
likes_count: z.number(),
playlist_likes_count: z.number(),
id: z.number(),
}).parse(user.data)
const tracks: ScTrack[] = []
const playlists: ScPlaylist[] = []
const updateSpinner = () => {
const percent = Math.floor((tracks.length + playlists.length) / (userData.likes_count + userData.playlist_likes_count) * 100)
spinnies.update('collect', {
text: `[${percent}%] collecting liked tracks: ${tracks.length}/${userData.likes_count}, playlists: ${playlists.length}/${userData.playlist_likes_count}`,
})
}
updateSpinner()
let offset = '0'
while (true) {
const res = await ffetchApi(`/users/${userData.id}/likes`, {
query: {
limit: 100,
offset,
linked_partitioning: '1',
},
}).parsedJson(z.object({
collection: z.array(ScLike),
next_href: z.string().nullable(),
}))
for (const like of res.collection) {
if (like.track) {
tracks.push(like.track)
} else if (like.playlist) {
playlists.push(like.playlist)
} else {
console.warn('unknown like type:', like.created_at)
}
}
updateSpinner()
if (!res.next_href) break
offset = new URL(res.next_href).searchParams.get('offset')!
}
spinnies.succeed('collect', { text: `collected ${tracks.length} tracks and ${playlists.length} playlists` })
spinnies.add('tracks', { text: 'downloading tracks...' })
const downloaded = 0
const updateTracksSpinner = () => {
spinnies.update('tracks', { text: `[${downloaded}/${tracks.length}] downloading tracks...` })
}
updateTracksSpinner()
const baseDir = join('assets/soundcloud-dl', `${sanitizeFilename(username)}-likes`)
await mkdir(baseDir, { recursive: true })
// await asyncPool(tracks, async (track) => {
// const filename = `${track.user.username} - ${track.title}`
// spinnies.add(`${track.id}`, { text: filename })
// await downloadTrack(track, {
// destination: join(baseDir, sanitizeFilename(filename)),
// onRateLimit: (wait) => {
// spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` })
// },
// onCdnRateLimit: () => {
// spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` })
// },
// })
// spinnies.remove(`${track.id}`)
// updateTracksSpinner()
// })
spinnies.succeed('tracks', { text: `downloaded ${downloaded} tracks` })
spinnies.stopAll()
for (const playlist of playlists) {
console.log(`\uDB83\uDCB8 ${playlist.title}`)
const fullPlaylist = await fetchPlaylistById(playlist.id)
await downloadPlaylist(fullPlaylist, {
destination: join(baseDir, sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)),
})
}
}
const url = process.argv[2] ?? await question('url > ')
if (!url.startsWith('https://soundcloud.com/')) {
console.error('url must start with https://soundcloud.com/')
process.exit(1)
}
if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/sets\//i)) {
await downloadPlaylist(await fetchPlaylistByUrl(url))
} else if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/likes/i)) {
await downloadLikes(url.match(/^https:\/\/soundcloud.com\/([a-z0-9-]+)\/likes/i)![1])
} else {
const track = await fetchTrackByUrl(url)
const filename = `${track.user.username} - ${track.title}`
console.log('downloading track:', filename)
await downloadTrack(track, {
destination: join('assets/soundcloud-dl', sanitizeFilename(filename)),
})
}