import { mkdir, rm, writeFile } from 'node:fs/promises' import { join } from 'node:path' import { ffetchAddons } from '@fuman/fetch' import { assert, asyncPool, base64 } from '@fuman/utils' import { load } from 'cheerio' import Spinnies from 'spinnies' import { ProxyAgent } from 'undici' import { z } from 'zod' import { $, question } from 'zx' import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts' import { sanitizeFilename } from '../../utils/fs.ts' import { chunks, getEnv } from '../../utils/misc.ts' import { generateOpusImageBlob } from '../../utils/opus.ts' const ffetchApi = ffetchBase.extend({ baseUrl: 'https://api-v2.soundcloud.com', // @ts-expect-error lol fixme query: { client_id: '4BowhSywvkJtklODQDzjNMq9sK9wyDJ4', app_version: '1736857534', app_locale: 'en', }, addons: [ ffetchAddons.rateLimitHandler(), ], rateLimit: { isRejected(res) { return res.status === 429 }, defaultWaitTime: 10_000, }, headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', 'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`, }, }) const ffetchHtml = ffetchBase.extend({ baseUrl: 'https://soundcloud.com', headers: { Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`, }, extra: { // @ts-expect-error lol fixme dispatcher: new ProxyAgent('http://127.0.0.1:7891'), }, }) const ScTrack = z.object({ id: z.number(), kind: z.literal('track'), title: z.string(), duration: z.number(), permalink_url: z.string(), artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(), media: z.object({ transcodings: z.array(z.object({ url: z.string(), preset: z.string(), format: z.object({ protocol: z.string(), mime_type: z.string(), }), quality: z.string(), is_legacy_transcoding: z.boolean(), })), }), track_authorization: z.string(), user: z.object({ username: z.string(), permalink: z.string(), }), }) type ScTrack = z.infer const ScPlaylist = z.object({ id: z.number(), title: z.string(), duration: z.number(), permalink_url: z.string(), genre: z.string(), description: z.string().nullable(), track_count: z.number(), user: z.object({ username: z.string(), }), tracks: z.array(z.union([ ScTrack, z.object({ id: z.number(), kind: z.literal('track'), }), ])), }) type ScPlaylist = z.infer function extractHydrationData(html: string) { const $ = load(html) const script = $('script:contains(window.__sc_hydration = )') return JSON.parse(script.html()!.replace('window.__sc_hydration = ', '').slice(0, -1)) } async function fetchTrackByUrl(url: string) { const html = await ffetchHtml(url).text() const hydrationData = extractHydrationData(html) const track = hydrationData.find(it => it.hydratable === 'sound') if (!track) throw new Error('no track found') return ScTrack.parse(track.data) } async function fetchPlaylistByUrl(url: string) { const html = await ffetchHtml(url).text() const hydrationData = extractHydrationData(html) const playlist = hydrationData.find(it => it.hydratable === 'playlist') if (!playlist) throw new Error('no playlist found') return ScPlaylist.parse(playlist.data) } async function fetchTracksById(trackIds: number[]) { return ffetchApi('/tracks', { query: { ids: trackIds.join(','), }, }).parsedJson(z.array(ScTrack)) } async function downloadTrack(track: ScTrack, opts: { /* download destination (filename without extension) */ destination: string }) { const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`) const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null // find the best transcoding let transcoding!: typeof track.media.transcodings[0] for (const t of track.media.transcodings) { if (t.quality === 'hq') { transcoding = t break } if (t.preset === 'opus_0_0') { transcoding = t break } transcoding = t } const { url: hlsUrl } = await ffetchApi(transcoding.url, { query: { track_authorization: track.track_authorization, }, }).parsedJson(z.object({ url: z.string(), })) let ext = transcoding.format.mime_type.match(/^audio\/(\w+)(;|$)/)![1] if (ext === 'mp4') ext = 'm4a' const filename = `${opts.destination}.${ext}` const params: string[] = [ '-y', '-i', hlsUrl, ] if (artworkBytes) { if (ext === 'mp3') { await writeFile(artworkPath, artworkBytes) params.push( '-i', artworkPath, '-map', '1:v:0', '-id3v2_version', '3', '-metadata:s:v', 'title=Album cover', '-metadata:s:v', 'comment=Cover (front)', ) } else if (ext === 'ogg') { const blob = base64.encode(await generateOpusImageBlob(artworkBytes)) params.push( '-metadata', `metadata_block_picture=${blob}`, ) } else if (ext === 'm4a') { await writeFile(artworkPath, artworkBytes) params.push( '-i', artworkPath, '-map', '1', '-disposition:v', 'attached_pic', ) } } params.push( '-map', '0:a', '-c', 'copy', '-metadata', `title=${track.title}`, '-metadata', `artist=${track.user.username}`, filename, ) await $`ffmpeg ${params}`.quiet(true) await rm(artworkPath, { force: true }) } async function downloadPlaylist(playlist: ScPlaylist) { const tracks: ScTrack[] = [] const tracksToFetch = new Set() const trackIdToPosition = new Map() for (let i = 0; i < playlist.tracks.length; i++) { const track = playlist.tracks[i] trackIdToPosition.set(track.id, i + 1) if ('user' in track) { tracks.push(track) } else { tracksToFetch.add(track.id) } } const spinnies = new Spinnies() if (tracksToFetch.size) { let remaining = tracksToFetch.size spinnies.add('fetching', { text: `fetching ${remaining} tracks` }) await asyncPool(chunks(Array.from(tracksToFetch), 20), async (ids) => { const res = await fetchTracksById(Array.from(ids)) for (const track of res) { tracks.push(track) } remaining -= ids.length spinnies.update('fetching', { text: `fetching ${remaining} tracks` }) }) spinnies.succeed('fetching') } const destDir = join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)) await mkdir(destDir, { recursive: true }) const posPadSize = Math.ceil(Math.log10(tracks.length)) await asyncPool(tracks, async (track) => { const position = trackIdToPosition.get(track.id)! const filename = `${position.toString().padStart(posPadSize, '0')}. ${track.user.username} - ${track.title}` spinnies.add(`${track.id}`, { text: filename }) await downloadTrack(track, { destination: join(destDir, filename), }) spinnies.remove(`${track.id}`) }, { limit: 8 }) console.log('done') spinnies.stopAll() } const url = process.argv[2] ?? await question('url > ') if (!url.startsWith('https://soundcloud.com/')) { console.error('url must start with https://soundcloud.com/') process.exit(1) } if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/sets\//i)) { await downloadPlaylist(await fetchPlaylistByUrl(url)) } else { const track = await fetchTrackByUrl(url) const filename = `${track.user.username}-${track.title}` console.log('downloading track:', filename) await downloadTrack(track, { destination: join('assets/soundcloud-dl', sanitizeFilename(filename)), }) }