import { mkdir, rm, writeFile } from 'node:fs/promises' import { join } from 'node:path' import { ffetchAddons } from '@fuman/fetch' import { assert, asyncPool, base64, sleep } from '@fuman/utils' import { load } from 'cheerio' import Spinnies from 'spinnies' import { ProxyAgent } from 'undici' import { z } from 'zod' import { $, ProcessOutput, question } from 'zx' import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts' import { sanitizeFilename } from '../../utils/fs.ts' import { chunks, getEnv } from '../../utils/misc.ts' import { generateOpusImageBlob } from '../../utils/opus.ts' const ffetchApi = ffetchBase.extend({ baseUrl: 'https://api-v2.soundcloud.com', query: { client_id: '4BowhSywvkJtklODQDzjNMq9sK9wyDJ4', app_version: '1738322252', app_locale: 'en', }, addons: [ ffetchAddons.rateLimitHandler(), ], headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', 'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`, }, }) const ffetchHtml = ffetchBase.extend({ baseUrl: 'https://soundcloud.com', headers: { Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`, }, }) const ScTrack = z.object({ id: z.number(), kind: z.literal('track'), title: z.string(), duration: z.number(), description: z.string().nullable(), permalink_url: z.string(), artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(), media: z.object({ transcodings: z.array(z.object({ url: z.string(), preset: z.string(), format: z.object({ protocol: z.string(), mime_type: z.string(), }), quality: z.string(), is_legacy_transcoding: z.boolean(), })), }), track_authorization: z.string(), user: z.object({ username: z.string(), permalink: z.string(), }), }) type ScTrack = z.infer const ScPlaylist = z.object({ id: z.number(), title: z.string(), duration: z.number(), permalink_url: z.string(), genre: z.string().nullish(), description: z.string().nullish(), track_count: z.number(), user: z.object({ username: z.string(), }), tracks: z.array(z.union([ ScTrack, z.object({ id: z.number(), kind: z.literal('track'), }), ])).default(() => []), }) type ScPlaylist = z.infer const ScUser = z.object({ id: z.number(), kind: z.literal('user'), permalink_url: z.string(), username: z.string(), likes_count: z.number(), track_count: z.number(), playlist_likes_count: z.number(), }) type ScUser = z.infer const ScLike = z.object({ created_at: z.string(), kind: z.literal('like'), track: ScTrack.optional(), playlist: ScPlaylist.optional(), }) function extractHydrationData(html: string) { const $ = load(html) const script = $('script:contains(window.__sc_hydration = )') return JSON.parse(script.html()!.replace('window.__sc_hydration = ', '').slice(0, -1)) } async function fetchPlaylistByUrl(url: string) { const html = await ffetchHtml(url).text() const hydrationData = extractHydrationData(html) const playlist = hydrationData.find(it => it.hydratable === 'playlist') if (!playlist) throw new Error('no playlist found') return ScPlaylist.parse(playlist.data) } async function fetchPlaylistById(id: number) { return ffetchApi(`/playlists/${id}`, { query: { linked_partitioning: '1', }, }).parsedJson(ScPlaylist) } async function fetchTracksById(trackIds: number[]) { return ffetchApi('/tracks', { query: { ids: trackIds.join(','), }, }).parsedJson(z.array(ScTrack)) } async function downloadTrack(track: ScTrack, opts: { /* download destination (filename without extension) */ destination: string onRateLimit?: (waitTime: number) => void onCdnRateLimit?: () => void }) { const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`) const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null // find the best transcoding let transcoding!: typeof track.media.transcodings[0] for (const t of track.media.transcodings) { if (t.quality === 'hq') { transcoding = t break } if (t.preset === 'opus_0_0') { transcoding = t break } transcoding = t } const { url: hlsUrl } = await ffetchApi(transcoding.url, { query: { track_authorization: track.track_authorization, }, rateLimit: { isRejected(res) { return res.status === 429 }, defaultWaitTime: 60_000, maxRetries: 10, onRateLimitExceeded(res, waitTime) { opts.onRateLimit?.(waitTime) }, }, }).parsedJson(z.object({ url: z.string(), })) let ext = transcoding.format.mime_type.match(/^audio\/(\w+)(;|$)/)![1] if (ext === 'mp4') ext = 'm4a' const filename = `${opts.destination}.${ext}` const params: string[] = [ '-y', '-i', hlsUrl, ] if (artworkBytes) { if (ext === 'mp3') { await writeFile(artworkPath, artworkBytes) params.push( '-i', artworkPath, '-map', '1:v:0', '-id3v2_version', '3', '-metadata:s:v', 'title=Album cover', '-metadata:s:v', 'comment=Cover (front)', ) } else if (ext === 'ogg') { const blob = base64.encode(await generateOpusImageBlob(artworkBytes)) params.push( '-metadata', `metadata_block_picture=${blob}`, ) } else if (ext === 'm4a') { await writeFile(artworkPath, artworkBytes) params.push( '-i', artworkPath, '-map', '1', '-disposition:v', 'attached_pic', ) } } params.push( '-map', '0:a', '-c', 'copy', '-metadata', `title=${track.title}`, '-metadata', `artist=${track.user.username}`, '-metadata', `comment=${track.description ?? ''}`, filename, ) while (true) { try { await $`ffmpeg ${params}`.quiet(true) break } catch (e) { if (!(e instanceof ProcessOutput)) { throw e } if (e.stderr.includes('429 Too Many Requests')) { opts.onCdnRateLimit?.() await sleep(10_000) continue } throw e } } await rm(artworkPath, { force: true }) } async function downloadPlaylist(playlist: ScPlaylist, params: { destination?: string } = {}) { const tracks: ScTrack[] = [] const tracksToFetch = new Set() const trackIdToPosition = new Map() for (let i = 0; i < playlist.tracks.length; i++) { const track = playlist.tracks[i] trackIdToPosition.set(track.id, i + 1) if ('user' in track) { tracks.push(track) } else { tracksToFetch.add(track.id) } } const spinnies = new Spinnies() if (tracksToFetch.size) { let remaining = tracksToFetch.size spinnies.add('fetching', { text: `fetching ${remaining} tracks` }) await asyncPool(chunks(Array.from(tracksToFetch), 20), async (ids) => { const res = await fetchTracksById(Array.from(ids)) for (const track of res) { tracks.push(track) } remaining -= ids.length spinnies.update('fetching', { text: `fetching ${remaining} tracks` }) }) spinnies.succeed('fetching', { text: `fetched ${tracks.length} tracks` }) } const destDir = params.destination ?? join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)) await mkdir(destDir, { recursive: true }) const posPadSize = Math.ceil(Math.log10(tracks.length)) await asyncPool(tracks, async (track) => { const position = trackIdToPosition.get(track.id)! const filename = `${position.toString().padStart(posPadSize, '0')}. ${track.user.username} - ${track.title}` spinnies.add(`${track.id}`, { text: filename }) await downloadTrack(track, { destination: join(destDir, sanitizeFilename(filename)), onRateLimit: (wait) => { spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` }) }, onCdnRateLimit: () => { spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` }) }, }) spinnies.remove(`${track.id}`) }) console.log('done') spinnies.stopAll() } async function downloadLikes(username: string) { const spinnies = new Spinnies() spinnies.add('collect', { text: 'collecting likes...' }) const userPage = await ffetchHtml(`/${username}`).text() const hydrationData = extractHydrationData(userPage) const user = hydrationData.find(it => it.hydratable === 'user') if (!user) throw new Error('no user found') const userData = ScUser.parse(user.data) const tracks: ScTrack[] = [] const playlists: ScPlaylist[] = [] const updateSpinner = () => { const percent = Math.floor((tracks.length + playlists.length) / (userData.likes_count + userData.playlist_likes_count) * 100) spinnies.update('collect', { text: `[${percent}%] collecting liked tracks: ${tracks.length}/${userData.likes_count}, playlists: ${playlists.length}/${userData.playlist_likes_count}`, }) } updateSpinner() let offset = '0' while (true) { const res = await ffetchApi(`/users/${userData.id}/likes`, { query: { limit: 100, offset, linked_partitioning: '1', }, }).parsedJson(z.object({ collection: z.array(ScLike), next_href: z.string().nullable(), })) for (const like of res.collection) { if (like.track) { tracks.push(like.track) } else if (like.playlist) { playlists.push(like.playlist) } else { console.warn('unknown like type:', like.created_at) } } updateSpinner() if (!res.next_href) break offset = new URL(res.next_href).searchParams.get('offset')! } spinnies.succeed('collect', { text: `collected ${tracks.length} tracks and ${playlists.length} playlists` }) spinnies.add('tracks', { text: 'downloading tracks...' }) let downloaded = 0 const updateTracksSpinner = () => { spinnies.update('tracks', { text: `[${downloaded}/${tracks.length}] downloading tracks...` }) } updateTracksSpinner() const baseDir = join('assets/soundcloud-dl', `${sanitizeFilename(username)}-likes`) await mkdir(baseDir, { recursive: true }) await asyncPool(tracks, async (track) => { const filename = `${track.user.username} - ${track.title}` spinnies.add(`${track.id}`, { text: filename }) await downloadTrack(track, { destination: join(baseDir, sanitizeFilename(filename)), onRateLimit: (wait) => { spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` }) }, onCdnRateLimit: () => { spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` }) }, }) spinnies.remove(`${track.id}`) downloaded += 1 updateTracksSpinner() }) spinnies.succeed('tracks', { text: `downloaded ${downloaded} tracks` }) spinnies.stopAll() for (const playlist of playlists) { console.log(`\uDB83\uDCB8 ${playlist.title}`) const fullPlaylist = await fetchPlaylistById(playlist.id) await downloadPlaylist(fullPlaylist, { destination: join(baseDir, sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)), }) } } async function downloadUser(user: ScUser) { const tracks: ScTrack[] = [] const spinnies = new Spinnies() spinnies.add('collect') const updateSpinner = () => { const percent = Math.floor(tracks.length / user.track_count * 100) spinnies.update('collect', { text: `[${percent}%] collecting user tracks: ${tracks.length}/${user.track_count}`, }) } updateSpinner() let offset = '0' while (true) { const res = await ffetchApi(`/users/${user.id}/tracks`, { query: { limit: 100, offset, linked_partitioning: '1', }, }).parsedJson(z.object({ collection: z.array(ScTrack), next_href: z.string().nullable(), })) for (const track of res.collection) { tracks.push(track) } updateSpinner() if (!res.next_href) break offset = new URL(res.next_href).searchParams.get('offset')! } spinnies.succeed('collect', { text: `collected ${tracks.length} tracks` }) spinnies.add('tracks', { text: 'downloading tracks...' }) let downloaded = 0 const updateTracksSpinner = () => { spinnies.update('tracks', { text: `[${downloaded}/${tracks.length}] downloading tracks...` }) } updateTracksSpinner() const baseDir = join('assets/soundcloud-dl', `${sanitizeFilename(user.username)}-tracks`) await mkdir(baseDir, { recursive: true }) await asyncPool(tracks, async (track) => { const filename = track.title spinnies.add(`${track.id}`, { text: filename }) await downloadTrack(track, { destination: join(baseDir, sanitizeFilename(filename)), onRateLimit: (wait) => { spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` }) }, onCdnRateLimit: () => { spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` }) }, }) downloaded += 1 spinnies.remove(`${track.id}`) updateTracksSpinner() }) spinnies.succeed('tracks', { text: `downloaded ${downloaded} tracks` }) spinnies.stopAll() } const url = process.argv[2] ?? await question('url > ') if (!url.startsWith('https://soundcloud.com/')) { console.error('url must start with https://soundcloud.com/') process.exit(1) } if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/sets\//i)) { await downloadPlaylist(await fetchPlaylistByUrl(url)) } else if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/likes/i)) { await downloadLikes(url.match(/^https:\/\/soundcloud.com\/([a-z0-9-]+)\/likes/i)![1]) } else { const html = await ffetchHtml(url).text() const hydrationData = extractHydrationData(html) const trackData = hydrationData.find(it => it.hydratable === 'sound') if (trackData) { const track = ScTrack.parse(trackData.data) const filename = `${track.user.username} - ${track.title}` console.log('downloading track:', filename) await downloadTrack(track, { destination: join('assets/soundcloud-dl', sanitizeFilename(filename)), }) } else { const userData = hydrationData.find(it => it.hydratable === 'user') if (userData) { const user = ScUser.parse(userData.data) await downloadUser(user) } } }