import { mkdir, writeFile } from 'node:fs/promises' import { join } from 'node:path' import { ffetchAddons } from '@fuman/fetch' import { asyncPool, base64 } from '@fuman/utils' import { load } from 'cheerio' import Spinnies from 'spinnies' import { ProxyAgent } from 'undici' import { z } from 'zod' import { $ } from 'zx' import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts' import { sanitizeFilename } from '../../utils/fs.ts' import { chunks, getEnv } from '../../utils/misc.ts' import { generateOpusImageBlob } from '../../utils/opus.ts' const ffetchApi = ffetchBase.extend({ baseUrl: 'https://api-v2.soundcloud.com', // @ts-expect-error lol fixme query: { client_id: '4BowhSywvkJtklODQDzjNMq9sK9wyDJ4', app_version: '1736857534', app_locale: 'en', }, addons: [ ffetchAddons.rateLimitHandler(), ], rateLimit: { isRejected(res) { return res.status === 429 }, defaultWaitTime: 10_000, }, headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', 'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`, }, }) const ffetchHtml = ffetchBase.extend({ baseUrl: 'https://soundcloud.com', headers: { Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`, }, extra: { // @ts-expect-error lol fixme dispatcher: new ProxyAgent('http://127.0.0.1:7891'), }, }) const ScTrack = z.object({ id: z.number(), kind: z.literal('track'), title: z.string(), duration: z.number(), permalink_url: z.string(), artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(), media: z.object({ transcodings: z.array(z.object({ url: z.string(), preset: z.string(), format: z.object({ protocol: z.string(), mime_type: z.string(), }), quality: z.string(), is_legacy_transcoding: z.boolean(), })), }), track_authorization: z.string(), user: z.object({ username: z.string(), permalink: z.string(), }), }) type ScTrack = z.infer const ScPlaylist = z.object({ id: z.number(), title: z.string(), duration: z.number(), permalink_url: z.string(), genre: z.string(), description: z.string().nullable(), track_count: z.number(), user: z.object({ username: z.string(), }), tracks: z.array(z.union([ ScTrack, z.object({ id: z.number(), kind: z.literal('track'), }), ])), }) type ScPlaylist = z.infer function extractHydrationData(html: string) { const $ = load(html) const script = $('script:contains(window.__sc_hydration = )') return JSON.parse(script.html()!.replace('window.__sc_hydration = ', '').slice(0, -1)) } async function fetchTrackByUrl(url: string) { const html = await ffetchHtml(url).text() const hydrationData = extractHydrationData(html) const track = hydrationData.find(it => it.hydratable === 'sound') if (!track) throw new Error('no track found') return ScTrack.parse(track.data) } async function fetchPlaylistByUrl(url: string) { const html = await ffetchHtml(url).text() const hydrationData = extractHydrationData(html) const playlist = hydrationData.find(it => it.hydratable === 'playlist') if (!playlist) throw new Error('no playlist found') return ScPlaylist.parse(playlist.data) } async function fetchTracksById(trackIds: number[]) { return ffetchApi('/tracks', { query: { ids: trackIds.join(','), }, }).parsedJson(z.array(ScTrack)) } async function downloadTrack(track: ScTrack, opts: { /* download destination (filename without extension) */ destination: string }) { const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`) const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null // find the best transcoding const transcoding = track.media.transcodings.sort((a, b) => { // prefer non-legacy transcodings if (a.is_legacy_transcoding && !b.is_legacy_transcoding) return -1 if (!a.is_legacy_transcoding && b.is_legacy_transcoding) return 1 // prefer hq if (a.quality === 'sq' && b.quality === 'hq') return -1 if (a.quality === 'hq' && b.quality === 'sq') return 1 // prefer opus if (a.preset === 'opus_0_0' && b.preset !== 'opus_0_0') return -1 if (a.preset !== 'opus_0_0' && b.preset === 'opus_0_0') return 1 return 0 })[0] const { url: hlsUrl } = await ffetchApi(transcoding.url, { query: { track_authorization: track.track_authorization, }, }).parsedJson(z.object({ url: z.string(), })) const ext = transcoding.format.mime_type.match(/^audio\/(\w+)(;|$)/)![1] const filename = `${opts.destination}.${ext}` const params: string[] = [ '-y', '-i', hlsUrl, '-c', 'copy', ] if (ext === 'mp3') { if (artworkBytes) { await writeFile(artworkPath, artworkBytes) params.push( '-i', artworkPath, '-map', '0:a', '-map', '1:0', ) } params.push( '-id3v2_version', '3', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (front)"', ) } else if (ext === 'ogg' && artworkBytes) { const blob = base64.encode(await generateOpusImageBlob(artworkBytes)) params.push( '-metadata', `metadata_block_picture=${blob}`, ) } params.push( '-metadata', `title=${track.title}`, '-metadata', `artist=${track.user.username}`, filename, ) await $`ffmpeg ${params}`.quiet(true) } async function downloadPlaylist(playlist: ScPlaylist) { const tracks: ScTrack[] = [] const tracksToFetch = new Set() const trackIdToPosition = new Map() for (let i = 0; i < playlist.tracks.length; i++) { const track = playlist.tracks[i] trackIdToPosition.set(track.id, i + 1) if ('user' in track) { tracks.push(track) } else { tracksToFetch.add(track.id) } } const spinnies = new Spinnies() if (tracksToFetch.size) { let remaining = tracksToFetch.size spinnies.add('fetching', { text: `fetching ${remaining} tracks` }) await asyncPool(chunks(Array.from(tracksToFetch), 20), async (ids) => { const res = await fetchTracksById(Array.from(ids)) for (const track of res) { tracks.push(track) } remaining -= ids.length spinnies.update('fetching', { text: `fetching ${remaining} tracks` }) }) spinnies.succeed('fetching') } const destDir = join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)) await mkdir(destDir, { recursive: true }) const posPadSize = Math.ceil(Math.log10(tracks.length)) await asyncPool(tracks, async (track) => { const position = trackIdToPosition.get(track.id)! const filename = `${position.toString().padStart(posPadSize, '0')}. ${track.user.username} - ${track.title}` spinnies.add(`${track.id}`, { text: filename }) await downloadTrack(track, { destination: join(destDir, filename), }) spinnies.remove(`${track.id}`) }, { limit: 8 }) console.log('done') spinnies.stopAll() } await downloadPlaylist(await fetchPlaylistByUrl('https://soundcloud.com/user-398958278/sets/l2grace'))