mirror of
https://git.stupid.fish/teidesu/scripts.git
synced 2025-07-28 02:32:11 +10:00
424 lines
12 KiB
TypeScript
424 lines
12 KiB
TypeScript
import { mkdir, rm, writeFile } from 'node:fs/promises'
|
|
import { join } from 'node:path'
|
|
import { ffetchAddons } from '@fuman/fetch'
|
|
import { assert, asyncPool, base64, sleep } from '@fuman/utils'
|
|
import { load } from 'cheerio'
|
|
import Spinnies from 'spinnies'
|
|
import { ProxyAgent } from 'undici'
|
|
import { z } from 'zod'
|
|
import { $, ProcessOutput, question } from 'zx'
|
|
import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts'
|
|
import { sanitizeFilename } from '../../utils/fs.ts'
|
|
import { chunks, getEnv } from '../../utils/misc.ts'
|
|
import { generateOpusImageBlob } from '../../utils/opus.ts'
|
|
|
|
const ffetchApi = ffetchBase.extend({
|
|
baseUrl: 'https://api-v2.soundcloud.com',
|
|
query: {
|
|
client_id: '4BowhSywvkJtklODQDzjNMq9sK9wyDJ4',
|
|
app_version: '1736857534',
|
|
app_locale: 'en',
|
|
},
|
|
addons: [
|
|
ffetchAddons.rateLimitHandler(),
|
|
],
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
|
|
'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`,
|
|
},
|
|
})
|
|
const ffetchHtml = ffetchBase.extend({
|
|
baseUrl: 'https://soundcloud.com',
|
|
headers: {
|
|
Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`,
|
|
},
|
|
})
|
|
|
|
const ScTrack = z.object({
|
|
id: z.number(),
|
|
kind: z.literal('track'),
|
|
title: z.string(),
|
|
duration: z.number(),
|
|
description: z.string().nullable(),
|
|
permalink_url: z.string(),
|
|
artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(),
|
|
media: z.object({
|
|
transcodings: z.array(z.object({
|
|
url: z.string(),
|
|
preset: z.string(),
|
|
format: z.object({
|
|
protocol: z.string(),
|
|
mime_type: z.string(),
|
|
}),
|
|
quality: z.string(),
|
|
is_legacy_transcoding: z.boolean(),
|
|
})),
|
|
}),
|
|
track_authorization: z.string(),
|
|
user: z.object({
|
|
username: z.string(),
|
|
permalink: z.string(),
|
|
}),
|
|
})
|
|
type ScTrack = z.infer<typeof ScTrack>
|
|
|
|
const ScPlaylist = z.object({
|
|
id: z.number(),
|
|
title: z.string(),
|
|
duration: z.number(),
|
|
permalink_url: z.string(),
|
|
genre: z.string().nullish(),
|
|
description: z.string().nullish(),
|
|
track_count: z.number(),
|
|
user: z.object({
|
|
username: z.string(),
|
|
}),
|
|
tracks: z.array(z.union([
|
|
ScTrack,
|
|
z.object({
|
|
id: z.number(),
|
|
kind: z.literal('track'),
|
|
}),
|
|
])).default(() => []),
|
|
})
|
|
type ScPlaylist = z.infer<typeof ScPlaylist>
|
|
|
|
const ScLike = z.object({
|
|
created_at: z.string(),
|
|
kind: z.literal('like'),
|
|
track: ScTrack.optional(),
|
|
playlist: ScPlaylist.optional(),
|
|
})
|
|
|
|
function extractHydrationData(html: string) {
|
|
const $ = load(html)
|
|
const script = $('script:contains(window.__sc_hydration = )')
|
|
return JSON.parse(script.html()!.replace('window.__sc_hydration = ', '').slice(0, -1))
|
|
}
|
|
|
|
async function fetchTrackByUrl(url: string) {
|
|
const html = await ffetchHtml(url).text()
|
|
const hydrationData = extractHydrationData(html)
|
|
const track = hydrationData.find(it => it.hydratable === 'sound')
|
|
if (!track) throw new Error('no track found')
|
|
|
|
return ScTrack.parse(track.data)
|
|
}
|
|
|
|
async function fetchPlaylistByUrl(url: string) {
|
|
const html = await ffetchHtml(url).text()
|
|
const hydrationData = extractHydrationData(html)
|
|
const playlist = hydrationData.find(it => it.hydratable === 'playlist')
|
|
if (!playlist) throw new Error('no playlist found')
|
|
|
|
return ScPlaylist.parse(playlist.data)
|
|
}
|
|
|
|
async function fetchPlaylistById(id: number) {
|
|
return ffetchApi(`/playlists/${id}`, {
|
|
query: {
|
|
linked_partitioning: '1',
|
|
},
|
|
}).parsedJson(ScPlaylist)
|
|
}
|
|
|
|
async function fetchTracksById(trackIds: number[]) {
|
|
return ffetchApi('/tracks', {
|
|
query: {
|
|
ids: trackIds.join(','),
|
|
},
|
|
}).parsedJson(z.array(ScTrack))
|
|
}
|
|
|
|
async function downloadTrack(track: ScTrack, opts: {
|
|
/* download destination (filename without extension) */
|
|
destination: string
|
|
onRateLimit?: (waitTime: number) => void
|
|
onCdnRateLimit?: () => void
|
|
}) {
|
|
const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`)
|
|
const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null
|
|
|
|
// find the best transcoding
|
|
let transcoding!: typeof track.media.transcodings[0]
|
|
for (const t of track.media.transcodings) {
|
|
if (t.quality === 'hq') {
|
|
transcoding = t
|
|
break
|
|
}
|
|
if (t.preset === 'opus_0_0') {
|
|
transcoding = t
|
|
break
|
|
}
|
|
transcoding = t
|
|
}
|
|
|
|
const { url: hlsUrl } = await ffetchApi(transcoding.url, {
|
|
query: {
|
|
track_authorization: track.track_authorization,
|
|
},
|
|
rateLimit: {
|
|
isRejected(res) {
|
|
return res.status === 429
|
|
},
|
|
defaultWaitTime: 60_000,
|
|
maxRetries: 10,
|
|
onRateLimitExceeded(res, waitTime) {
|
|
opts.onRateLimit?.(waitTime)
|
|
},
|
|
},
|
|
}).parsedJson(z.object({
|
|
url: z.string(),
|
|
}))
|
|
|
|
let ext = transcoding.format.mime_type.match(/^audio\/(\w+)(;|$)/)![1]
|
|
if (ext === 'mp4') ext = 'm4a'
|
|
const filename = `${opts.destination}.${ext}`
|
|
|
|
const params: string[] = [
|
|
'-y',
|
|
'-i',
|
|
hlsUrl,
|
|
]
|
|
|
|
if (artworkBytes) {
|
|
if (ext === 'mp3') {
|
|
await writeFile(artworkPath, artworkBytes)
|
|
params.push(
|
|
'-i',
|
|
artworkPath,
|
|
'-map',
|
|
'1:v:0',
|
|
'-id3v2_version',
|
|
'3',
|
|
'-metadata:s:v',
|
|
'title=Album cover',
|
|
'-metadata:s:v',
|
|
'comment=Cover (front)',
|
|
)
|
|
} else if (ext === 'ogg') {
|
|
const blob = base64.encode(await generateOpusImageBlob(artworkBytes))
|
|
params.push(
|
|
'-metadata',
|
|
`metadata_block_picture=${blob}`,
|
|
)
|
|
} else if (ext === 'm4a') {
|
|
await writeFile(artworkPath, artworkBytes)
|
|
params.push(
|
|
'-i',
|
|
artworkPath,
|
|
'-map',
|
|
'1',
|
|
'-disposition:v',
|
|
'attached_pic',
|
|
)
|
|
}
|
|
}
|
|
|
|
params.push(
|
|
'-map',
|
|
'0:a',
|
|
'-c',
|
|
'copy',
|
|
'-metadata',
|
|
`title=${track.title}`,
|
|
'-metadata',
|
|
`artist=${track.user.username}`,
|
|
'-metadata',
|
|
`comment=${track.description ?? ''}`,
|
|
filename,
|
|
)
|
|
|
|
while (true) {
|
|
try {
|
|
await $`ffmpeg ${params}`.quiet(true)
|
|
break
|
|
} catch (e) {
|
|
if (!(e instanceof ProcessOutput)) {
|
|
throw e
|
|
}
|
|
if (e.stderr.includes('429 Too Many Requests')) {
|
|
opts.onCdnRateLimit?.()
|
|
await sleep(10_000)
|
|
continue
|
|
}
|
|
|
|
throw e
|
|
}
|
|
}
|
|
|
|
await rm(artworkPath, { force: true })
|
|
}
|
|
|
|
async function downloadPlaylist(playlist: ScPlaylist, params: {
|
|
destination?: string
|
|
} = {}) {
|
|
const tracks: ScTrack[] = []
|
|
const tracksToFetch = new Set<number>()
|
|
const trackIdToPosition = new Map<number, number>()
|
|
|
|
for (let i = 0; i < playlist.tracks.length; i++) {
|
|
const track = playlist.tracks[i]
|
|
trackIdToPosition.set(track.id, i + 1)
|
|
if ('user' in track) {
|
|
tracks.push(track)
|
|
} else {
|
|
tracksToFetch.add(track.id)
|
|
}
|
|
}
|
|
|
|
const spinnies = new Spinnies()
|
|
|
|
if (tracksToFetch.size) {
|
|
let remaining = tracksToFetch.size
|
|
spinnies.add('fetching', { text: `fetching ${remaining} tracks` })
|
|
await asyncPool(chunks(Array.from(tracksToFetch), 20), async (ids) => {
|
|
const res = await fetchTracksById(Array.from(ids))
|
|
for (const track of res) {
|
|
tracks.push(track)
|
|
}
|
|
remaining -= ids.length
|
|
spinnies.update('fetching', { text: `fetching ${remaining} tracks` })
|
|
})
|
|
spinnies.succeed('fetching', { text: `fetched ${tracks.length} tracks` })
|
|
}
|
|
|
|
const destDir = params.destination ?? join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`))
|
|
await mkdir(destDir, { recursive: true })
|
|
|
|
const posPadSize = Math.ceil(Math.log10(tracks.length))
|
|
|
|
await asyncPool(tracks, async (track) => {
|
|
const position = trackIdToPosition.get(track.id)!
|
|
const filename = `${position.toString().padStart(posPadSize, '0')}. ${track.user.username} - ${track.title}`
|
|
|
|
spinnies.add(`${track.id}`, { text: filename })
|
|
await downloadTrack(track, {
|
|
destination: join(destDir, sanitizeFilename(filename)),
|
|
onRateLimit: (wait) => {
|
|
spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` })
|
|
},
|
|
onCdnRateLimit: () => {
|
|
spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` })
|
|
},
|
|
})
|
|
|
|
spinnies.remove(`${track.id}`)
|
|
})
|
|
|
|
console.log('done')
|
|
spinnies.stopAll()
|
|
}
|
|
|
|
async function downloadLikes(username: string) {
|
|
const spinnies = new Spinnies()
|
|
spinnies.add('collect', { text: 'collecting likes...' })
|
|
|
|
const userPage = await ffetchHtml(`/${username}`).text()
|
|
const hydrationData = extractHydrationData(userPage)
|
|
const user = hydrationData.find(it => it.hydratable === 'user')
|
|
if (!user) throw new Error('no user found')
|
|
const userData = z.object({
|
|
likes_count: z.number(),
|
|
playlist_likes_count: z.number(),
|
|
id: z.number(),
|
|
}).parse(user.data)
|
|
|
|
const tracks: ScTrack[] = []
|
|
const playlists: ScPlaylist[] = []
|
|
const updateSpinner = () => {
|
|
const percent = Math.floor((tracks.length + playlists.length) / (userData.likes_count + userData.playlist_likes_count) * 100)
|
|
spinnies.update('collect', {
|
|
text: `[${percent}%] collecting liked tracks: ${tracks.length}/${userData.likes_count}, playlists: ${playlists.length}/${userData.playlist_likes_count}`,
|
|
})
|
|
}
|
|
updateSpinner()
|
|
|
|
let offset = '0'
|
|
while (true) {
|
|
const res = await ffetchApi(`/users/${userData.id}/likes`, {
|
|
query: {
|
|
limit: 100,
|
|
offset,
|
|
linked_partitioning: '1',
|
|
},
|
|
}).parsedJson(z.object({
|
|
collection: z.array(ScLike),
|
|
next_href: z.string().nullable(),
|
|
}))
|
|
|
|
for (const like of res.collection) {
|
|
if (like.track) {
|
|
tracks.push(like.track)
|
|
} else if (like.playlist) {
|
|
playlists.push(like.playlist)
|
|
} else {
|
|
console.warn('unknown like type:', like.created_at)
|
|
}
|
|
}
|
|
|
|
updateSpinner()
|
|
|
|
if (!res.next_href) break
|
|
offset = new URL(res.next_href).searchParams.get('offset')!
|
|
}
|
|
|
|
spinnies.succeed('collect', { text: `collected ${tracks.length} tracks and ${playlists.length} playlists` })
|
|
|
|
spinnies.add('tracks', { text: 'downloading tracks...' })
|
|
const downloaded = 0
|
|
const updateTracksSpinner = () => {
|
|
spinnies.update('tracks', { text: `[${downloaded}/${tracks.length}] downloading tracks...` })
|
|
}
|
|
updateTracksSpinner()
|
|
|
|
const baseDir = join('assets/soundcloud-dl', `${sanitizeFilename(username)}-likes`)
|
|
await mkdir(baseDir, { recursive: true })
|
|
|
|
// await asyncPool(tracks, async (track) => {
|
|
// const filename = `${track.user.username} - ${track.title}`
|
|
// spinnies.add(`${track.id}`, { text: filename })
|
|
// await downloadTrack(track, {
|
|
// destination: join(baseDir, sanitizeFilename(filename)),
|
|
// onRateLimit: (wait) => {
|
|
// spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` })
|
|
// },
|
|
// onCdnRateLimit: () => {
|
|
// spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` })
|
|
// },
|
|
// })
|
|
// spinnies.remove(`${track.id}`)
|
|
// updateTracksSpinner()
|
|
// })
|
|
|
|
spinnies.succeed('tracks', { text: `downloaded ${downloaded} tracks` })
|
|
spinnies.stopAll()
|
|
|
|
for (const playlist of playlists) {
|
|
console.log(`\uDB83\uDCB8 ${playlist.title}`)
|
|
|
|
const fullPlaylist = await fetchPlaylistById(playlist.id)
|
|
await downloadPlaylist(fullPlaylist, {
|
|
destination: join(baseDir, sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)),
|
|
})
|
|
}
|
|
}
|
|
|
|
const url = process.argv[2] ?? await question('url > ')
|
|
if (!url.startsWith('https://soundcloud.com/')) {
|
|
console.error('url must start with https://soundcloud.com/')
|
|
process.exit(1)
|
|
}
|
|
|
|
if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/sets\//i)) {
|
|
await downloadPlaylist(await fetchPlaylistByUrl(url))
|
|
} else if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/likes/i)) {
|
|
await downloadLikes(url.match(/^https:\/\/soundcloud.com\/([a-z0-9-]+)\/likes/i)![1])
|
|
} else {
|
|
const track = await fetchTrackByUrl(url)
|
|
const filename = `${track.user.username} - ${track.title}`
|
|
console.log('downloading track:', filename)
|
|
await downloadTrack(track, {
|
|
destination: join('assets/soundcloud-dl', sanitizeFilename(filename)),
|
|
})
|
|
}
|