chore: update public repo

This commit is contained in:
desu-bot 2025-02-19 02:30:26 +00:00
parent c118bcbfc3
commit f02ccb6029
No known key found for this signature in database
12 changed files with 510 additions and 2 deletions

1
scripts/misc/shikimori/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/_very-secret-ratelimit-bypass.ts

View file

@ -0,0 +1,53 @@
import { asyncPool } from '@fuman/utils'
import Database from 'better-sqlite3'
import { counterIter, ffetchShiki } from './utils.ts'
const isManga = process.argv[2] === 'manga'
const isRanobe = process.argv[2] === 'ranobe'
const collection = isManga ? 'mangas' : isRanobe ? 'ranobe' : 'animes'
const db = new Database('assets/shikimori.db')
db.exec(`
create table if not exists ${collection} (
id integer primary key,
data text not null
);
create table if not exists ${collection}_related (
id integer primary key,
data text not null
);
`)
const insertQuery = db.prepare(`insert into ${collection} (id, data) values (?, ?) on conflict (id) do update set data = excluded.data`)
const insertRelatedQuery = db.prepare(`insert into ${collection}_related (id, data) values (?, ?) on conflict (id) do update set data = excluded.data`)
const maxId = await ffetchShiki(`/api/${collection}?order=id_desc`).json<any>().then(res => res[0].id)
console.log('max id: %d', maxId)
const counter = counterIter(1, maxId)
await asyncPool(counter.iter, async (id) => {
if (id % 1000 === 0) {
console.log('currently at %d', id)
}
// const data = await ffetchShiki(`/api/${collection}/${id}`, {
// validateResponse: false,
// }).json<any>()
// if (data.code === 404) {
// return
// }
// insertQuery.run(id, JSON.stringify(data))
const data = await ffetchShiki(`/api/${collection}/${id}/related`, {
validateResponse: false,
}).json<any>()
if (data.code === 404) {
return
}
insertRelatedQuery.run(id, JSON.stringify(data))
}, { limit: 64 })

View file

@ -0,0 +1,30 @@
import { asyncPool } from '@fuman/utils'
import Database from 'better-sqlite3'
import { counterIter, ffetchShiki } from './utils.ts'
const db = new Database('assets/shikimori.db')
db.exec(`
create table if not exists bans (
id integer primary key,
data text not null
);
`)
const insertQuery = db.prepare('insert into bans (id, data) values (?, ?) on conflict (id) do update set data = excluded.data')
const counter = counterIter(1)
await asyncPool(counter.iter, async (page) => {
if (page % 100 === 0) {
console.log('currently at page %d', page)
}
const data = await ffetchShiki(`/api/bans?page=${page}`).json<any>()
if (!data.length) {
counter.end()
return
}
for (const ban of data) {
insertQuery.run(ban.id, JSON.stringify(ban))
}
}, { limit: 64 })

View file

@ -0,0 +1,59 @@
import { asyncPool } from '@fuman/utils'
import Database from 'better-sqlite3'
import { counterIter, ffetchShiki } from './utils.ts'
const db = new Database('assets/shikimori.db')
db.pragma('journal_mode = WAL')
db.exec(`
create table if not exists characters (
id integer primary key,
data text not null
);
`)
const insertQuery = db.prepare('insert into characters (id, data) values (?, ?) on conflict (id) do update set data = excluded.data')
// find maxId with binary search
let maxIdPage = 20000
let maxIdPageStart = 1
let maxId = 0
while (true) {
const midPage = Math.floor((maxIdPageStart + maxIdPage) / 2)
console.log('trying page %d', midPage)
const res = await ffetchShiki.post('/api/graphql', {
json: {
query: `{characters(page: ${midPage}, limit: 50) { id }}`,
},
}).json<any>()
const items = res.data.characters
if (!items.length) {
maxIdPage = midPage - 1
continue
}
if (maxIdPageStart === midPage) {
maxId = Math.max(...items.map(item => item.id))
break
} else {
maxIdPageStart = midPage
}
}
console.log('max id: %d', maxId)
const counter = counterIter(1, maxId)
await asyncPool(counter.iter, async (id) => {
if (id % 1000 === 0) {
console.log('currently at %d', id)
}
const data = await ffetchShiki(`/api/characters/${id}`, {
validateResponse: false,
}).json<any>()
if (data.code === 404) {
return
}
insertQuery.run(id, JSON.stringify(data))
}, { limit: 64 })

View file

@ -0,0 +1,49 @@
import { asyncPool } from '@fuman/utils'
import Database from 'better-sqlite3'
import { counterIter, ffetchShiki } from './utils.ts'
const db = new Database('assets/shikimori.db')
db.pragma('journal_mode = WAL')
db.exec(`
create table if not exists clubs (
id integer primary key,
data text not null
);
`)
const insertQuery = db.prepare('insert into clubs (id, data) values (?, ?) on conflict (id) do update set data = excluded.data')
// collect clubs ids
const ids: Set<number> = new Set()
const pageCounter = counterIter(1)
await asyncPool(pageCounter.iter, async (page) => {
const data = await ffetchShiki('/api/clubs', {
query: { page, limit: 50 },
validateResponse: false,
}).json<any>()
if (!data.length) {
pageCounter.end()
return
}
for (const club of data) {
ids.add(club.id)
}
}, { limit: 16 })
console.log('collected %d clubs', ids.size)
await asyncPool(ids, async (id, idx) => {
if (idx % 100 === 0) {
console.log('currently at %d', idx)
}
const clubData = await ffetchShiki(`/api/clubs/${id}`).json<any>()
if (clubData.code === 404) {
return
}
insertQuery.run(id, JSON.stringify(clubData))
}, { limit: 64 })

View file

@ -0,0 +1,37 @@
import { asyncPool } from '@fuman/utils'
import Database from 'better-sqlite3'
import { counterIter, ffetchShiki } from './utils.ts'
const db = new Database('assets/shikimori.db')
db.pragma('journal_mode = WAL')
db.exec(`
create table if not exists comments (
id integer primary key,
data text not null
);
`)
const insertQuery = db.prepare('insert into comments (id, data) values (?, ?) on conflict (id) do update set data = excluded.data')
const counter = counterIter(11312000)
let consequent404 = 0
await asyncPool(counter.iter, async (id) => {
if (id % 1000 === 0) {
console.log('currently at %d', id)
}
const data = await ffetchShiki(`/api/comments/${id}`, {
validateResponse: false,
}).json<any>()
if (data.code === 404) {
consequent404++
if (consequent404 > 10_000) {
counter.end()
console.log('10k consequent 404-s, stopping')
}
return
}
consequent404 = 0
insertQuery.run(id, JSON.stringify(data))
}, { limit: 64 })

View file

@ -0,0 +1,59 @@
import { asyncPool } from '@fuman/utils'
import Database from 'better-sqlite3'
import { counterIter, ffetchShiki } from './utils.ts'
const db = new Database('assets/shikimori.db')
db.pragma('journal_mode = WAL')
db.exec(`
create table if not exists people (
id integer primary key,
data text not null
);
`)
const insertQuery = db.prepare('insert into people (id, data) values (?, ?) on conflict (id) do update set data = excluded.data')
// find maxId with binary search
let maxIdPage = 20000
let maxIdPageStart = 1
let maxId = 0
while (true) {
const midPage = Math.floor((maxIdPageStart + maxIdPage) / 2)
console.log('trying page %d', midPage)
const res = await ffetchShiki.post('/api/graphql', {
json: {
query: `{people(page: ${midPage}, limit: 50) { id }}`,
},
}).json<any>()
const items = res.data.people
if (!items.length) {
maxIdPage = midPage - 1
continue
}
if (maxIdPageStart === midPage) {
maxId = Math.max(...items.map(item => item.id))
break
} else {
maxIdPageStart = midPage
}
}
console.log('max id: %d', maxId)
const counter = counterIter(1, maxId)
await asyncPool(counter.iter, async (id) => {
if (id % 1000 === 0) {
console.log('currently at %d', id)
}
const data = await ffetchShiki(`/api/people/${id}`, {
validateResponse: false,
}).json<any>()
if (data.code === 404) {
return
}
insertQuery.run(id, JSON.stringify(data))
}, { limit: 64 })

View file

@ -0,0 +1,129 @@
import { asyncPool } from '@fuman/utils'
import Database from 'better-sqlite3'
import { counterIter, ffetchShiki } from './utils.ts'
const db = new Database('assets/shikimori.db')
db.pragma('journal_mode = WAL')
db.exec(`
create table if not exists users (
id integer primary key,
data text not null
);
`)
const insertQuery = db.prepare('insert into users (id, data) values (?, ?) on conflict (id) do update set data = excluded.data')
async function fetchUserFriends(userId: number) {
const list: any[] = []
for (let page = 1; ; page++) {
const data = await ffetchShiki(`/api/users/${userId}/friends`, {
query: { page, limit: 100 },
validateResponse: false,
}).json<any>()
if (!data.length) {
break
}
list.push(...data)
}
return list
}
async function fetchUserRates(userId: number, kind: 'anime' | 'manga') {
const list: any[] = []
for (let page = 1; ; page++) {
const data = await ffetchShiki(`/api/users/${userId}/${kind}_rates`, {
query: { page, limit: 1000 },
validateResponse: false,
}).json<any>()
if (data === null || !data.length) {
break
}
for (const item of data) {
// clean up unnecessary data before inserting
delete item.user
if (item[kind]) {
item[`${kind}_id`] = item[kind].id
delete item[kind]
}
list.push(item)
}
}
return list
}
async function fetchUserHistory(userId: number) {
const list: any[] = []
for (let page = 0; ; page++) {
const data = await ffetchShiki(`/api/users/${userId}/history`, {
query: { page, limit: 100 },
validateResponse: false,
}).json<any>()
if (!data.length) {
break
}
for (const item of data) {
if (item.target) {
item.target_type = item.target.url.startsWith('/animes/') ? 'anime' : 'manga'
item.target_id = item.target.id
delete item.target
}
list.push(item)
}
}
return list
}
const counter = counterIter(467800)
let consequent404 = 0
await asyncPool(counter.iter, async (id) => {
if (id % 100 === 0) {
console.log('currently at %d', id)
}
const data = await ffetchShiki(`/api/users/${id}`, {
validateResponse: false,
}).json<any>()
if (data.code === 404) {
consequent404++
if (consequent404 > 1_000) {
counter.end()
console.log('1k consequent 404-s, stopping')
}
return
}
consequent404 = 0
// fetch extra data
const [
favsData,
friends,
animeRates,
mangaRates,
history,
] = await Promise.all([
ffetchShiki(`/api/users/${id}/favourites`).json<any>(),
fetchUserFriends(id),
fetchUserRates(id, 'anime'),
fetchUserRates(id, 'manga'),
fetchUserHistory(id),
])
data._extra = {
favs: favsData,
friends,
animeRates,
mangaRates,
history,
}
insertQuery.run(id, JSON.stringify(data))
}, { limit: 32 })

View file

@ -0,0 +1,39 @@
import { ffetch as ffetchBase } from '../../../utils/fetch.ts'
import { rateLimitBypass } from './_very-secret-ratelimit-bypass.ts'
export const ffetchShiki = ffetchBase.extend({
baseUrl: 'https://shikimori.one',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
},
retry: {},
...(rateLimitBypass as any),
})
export function counterIter(start = 0, end = Infinity) {
let i = start
let ended = false
const iter: IterableIterator<number> = {
[Symbol.iterator]: () => iter,
next() {
if (ended) {
return { value: undefined, done: true }
}
if (i > end) {
return { value: undefined, done: true }
}
return { value: i++, done: false }
},
}
return {
iter,
end: () => {
ended = true
},
}
}