mirror of
https://git.stupid.fish/teidesu/scripts.git
synced 2025-07-28 02:32:11 +10:00
147 lines
3.9 KiB
TypeScript
147 lines
3.9 KiB
TypeScript
import { FramedReader, type IReadable, TextDelimiterCodec } from '@fuman/io'
|
|
|
|
interface CsvReaderOptions {
|
|
/** @default '\n' */
|
|
lineDelimiter: string
|
|
/** @default ',' */
|
|
delimiter: string
|
|
/** @default '"' */
|
|
quote: string
|
|
/** @default '"' */
|
|
quoteEscape: string
|
|
|
|
/**
|
|
* if true, missing values in a line will be treated as empty strings
|
|
* @default false
|
|
*/
|
|
assumeEmptyValues: boolean
|
|
|
|
/** whether to treat header line as a data line */
|
|
includeHeader: boolean
|
|
}
|
|
|
|
export class CsvReader<const Fields extends string[] = string[]> {
|
|
#codec: FramedReader<string>
|
|
readonly options: CsvReaderOptions
|
|
#schema?: Fields
|
|
constructor(
|
|
stream: IReadable,
|
|
options: Partial<CsvReaderOptions> & {
|
|
/** fields that are expected in the csv */
|
|
schema?: Fields
|
|
},
|
|
) {
|
|
this.options = {
|
|
lineDelimiter: '\n',
|
|
delimiter: ',',
|
|
quote: '"',
|
|
quoteEscape: '"',
|
|
assumeEmptyValues: false,
|
|
includeHeader: false,
|
|
...options,
|
|
}
|
|
|
|
this.#codec = new FramedReader(stream, new TextDelimiterCodec(this.options.lineDelimiter))
|
|
this.#schema = options.schema
|
|
|
|
if (options.includeHeader) {
|
|
if (!options.schema) throw new Error('schema is required if includeHeader is true')
|
|
this.#header = options.schema
|
|
}
|
|
}
|
|
|
|
#header?: string[]
|
|
|
|
async read(): Promise<Record<Fields[number], string> | null> {
|
|
let line = await this.#codec.read()
|
|
if (!line) return null
|
|
|
|
line = line.trim()
|
|
if (line === '') return this.read()
|
|
|
|
if (!this.#header) {
|
|
this.#header = line.split(this.options.delimiter).map(s => s.trim())
|
|
if (JSON.stringify(this.#schema!) !== JSON.stringify(this.#header)) {
|
|
throw new Error(`schema and header are the same (expected ${this.#schema!.join(', ')}; got ${this.#header.join(', ')})`)
|
|
}
|
|
return this.read()
|
|
}
|
|
|
|
const obj: Record<string, string> = {}
|
|
|
|
let insideQuote = false
|
|
let currentFieldIdx = 0
|
|
let currentValue = ''
|
|
for (let i = 0; i < line.length; i++) {
|
|
if (line[i] === this.options.quoteEscape) {
|
|
if (insideQuote && line[i + 1] === this.options.quote) {
|
|
i++
|
|
currentValue += this.options.quote
|
|
continue
|
|
}
|
|
}
|
|
|
|
if (line[i] === this.options.quote) {
|
|
if (!insideQuote) {
|
|
if (currentValue !== '') {
|
|
throw new Error('unexpected open quote mid-value')
|
|
}
|
|
insideQuote = true
|
|
continue
|
|
}
|
|
|
|
if (i !== line.length - 1 && line[i + 1] !== this.options.delimiter) {
|
|
console.log(i, line.length, line[i + 1])
|
|
throw new Error(`unexpected close quote mid-value at ${i}`)
|
|
}
|
|
|
|
insideQuote = false
|
|
continue
|
|
}
|
|
|
|
if (insideQuote) {
|
|
currentValue += line[i]
|
|
continue
|
|
}
|
|
|
|
if (line[i] === this.options.delimiter) {
|
|
obj[this.#header[currentFieldIdx]] = currentValue
|
|
currentFieldIdx += 1
|
|
currentValue = ''
|
|
if (currentFieldIdx > this.#header.length) {
|
|
throw new Error('too many fields')
|
|
}
|
|
continue
|
|
}
|
|
|
|
currentValue += line[i]
|
|
}
|
|
|
|
obj[this.#header[currentFieldIdx++]] = currentValue
|
|
|
|
if (currentFieldIdx < this.#header.length) {
|
|
if (this.options.assumeEmptyValues) {
|
|
for (let i = currentFieldIdx; i < this.#header.length; i++) {
|
|
obj[this.#header[i]] = ''
|
|
}
|
|
} else {
|
|
throw new Error(`missing values for fields: ${this.#header.slice(currentFieldIdx).join(', ')}`)
|
|
}
|
|
}
|
|
|
|
return obj as Record<Fields[number], string>
|
|
}
|
|
|
|
[Symbol.asyncIterator]() {
|
|
const iter: AsyncIterableIterator<Record<Fields[number], string>> = {
|
|
next: async () => {
|
|
const obj = await this.read()
|
|
if (!obj) return { done: true, value: undefined }
|
|
return { done: false, value: obj }
|
|
},
|
|
[Symbol.asyncIterator]: () => iter,
|
|
}
|
|
|
|
return iter
|
|
}
|
|
}
|