teidesu-scripts/utils/csv.ts
2025-05-14 09:39:22 +00:00

147 lines
3.9 KiB
TypeScript

import { FramedReader, type IReadable, TextDelimiterCodec } from '@fuman/io'
interface CsvReaderOptions {
/** @default '\n' */
lineDelimiter: string
/** @default ',' */
delimiter: string
/** @default '"' */
quote: string
/** @default '"' */
quoteEscape: string
/**
* if true, missing values in a line will be treated as empty strings
* @default false
*/
assumeEmptyValues: boolean
/** whether to treat header line as a data line */
includeHeader: boolean
}
export class CsvReader<const Fields extends string[] = string[]> {
#codec: FramedReader<string>
readonly options: CsvReaderOptions
#schema?: Fields
constructor(
stream: IReadable,
options: Partial<CsvReaderOptions> & {
/** fields that are expected in the csv */
schema?: Fields
},
) {
this.options = {
lineDelimiter: '\n',
delimiter: ',',
quote: '"',
quoteEscape: '"',
assumeEmptyValues: false,
includeHeader: false,
...options,
}
this.#codec = new FramedReader(stream, new TextDelimiterCodec(this.options.lineDelimiter))
this.#schema = options.schema
if (options.includeHeader) {
if (!options.schema) throw new Error('schema is required if includeHeader is true')
this.#header = options.schema
}
}
#header?: string[]
async read(): Promise<Record<Fields[number], string> | null> {
let line = await this.#codec.read()
if (!line) return null
line = line.trim()
if (line === '') return this.read()
if (!this.#header) {
this.#header = line.split(this.options.delimiter).map(s => s.trim())
if (JSON.stringify(this.#schema!) !== JSON.stringify(this.#header)) {
throw new Error(`schema and header are the same (expected ${this.#schema!.join(', ')}; got ${this.#header.join(', ')})`)
}
return this.read()
}
const obj: Record<string, string> = {}
let insideQuote = false
let currentFieldIdx = 0
let currentValue = ''
for (let i = 0; i < line.length; i++) {
if (line[i] === this.options.quoteEscape) {
if (insideQuote && line[i + 1] === this.options.quote) {
i++
currentValue += this.options.quote
continue
}
}
if (line[i] === this.options.quote) {
if (!insideQuote) {
if (currentValue !== '') {
throw new Error('unexpected open quote mid-value')
}
insideQuote = true
continue
}
if (i !== line.length - 1 && line[i + 1] !== this.options.delimiter) {
console.log(i, line.length, line[i + 1])
throw new Error(`unexpected close quote mid-value at ${i}`)
}
insideQuote = false
continue
}
if (insideQuote) {
currentValue += line[i]
continue
}
if (line[i] === this.options.delimiter) {
obj[this.#header[currentFieldIdx]] = currentValue
currentFieldIdx += 1
currentValue = ''
if (currentFieldIdx > this.#header.length) {
throw new Error('too many fields')
}
continue
}
currentValue += line[i]
}
obj[this.#header[currentFieldIdx++]] = currentValue
if (currentFieldIdx < this.#header.length) {
if (this.options.assumeEmptyValues) {
for (let i = currentFieldIdx; i < this.#header.length; i++) {
obj[this.#header[i]] = ''
}
} else {
throw new Error(`missing values for fields: ${this.#header.slice(currentFieldIdx).join(', ')}`)
}
}
return obj as Record<Fields[number], string>
}
[Symbol.asyncIterator]() {
const iter: AsyncIterableIterator<Record<Fields[number], string>> = {
next: async () => {
const obj = await this.read()
if (!obj) return { done: true, value: undefined }
return { done: false, value: obj }
},
[Symbol.asyncIterator]: () => iter,
}
return iter
}
}