import { FramedReader, type IReadable, TextDelimiterCodec } from '@fuman/io' interface CsvReaderOptions { /** @default '\n' */ lineDelimiter: string /** @default ',' */ delimiter: string /** @default '"' */ quote: string /** @default '"' */ quoteEscape: string /** * if true, missing values in a line will be treated as empty strings * @default false */ assumeEmptyValues: boolean /** whether to treat all data from the readable as data (requires `schema` to be set) */ skipHeader: boolean } export class CsvReader { #codec: FramedReader readonly options: CsvReaderOptions #schema?: Fields constructor( stream: IReadable, options: Partial & { /** fields that are expected in the csv */ schema?: Fields } = {}, ) { this.options = { lineDelimiter: '\n', delimiter: ',', quote: '"', quoteEscape: '"', assumeEmptyValues: false, skipHeader: false, ...options, } this.#codec = new FramedReader(stream, new TextDelimiterCodec(this.options.lineDelimiter)) this.#schema = options.schema if (options.skipHeader) { if (!options.schema) throw new Error('schema is required if includeHeader is true') this.#header = options.schema } } #header?: string[] async read(): Promise | null> { let line = await this.#codec.read() if (!line) return null line = line.trim() if (line === '') return this.read() if (!this.#header) { this.#header = line.split(this.options.delimiter).map(s => s.trim()) if (JSON.stringify(this.#schema!) !== JSON.stringify(this.#header)) { throw new Error(`schema and header are the same (expected ${this.#schema!.join(', ')}; got ${this.#header.join(', ')})`) } return this.read() } const obj: Record = {} let insideQuote = false let currentFieldIdx = 0 let currentValue = '' for (let i = 0; i < line.length; i++) { if (line[i] === this.options.quoteEscape) { if (insideQuote && line[i + 1] === this.options.quote) { i++ currentValue += this.options.quote continue } } if (line[i] === this.options.quote) { if (!insideQuote) { if (currentValue !== '') { throw new Error('unexpected open quote mid-value') } insideQuote = true continue } if (i !== line.length - 1 && line[i + 1] !== this.options.delimiter) { console.log(i, line.length, line[i + 1]) throw new Error(`unexpected close quote mid-value at ${i}`) } insideQuote = false continue } if (insideQuote) { currentValue += line[i] continue } if (line[i] === this.options.delimiter) { obj[this.#header[currentFieldIdx]] = currentValue currentFieldIdx += 1 currentValue = '' if (currentFieldIdx > this.#header.length) { throw new Error('too many fields') } continue } currentValue += line[i] } obj[this.#header[currentFieldIdx++]] = currentValue if (currentFieldIdx < this.#header.length) { if (this.options.assumeEmptyValues) { for (let i = currentFieldIdx; i < this.#header.length; i++) { obj[this.#header[i]] = '' } } else { throw new Error(`missing values for fields: ${this.#header.slice(currentFieldIdx).join(', ')}`) } } return obj as Record } [Symbol.asyncIterator]() { const iter: AsyncIterableIterator> = { next: async () => { const obj = await this.read() if (!obj) return { done: true, value: undefined } return { done: false, value: obj } }, [Symbol.asyncIterator]: () => iter, } return iter } }