From 8c04afc6d2c2d5b8fdc9eac17648efdfc30989f4 Mon Sep 17 00:00:00 2001 From: desu-bot Date: Wed, 14 May 2025 09:39:22 +0000 Subject: [PATCH] chore: update public repo --- utils/csv.ts | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 utils/csv.ts diff --git a/utils/csv.ts b/utils/csv.ts new file mode 100644 index 0000000..609848b --- /dev/null +++ b/utils/csv.ts @@ -0,0 +1,147 @@ +import { FramedReader, type IReadable, TextDelimiterCodec } from '@fuman/io' + +interface CsvReaderOptions { + /** @default '\n' */ + lineDelimiter: string + /** @default ',' */ + delimiter: string + /** @default '"' */ + quote: string + /** @default '"' */ + quoteEscape: string + + /** + * if true, missing values in a line will be treated as empty strings + * @default false + */ + assumeEmptyValues: boolean + + /** whether to treat header line as a data line */ + includeHeader: boolean +} + +export class CsvReader { + #codec: FramedReader + readonly options: CsvReaderOptions + #schema?: Fields + constructor( + stream: IReadable, + options: Partial & { + /** fields that are expected in the csv */ + schema?: Fields + }, + ) { + this.options = { + lineDelimiter: '\n', + delimiter: ',', + quote: '"', + quoteEscape: '"', + assumeEmptyValues: false, + includeHeader: false, + ...options, + } + + this.#codec = new FramedReader(stream, new TextDelimiterCodec(this.options.lineDelimiter)) + this.#schema = options.schema + + if (options.includeHeader) { + if (!options.schema) throw new Error('schema is required if includeHeader is true') + this.#header = options.schema + } + } + + #header?: string[] + + async read(): Promise | null> { + let line = await this.#codec.read() + if (!line) return null + + line = line.trim() + if (line === '') return this.read() + + if (!this.#header) { + this.#header = line.split(this.options.delimiter).map(s => s.trim()) + if (JSON.stringify(this.#schema!) !== JSON.stringify(this.#header)) { + throw new Error(`schema and header are the same (expected ${this.#schema!.join(', ')}; got ${this.#header.join(', ')})`) + } + return this.read() + } + + const obj: Record = {} + + let insideQuote = false + let currentFieldIdx = 0 + let currentValue = '' + for (let i = 0; i < line.length; i++) { + if (line[i] === this.options.quoteEscape) { + if (insideQuote && line[i + 1] === this.options.quote) { + i++ + currentValue += this.options.quote + continue + } + } + + if (line[i] === this.options.quote) { + if (!insideQuote) { + if (currentValue !== '') { + throw new Error('unexpected open quote mid-value') + } + insideQuote = true + continue + } + + if (i !== line.length - 1 && line[i + 1] !== this.options.delimiter) { + console.log(i, line.length, line[i + 1]) + throw new Error(`unexpected close quote mid-value at ${i}`) + } + + insideQuote = false + continue + } + + if (insideQuote) { + currentValue += line[i] + continue + } + + if (line[i] === this.options.delimiter) { + obj[this.#header[currentFieldIdx]] = currentValue + currentFieldIdx += 1 + currentValue = '' + if (currentFieldIdx > this.#header.length) { + throw new Error('too many fields') + } + continue + } + + currentValue += line[i] + } + + obj[this.#header[currentFieldIdx++]] = currentValue + + if (currentFieldIdx < this.#header.length) { + if (this.options.assumeEmptyValues) { + for (let i = currentFieldIdx; i < this.#header.length; i++) { + obj[this.#header[i]] = '' + } + } else { + throw new Error(`missing values for fields: ${this.#header.slice(currentFieldIdx).join(', ')}`) + } + } + + return obj as Record + } + + [Symbol.asyncIterator]() { + const iter: AsyncIterableIterator> = { + next: async () => { + const obj = await this.read() + if (!obj) return { done: true, value: undefined } + return { done: false, value: obj } + }, + [Symbol.asyncIterator]: () => iter, + } + + return iter + } +}