fix dtypes not used on csv parse by risenW · Pull Request #657 · javascriptdata/danfojs · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
11 changes: 7 additions & 4 deletions src/danfojs-base/io/browser/io.csv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,16 @@ import Papa from 'papaparse'
*/
const $readCSV = async (file: any, options?: CsvInputOptionsBrowser): Promise<DataFrame> => {
const frameConfig = options?.frameConfig || {}
const hasStringType = frameConfig.dtypes?.includes("string")

return new Promise((resolve, reject) => {
let hasError = false;

Papa.parse(file, {
header: true,
dynamicTyping: true,
dynamicTyping: !hasStringType,
skipEmptyLines: 'greedy',
delimiter: ",",
...options,
error: (error) => {
hasError = true;
Expand Down Expand Up @@ -108,12 +110,13 @@ const $streamCSV = async (file: string, callback: (df: DataFrame) => void, optio
return new Promise((resolve, reject) => {
let count = 0
let hasError = false;

const hasStringType = frameConfig.dtypes?.includes("string")
Papa.parse(file, {
...options,
dynamicTyping: true,
header: true,
download: true,
dynamicTyping: !hasStringType,
delimiter: ",",
...options,
step: results => {
if (hasError) return;
try {
Expand Down
7 changes: 5 additions & 2 deletions src/danfojs-base/io/node/io.csv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,16 @@ import fs from 'fs'
*/
const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promise<DataFrame> => {
const frameConfig = options?.frameConfig || {}
const hasStringType = frameConfig.dtypes?.includes("string")

if (filePath.startsWith("http") || filePath.startsWith("https")) {
return new Promise((resolve, reject) => {
let hasError = false;
const optionsWithDefaults = {
header: true,
dynamicTyping: true,
dynamicTyping: !hasStringType,
skipEmptyLines: 'greedy',
delimiter: ",",
...options,
}

Expand Down Expand Up @@ -116,7 +118,8 @@ const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promis

Papa.parse(fileStream, {
header: true,
dynamicTyping: true,
dynamicTyping: !hasStringType,
delimiter: ",",
...options,
error: (error) => {
hasError = true;
Expand Down
35 changes: 35 additions & 0 deletions src/danfojs-browser/tests/io/csv.reader.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,41 @@ describe("readCSV", function () {
assert.ok(error instanceof Error);
}
});

it("Preserves leading zeros when dtype is string", async function () {
// Create a CSV file with leading zeros
const csvContent = "codes\n012345\n001234";
const file = new File([ csvContent ], "leading_zeros.csv", { type: "text/csv" });

const df = await dfd.readCSV(file, {
frameConfig: {
dtypes: [ "string" ]
}
});

assert.deepEqual(df.values, [ [ "012345" ], [ "001234" ] ]);
assert.deepEqual(df.dtypes, [ "string" ]);

// Verify the values are actually strings
const jsonData = dfd.toJSON(df);
assert.deepEqual(jsonData, [ { codes: "012345" }, { codes: "001234" } ]);
});

it("Converts to numbers when dtype is not string", async function () {
// Create a CSV file with leading zeros
const csvContent = "codes\n012345\n001234";
const file = new File([ csvContent ], "leading_zeros.csv", { type: "text/csv" });

const df = await dfd.readCSV(file); // default behavior without string dtype

// Values should be converted to numbers
assert.deepEqual(df.values, [ [ 12345 ], [ 1234 ] ]);
assert.deepEqual(df.dtypes, [ "int32" ]);

// Verify JSON output
const jsonData = dfd.toJSON(df);
assert.deepEqual(jsonData, [ { codes: 12345 }, { codes: 1234 } ]);
});
});

// describe("streamCSV", function () {
Expand Down
55 changes: 54 additions & 1 deletion src/danfojs-node/test/io/csv.reader.test.ts