feat(dataframe): add limit, distinct, dropColumns, withColumnRenamed … · apache/datafusion-java@fe54208 · GitHub
Skip to content

Commit fe54208

Browse files
authored
feat(dataframe): add limit, distinct, dropColumns, withColumnRenamed (#30)
1 parent 9dacdc8 commit fe54208

3 files changed

Lines changed: 304 additions & 0 deletions

File tree

native/src/lib.rs

Lines changed: 81 additions & 0 deletions

src/main/java/org/apache/datafusion/DataFrame.java

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,61 @@ public DataFrame filter(String predicate) {
116116
return new DataFrame(filterRows(nativeHandle, predicate));
117117
}
118118

119+
/**
120+
* Take the first {@code fetch} rows. Equivalent to {@link #limit(int, int)} with {@code skip =
121+
* 0}. The receiver remains usable and must still be closed independently.
122+
*/
123+
public DataFrame limit(int fetch) {
124+
return limit(0, fetch);
125+
}
126+
127+
/**
128+
* Skip {@code skip} rows, then take the next {@code fetch} rows. Both arguments must be
129+
* non-negative. The receiver remains usable and must still be closed independently.
130+
*/
131+
public DataFrame limit(int skip, int fetch) {
132+
if (skip < 0) {
133+
throw new IllegalArgumentException("skip must be non-negative, was " + skip);
134+
}
135+
if (fetch < 0) {
136+
throw new IllegalArgumentException("fetch must be non-negative, was " + fetch);
137+
}
138+
if (nativeHandle == 0) {
139+
throw new IllegalStateException("DataFrame is closed or already collected");
140+
}
141+
return new DataFrame(limitRows(nativeHandle, skip, fetch));
142+
}
143+
144+
/**
145+
* Deduplicate rows across all columns. The receiver remains usable and must still be closed
146+
* independently.
147+
*/
148+
public DataFrame distinct() {
149+
if (nativeHandle == 0) {
150+
throw new IllegalStateException("DataFrame is closed or already collected");
151+
}
152+
return new DataFrame(distinctRows(nativeHandle));
153+
}
154+
155+
/**
156+
* Drop the named columns. The inverse of {@link #select(String...)}. The receiver remains usable
157+
* and must still be closed independently.
158+
*/
159+
public DataFrame dropColumns(String... columnNames) {
160+
if (nativeHandle == 0) {
161+
throw new IllegalStateException("DataFrame is closed or already collected");
162+
}
163+
return new DataFrame(dropColumns(nativeHandle, columnNames));
164+
}
165+
166+
/** Rename a column. The receiver remains usable and must still be closed independently. */
167+
public DataFrame withColumnRenamed(String oldName, String newName) {
168+
if (nativeHandle == 0) {
169+
throw new IllegalStateException("DataFrame is closed or already collected");
170+
}
171+
return new DataFrame(renameColumn(nativeHandle, oldName, newName));
172+
}
173+
119174
/**
120175
* Materialize this DataFrame as Parquet at {@code path}. The path is treated as a directory
121176
* unless overridden via {@link ParquetWriteOptions#singleFileOutput(boolean)}. The receiver
@@ -168,6 +223,14 @@ public void close() {
168223

169224
private static native long filterRows(long handle, String predicate);
170225

226+
private static native long limitRows(long handle, int skip, int fetch);
227+
228+
private static native long distinctRows(long handle);
229+
230+
private static native long dropColumns(long handle, String[] columnNames);
231+
232+
private static native long renameColumn(long handle, String oldName, String newName);
233+
171234
private static native void writeParquetWithOptions(
172235
long handle,
173236
String path,

src/test/java/org/apache/datafusion/DataFrameTransformationsTest.java

Lines changed: 160 additions & 0 deletions

0 commit comments

Comments
 (0)