perf: split row groups by file range (morsel splitting) by Dandandan · Pull Request #23285 · apache/datafusion · GitHub
Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions datafusion/common/src/config.rs
4 changes: 4 additions & 0 deletions datafusion/common/src/file_options/parquet_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ impl ParquetOptions {
pushdown_filters: _,
reorder_filters: _,
force_filter_selections: _, // not used for writer props
split_row_groups_by_range: _, // not used for writer props
allow_single_file_parallelism: _,
maximum_parallel_row_group_writers: _,
maximum_buffered_record_batches_per_stream: _,
Expand Down Expand Up @@ -494,6 +495,7 @@ mod tests {
pushdown_filters: defaults.pushdown_filters,
reorder_filters: defaults.reorder_filters,
force_filter_selections: defaults.force_filter_selections,
split_row_groups_by_range: defaults.split_row_groups_by_range,
allow_single_file_parallelism: defaults.allow_single_file_parallelism,
maximum_parallel_row_group_writers: defaults
.maximum_parallel_row_group_writers,
Expand Down Expand Up @@ -613,6 +615,8 @@ mod tests {
pushdown_filters: global_options_defaults.pushdown_filters,
reorder_filters: global_options_defaults.reorder_filters,
force_filter_selections: global_options_defaults.force_filter_selections,
split_row_groups_by_range: global_options_defaults
.split_row_groups_by_range,
allow_single_file_parallelism: global_options_defaults
.allow_single_file_parallelism,
maximum_parallel_row_group_writers: global_options_defaults
Expand Down
7 changes: 7 additions & 0 deletions datafusion/datasource-parquet/src/access_plan.rs
456 changes: 441 additions & 15 deletions datafusion/datasource-parquet/src/opener/mod.rs

Large diffs are not rendered by default.

Loading
Loading