1

I managed to load a parquet file based on example & documentation of rust's apache::arrow implementation.

    use parquet::arrow::{ParquetFileArrowReader, ArrowReader};
    use std::rc::Rc;
    use arrow::record_batch::RecordBatchReader;

    let file = File::open(&Path::new("./path_to/file.parquet")).unwrap();
    let file_reader = SerializedFileReader::new(file).unwrap();
    let mut arrow_reader = ParquetFileArrowReader::new(Rc::new(file_reader));

    println!("Converted arrow schema is: {}", arrow_reader.get_schema().unwrap());

    let mut record_batch_reader = arrow_reader.get_record_reader(2048).unwrap();

I was able to display the name and type of columns of each batch:

    loop {
       let record_batch = record_batch_reader.next_batch().unwrap().unwrap();
       if record_batch.num_rows() > 0 {
           println!("Schema: {}.", record_batch.schema());
       }
    }

but I am quite confused on how to display the content of the columns. How can I retrieve the content of the first column and print it?

1 Answer 1

1

The last version of apache arrow seams to have a prettifyer class. Unfortunately this is not in the last available package (1.0.1).

use arrow::util::pretty;
pretty::print_batches(&batch);

The manual way to do it is through downcasting.

// For an int:
let col = batch.column(0).as_any().downcast_ref::<arrow::array::Int32Array>();

// For a Utf8 string:
let col = batch.column(0).as_any().downcast_ref::<arrow::array::StringArray>();

Then you can simply print it:

println!("Columns: {:?}.", col);
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.