peroxide/structure/
dataframe.rs

1//! Pandas-like dataframe & series.
2//!
3//! ## Series
4//!
5//! ### 1. Declare Series
6//!
7//! * To declare series, you should have `Vec<T>` where `T` is one of following types.
8//!
9//! | Primitive type | DType   |
10//! | :-----:  | :-----: |
11//! | `usize`  | `USIZE` |
12//! | `u8`     | `U8`    |
13//! | `u16`    | `U16`   |
14//! | `u32`    | `U32`   |
15//! | `u64`    | `U64`   |
16//! | `isize`  | `ISIZE` |
17//! | `i8`     | `I8`    |
18//! | `i16`    | `I16`   |
19//! | `i32`    | `I32`   |
20//! | `i64`    | `I64`   |
21//! | `f32`    | `F32`   |
22//! | `f64`    | `F64`   |
23//! | `bool`   | `Bool`  |
24//! | `char`   | `Char`  |
25//! | `String` | `Str`   |
26//!
27//! * If you prepare `Vec<T>`, then `Series::new(Vec<T>)`
28//!
29//! ### 2. Methods for Series
30//!
31//! * `TypedVector<T> trait for Series`
32//!     
33//!     ```ignore
34//!     pub trait TypedVector<T> {
35//!         fn new(v: Vec<T>) -> Self;
36//!         fn to_vec(&self) -> Vec<T>;
37//!         fn as_slice(&self) -> &[T];
38//!         fn as_slice_mut(&mut self) -> &mut [T];
39//!         fn at_raw(&self, i: usize) -> T;
40//!         fn push(&mut self, elem: T);
41//!     }
42//!     ```
43//!
44//! * `Series` methods
45//!
46//!     ```ignore
47//!     impl Series {
48//!         // Core
49//!         pub fn at(&self, i: usize) -> Scalar;
50//!         pub fn len(&self) -> usize;
51//!         pub fn to_type(&self, dtype: DType) -> Series;
52//!         pub fn as_type(&mut self, dtype: DType);
53//!         pub fn select_indices(&self, indices: &[usize]) -> Series;
54//!         pub fn to_f64_vec(&self) -> anyhow::Result<Vec<f64>>;
55//!
56//!         // Statistics (numeric types only, except min/max)
57//!         pub fn sum(&self) -> anyhow::Result<f64>;
58//!         pub fn mean(&self) -> anyhow::Result<f64>;
59//!         pub fn var(&self) -> anyhow::Result<f64>;
60//!         pub fn sd(&self) -> anyhow::Result<f64>;
61//!         pub fn min(&self) -> anyhow::Result<Scalar>;
62//!         pub fn max(&self) -> anyhow::Result<Scalar>;
63//!     }
64//!     ```
65//!
66//!     * `at` is simple getter for `Series`. It returns `Scalar`.
67//!     * `as_type` is a method for mutable type casting.
68//!         * All types can be changed to `Str`.
69//!         * All integer & float types can be exchanged.
70//!         * `Bool, Char` can be changed to `Str` or `U8` only.
71//!         * `U8` can be changed to all types.
72//!     * `select_indices` selects elements by indices, returning a new Series.
73//!     * `to_f64_vec` converts numeric Series to `Vec<f64>` (bridge for statistics).
74//!     * `sum`, `mean`, `var`, `sd` convert to `f64` internally via `to_f64_vec`.
75//!     * `min`, `max` preserve the original type and return `Scalar`. Works on all ordered types including `Char` and `String`.
76//!
77//! ### 3. Example
78//!
79//! ```rust
80//! extern crate peroxide;
81//! use peroxide::fuga::*;
82//!
83//! fn main() {
84//!     let a = Series::new(vec![1, 2, 3, 4]);
85//!     let b = Series::new(vec!['a', 'b', 'c', 'd']);
86//!     let mut c = Series::new(vec![true, false, false, true]);
87//!
88//!     a.print();       // print for Series
89//!     b.dtype.print(); // print for dtype of Series (=Char)
90//!     c.as_type(U8);   // Bool => U8
91//!
92//!     assert_eq!(c.dtype, U8);
93//!
94//!     // Select by indices
95//!     let d = a.select_indices(&[0, 2]);
96//!     assert_eq!(d, Series::new(vec![1, 3]));
97//!
98//!     // Statistics
99//!     let e = Series::new(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
100//!     assert_eq!(e.sum().unwrap(), 15.0);
101//!     assert_eq!(e.mean().unwrap(), 3.0);
102//!     assert_eq!(e.min().unwrap(), Scalar::new(1.0f64));
103//!     assert_eq!(e.max().unwrap(), Scalar::new(5.0f64));
104//! }
105//! ```
106//!
107//! ## DataFrame
108//!
109//! ### 1. Declare DataFrame
110//!
111//! * To declare dataframe, use constructor.
112//!     * `DataFrame::new(Vec<Series>)`
113//!
114//! ```rust
115//! extern crate peroxide;
116//! use peroxide::fuga::*;
117//!
118//! fn main() {
119//!     // 1-1. Empty DataFrame
120//!     let mut df = DataFrame::new(vec![]);
121//!
122//!     // 1-2. Push Series
123//!     df.push("a", Series::new(vec![1, 2, 3, 4]));
124//!     df.push("b", Series::new(vec![0.1, 0.2, 0.3, 0.4]));
125//!     df.push("c", Series::new(vec!['a', 'b', 'c', 'd']));
126//!
127//!     // 1-3. Print
128//!     df.print();
129//!
130//!     // 2-1. Construct Series first
131//!     let a = Series::new(vec![1, 2, 3, 4]);
132//!     let b = Series::new(vec![0.1, 0.2, 0.3, 0.4]);
133//!     let c = Series::new(vec!['a', 'b', 'c', 'd']);
134//!
135//!     // 2-2. Declare DataFrame with exist Series
136//!     let mut dg = DataFrame::new(vec![a, b, c]);
137//!
138//!     // 2-3. Print or Set header
139//!     dg.print();                         // But header: 0 1 2
140//!     dg.set_header(vec!["a", "b", "c"]); // Change header
141//! }
142//! ```
143//!
144//! ### 2. Methods for DataFrame
145//!
146//! * `DataFrame` method
147//!
148//!     ```ignore
149//!     impl DataFrame {
150//!         // Constructor & Basic
151//!         pub fn new(v: Vec<Series>) -> Self;
152//!         pub fn header(&self) -> &Vec<String>;
153//!         pub fn header_mut(&mut self) -> &mut Vec<String>;
154//!         pub fn set_header(&mut self, new_header: Vec<&str>);
155//!         pub fn push(&mut self, name: &str, series: Series);
156//!         pub fn drop(&mut self, col_header: &str);
157//!         pub fn row(&self, i: usize) -> DataFrame;
158//!         pub fn spread(&self) -> String;
159//!         pub fn as_types(&mut self, dtypes: Vec<DType>);
160//!         pub fn filter_by<F>(&self, column: &str, f: F) -> anyhow::Result<DataFrame>;
161//!         pub fn mask(&self, mask: &Series) -> anyhow::Result<DataFrame>;
162//!         pub fn select_rows(&self, indices: &[usize]) -> DataFrame;
163//!
164//!         // Shape & Info
165//!         pub fn nrow(&self) -> usize;
166//!         pub fn ncol(&self) -> usize;
167//!         pub fn shape(&self) -> (usize, usize);
168//!         pub fn dtypes(&self) -> Vec<DType>;
169//!         pub fn is_empty(&self) -> bool;
170//!         pub fn contains(&self, col_header: &str) -> bool;
171//!
172//!         // Row Operations
173//!         pub fn head(&self, n: usize) -> DataFrame;
174//!         pub fn tail(&self, n: usize) -> DataFrame;
175//!         pub fn slice(&self, offset: usize, length: usize) -> DataFrame;
176//!
177//!         // Column Operations
178//!         pub fn select(&self, columns: &[&str]) -> DataFrame;
179//!         pub fn rename(&mut self, old: &str, new: &str);
180//!         pub fn column_names(&self) -> Vec<&str>;
181//!         pub fn select_dtypes(&self, dtypes: &[DType]) -> DataFrame;
182//!
183//!         // Statistics (numeric columns only)
184//!         pub fn describe(&self) -> DataFrame;
185//!         pub fn sum(&self) -> DataFrame;
186//!         pub fn mean(&self) -> DataFrame;
187//!     }
188//!     ```
189//!
190//!     * `push(&mut self, name: &str, series: Series)`: push head & Series pair
191//!     * `drop(&mut self, col_header: &str)`: drop specific column by header
192//!     * `row(&self, i: usize) -> DataFrame` : Extract $i$-th row as new DataFrame
193//!     * `filter_by(&self, column, f)` : Filter DataFrame by specific column
194//!     * `mask(&self, mask: &Series)` : Mask DataFrame by boolean Series
195//!     * `select_rows(&self, indices)` : Select rows by indices
196//!     * `nrow`, `ncol`, `shape` : Row count (max column length), column count, `(nrow, ncol)` tuple
197//!     * `dtypes` : `Vec<DType>` of each column's type
198//!     * `is_empty` : `true` if no columns or no rows
199//!     * `contains(col_header)` : `true` if the column exists
200//!     * `head(n)`, `tail(n)` : First / last `n` rows
201//!     * `slice(offset, length)` : Row slice starting at `offset`
202//!     * `select(columns)` : Select columns by name (panics on missing)
203//!     * `rename(old, new)` : Rename a column in-place
204//!     * `column_names` : `Vec<&str>` of all headers
205//!     * `select_dtypes(dtypes)` : Select columns matching given DTypes
206//!     * `describe` : Computes count / mean / sd / min / max for each numeric column
207//!     * `sum`, `mean` : Single-row DataFrame with column-wise sum / mean
208//!
209//! * `WithCSV` trait
210//!
211//!     ```ignore
212//!     pub trait WithCSV: Sized {
213//!         fn write_csv(&self, file_path: &str) -> Result<(), Box<dyn Error>>;
214//!         fn read_csv(file_path: &str, delimiter: char) -> Result<Self, Box<dyn Error>>;
215//!     }
216//!     ```
217//!
218//!     * `csv` feature should be required
219//!
220//!     ```rust
221//!     // Example for CSV
222//!     #[macro_use]
223//!     extern crate peroxide;
224//!     use peroxide::fuga::*;
225//!
226//!     fn main() -> Result<(), Box<dyn Error>> {
227//!     # #[cfg(feature="csv")]
228//!     # {
229//!         // Write CSV
230//!         let mut df = DataFrame::new(vec![]);
231//!         df.push("a", Series::new(vec!['x', 'y', 'z']));
232//!         df.push("b", Series::new(vec![0, 1, 2]));
233//!         df.push("c", Series::new(c!(0.1, 0.2, 0.3)));
234//!         df.write_csv("example_data/doc_csv.csv")?;
235//!
236//!         // Read CSV
237//!         let mut dg = DataFrame::read_csv("example_data/doc_csv.csv", ',')?;
238//!         dg.as_types(vec![Char, I32, F64]);
239//!
240//!         assert_eq!(df, dg);
241//!     # }
242//!
243//!         Ok(())
244//!     }
245//!     ```
246//!
247//! * `WithNetCDF` trait
248//!
249//!     ```ignore
250//!     pub trait WithNetCDF: Sized {
251//!         fn write_nc(&self, file_path: &str) -> Result<(), Box<dyn Error>>;
252//!         fn read_nc(file_path: &str) -> Result<Self, Box<dyn Error>>;
253//!         fn read_nc_by_header(file_path: &str, header: Vec<&str>) -> Result<Self, Box<dyn Error>>;
254//!     }
255//!     ```
256//!
257//!     * `nc` feature should be required
258//!     * `libnetcdf` dependency should be required
259//!     * `Char`, `Bool` are saved as `U8` type. Thus, for reading `Char` or `Bool` type nc file, explicit type casting is required.
260//!
261//!     ```
262//!     #[macro_use]
263//!     extern crate peroxide;
264//!     use peroxide::fuga::*;
265//!
266//!     fn main() -> Result<(), Box<dyn Error>> {
267//!     #    #[cfg(feature = "nc")]
268//!     #    {
269//!         // Write netcdf
270//!         let mut df = DataFrame::new(vec![]);
271//!         df.push("a", Series::new(vec!['x', 'y', 'z']));
272//!         df.push("b", Series::new(vec![0, 1, 2]));
273//!         df.push("c", Series::new(c!(0.1, 0.2, 0.3)));
274//!         df.write_nc("example_data/doc_nc.nc")?;
275//!
276//!         // Read netcdf
277//!         let mut dg = DataFrame::read_nc("example_data/doc_nc.nc")?;
278//!         dg["a"].as_type(Char); // Char, Bool are only read/written as U8 type
279//!
280//!         assert_eq!(df, dg);
281//!     #    }
282//!
283//!         Ok(())
284//!     }
285//!     ```
286//!
287//! * `WithParquet` trait
288//!
289//!     ```ignore
290//!     pub trait WithParquet: Sized {
291//!         fn write_parquet(&self, file_path: &str, compression: Compression) -> Result<(), Box<dyn Error>>;
292//!         fn read_parquet(file_path: &str) -> Result<Self, Box<dyn Error>>;
293//!     }
294//!     ```
295//!
296//!     * `parquet` feature should be required
297//!     * `Char` is saved with `String` type. Thus, for reading `Char` type parquet file, the output type is `String`.
298//!     * **Caution** : For different length `Bool` type column, missing values are filled with `false`.
299//!     ```
300//!     #[macro_use]
301//!     extern crate peroxide;
302//!     use peroxide::fuga::*;
303//!     
304//!     fn main() -> Result<(), Box<dyn Error>> {
305//!     #    #[cfg(feature = "parquet")]
306//!     #    {
307//!         // Write parquet
308//!         let mut df = DataFrame::new(vec![]);
309//!         df.push("a", Series::new(vec!['x', 'y', 'z']));
310//!         df.push("b", Series::new(vec![0, 1, 2]));
311//!         df.push("c", Series::new(c!(0.1, 0.2, 0.3)));
312//!         df.write_parquet("example_data/doc_pq.parquet", SNAPPY)?;
313//!
314//!         // Read parquet
315//!         let mut dg = DataFrame::read_parquet("example_data/doc_pq.parquet")?;
316//!         dg["a"].as_type(Char); // Char is only read/written as String type
317//!
318//!         assert_eq!(df, dg);
319//!     #    }
320//!
321//!         Ok(())
322//!     }
323//!     ```
324
325use crate::traits::math::Vector;
326use crate::util::{print::LowerExpWithPlus, useful::tab};
327#[cfg(feature = "parquet")]
328use arrow::datatypes::{
329    Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
330    UInt64Type, UInt8Type,
331};
332use std::cmp::{max, min};
333#[cfg(feature = "csv")]
334use std::collections::HashMap;
335#[cfg(feature = "parquet")]
336use indexmap::IndexMap;
337#[cfg(any(feature = "csv", feature = "nc", feature = "parquet"))]
338use std::error::Error;
339use std::fmt;
340use std::ops::{Index, IndexMut};
341#[cfg(feature = "parquet")]
342use std::sync::Arc;
343use DType::{Bool, Char, Str, F32, F64, I16, I32, I64, I8, ISIZE, U16, U32, U64, U8, USIZE};
344
345#[cfg(feature = "parquet")]
346use arrow::{
347    array::{Array, BooleanArray, PrimitiveArray, StringArray},
348    datatypes::{DataType, Field, Schema},
349};
350#[cfg(feature = "csv")]
351use csv::{ReaderBuilder, WriterBuilder};
352#[cfg(feature = "nc")]
353use netcdf::{
354    types::VariableType,
355    variable::{Variable, VariableMut},
356    Numeric,
357};
358#[cfg(feature = "parquet")]
359use parquet::{
360    arrow::arrow_reader::ParquetRecordBatchReaderBuilder,
361    arrow::arrow_writer::compute_leaves,
362    arrow::arrow_writer::get_column_writers,
363    arrow::arrow_writer::ArrowLeafColumn,
364    arrow::ArrowSchemaConverter,
365    basic::Compression,
366    file::properties::WriterProperties,
367    file::writer::{SerializedFileWriter, SerializedRowGroupWriter},
368};
369
370// =============================================================================
371// Enums
372// =============================================================================
373
374/// Data Type enum
375#[derive(Debug, Copy, Clone, Eq, PartialEq)]
376pub enum DType {
377    USIZE,
378    U8,
379    U16,
380    U32,
381    U64,
382    ISIZE,
383    I8,
384    I16,
385    I32,
386    I64,
387    F32,
388    F64,
389    Bool,
390    Str,
391    Char,
392}
393
394/// Vector with `DType`
395#[derive(Debug, Clone, PartialEq)]
396pub enum DTypeArray {
397    USIZE(Vec<usize>),
398    U8(Vec<u8>),
399    U16(Vec<u16>),
400    U32(Vec<u32>),
401    U64(Vec<u64>),
402    ISIZE(Vec<isize>),
403    I8(Vec<i8>),
404    I16(Vec<i16>),
405    I32(Vec<i32>),
406    I64(Vec<i64>),
407    F32(Vec<f32>),
408    F64(Vec<f64>),
409    Bool(Vec<bool>),
410    Str(Vec<String>),
411    Char(Vec<char>),
412}
413
414/// Scalar with `DType`
415#[derive(Debug, Clone, PartialEq, PartialOrd)]
416pub enum DTypeValue {
417    USIZE(usize),
418    U8(u8),
419    U16(u16),
420    U32(u32),
421    U64(u64),
422    ISIZE(isize),
423    I8(i8),
424    I16(i16),
425    I32(i32),
426    I64(i64),
427    F32(f32),
428    F64(f64),
429    Bool(bool),
430    Str(String),
431    Char(char),
432}
433
434// =============================================================================
435// Structs
436// =============================================================================
437
438/// Generic `DataFrame` structure
439///
440/// # Example
441///
442/// ```rust
443/// extern crate peroxide;
444/// use peroxide::fuga::*;
445///
446/// fn main() {
447///     // 1. Series to DataFrame
448///     // 1-1. Declare Series
449///     let a = Series::new(vec![1, 2, 3, 4]);
450///     let b = Series::new(vec![true, false, false, true]);
451///     let c = Series::new(vec![0.1, 0.2, 0.3, 0.4]);
452///
453///     // 1-2. Declare DataFrame (default header: 0, 1, 2)
454///     let mut df = DataFrame::new(vec![a, b, c]);
455///     df.set_header(vec!["a", "b", "c"]);
456///     df.print(); // Pretty print for DataFrame
457///
458///     // 2. Empty DataFrame
459///     let mut dg = DataFrame::new(vec![]);
460///     dg.push("a", Series::new(vec![1,2,3,4]));
461///     dg.push("b", Series::new(vec![true, false, false, true]));
462///     dg.push("c", Series::new(vec![0.1, 0.2, 0.3, 0.4]));
463///     dg.print();
464///
465///     assert_eq!(df, dg);
466/// }
467/// ```
468#[derive(Debug, Clone, PartialEq)]
469pub struct DataFrame {
470    pub data: Vec<Series>,
471    pub ics: Vec<String>,
472}
473
474/// Generic Series
475///
476/// # Example
477///
478/// ```rust
479/// extern crate peroxide;
480/// use peroxide::fuga::*;
481///
482/// fn main() {
483///     // Declare Series with Vec<T> (T: primitive type)
484///     let a = Series::new(vec![1i32, 2, 3, 4]);
485///     a.print();                      // print for Series
486///     a.dtype.print();              // print for dtype of Series
487///
488///     let b: &[i32] = a.as_slice();   // Borrow series to &[T]
489///     let c: Vec<i32> = a.to_vec();   // Series to Vec<T> (clone)
490///     
491///     // ...
492/// }
493/// ```
494#[derive(Debug, Clone, PartialEq)]
495pub struct Series {
496    pub values: DTypeArray,
497    pub dtype: DType,
498}
499
500/// Generic Scalar
501#[derive(Debug, Clone, PartialEq)]
502pub struct Scalar {
503    pub value: DTypeValue,
504    pub dtype: DType,
505}
506
507// =============================================================================
508// Traits
509// =============================================================================
510pub trait TypedScalar<T> {
511    fn new(s: T) -> Self
512    where
513        Self: Sized;
514    fn unwrap(self) -> T;
515}
516
517pub trait TypedVector<T> {
518    fn new(v: Vec<T>) -> Self;
519    fn to_vec(&self) -> Vec<T>;
520    fn as_slice(&self) -> &[T];
521    fn as_slice_mut(&mut self) -> &mut [T];
522    fn at_raw(&self, i: usize) -> T;
523    fn push(&mut self, elem: T);
524    fn map<F: Fn(T) -> T>(&self, f: F) -> Self;
525    fn mut_map<F: Fn(&mut T)>(&mut self, f: F);
526    fn fold<F: Fn(T, T) -> T>(&self, init: T, f: F) -> T;
527    fn filter<F: Fn(&T) -> bool>(&self, f: F) -> Self;
528    fn take(&self, n: usize) -> Self;
529    fn skip(&self, n: usize) -> Self;
530    fn take_while<F: Fn(&T) -> bool>(&self, f: F) -> Self;
531    fn skip_while<F: Fn(&T) -> bool>(&self, f: F) -> Self;
532    fn zip_with<F: Fn(T, T) -> T>(&self, f: F, other: &Self) -> Self;
533}
534
535// =============================================================================
536// Macros & Private functions
537// =============================================================================
538macro_rules! impl_typed_scalar {
539    ($type:ty, $dtype:ident) => {
540        impl TypedScalar<$type> for Scalar {
541            fn new(s: $type) -> Self {
542                Self {
543                    value: DTypeValue::$dtype(s),
544                    dtype: DType::$dtype,
545                }
546            }
547
548            fn unwrap(self) -> $type {
549                match self.value {
550                    DTypeValue::$dtype(s) => s,
551                    _ => panic!("Can't unwrap {:?} value", $dtype),
552                }
553            }
554        }
555    };
556}
557
558macro_rules! impl_typed_vector {
559    ($type:ty, $dtype:ident) => {
560        impl TypedVector<$type> for Series {
561            fn new(v: Vec<$type>) -> Self {
562                Self {
563                    values: DTypeArray::$dtype(v),
564                    dtype: DType::$dtype,
565                }
566            }
567
568            fn to_vec(&self) -> Vec<$type> {
569                self.as_slice().to_vec()
570            }
571
572            fn as_slice(&self) -> &[$type] {
573                match &self.values {
574                    DTypeArray::$dtype(v) => v,
575                    _ => panic!("Can't convert to {:?} vector", $dtype),
576                }
577            }
578
579            fn as_slice_mut(&mut self) -> &mut [$type] {
580                match &mut self.values {
581                    DTypeArray::$dtype(v) => v,
582                    _ => panic!("Can't convert to {:?} vector", $dtype),
583                }
584            }
585
586            fn at_raw(&self, i: usize) -> $type {
587                let v: &[$type] = self.as_slice();
588                v[i].clone()
589            }
590
591            fn push(&mut self, elem: $type) {
592                let v: &mut Vec<$type> = match &mut self.values {
593                    DTypeArray::$dtype(v) => v,
594                    _ => panic!("Can't convert to {:?} vector", $dtype),
595                };
596                v.push(elem);
597            }
598
599            fn map<F: Fn($type) -> $type>(&self, f: F) -> Self {
600                let v: Vec<$type> = self.to_vec();
601                Series::new(v.into_iter().map(f).collect::<Vec<$type>>())
602            }
603
604            fn mut_map<F: Fn(&mut $type)>(&mut self, f: F) {
605                let v = self.as_slice_mut();
606                v.iter_mut().for_each(f);
607            }
608
609            fn fold<F: Fn($type, $type) -> $type>(&self, init: $type, f: F) -> $type {
610                let v: Vec<$type> = self.to_vec();
611                v.into_iter().fold(init, f)
612            }
613
614            fn filter<F: Fn(&$type) -> bool>(&self, f: F) -> Self {
615                let v: Vec<$type> = self.to_vec();
616                Series::new(v.into_iter().filter(|x| f(x)).collect::<Vec<$type>>())
617            }
618
619            fn take(&self, n: usize) -> Self {
620                let v: Vec<$type> = self.to_vec();
621                Series::new(v.into_iter().take(n).collect::<Vec<$type>>())
622            }
623
624            fn skip(&self, n: usize) -> Self {
625                let v: Vec<$type> = self.to_vec();
626                Series::new(v.into_iter().skip(n).collect::<Vec<$type>>())
627            }
628
629            fn take_while<F: Fn(&$type) -> bool>(&self, f: F) -> Self {
630                let v: Vec<$type> = self.to_vec();
631                Series::new(v.into_iter().take_while(|x| f(x)).collect::<Vec<$type>>())
632            }
633
634            fn skip_while<F: Fn(&$type) -> bool>(&self, f: F) -> Self {
635                let v: Vec<$type> = self.to_vec();
636                Series::new(v.into_iter().skip_while(|x| f(x)).collect::<Vec<$type>>())
637            }
638
639            fn zip_with<F: Fn($type, $type) -> $type>(&self, f: F, other: &Self) -> Self {
640                let v: Vec<$type> = self.to_vec();
641                let w: Vec<$type> = other.to_vec();
642                Series::new(
643                    v.into_iter()
644                        .zip(w.into_iter())
645                        .map(|(x, y)| f(x, y))
646                        .collect::<Vec<$type>>(),
647                )
648            }
649        }
650    };
651}
652
653macro_rules! dtype_case {
654    ($type:ty, $value:expr, $wrapper: expr) => {{
655        let x: $type = $value;
656        $wrapper(x)
657    }};
658}
659
660macro_rules! dtype_match {
661    ($dtype:expr, $value:expr, $wrapper:expr) => {{
662        match $dtype {
663            USIZE => dtype_case!(usize, $value, $wrapper),
664            U8 => dtype_case!(u8, $value, $wrapper),
665            U16 => dtype_case!(u16, $value, $wrapper),
666            U32 => dtype_case!(u32, $value, $wrapper),
667            U64 => dtype_case!(u64, $value, $wrapper),
668            ISIZE => dtype_case!(isize, $value, $wrapper),
669            I8 => dtype_case!(i8, $value, $wrapper),
670            I16 => dtype_case!(i16, $value, $wrapper),
671            I32 => dtype_case!(i32, $value, $wrapper),
672            I64 => dtype_case!(i64, $value, $wrapper),
673            F32 => dtype_case!(f32, $value, $wrapper),
674            F64 => dtype_case!(f64, $value, $wrapper),
675            Bool => dtype_case!(bool, $value, $wrapper),
676            Char => dtype_case!(char, $value, $wrapper),
677            Str => dtype_case!(String, $value, $wrapper),
678        }
679    }};
680
681    ($dtype:expr, $value:expr, $wrapper:expr; $functor:ident) => {{
682        match $dtype {
683            USIZE => dtype_case!($functor<usize>, $value, $wrapper),
684            U8 => dtype_case!($functor<u8>, $value, $wrapper),
685            U16 => dtype_case!($functor<u16>, $value, $wrapper),
686            U32 => dtype_case!($functor<u32>, $value, $wrapper),
687            U64 => dtype_case!($functor<u64>, $value, $wrapper),
688            ISIZE => dtype_case!($functor<isize>, $value, $wrapper),
689            I8 => dtype_case!($functor<i8>, $value, $wrapper),
690            I16 => dtype_case!($functor<i16>, $value, $wrapper),
691            I32 => dtype_case!($functor<i32>, $value, $wrapper),
692            I64 => dtype_case!($functor<i64>, $value, $wrapper),
693            F32 => dtype_case!($functor<f32>, $value, $wrapper),
694            F64 => dtype_case!($functor<f64>, $value, $wrapper),
695            Bool => dtype_case!($functor<bool>, $value, $wrapper),
696            Char => dtype_case!($functor<char>, $value, $wrapper),
697            Str => dtype_case!($functor<String>, $value, $wrapper),
698        }
699    }};
700
701    (N; $dtype:expr, $value:expr, $wrapper:expr) => {{
702        match $dtype {
703            U8 => dtype_case!(u8, $value, $wrapper),
704            U16 => dtype_case!(u16, $value, $wrapper),
705            U32 => dtype_case!(u32, $value, $wrapper),
706            U64 => dtype_case!(u64, $value, $wrapper),
707            I8 => dtype_case!(i8, $value, $wrapper),
708            I16 => dtype_case!(i16, $value, $wrapper),
709            I32 => dtype_case!(i32, $value, $wrapper),
710            I64 => dtype_case!(i64, $value, $wrapper),
711            F32 => dtype_case!(f32, $value, $wrapper),
712            F64 => dtype_case!(f64, $value, $wrapper),
713            _ => panic!("Can't use {} to numeric", $dtype);
714        }
715    }};
716
717    (N; $dtype:expr, $value:expr, $wrapper:expr; $functor:ident) => {{
718        match $dtype {
719            U8 => dtype_case!($functor<u8>, $value, $wrapper),
720            U16 => dtype_case!($functor<u16>, $value, $wrapper),
721            U32 => dtype_case!($functor<u32>, $value, $wrapper),
722            U64 => dtype_case!($functor<u64>, $value, $wrapper),
723            I8 => dtype_case!($functor<i8>, $value, $wrapper),
724            I16 => dtype_case!($functor<i16>, $value, $wrapper),
725            I32 => dtype_case!($functor<i32>, $value, $wrapper),
726            I64 => dtype_case!($functor<i64>, $value, $wrapper),
727            F32 => dtype_case!($functor<f32>, $value, $wrapper),
728            F64 => dtype_case!($functor<f64>, $value, $wrapper),
729            _ => panic!("Can't use {} to numeric", $dtype),
730        }
731    }};
732}
733
734macro_rules! set_space {
735    ($elem:expr) => {{
736        match $elem.dtype {
737            F32 => {
738                let elem: f32 = $elem.unwrap();
739                let st1 = elem.fmt_lower_exp(2);
740                let st2 = elem.to_string();
741
742                if st1.len() < st2.len() {
743                    st1
744                } else {
745                    st2
746                }
747            }
748            F64 => {
749                let elem: f64 = $elem.unwrap();
750                let st1 = elem.fmt_lower_exp(2);
751                let st2 = elem.to_string();
752
753                if st1.len() < st2.len() {
754                    st1
755                } else {
756                    st2
757                }
758            }
759            _ => $elem.to_string(),
760        }
761    }};
762
763    ($elem:expr, $space:expr) => {{
764        match $elem.dtype {
765            F32 => {
766                let elem: f32 = $elem.unwrap();
767                $space = max(
768                    $space,
769                    min(elem.fmt_lower_exp(2).len(), elem.to_string().len()),
770                );
771            }
772            F64 => {
773                let elem: f64 = $elem.unwrap();
774                $space = max(
775                    $space,
776                    min(elem.fmt_lower_exp(2).len(), elem.to_string().len()),
777                );
778            }
779            _ => {
780                $space = max($space, $elem.to_string().len());
781            }
782        }
783    }};
784}
785
786macro_rules! format_float_vec {
787    ($self:expr) => {{
788        let mut result = String::new();
789        result.push_str("[");
790        for i in 0..$self.len() {
791            let st1 = $self[i].fmt_lower_exp(2);
792            let st2 = $self[i].to_string();
793            let st = if st1.len() < st2.len() { st1 } else { st2 };
794            result.push_str(&st);
795            if i == $self.len() - 1 {
796                break;
797            }
798            result.push_str(", ");
799        }
800        result.push_str("]");
801        result
802    }};
803}
804
805/// ty1 -> ty2
806macro_rules! type_cast_vec {
807    ($ty1:ty, $ty2:ty, $to_vec:expr, $wrapper:expr) => {{
808        let y: Vec<$ty1> = $to_vec;
809        let x: Vec<$ty2> = y.into_iter().map(|x| x as $ty2).collect();
810        $wrapper(x)
811    }};
812}
813
814macro_rules! string_cast_vec {
815    ($ty1:ty, $to_vec:expr, $wrapper:expr) => {{
816        let y: Vec<$ty1> = $to_vec;
817        let x: Vec<String> = y.into_iter().map(|x| x.to_string()).collect();
818        $wrapper(x)
819    }};
820}
821
822macro_rules! type_parse_vec {
823    ($ty2:ty, $to_vec:expr, $wrapper:expr) => {{
824        let y: Vec<String> = $to_vec.to_vec();
825        let x: Vec<$ty2> = y.into_iter().map(|x| x.parse().unwrap()).collect();
826        $wrapper(x)
827    }};
828}
829
830macro_rules! dtype_parse_vec_part {
831    ($dt2:expr, $to_vec:expr, $wrapper:expr) => {{
832        match $dt2 {
833            USIZE => type_parse_vec!(usize, $to_vec, $wrapper),
834            U8 => type_parse_vec!(u8, $to_vec, $wrapper),
835            U16 => type_parse_vec!(u16, $to_vec, $wrapper),
836            U32 => type_parse_vec!(u32, $to_vec, $wrapper),
837            U64 => type_parse_vec!(u64, $to_vec, $wrapper),
838            ISIZE => type_parse_vec!(isize, $to_vec, $wrapper),
839            I8 => type_parse_vec!(i8, $to_vec, $wrapper),
840            I16 => type_parse_vec!(i16, $to_vec, $wrapper),
841            I32 => type_parse_vec!(i32, $to_vec, $wrapper),
842            I64 => type_parse_vec!(i64, $to_vec, $wrapper),
843            F32 => type_parse_vec!(f32, $to_vec, $wrapper),
844            F64 => type_parse_vec!(f64, $to_vec, $wrapper),
845            Bool => type_parse_vec!(bool, $to_vec, $wrapper),
846            Char => type_parse_vec!(char, $to_vec, $wrapper),
847            Str => type_parse_vec!(String, $to_vec, $wrapper),
848        }
849    }};
850}
851
852macro_rules! dtype_cast_vec_part {
853    ($ty1:ty, $dt2:expr, $to_vec:expr, $wrapper:expr) => {{
854        match $dt2 {
855            USIZE => type_cast_vec!($ty1, usize, $to_vec, $wrapper),
856            U8 => type_cast_vec!($ty1, u8, $to_vec, $wrapper),
857            U16 => type_cast_vec!($ty1, u16, $to_vec, $wrapper),
858            U32 => type_cast_vec!($ty1, u32, $to_vec, $wrapper),
859            U64 => type_cast_vec!($ty1, u64, $to_vec, $wrapper),
860            ISIZE => type_cast_vec!($ty1, isize, $to_vec, $wrapper),
861            I8 => type_cast_vec!($ty1, i8, $to_vec, $wrapper),
862            I16 => type_cast_vec!($ty1, i16, $to_vec, $wrapper),
863            I32 => type_cast_vec!($ty1, i32, $to_vec, $wrapper),
864            I64 => type_cast_vec!($ty1, i64, $to_vec, $wrapper),
865            F32 => type_cast_vec!($ty1, f32, $to_vec, $wrapper),
866            F64 => type_cast_vec!($ty1, f64, $to_vec, $wrapper),
867            Str => string_cast_vec!($ty1, $to_vec, $wrapper),
868            _ => panic!("Can't convert to {}", $dt2),
869        }
870    }};
871}
872
873macro_rules! dtype_cast_vec {
874    ($dt1:expr, $dt2:expr, $to_vec:expr, $wrapper:expr) => {{
875        match $dt1 {
876            USIZE => dtype_cast_vec_part!(usize, $dt2, $to_vec, $wrapper),
877            U8 => match $dt2 {
878                Bool => {
879                    let y: Vec<u8> = $to_vec;
880                    let x: Vec<bool> = y.into_iter().map(|x| x != 0).collect();
881                    $wrapper(x)
882                }
883                Char => {
884                    let y: Vec<u8> = $to_vec;
885                    let x: Vec<char> = y.into_iter().map(|x| x as char).collect();
886                    $wrapper(x)
887                }
888                _ => dtype_cast_vec_part!(u8, $dt2, $to_vec, $wrapper),
889            },
890            U16 => dtype_cast_vec_part!(u16, $dt2, $to_vec, $wrapper),
891            U32 => dtype_cast_vec_part!(u32, $dt2, $to_vec, $wrapper),
892            U64 => dtype_cast_vec_part!(u64, $dt2, $to_vec, $wrapper),
893            ISIZE => dtype_cast_vec_part!(isize, $dt2, $to_vec, $wrapper),
894            I8 => dtype_cast_vec_part!(i8, $dt2, $to_vec, $wrapper),
895            I16 => dtype_cast_vec_part!(i16, $dt2, $to_vec, $wrapper),
896            I32 => dtype_cast_vec_part!(i32, $dt2, $to_vec, $wrapper),
897            I64 => dtype_cast_vec_part!(i64, $dt2, $to_vec, $wrapper),
898            F32 => dtype_cast_vec_part!(f32, $dt2, $to_vec, $wrapper),
899            F64 => dtype_cast_vec_part!(f64, $dt2, $to_vec, $wrapper),
900            Str => dtype_parse_vec_part!($dt2, $to_vec, $wrapper),
901            Char => match $dt2 {
902                Str => string_cast_vec!(char, $to_vec, $wrapper),
903                U8 => {
904                    let y: Vec<char> = $to_vec;
905                    let x: Vec<u8> = y.into_iter().map(|x| x as u8).collect();
906                    $wrapper(x)
907                }
908                _ => panic!("Can't convert char type to {}", $dt2),
909            },
910            Bool => match $dt2 {
911                U8 => {
912                    let y: Vec<bool> = $to_vec;
913                    let x: Vec<u8> = y.into_iter().map(|x| if x { 1 } else { 0 }).collect();
914                    $wrapper(x)
915                }
916                Bool => {
917                    let y: Vec<bool> = $to_vec;
918                    $wrapper(y)
919                }
920                _ => panic!("Can't convert bool type to {}", $dt2),
921            },
922        }
923    }};
924}
925
926fn len<T>(x: Vec<T>) -> usize {
927    x.len()
928}
929
930fn to_string<T: fmt::Display>(x: T) -> String {
931    x.to_string()
932}
933
934#[cfg(feature = "nc")]
935fn dtype_to_vtype(dt: DType) -> netcdf::types::BasicType {
936    match dt {
937        USIZE => netcdf::types::BasicType::Uint64,
938        U8 => netcdf::types::BasicType::Ubyte,
939        U16 => netcdf::types::BasicType::Ushort,
940        U32 => netcdf::types::BasicType::Uint,
941        U64 => netcdf::types::BasicType::Uint64,
942        ISIZE => netcdf::types::BasicType::Int64,
943        I8 => netcdf::types::BasicType::Byte,
944        I16 => netcdf::types::BasicType::Short,
945        I32 => netcdf::types::BasicType::Int,
946        I64 => netcdf::types::BasicType::Int64,
947        F32 => netcdf::types::BasicType::Float,
948        F64 => netcdf::types::BasicType::Double,
949        Bool => netcdf::types::BasicType::Ubyte,
950        Char => netcdf::types::BasicType::Ubyte,
951        _ => panic!("Can't convert type to netcdf::types::BasicType"),
952    }
953}
954
955#[cfg(feature = "nc")]
956fn vtype_to_dtype(dv: netcdf::types::BasicType) -> DType {
957    match dv {
958        netcdf::types::BasicType::Ubyte => U8,
959        netcdf::types::BasicType::Ushort => U16,
960        netcdf::types::BasicType::Uint => U32,
961        netcdf::types::BasicType::Uint64 => U64,
962        netcdf::types::BasicType::Byte => I8,
963        netcdf::types::BasicType::Short => I16,
964        netcdf::types::BasicType::Int => I32,
965        netcdf::types::BasicType::Int64 => I64,
966        netcdf::types::BasicType::Float => F32,
967        netcdf::types::BasicType::Double => F64,
968        netcdf::types::BasicType::Char => Char,
969    }
970}
971
972#[cfg(feature = "nc")]
973fn nc_put_value<T: Numeric>(var: &mut VariableMut, v: Vec<T>) -> Result<(), netcdf::error::Error> {
974    var.put_values(&v, None, None)
975}
976
977#[cfg(feature = "nc")]
978fn nc_read_value<T: Numeric + Default + Clone>(
979    val: &Variable,
980    v: Vec<T>,
981) -> Result<Series, netcdf::error::Error>
982where
983    Series: TypedVector<T>,
984{
985    let mut v = v;
986    v.resize_with(val.len(), Default::default);
987    val.values_to(&mut v, None, None)?;
988    Ok(Series::new(v.clone()))
989}
990
991#[cfg(feature = "parquet")]
992fn dtype_to_arrow(dt: DType) -> DataType {
993    match dt {
994        USIZE => DataType::UInt64,
995        U8 => DataType::UInt8,
996        U16 => DataType::UInt16,
997        U32 => DataType::UInt32,
998        U64 => DataType::UInt64,
999        ISIZE => DataType::Int64,
1000        I8 => DataType::Int8,
1001        I16 => DataType::Int16,
1002        I32 => DataType::Int32,
1003        I64 => DataType::Int64,
1004        F32 => DataType::Float32,
1005        F64 => DataType::Float64,
1006        Bool => DataType::Boolean,
1007        Str => DataType::Utf8,
1008        Char => DataType::Utf8,
1009    }
1010}
1011
1012#[cfg(feature = "parquet")]
1013fn arrow_to_dtype(dt: DataType) -> DType {
1014    match dt {
1015        DataType::Boolean => Bool,
1016        DataType::Int8 => I8,
1017        DataType::Int16 => I16,
1018        DataType::Int32 => I32,
1019        DataType::Int64 => I64,
1020        DataType::UInt8 => U8,
1021        DataType::UInt16 => U16,
1022        DataType::UInt32 => U32,
1023        DataType::UInt64 => U64,
1024        // DataType::Float16 => DType::F16,
1025        DataType::Float32 => F32,
1026        DataType::Float64 => F64,
1027        DataType::Utf8 => Str,
1028        _ => unimplemented!(),
1029    }
1030}
1031
1032#[cfg(feature = "parquet")]
1033macro_rules! dtype_case_to_arrow {
1034    ($ty:ty, $to_arr:expr, $value:expr, $chunk_vec:expr; $length:expr) => {{
1035        let v: Vec<$ty> = $value;
1036        let v_wrap = (0usize..$length)
1037            .map(|i| {
1038                if i < v.len() {
1039                    Some(v[i].clone())
1040                } else {
1041                    None
1042                }
1043            })
1044            .collect::<Vec<_>>();
1045        let arr = $to_arr(v_wrap);
1046        $chunk_vec.push(Arc::from(arr) as Arc<dyn Array>);
1047    }};
1048}
1049
1050#[cfg(feature = "parquet")]
1051macro_rules! dtype_match_to_arrow {
1052    ($dtype:expr, $value:expr, $chunk_vec:expr; $length:expr) => {{
1053        match $dtype {
1054            Bool => dtype_case_to_arrow!(bool, BooleanArray::from, $value, $chunk_vec; $length),
1055            Str => dtype_case_to_arrow!(String, StringArray::from, $value, $chunk_vec; $length),
1056            Char => {
1057                let v: Vec<char> = $value;
1058                let v = v.into_iter().map(|t| t.to_string()).collect::<Vec<_>>();
1059                dtype_case_to_arrow!(String, StringArray::from, v, $chunk_vec; $length)
1060            }
1061            USIZE => dtype_case_to_arrow!(u64, PrimitiveArray::<UInt64Type>::from, $value, $chunk_vec; $length),
1062            U8 => dtype_case_to_arrow!(u8, PrimitiveArray::<UInt8Type>::from, $value, $chunk_vec; $length),
1063            U16 => dtype_case_to_arrow!(u16, PrimitiveArray::<UInt16Type>::from, $value, $chunk_vec; $length),
1064            U32 => dtype_case_to_arrow!(u32, PrimitiveArray::<UInt32Type>::from, $value, $chunk_vec; $length),
1065            U64 => dtype_case_to_arrow!(u64, PrimitiveArray::<UInt64Type>::from, $value, $chunk_vec; $length),
1066            ISIZE => dtype_case_to_arrow!(i64, PrimitiveArray::<Int64Type>::from, $value, $chunk_vec; $length),
1067            I8 => dtype_case_to_arrow!(i8, PrimitiveArray::<Int8Type>::from, $value, $chunk_vec; $length),
1068            I16 => dtype_case_to_arrow!(i16, PrimitiveArray::<Int16Type>::from, $value, $chunk_vec; $length),
1069            I32 => dtype_case_to_arrow!(i32, PrimitiveArray::<Int32Type>::from, $value, $chunk_vec; $length),
1070            I64 => dtype_case_to_arrow!(i64, PrimitiveArray::<Int64Type>::from, $value, $chunk_vec; $length),
1071            F32 => dtype_case_to_arrow!(f32, PrimitiveArray::<Float32Type>::from, $value, $chunk_vec; $length),
1072            F64 => dtype_case_to_arrow!(f64, PrimitiveArray::<Float64Type>::from, $value, $chunk_vec; $length),
1073        }
1074    }};
1075}
1076
1077fn add_vec<T: std::ops::Add<T, Output = T> + Clone>(v: Vec<T>, w: Vec<T>) -> Series
1078where
1079    Series: TypedVector<T>,
1080{
1081    Series::new(v.into_iter().zip(w).map(|(x, y)| x + y).collect::<Vec<T>>())
1082}
1083
1084fn sub_vec<T: std::ops::Sub<T, Output = T> + Clone>(v: Vec<T>, w: Vec<T>) -> Series
1085where
1086    Series: TypedVector<T>,
1087{
1088    Series::new(v.into_iter().zip(w).map(|(x, y)| x - y).collect::<Vec<T>>())
1089}
1090
1091fn mul_scalar<T: std::ops::Mul<T, Output = T> + Clone + Copy>(v: Vec<T>, s: T) -> Series
1092where
1093    Series: TypedVector<T>,
1094{
1095    Series::new(v.into_iter().map(|x| x * s).collect::<Vec<T>>())
1096}
1097
1098// =============================================================================
1099// Implementations of DType variables
1100// =============================================================================
1101impl DType {
1102    /// Check for static numeric type
1103    pub fn is_numeric(&self) -> bool {
1104        match self {
1105            Bool => false,
1106            Str => false,
1107            Char => false,
1108            USIZE => false,
1109            ISIZE => false,
1110            _ => true,
1111        }
1112    }
1113
1114    pub fn is_integer(&self) -> bool {
1115        match self {
1116            Bool => false,
1117            Str => false,
1118            Char => false,
1119            F32 => false,
1120            F64 => false,
1121            _ => true,
1122        }
1123    }
1124}
1125
1126impl fmt::Display for DType {
1127    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1128        let st = match self {
1129            USIZE => "usize",
1130            U8 => "u8",
1131            U16 => "u16",
1132            U32 => "u32",
1133            U64 => "u64",
1134            ISIZE => "isize",
1135            I8 => "i8",
1136            I16 => "i16",
1137            I32 => "i32",
1138            I64 => "i64",
1139            F32 => "f32",
1140            F64 => "f64",
1141            Bool => "bool",
1142            Char => "char",
1143            Str => "String",
1144        };
1145        write!(f, "{}", st)
1146    }
1147}
1148
1149impl fmt::Display for DTypeArray {
1150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1151        let st = match self {
1152            DTypeArray::USIZE(v) => format!("array: {:?}\ndtype: usize", v),
1153            DTypeArray::U8(v) => format!("array: {:?}\ndtype: u8", v),
1154            DTypeArray::U16(v) => format!("array: {:?}\ndtype: u16", v),
1155            DTypeArray::U32(v) => format!("array: {:?}\ndtype: u32", v),
1156            DTypeArray::U64(v) => format!("array: {:?}\ndtype: u64", v),
1157            DTypeArray::ISIZE(v) => format!("array: {:?}\ndtype: isize", v),
1158            DTypeArray::I8(v) => format!("array: {:?}\ndtype: i8", v),
1159            DTypeArray::I16(v) => format!("array: {:?}\ndtype: i16", v),
1160            DTypeArray::I32(v) => format!("array: {:?}\ndtype: i32", v),
1161            DTypeArray::I64(v) => format!("array: {:?}\ndtype: i64", v),
1162            DTypeArray::F32(v) => format!("array: {}\ndtype: f32", format_float_vec!(v)),
1163            DTypeArray::F64(v) => format!("array: {}\ndtype: f64", format_float_vec!(v)),
1164            DTypeArray::Bool(v) => format!("array: {:?}\ndtype: bool", v),
1165            DTypeArray::Str(v) => format!("array: {:?}\ndtype: String", v),
1166            DTypeArray::Char(v) => format!("array: {:?}\ndtype: char", v),
1167        };
1168        write!(f, "{}", st)
1169    }
1170}
1171
1172// =============================================================================
1173// Implementations for Scalar & Series
1174// =============================================================================
1175
1176impl Scalar {
1177    /// Scalar to length 1 Series
1178    pub fn to_series(self) -> Series {
1179        dtype_match!(self.dtype, vec![self.unwrap()], Series::new; Vec)
1180    }
1181
1182    pub fn to_string(self) -> String {
1183        dtype_match!(self.dtype, self.unwrap(), to_string)
1184    }
1185}
1186
1187impl Series {
1188    /// Getter for Series
1189    ///
1190    /// # Examples
1191    ///
1192    /// ```rust
1193    /// extern crate peroxide;
1194    /// use peroxide::fuga::*;
1195    ///
1196    /// fn main() {
1197    ///     let a = Series::new(vec![1i32,2,3,4]);
1198    ///     let x = a.at(0);
1199    ///
1200    ///     assert_eq!(x, Scalar::new(1i32));
1201    /// }
1202    /// ```
1203    pub fn at(&self, i: usize) -> Scalar {
1204        dtype_match!(self.dtype, self.at_raw(i), Scalar::new)
1205    }
1206
1207    /// Length for Series
1208    pub fn len(&self) -> usize {
1209        dtype_match!(self.dtype, self.as_slice().to_vec(), len; Vec)
1210    }
1211
1212    /// Explicit type casting for Series
1213    pub fn to_type(&self, dtype: DType) -> Series {
1214        dtype_cast_vec!(self.dtype, dtype, self.to_vec(), Series::new)
1215    }
1216
1217    /// Type casting for Series
1218    ///
1219    /// # Examples
1220    ///
1221    /// ```rust
1222    /// extern crate peroxide;
1223    /// use peroxide::fuga::*;
1224    ///
1225    /// fn main() {
1226    ///     let mut a = Series::new(vec![1i32, 2, 3, 4]);
1227    ///     a.as_type(USIZE);
1228    ///     
1229    ///     assert_eq!(a, Series::new(vec![1usize, 2, 3, 4]));
1230    /// }
1231    /// ```
1232    pub fn as_type(&mut self, dtype: DType) {
1233        let x = self.to_type(dtype);
1234        self.dtype = x.dtype;
1235        self.values = x.values;
1236    }
1237
1238    /// Select elements by indices, returning a new Series
1239    ///
1240    /// # Examples
1241    ///
1242    /// ```rust
1243    /// extern crate peroxide;
1244    /// use peroxide::fuga::*;
1245    ///
1246    /// fn main() {
1247    ///     let a = Series::new(vec![10, 20, 30, 40, 50]);
1248    ///     let b = a.select_indices(&[0, 2, 4]);
1249    ///     assert_eq!(b, Series::new(vec![10, 30, 50]));
1250    /// }
1251    /// ```
1252    pub fn select_indices(&self, indices: &[usize]) -> Series {
1253        macro_rules! extract_by_indices {
1254            ($array:expr, $type:ty) => {{
1255                let values: Vec<$type> = indices.iter().map(|&i| $array[i].clone()).collect();
1256                Series::new(values)
1257            }};
1258        }
1259
1260        match &self.values {
1261            DTypeArray::USIZE(v) => extract_by_indices!(v, usize),
1262            DTypeArray::U8(v) => extract_by_indices!(v, u8),
1263            DTypeArray::U16(v) => extract_by_indices!(v, u16),
1264            DTypeArray::U32(v) => extract_by_indices!(v, u32),
1265            DTypeArray::U64(v) => extract_by_indices!(v, u64),
1266            DTypeArray::ISIZE(v) => extract_by_indices!(v, isize),
1267            DTypeArray::I8(v) => extract_by_indices!(v, i8),
1268            DTypeArray::I16(v) => extract_by_indices!(v, i16),
1269            DTypeArray::I32(v) => extract_by_indices!(v, i32),
1270            DTypeArray::I64(v) => extract_by_indices!(v, i64),
1271            DTypeArray::F32(v) => extract_by_indices!(v, f32),
1272            DTypeArray::F64(v) => extract_by_indices!(v, f64),
1273            DTypeArray::Bool(v) => extract_by_indices!(v, bool),
1274            DTypeArray::Str(v) => extract_by_indices!(v, String),
1275            DTypeArray::Char(v) => extract_by_indices!(v, char),
1276        }
1277    }
1278
1279    /// Convert numeric Series to `Vec<f64>`
1280    ///
1281    /// Supports all integer and float types. Non-numeric types (Bool, Char, Str) return an error.
1282    pub fn to_f64_vec(&self) -> anyhow::Result<Vec<f64>> {
1283        match self.dtype {
1284            Bool | Char | Str => anyhow::bail!("Cannot convert {} Series to f64", self.dtype),
1285            _ => {
1286                let converted = self.to_type(F64);
1287                Ok(TypedVector::<f64>::to_vec(&converted))
1288            }
1289        }
1290    }
1291
1292    // =========================================================================
1293    // Statistics
1294    // =========================================================================
1295
1296    /// Sum of all elements (numeric types only)
1297    pub fn sum(&self) -> anyhow::Result<f64> {
1298        let v = self.to_f64_vec()?;
1299        Ok(v.iter().sum())
1300    }
1301
1302    /// Mean of all elements (numeric types only, Welford's algorithm)
1303    pub fn mean(&self) -> anyhow::Result<f64> {
1304        use crate::statistics::stat::Statistics;
1305        let v = self.to_f64_vec()?;
1306        anyhow::ensure!(!v.is_empty(), "Cannot compute mean of empty Series");
1307        Ok(v.mean())
1308    }
1309
1310    /// Variance of all elements (numeric types only, sample variance)
1311    pub fn var(&self) -> anyhow::Result<f64> {
1312        use crate::statistics::stat::Statistics;
1313        let v = self.to_f64_vec()?;
1314        anyhow::ensure!(v.len() > 1, "Cannot compute variance of Series with fewer than 2 elements");
1315        Ok(v.var())
1316    }
1317
1318    /// Standard deviation of all elements (numeric types only)
1319    pub fn sd(&self) -> anyhow::Result<f64> {
1320        use crate::statistics::stat::Statistics;
1321        let v = self.to_f64_vec()?;
1322        anyhow::ensure!(v.len() > 1, "Cannot compute sd of Series with fewer than 2 elements");
1323        Ok(v.sd())
1324    }
1325
1326    /// Minimum value, preserving original type
1327    pub fn min(&self) -> anyhow::Result<Scalar> {
1328        anyhow::ensure!(self.len() > 0, "Cannot compute min of empty Series");
1329
1330        macro_rules! typed_min {
1331            ($v:expr, $dtype:ident) => {{
1332                let min_val = $v.iter().cloned().reduce(|a, b| if a <= b { a } else { b }).unwrap();
1333                Ok(Scalar { value: DTypeValue::$dtype(min_val), dtype: DType::$dtype })
1334            }};
1335        }
1336
1337        match &self.values {
1338            DTypeArray::USIZE(v) => typed_min!(v, USIZE),
1339            DTypeArray::U8(v) => typed_min!(v, U8),
1340            DTypeArray::U16(v) => typed_min!(v, U16),
1341            DTypeArray::U32(v) => typed_min!(v, U32),
1342            DTypeArray::U64(v) => typed_min!(v, U64),
1343            DTypeArray::ISIZE(v) => typed_min!(v, ISIZE),
1344            DTypeArray::I8(v) => typed_min!(v, I8),
1345            DTypeArray::I16(v) => typed_min!(v, I16),
1346            DTypeArray::I32(v) => typed_min!(v, I32),
1347            DTypeArray::I64(v) => typed_min!(v, I64),
1348            DTypeArray::F32(v) => typed_min!(v, F32),
1349            DTypeArray::F64(v) => typed_min!(v, F64),
1350            DTypeArray::Bool(v) => typed_min!(v, Bool),
1351            DTypeArray::Char(v) => typed_min!(v, Char),
1352            DTypeArray::Str(v) => typed_min!(v, Str),
1353        }
1354    }
1355
1356    /// Maximum value, preserving original type
1357    pub fn max(&self) -> anyhow::Result<Scalar> {
1358        anyhow::ensure!(self.len() > 0, "Cannot compute max of empty Series");
1359
1360        macro_rules! typed_max {
1361            ($v:expr, $dtype:ident) => {{
1362                let max_val = $v.iter().cloned().reduce(|a, b| if a >= b { a } else { b }).unwrap();
1363                Ok(Scalar { value: DTypeValue::$dtype(max_val), dtype: DType::$dtype })
1364            }};
1365        }
1366
1367        match &self.values {
1368            DTypeArray::USIZE(v) => typed_max!(v, USIZE),
1369            DTypeArray::U8(v) => typed_max!(v, U8),
1370            DTypeArray::U16(v) => typed_max!(v, U16),
1371            DTypeArray::U32(v) => typed_max!(v, U32),
1372            DTypeArray::U64(v) => typed_max!(v, U64),
1373            DTypeArray::ISIZE(v) => typed_max!(v, ISIZE),
1374            DTypeArray::I8(v) => typed_max!(v, I8),
1375            DTypeArray::I16(v) => typed_max!(v, I16),
1376            DTypeArray::I32(v) => typed_max!(v, I32),
1377            DTypeArray::I64(v) => typed_max!(v, I64),
1378            DTypeArray::F32(v) => typed_max!(v, F32),
1379            DTypeArray::F64(v) => typed_max!(v, F64),
1380            DTypeArray::Bool(v) => typed_max!(v, Bool),
1381            DTypeArray::Char(v) => typed_max!(v, Char),
1382            DTypeArray::Str(v) => typed_max!(v, Str),
1383        }
1384    }
1385}
1386
1387impl Vector for Series {
1388    type Scalar = Scalar;
1389
1390    /// Add series
1391    ///
1392    /// # Example
1393    ///
1394    /// ```rust
1395    /// extern crate peroxide;
1396    /// use peroxide::fuga::*;
1397    ///
1398    /// fn main() {
1399    ///     let a = Series::new(vec![1,2,3]);
1400    ///     let b = Series::new(vec![3,2,1]);
1401    ///     let c = a.add_vec(&b);
1402    ///     assert_eq!(c, Series::new(vec![4,4,4]));
1403    /// }
1404    /// ```
1405    fn add_vec(&self, rhs: &Self) -> Self {
1406        assert_eq!(self.dtype, rhs.dtype, "DTypes are not same (add_vec)");
1407        dtype_match!(
1408            N;
1409            self.dtype,
1410            self.to_vec(),
1411            |x| add_vec(x, rhs.to_vec());
1412            Vec
1413        )
1414    }
1415
1416    /// Sub series
1417    ///
1418    /// # Example
1419    ///
1420    /// ```rust
1421    /// extern crate peroxide;
1422    /// use peroxide::fuga::*;
1423    ///
1424    /// fn main() {
1425    ///     let a = Series::new(vec![4,5,6]);
1426    ///     let b = Series::new(vec![1,2,3]);
1427    ///     let c = a.sub_vec(&b);
1428    ///     assert_eq!(c, Series::new(vec![3,3,3]));
1429    /// }
1430    /// ```
1431    fn sub_vec(&self, rhs: &Self) -> Self {
1432        assert_eq!(self.dtype, rhs.dtype, "DTypes are not same (add_vec)");
1433        dtype_match!(
1434            N;
1435            self.dtype,
1436            self.to_vec(),
1437            |x| sub_vec(x, rhs.to_vec());
1438            Vec
1439        )
1440    }
1441
1442    /// Mul Scalar
1443    ///
1444    /// # Example
1445    ///
1446    /// ```rust
1447    /// extern crate peroxide;
1448    /// use peroxide::fuga::*;
1449    ///
1450    /// fn main() {
1451    ///     let a = Series::new(vec![1,2,3]);
1452    ///     let b = Scalar::new(2);
1453    ///     let c = a.mul_scalar(b);
1454    ///     assert_eq!(c, Series::new(vec![2,4,6]));
1455    /// }
1456    /// ```
1457    fn mul_scalar(&self, rhs: Self::Scalar) -> Self {
1458        assert_eq!(self.dtype, rhs.dtype, "DTypes are not same (mul_scalar)");
1459
1460        dtype_match!(
1461            N;
1462            self.dtype,
1463            self.to_vec(),
1464            |x| mul_scalar(x, rhs.unwrap());
1465            Vec
1466        )
1467    }
1468}
1469
1470impl_typed_scalar!(usize, USIZE);
1471impl_typed_scalar!(u8, U8);
1472impl_typed_scalar!(u16, U16);
1473impl_typed_scalar!(u32, U32);
1474impl_typed_scalar!(u64, U64);
1475impl_typed_scalar!(isize, ISIZE);
1476impl_typed_scalar!(i8, I8);
1477impl_typed_scalar!(i16, I16);
1478impl_typed_scalar!(i32, I32);
1479impl_typed_scalar!(i64, I64);
1480impl_typed_scalar!(f32, F32);
1481impl_typed_scalar!(f64, F64);
1482impl_typed_scalar!(bool, Bool);
1483impl_typed_scalar!(char, Char);
1484impl_typed_scalar!(String, Str);
1485
1486impl_typed_vector!(usize, USIZE);
1487impl_typed_vector!(u8, U8);
1488impl_typed_vector!(u16, U16);
1489impl_typed_vector!(u32, U32);
1490impl_typed_vector!(u64, U64);
1491impl_typed_vector!(isize, ISIZE);
1492impl_typed_vector!(i8, I8);
1493impl_typed_vector!(i16, I16);
1494impl_typed_vector!(i32, I32);
1495impl_typed_vector!(i64, I64);
1496impl_typed_vector!(f32, F32);
1497impl_typed_vector!(f64, F64);
1498impl_typed_vector!(bool, Bool);
1499impl_typed_vector!(char, Char);
1500impl_typed_vector!(String, Str);
1501
1502impl fmt::Display for Scalar {
1503    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1504        let st = format!("{}, dtype:{}", self.clone().to_string(), self.dtype);
1505        write!(f, "{}", st)
1506    }
1507}
1508
1509// impl FPVector for Series {
1510//     type Scalar = Scalar;
1511//
1512//     fn fmap<F>(&self, f: F) -> Self where
1513//         F: Fn(Self::Scalar) -> Self::Scalar {
1514//         dtype_match!(
1515//             self.dtype,
1516//             self.to_vec(),
1517//             |x| map(x, f);
1518//             Vec
1519//         )
1520//     }
1521//
1522//     fn reduce<F, T>(&self, init: T, f: F) -> Self::Scalar where
1523//         F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar,
1524//         T: Into<Self::Scalar> {
1525//         dtype_match!(
1526//             self.dtype,
1527//             self.to_vec(),
1528//             |x| reduce(x, f);
1529//             Vec
1530//         )
1531//     }
1532//
1533//     fn zip_with<F>(&self, f: F, other: &Self) -> Self where
1534//         F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar {
1535//         dtype_match!(
1536//             self.dtype,
1537//             self.to_vec(),
1538//             |x| zip_with(x, other.to_vec(), f);
1539//             Vec
1540//         )
1541//     }
1542//
1543//     fn filter<F>(&self, f: F) -> Self where
1544//         F: Fn(Self::Scalar) -> bool {
1545//         dtype_match!(
1546//             self.dtype,
1547//             self.to_vec(),
1548//             |x| filter(x, f);
1549//             Vec
1550//         )
1551//     }
1552//
1553//     fn take(&self, n: usize) -> Self {
1554//         dtype_match!(
1555//             self.dtype,
1556//             self.to_vec(),
1557//             |x| take(x, n);
1558//             Vec
1559//         )
1560//     }
1561//
1562//     fn skip(&self, n: usize) -> Self {
1563//         dtype_match!(
1564//             self.dtype,
1565//             self.to_vec(),
1566//             |x| skip(x, n);
1567//             Vec
1568//         )
1569//     }
1570//
1571//     fn sum(&self) -> Self::Scalar {
1572//         todo!()
1573//     }
1574//
1575//     fn prod(&self) -> Self::Scalar {
1576//         todo!()
1577//     }
1578// }
1579
1580// =============================================================================
1581// Implementation for DataFrame
1582// =============================================================================
1583
1584impl DataFrame {
1585    /// Declare new DataFrame with `Vec<Series>`
1586    pub fn new(v: Vec<Series>) -> Self {
1587        let ics = (0usize..v.len()).map(|x| x.to_string()).collect();
1588
1589        Self { data: v, ics }
1590    }
1591
1592    pub fn header(&self) -> &Vec<String> {
1593        &self.ics
1594    }
1595
1596    pub fn header_mut(&mut self) -> &mut Vec<String> {
1597        &mut self.ics
1598    }
1599
1600    /// Change header
1601    pub fn set_header(&mut self, new_header: Vec<&str>) {
1602        assert_eq!(self.ics.len(), new_header.len(), "Improper Header length!");
1603        self.ics = new_header.into_iter().map(|x| x.to_string()).collect();
1604    }
1605
1606    /// Push new pair of head, Series to DataFrame
1607    pub fn push(&mut self, name: &str, series: Series) {
1608        if !self.ics.is_empty() {
1609            assert_eq!(
1610                self.ics.iter().find(|x| x.as_str() == name),
1611                None,
1612                "Repetitive index!"
1613            );
1614        }
1615        self.ics.push(name.to_string());
1616        self.data.push(series);
1617    }
1618
1619    /// Extract specific row as DataFrame
1620    pub fn row(&self, i: usize) -> DataFrame {
1621        let mut df = DataFrame::new(vec![]);
1622        for (j, series) in self.data.iter().enumerate() {
1623            let s = series.at(i);
1624            let new_series = s.to_series();
1625            df.push(&self.ics[j], new_series);
1626        }
1627        df
1628    }
1629
1630    pub fn spread(&self) -> String {
1631        let r: usize = self
1632            .data
1633            .iter()
1634            .fold(0, |max_len, column| max(max_len, column.len()));
1635        let h = self.header();
1636
1637        let mut result = String::new();
1638
1639        if r > 100 {
1640            let lc1 = ((r as f64).log10() as usize) + 5;
1641            result.push_str(&tab("", lc1));
1642
1643            let mut space_vec: Vec<usize> = vec![];
1644            for i in 0..self.data.len() {
1645                let v = &self[i];
1646                let mut space = 0usize;
1647                for j in 0..v.len().min(5) {
1648                    let elem = v.at(j);
1649                    set_space!(elem, space);
1650                }
1651                if v.len() >= r - 5 {
1652                    for j in v.len() - 5..v.len() {
1653                        let elem = v.at(j);
1654                        set_space!(elem, space);
1655                    }
1656                }
1657                space = max(space + 1, 5);
1658                let k = &h[i];
1659                if k.len() >= space {
1660                    space = k.len() + 1;
1661                }
1662                result.push_str(&tab(k, space));
1663                space_vec.push(space);
1664            }
1665            result.push('\n');
1666
1667            for i in 0..5 {
1668                result.push_str(&tab(&format!("r[{}]", i), lc1));
1669                for j in 0..self.data.len() {
1670                    let v = &self[j];
1671                    let space = space_vec[j];
1672                    if i < v.len() {
1673                        let elem = v.at(i);
1674                        let st = set_space!(elem);
1675                        result.push_str(&tab(&st, space));
1676                    } else {
1677                        result.push_str(&tab("", space));
1678                    }
1679                }
1680                result.push('\n');
1681            }
1682            result.push_str(&tab("...", lc1));
1683            for &space in space_vec.iter() {
1684                result.push_str(&tab("...", space));
1685            }
1686            result.push('\n');
1687            for i in r - 5..r {
1688                result.push_str(&tab(&format!("r[{}]", i), lc1));
1689                for j in 0..self.data.len() {
1690                    let v = &self[j];
1691                    let space = space_vec[j];
1692                    if i < v.len() {
1693                        let elem = v.at(i);
1694                        let st = set_space!(elem);
1695                        result.push_str(&tab(&st, space));
1696                    } else {
1697                        result.push_str(&tab("", space));
1698                    }
1699                }
1700                if i == r - 1 {
1701                    break;
1702                }
1703                result.push('\n');
1704            }
1705            return result;
1706        }
1707
1708        result.push_str(&tab("", 5));
1709        let mut space_vec: Vec<usize> = vec![];
1710
1711        for i in 0..self.data.len() {
1712            let v = &self[i];
1713            let mut space = 0usize;
1714            for j in 0..v.len() {
1715                let elem = v.at(j);
1716                set_space!(elem, space)
1717            }
1718            space = max(space + 1, 5);
1719            let k = &h[i];
1720            if k.len() >= space {
1721                space = k.len() + 1;
1722            }
1723            result.push_str(&tab(k, space));
1724            space_vec.push(space);
1725        }
1726        result.push('\n');
1727
1728        for i in 0..r {
1729            result.push_str(&tab(&format!("r[{}]", i), 5));
1730            for j in 0..self.data.len() {
1731                let v = &self[j];
1732                let space = space_vec[j];
1733                if i < v.len() {
1734                    let elem = v.at(i);
1735                    let st = set_space!(elem);
1736                    result.push_str(&tab(&st, space));
1737                } else {
1738                    result.push_str(&tab("", space));
1739                }
1740            }
1741            if i == (r - 1) {
1742                break;
1743            }
1744            result.push('\n');
1745        }
1746        result
1747    }
1748
1749    /// Type casting for DataFrame
1750    ///
1751    /// # Examples
1752    ///
1753    /// ```rust
1754    /// extern crate peroxide;
1755    /// use peroxide::fuga::*;
1756    ///
1757    /// fn main() {
1758    ///     let a = Series::new(vec![1i32, 2, 3, 4]);
1759    ///     let b = Series::new(vec![true, false, false, true]);
1760    ///     
1761    ///     let mut df = DataFrame::new(vec![a, b]);    // I32, Bool
1762    ///     df.as_types(vec![USIZE, U8]);               // USIZE, U8
1763    ///
1764    ///     let c = Series::new(vec![1usize, 2, 3, 4]);
1765    ///     let d = Series::new(vec![1u8, 0, 0, 1]);
1766    ///     let dg = DataFrame::new(vec![c, d]);
1767    ///
1768    ///     assert_eq!(df, dg);
1769    /// }
1770    /// ```
1771    pub fn as_types(&mut self, dtypes: Vec<DType>) {
1772        assert_eq!(
1773            self.data.len(),
1774            dtypes.len(),
1775            "Length of dtypes are not compatible with DataFrame"
1776        );
1777        for (i, dtype) in dtypes.into_iter().enumerate() {
1778            self[i].as_type(dtype);
1779        }
1780    }
1781
1782    /// Drop specific column by header
1783    ///
1784    /// # Examples
1785    ///
1786    /// ```rust
1787    /// extern crate peroxide;
1788    /// use peroxide::fuga::*;
1789    ///
1790    /// fn main() {
1791    ///     let a = Series::new(vec![1,2,3,4]);
1792    ///     let b = Series::new(vec![5,6,7,8]);
1793    ///
1794    ///     let mut df = DataFrame::new(vec![a.clone(), b]);
1795    ///     df.set_header(vec!["a", "b"]);
1796    ///
1797    ///     let mut dg = DataFrame::new(vec![a]);
1798    ///     dg.set_header(vec!["a"]);
1799    ///
1800    ///     df.drop("b");
1801    ///
1802    ///     assert_eq!(df, dg);
1803    /// }
1804    /// ```
1805    pub fn drop(&mut self, col_header: &str) {
1806        match self.ics.iter().position(|h| h == col_header) {
1807            Some(index) => {
1808                self.data.remove(index);
1809                self.ics.remove(index);
1810            }
1811            None => panic!("Can't drop header '{}'", col_header),
1812        }
1813    }
1814
1815    /// Filter DataFrame by specific column
1816    pub fn filter_by<F>(&self, column: &str, predicate: F) -> anyhow::Result<DataFrame>
1817    where
1818        F: Fn(Scalar) -> bool,
1819    {
1820        let series = match self.ics.iter().position(|x| x.as_str() == column) {
1821            Some(i) => &self.data[i],
1822            None => anyhow::bail!("Column '{}' not found in DataFrame", column),
1823        };
1824
1825        let mut indices = Vec::new();
1826        for i in 0..series.len() {
1827            let value = series.at(i);
1828            if predicate(value) {
1829                indices.push(i);
1830            }
1831        }
1832
1833        let mut new_df = DataFrame::new(vec![]);
1834        for (col_idx, col_series) in self.data.iter().enumerate() {
1835            let filtered_series = col_series.select_indices(&indices);
1836            new_df.push(&self.ics[col_idx], filtered_series);
1837        }
1838
1839        Ok(new_df)
1840    }
1841
1842    /// Mask DataFrame with a boolean Series
1843    pub fn mask(&self, mask: &Series) -> anyhow::Result<DataFrame> {
1844        if mask.len() != self.data[0].len() {
1845            anyhow::bail!(
1846                "Mask length ({}) does not match DataFrame row count ({})",
1847                mask.len(),
1848                self.data[0].len()
1849            );
1850        }
1851
1852        if mask.dtype != DType::Bool {
1853            anyhow::bail!("Mask Series must be of type Bool, but got {}", mask.dtype);
1854        }
1855
1856        let bool_mask: &[bool] = mask.as_slice();
1857        let ics: Vec<usize> = bool_mask
1858            .iter()
1859            .enumerate()
1860            .filter_map(|(i, &b)| if b { Some(i) } else { None })
1861            .collect();
1862
1863        Ok(self.select_rows(&ics))
1864    }
1865
1866    /// Select rows based on indices
1867    pub fn select_rows(&self, indices: &[usize]) -> DataFrame {
1868        let mut new_df = DataFrame::new(vec![]);
1869        for (col_idx, col_series) in self.data.iter().enumerate() {
1870            let filtered_series = col_series.select_indices(indices);
1871            new_df.push(&self.ics[col_idx], filtered_series);
1872        }
1873        new_df
1874    }
1875
1876    // =========================================================================
1877    // Shape / Info
1878    // =========================================================================
1879
1880    /// Number of rows (max column length)
1881    pub fn nrow(&self) -> usize {
1882        self.data.iter().fold(0, |acc, s| max(acc, s.len()))
1883    }
1884
1885    /// Number of columns
1886    pub fn ncol(&self) -> usize {
1887        self.data.len()
1888    }
1889
1890    /// Shape as (nrow, ncol)
1891    pub fn shape(&self) -> (usize, usize) {
1892        (self.nrow(), self.ncol())
1893    }
1894
1895    /// DType of each column
1896    pub fn dtypes(&self) -> Vec<DType> {
1897        self.data.iter().map(|s| s.dtype).collect()
1898    }
1899
1900    /// Check if the DataFrame has no columns or no rows
1901    pub fn is_empty(&self) -> bool {
1902        self.data.is_empty() || self.nrow() == 0
1903    }
1904
1905    /// Check if the DataFrame contains a column with the given header
1906    pub fn contains(&self, col_header: &str) -> bool {
1907        self.ics.iter().any(|x| x.as_str() == col_header)
1908    }
1909
1910    // =========================================================================
1911    // Row Operations
1912    // =========================================================================
1913
1914    /// Return the first `n` rows
1915    pub fn head(&self, n: usize) -> DataFrame {
1916        let nrow = self.nrow();
1917        let end = n.min(nrow);
1918        let indices: Vec<usize> = (0..end).collect();
1919        self.select_rows(&indices)
1920    }
1921
1922    /// Return the last `n` rows
1923    pub fn tail(&self, n: usize) -> DataFrame {
1924        let nrow = self.nrow();
1925        let start = nrow.saturating_sub(n);
1926        let indices: Vec<usize> = (start..nrow).collect();
1927        self.select_rows(&indices)
1928    }
1929
1930    /// Return a slice of rows starting at `offset` with the given `length`
1931    pub fn slice(&self, offset: usize, length: usize) -> DataFrame {
1932        let nrow = self.nrow();
1933        let end = (offset + length).min(nrow);
1934        let indices: Vec<usize> = (offset..end).collect();
1935        self.select_rows(&indices)
1936    }
1937
1938    // =========================================================================
1939    // Column Operations
1940    // =========================================================================
1941
1942    /// Select specific columns by name, returning a new DataFrame
1943    ///
1944    /// Panics if any column name does not exist.
1945    pub fn select(&self, columns: &[&str]) -> DataFrame {
1946        let mut new_df = DataFrame::new(vec![]);
1947        for &col in columns {
1948            let i = self
1949                .ics
1950                .iter()
1951                .position(|x| x.as_str() == col)
1952                .unwrap_or_else(|| panic!("Column '{}' not found in DataFrame", col));
1953            new_df.push(col, self.data[i].clone());
1954        }
1955        new_df
1956    }
1957
1958    /// Rename a column in-place
1959    ///
1960    /// Panics if the old column name does not exist.
1961    pub fn rename(&mut self, old: &str, new: &str) {
1962        let i = self
1963            .ics
1964            .iter()
1965            .position(|x| x.as_str() == old)
1966            .unwrap_or_else(|| panic!("Column '{}' not found in DataFrame", old));
1967        self.ics[i] = new.to_string();
1968    }
1969
1970    /// Return column names as `Vec<&str>`
1971    pub fn column_names(&self) -> Vec<&str> {
1972        self.ics.iter().map(|s| s.as_str()).collect()
1973    }
1974
1975    /// Select columns whose dtype is in the given list
1976    pub fn select_dtypes(&self, dtypes: &[DType]) -> DataFrame {
1977        let mut new_df = DataFrame::new(vec![]);
1978        for (i, series) in self.data.iter().enumerate() {
1979            if dtypes.contains(&series.dtype) {
1980                new_df.push(&self.ics[i], series.clone());
1981            }
1982        }
1983        new_df
1984    }
1985
1986    // =========================================================================
1987    // DataFrame-level Statistics
1988    // =========================================================================
1989
1990    /// Compute descriptive statistics for numeric columns
1991    ///
1992    /// Returns a DataFrame with rows: count, mean, sd, min, max
1993    /// and one column per numeric column from the original DataFrame.
1994    pub fn describe(&self) -> DataFrame {
1995        use crate::statistics::stat::Statistics;
1996
1997        let stat_labels = vec!["count", "mean", "sd", "min", "max"];
1998        let mut result = DataFrame::new(vec![]);
1999        result.push("stat", Series::new(stat_labels.iter().map(|s| s.to_string()).collect::<Vec<String>>()));
2000
2001        for (i, series) in self.data.iter().enumerate() {
2002            if let Ok(v) = series.to_f64_vec() {
2003                if v.is_empty() {
2004                    continue;
2005                }
2006                let count = v.len() as f64;
2007                let mean = v.mean();
2008                let sd = if v.len() > 1 { v.sd() } else { 0.0 };
2009                let min_val = v.iter().cloned().fold(f64::INFINITY, f64::min);
2010                let max_val = v.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
2011                result.push(
2012                    &self.ics[i],
2013                    Series::new(vec![count, mean, sd, min_val, max_val]),
2014                );
2015            }
2016        }
2017
2018        result
2019    }
2020
2021    /// Sum of each numeric column as a single-row DataFrame
2022    pub fn sum(&self) -> DataFrame {
2023        let mut result = DataFrame::new(vec![]);
2024        for (i, series) in self.data.iter().enumerate() {
2025            if let Ok(v) = series.to_f64_vec() {
2026                let s: f64 = v.iter().sum();
2027                result.push(&self.ics[i], Series::new(vec![s]));
2028            }
2029        }
2030        result
2031    }
2032
2033    /// Mean of each numeric column as a single-row DataFrame
2034    pub fn mean(&self) -> DataFrame {
2035        use crate::statistics::stat::Statistics;
2036
2037        let mut result = DataFrame::new(vec![]);
2038        for (i, series) in self.data.iter().enumerate() {
2039            if let Ok(v) = series.to_f64_vec() {
2040                if v.is_empty() {
2041                    continue;
2042                }
2043                let m = v.mean();
2044                result.push(&self.ics[i], Series::new(vec![m]));
2045            }
2046        }
2047        result
2048    }
2049}
2050
2051impl Index<&str> for DataFrame {
2052    type Output = Series;
2053
2054    fn index(&self, index: &str) -> &Self::Output {
2055        let i = self.ics.iter().position(|x| x.as_str() == index).unwrap();
2056        &self.data[i]
2057    }
2058}
2059
2060impl IndexMut<&str> for DataFrame {
2061    fn index_mut(&mut self, index: &str) -> &mut Self::Output {
2062        let i = self.ics.iter().position(|x| x.as_str() == index).unwrap();
2063        &mut self.data[i]
2064    }
2065}
2066
2067impl Index<usize> for DataFrame {
2068    type Output = Series;
2069
2070    fn index(&self, index: usize) -> &Self::Output {
2071        &self.data[index]
2072    }
2073}
2074
2075impl IndexMut<usize> for DataFrame {
2076    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
2077        &mut self.data[index]
2078    }
2079}
2080
2081impl fmt::Display for DataFrame {
2082    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2083        write!(f, "{}", self.spread())
2084    }
2085}
2086
2087// =============================================================================
2088// IO Implementations
2089// =============================================================================
2090
2091/// To handle CSV file format
2092#[cfg(feature = "csv")]
2093pub trait WithCSV: Sized {
2094    fn write_csv(&self, file_path: &str) -> Result<(), Box<dyn Error>>;
2095    fn read_csv(file_path: &str, delimiter: char) -> Result<Self, Box<dyn Error>>;
2096}
2097
2098#[cfg(feature = "csv")]
2099impl WithCSV for DataFrame {
2100    /// Write csv file
2101    fn write_csv(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
2102        let mut wtr = WriterBuilder::new().from_path(file_path)?;
2103        let r: usize = self
2104            .data
2105            .iter()
2106            .fold(0, |max_len, column| max(max_len, column.len()));
2107        let c: usize = self.data.len();
2108        wtr.write_record(self.header().clone())?;
2109
2110        for i in 0..r {
2111            let mut record: Vec<String> = vec!["".to_string(); c];
2112            for (j, v) in self.data.iter().enumerate() {
2113                if i < v.len() {
2114                    record[j] = v.at(i).to_string();
2115                }
2116            }
2117            wtr.write_record(record)?;
2118        }
2119        wtr.flush()?;
2120        Ok(())
2121    }
2122
2123    /// Read csv file with delimiter
2124    fn read_csv(file_path: &str, delimiter: char) -> Result<Self, Box<dyn Error>> {
2125        let mut rdr = ReaderBuilder::new()
2126            .has_headers(true)
2127            .delimiter(delimiter as u8)
2128            .from_path(file_path)?;
2129
2130        let headers_vec = rdr.headers()?;
2131        let headers = headers_vec.iter().map(|x| x).collect::<Vec<&str>>();
2132        let mut result = DataFrame::new(vec![]);
2133        for h in headers.iter() {
2134            result.push(*h, Series::new(Vec::<String>::new()));
2135        }
2136
2137        for rec in rdr.deserialize() {
2138            let record: HashMap<String, String> = rec?;
2139            for head in record.keys() {
2140                let value = &record[head];
2141                if value.len() > 0 {
2142                    result[head.as_str()].push(value.to_string());
2143                }
2144            }
2145        }
2146
2147        Ok(result)
2148    }
2149}
2150
2151/// To handle with NetCDF file format
2152#[cfg(feature = "nc")]
2153pub trait WithNetCDF: Sized {
2154    fn write_nc(&self, file_path: &str) -> Result<(), Box<dyn Error>>;
2155    fn read_nc(file_path: &str) -> Result<Self, Box<dyn Error>>;
2156    fn read_nc_by_header(file_path: &str, header: Vec<&str>) -> Result<Self, Box<dyn Error>>;
2157}
2158
2159#[cfg(feature = "nc")]
2160impl WithNetCDF for DataFrame {
2161    /// write netcdf file
2162    fn write_nc(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
2163        let mut f = netcdf::create(file_path)?;
2164
2165        for (i, h) in self.header().iter().enumerate() {
2166            let dim_name = format!("{}th col", i);
2167            let v = &self[h.as_str()];
2168            let dim = v.len();
2169            f.add_dimension(&dim_name, dim)?;
2170            match v.dtype {
2171                dtype if dtype.is_numeric() => {
2172                    let vtype = dtype_to_vtype(dtype);
2173                    let var = &mut f.add_variable_with_type(
2174                        h,
2175                        &[&dim_name],
2176                        &VariableType::Basic(vtype),
2177                    )?;
2178                    dtype_match!(N; dtype, v.to_vec(), |v| nc_put_value(var, v); Vec)?;
2179                }
2180                Str => {
2181                    let var = &mut f.add_string_variable(h, &[&dim_name])?;
2182                    let v_s: &[String] = v.as_slice();
2183                    for (i, s) in v_s.iter().enumerate() {
2184                        var.put_string(s, Some(&[i]))?;
2185                    }
2186                }
2187                USIZE => {
2188                    let v = v.to_type(U64);
2189                    let var = &mut f.add_variable::<u64>(h, &[&dim_name])?;
2190                    let v_slice: &[u64] = v.as_slice();
2191                    var.put_values(v_slice, None, None)?;
2192                }
2193                ISIZE => {
2194                    let v = v.to_type(I64);
2195                    let var = &mut f.add_variable::<i64>(h, &[&dim_name])?;
2196                    let v_slice: &[i64] = v.as_slice();
2197                    var.put_values(v_slice, None, None)?;
2198                }
2199                Bool => {
2200                    let v = v.to_type(U8);
2201                    let var = &mut f.add_variable::<u8>(h, &[&dim_name])?;
2202                    let v_slice: &[u8] = v.as_slice();
2203                    var.put_values(v_slice, None, None)?;
2204                }
2205                Char => {
2206                    let v = v.to_type(U8);
2207                    let var = &mut f.add_variable::<u8>(h, &[&dim_name])?;
2208                    let v_slice: &[u8] = v.as_slice();
2209                    var.put_values(v_slice, None, None)?;
2210                }
2211                _ => unreachable!(),
2212            }
2213        }
2214
2215        Ok(())
2216    }
2217
2218    /// Read netcdf to DataFrame
2219    fn read_nc(file_path: &str) -> Result<Self, Box<dyn Error>> {
2220        let f = netcdf::open(file_path)?;
2221        let mut df = DataFrame::new(vec![]);
2222        for v in f.variables() {
2223            let h = v.name();
2224            if v.vartype().is_string() {
2225                let mut data: Vec<String> = vec![Default::default(); v.len()];
2226                for i in 0..v.len() {
2227                    data[i] = v.string_value(Some(&[i]))?;
2228                }
2229                df.push(&h, Series::new(data));
2230            } else {
2231                let dtype = vtype_to_dtype(v.vartype().as_basic().unwrap());
2232                let series = dtype_match!(N; dtype, vec![], |vec| nc_read_value(&v, vec); Vec)?;
2233                df.push(&h, series);
2234            }
2235        }
2236        Ok(df)
2237    }
2238
2239    /// Read netcdf to DataFrame with specific header
2240    ///
2241    /// # Example
2242    ///
2243    /// ```
2244    /// #[macro_use]
2245    /// extern crate peroxide;
2246    /// use peroxide::fuga::*;
2247    ///
2248    /// fn main() -> Result<(), Box<dyn Error>> {
2249    ///     let mut df = DataFrame::new(vec![]);
2250    ///     df.push("a", Series::new(vec![1,2,3,4]));
2251    ///     df.push("b", Series::new(vec!['a', 'b', 'c', 'd']));
2252    ///     df.push("c", Series::new(c!(0.1, 0.2, 0.3, 0.4)));
2253    ///     df.write_nc("example_data/doc_nc2.nc")?;
2254    ///
2255    ///     let dg = DataFrame::read_nc_by_header("example_data/doc_nc2.nc", vec!["a", "c"])?;
2256    ///
2257    ///     df.drop("b");
2258    ///
2259    ///     assert_eq!(df, dg);
2260    ///     
2261    ///     Ok(())
2262    /// }
2263    /// ```
2264    fn read_nc_by_header(file_path: &str, header: Vec<&str>) -> Result<Self, Box<dyn Error>> {
2265        let f = netcdf::open(file_path)?;
2266        let mut df = DataFrame::new(vec![]);
2267        for h in header {
2268            let v = match f.variable(h) {
2269                Some(val) => val,
2270                None => panic!("There are no corresponding values"),
2271            };
2272            if v.vartype().is_string() {
2273                let mut data: Vec<String> = vec![Default::default(); v.len()];
2274                for i in 0..v.len() {
2275                    data[i] = v.string_value(Some(&[i]))?;
2276                }
2277                df.push(&h, Series::new(data));
2278            } else {
2279                let dtype = vtype_to_dtype(v.vartype().as_basic().unwrap());
2280                let series = dtype_match!(N; dtype, vec![], |vec| nc_read_value(&v, vec); Vec)?;
2281                df.push(&h, series);
2282            }
2283        }
2284        Ok(df)
2285    }
2286}
2287
2288/// To handle parquet format
2289#[cfg(feature = "parquet")]
2290pub trait WithParquet {
2291    fn write_parquet(
2292        &self,
2293        file_path: &str,
2294        compression: Compression,
2295    ) -> Result<(), Box<dyn Error>>;
2296    fn read_parquet(file_path: &str) -> Result<Self, Box<dyn Error>>
2297    where
2298        Self: Sized;
2299    // fn read_parquet_by_header(file_path: &str, header: Vec<&str>) -> Result<Self, Box<dyn Error>> where Self: Sized;
2300}
2301
2302/// This macro handles the repetitive logic of processing a column from an Arrow array,
2303/// converting it to a `Vec<T>`, and then inserting or updating it in the provided HashMap.
2304///
2305/// # Arguments
2306/// - `$hash_map`: The mutable HashMap storing the column series.
2307/// - `$h`: The column name (header).
2308/// - `$arr`: The `ArrayRef` (the raw column data from Arrow).
2309/// - `$arrow_type`: The concrete Arrow array type to downcast to (e.g., `BooleanArray`).
2310/// - `$rust_type`: The target Rust type for the `Vec` (e.g., `bool`).
2311/// - `|$concrete_array:ident| $extract_body:expr`: A closure-like expression that defines
2312///   how to extract the data from the downcasted array into a `Vec<$rust_type>`.
2313#[cfg(feature = "parquet")]
2314macro_rules! process_column {
2315    ($hash_map:expr, $h:expr, $arr:expr, $arrow_type:ty, $rust_type:ty, |$concrete_array:ident| $extract_body:expr) => {{
2316        // Downcast the generic array to the specific Arrow array type.
2317        let $concrete_array = $arr.as_any().downcast_ref::<$arrow_type>().unwrap();
2318        // Apply the provided logic to extract data into a Vec.
2319        let data: Vec<$rust_type> = $extract_body;
2320
2321        // Check if the column already exists in the map.
2322        if let Some(existing_data) = $hash_map.get_mut($h) {
2323            // If it exists, extend the existing vector with the new data.
2324            let mut vec_data: Vec<$rust_type> = existing_data.to_vec();
2325            vec_data.extend(data.iter().cloned());
2326            $hash_map.insert($h.clone(), Series::new(vec_data));
2327        } else {
2328            // If it's a new column, insert a new Series.
2329            $hash_map.insert($h.clone(), Series::new(data));
2330        }
2331    }};
2332}
2333
2334#[cfg(feature = "parquet")]
2335impl WithParquet for DataFrame {
2336    /// Write DataFrame to parquet
2337    fn write_parquet(
2338        &self,
2339        file_path: &str,
2340        compression: Compression,
2341    ) -> Result<(), Box<dyn Error>> {
2342        let mut schema_vec = vec![];
2343        let mut arr_vec = vec![];
2344
2345        let max_length = self.data.iter().fold(0usize, |acc, x| acc.max(x.len()));
2346
2347        for h in self.header().iter() {
2348            let v = &self[h.as_str()];
2349            let field = Field::new(h.as_str(), dtype_to_arrow(v.dtype), false);
2350
2351            dtype_match_to_arrow!(v.dtype, v.to_vec(), arr_vec; max_length);
2352            schema_vec.push(field);
2353        }
2354
2355        let schema = Arc::new(Schema::new(schema_vec));
2356        let parquet_schema = ArrowSchemaConverter::new()
2357            .convert(&schema)
2358            .map_err(|e| format!("Failed to convert schema: {}", e))?;
2359        let writer_properties = WriterProperties::builder()
2360            .set_compression(compression)
2361            .build();
2362        let props = Arc::new(writer_properties);
2363
2364        let col_writers = get_column_writers(&parquet_schema, &props, &schema)?;
2365        let mut workers: Vec<_> = col_writers
2366            .into_iter()
2367            .map(|mut col_writer| {
2368                let (send, recv) = std::sync::mpsc::channel::<ArrowLeafColumn>();
2369                let handle = std::thread::spawn(move || {
2370                    for col in recv {
2371                        col_writer.write(&col)?;
2372                    }
2373                    col_writer.close()
2374                });
2375                (handle, send)
2376            })
2377            .collect();
2378
2379        let root_schema = parquet_schema.root_schema_ptr();
2380        let mut output_file = std::fs::File::create(file_path)?;
2381        let mut writer = SerializedFileWriter::new(&mut output_file, root_schema, props.clone())?;
2382
2383        let mut row_group_writer: SerializedRowGroupWriter<'_, _> = writer.next_row_group()?;
2384
2385        let mut worker_iter = workers.iter_mut();
2386        for (arr, field) in arr_vec.iter().zip(&schema.fields) {
2387            for leaves in compute_leaves(field, &Arc::new(arr))? {
2388                worker_iter.next().unwrap().1.send(leaves)?;
2389            }
2390        }
2391
2392        for (handle, send) in workers {
2393            use parquet::arrow::arrow_writer::ArrowColumnChunk;
2394
2395            drop(send);
2396            let chunk: ArrowColumnChunk = handle.join().unwrap().unwrap();
2397            chunk.append_to_row_group(&mut row_group_writer)?;
2398        }
2399        row_group_writer.close()?;
2400        writer.close()?;
2401
2402        Ok(())
2403    }
2404
2405    /// Read parquet to DataFrame
2406    fn read_parquet(file_path: &str) -> Result<Self, Box<dyn Error>>
2407    where
2408        Self: Sized,
2409    {
2410        use parquet::arrow::arrow_reader::ParquetRecordBatchReader;
2411
2412        let mut df = DataFrame::new(vec![]);
2413
2414        let file = std::fs::File::open(file_path)?;
2415        let builder = ParquetRecordBatchReaderBuilder::try_new(file.try_clone()?)?;
2416        let schema = builder.schema();
2417        let fields = schema.fields.clone();
2418        let mut batch_size = usize::MAX; // Use maximum batch size
2419        let reader: ParquetRecordBatchReader = loop {
2420            let builder = ParquetRecordBatchReaderBuilder::try_new(file.try_clone()?)?;
2421            let reader = builder.with_batch_size(batch_size).build();
2422            match reader {
2423                Ok(r) => break r,
2424                Err(e) => {
2425                    if batch_size > 0 {
2426                        batch_size /= 10; // Reduce batch size if error occurs
2427                    } else {
2428                        println!(
2429                            "Failed to read parquet file: {} with eventually batch size 1",
2430                            e
2431                        );
2432                        return Err(Box::new(e));
2433                    }
2434                }
2435            }
2436        };
2437        let all_batches: Vec<_> = reader.collect::<Result<Vec<_>, _>>()?;
2438
2439        let mut hash_map = IndexMap::<String, Series>::new();
2440        for batch in all_batches {
2441            let arrs = batch.columns();
2442
2443            for (field, arr) in fields.iter().zip(arrs) {
2444                let h = field.name();
2445                let dt = field.data_type();
2446                let at = arrow_to_dtype(dt.clone());
2447                match at {
2448                    Bool => process_column!(hash_map, h, arr, BooleanArray, bool, |d| d
2449                        .values()
2450                        .iter()
2451                        .collect()),
2452                    Char => process_column!(hash_map, h, arr, StringArray, char, |d| d
2453                        .iter()
2454                        .filter_map(|opt_s| opt_s.and_then(|s| s.chars().next()))
2455                        .collect()),
2456                    Str => process_column!(hash_map, h, arr, StringArray, String, |d| d
2457                        .iter()
2458                        .filter_map(|opt_s| opt_s.map(String::from))
2459                        .collect()),
2460                    USIZE => {
2461                        process_column!(hash_map, h, arr, PrimitiveArray<UInt64Type>, usize, |d| d
2462                            .values()
2463                            .iter()
2464                            .map(|&x| x as usize)
2465                            .collect())
2466                    }
2467                    U8 => process_column!(hash_map, h, arr, PrimitiveArray<UInt8Type>, u8, |d| d
2468                        .values()
2469                        .to_vec()),
2470                    U16 => {
2471                        process_column!(hash_map, h, arr, PrimitiveArray<UInt16Type>, u16, |d| d
2472                            .values()
2473                            .to_vec())
2474                    }
2475                    U32 => {
2476                        process_column!(hash_map, h, arr, PrimitiveArray<UInt32Type>, u32, |d| d
2477                            .values()
2478                            .to_vec())
2479                    }
2480                    U64 => {
2481                        process_column!(hash_map, h, arr, PrimitiveArray<UInt64Type>, u64, |d| d
2482                            .values()
2483                            .to_vec())
2484                    }
2485                    ISIZE => {
2486                        process_column!(hash_map, h, arr, PrimitiveArray<Int64Type>, isize, |d| d
2487                            .values()
2488                            .iter()
2489                            .map(|&x| x as isize)
2490                            .collect())
2491                    }
2492                    I8 => process_column!(hash_map, h, arr, PrimitiveArray<Int8Type>, i8, |d| d
2493                        .values()
2494                        .to_vec()),
2495                    I16 => process_column!(hash_map, h, arr, PrimitiveArray<Int16Type>, i16, |d| d
2496                        .values()
2497                        .to_vec()),
2498                    I32 => process_column!(hash_map, h, arr, PrimitiveArray<Int32Type>, i32, |d| d
2499                        .values()
2500                        .to_vec()),
2501                    I64 => process_column!(hash_map, h, arr, PrimitiveArray<Int64Type>, i64, |d| d
2502                        .values()
2503                        .to_vec()),
2504                    F32 => {
2505                        process_column!(hash_map, h, arr, PrimitiveArray<Float32Type>, f32, |d| d
2506                            .values()
2507                            .to_vec())
2508                    }
2509                    F64 => {
2510                        process_column!(hash_map, h, arr, PrimitiveArray<Float64Type>, f64, |d| d
2511                            .values()
2512                            .to_vec())
2513                    }
2514                }
2515            }
2516        }
2517
2518        for (h, data) in hash_map {
2519            df.push(&h, data);
2520        }
2521
2522        Ok(df)
2523    }
2524}