Source code for geochemistrypi.data_mining.data.imputation
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from rich import print
from sklearn.impute import SimpleImputer
from ..constants import SECTION
from .data_readiness import float_input
[docs]
def imputer(data: pd.DataFrame, method: str) -> tuple[dict, np.ndarray]:
"""Apply imputation on missing values.
Parameters
----------
data : pd.DataFrame
The dataset with missing values.
method : str
The imputation method.
Returns
-------
imputation_config : dict
The imputation configuration.
data_imputed : np.ndarray
The dataset after imputing.
"""
method2option = {
"Mean Value": "mean",
"Median Value": "median",
"Most Frequent Value": "most_frequent",
"Constant(Specified Value)": "constant",
}[method]
if method2option == "constant":
filled_value = float_input(0, SECTION[2], "@Specified Value: ")
imputer = SimpleImputer(missing_values=np.nan, strategy=method2option, fill_value=filled_value)
else:
imputer = SimpleImputer(missing_values=np.nan, strategy=method2option)
print(f"Successfully fill the missing values with the {method2option} value " f"of each feature column respectively.")
data_imputed = imputer.fit_transform(data)
imputation_config = {type(imputer).__name__: imputer.get_params()}
return imputation_config, data_imputed