Source code for impyute.util.describe

""" impyute.util.describe """
from impyute.util import find_null


[docs]def describe(data): # verbose=True): """ Print input/output multiple times Parameters ---------- data: numpy.nd.array The data you want to get a description from verbose: boolean(optional) Decides whether the description is short or long form Returns ------- dict missingness: list Confidence interval of data being MCAR, MAR or MNAR - in that order null_xy: list of tuples Indices of all null points null_n: list Total number of null values for each column pmissing_n: float Percentage of missing values in dataset null_rows: list Indices of all rows that are completely null null_cols: list Indices of all columns that are completely null mean_rows: list Mean value of each row mean_cols: list Mean value of each column std_dev: list std dev for each row/column min_max: list Finds the minimum and maximum for each row """ # missingness = [0.33, 0.33, 0.33] # find_missingness(data) null_xy = find_null(data) null_n = len(null_xy) pmissing_n = float(null_n/len(data.flatten)) # pmissing_rows = "" # pmissing_cols = "" # null_rows = "" # null_cols = "" # mean_rows = "" # mean_cols = "" # std_dev = "" # "missingness": missingness, description = {"null_xy": null_xy, "null_n": null_n, "pmissing_n": pmissing_n} # "pmissing_rows": pmissing_rows, # "pmissing_cols": pmissing_cols, # "null_rows": null_rows, # "null_cols": null_cols, # "mean_rows": mean_rows, # "mean_cols": mean_cols, # "std_dev": std_dev} return description