Source code for impyute.imputation.ts.moving_window

import numpy as np
from impyute.ops import matrix
from impyute.ops import wrapper
# pylint: disable=invalid-name, too-many-arguments, too-many-locals, too-many-branches, broad-except, len-as-condition

[docs]@wrapper.wrappers @wrapper.checks def moving_window(data, nindex=None, wsize=5, errors="coerce", func=np.mean, inplace=False): """ Interpolate the missing values based on nearby values. For example, with an array like this: array([[-1.24940, -1.38673, -0.03214945, 0.08255145, -0.007415], [ 2.14662, 0.32758 , -0.82601414, 1.78124027, 0.873998], [-0.41400, -0.977629, nan, -1.39255344, 1.680435], [ 0.40975, 1.067599, 0.29152388, -1.70160145, -0.565226], [-0.54592, -1.126187, 2.04004377, 0.16664863, -0.010677]]) Using a `k` or window size of 3. The one missing value would be set to -1.18509122. The window operates on the horizontal axis. Usage ----- The parameters default the function to a moving mean. You may want to change the default window size: moving_window(data, wsize=10) To only look at past data (null value is at the rightmost index in the window): moving_window(data, nindex=-1) To use a custom function: moving_window(data, func=np.median) You can also do something like take 1.5x the max of previous values in the window: moving_window(data, func=lambda arr: max(arr) * 1.50, nindex=-1) Parameters ---------- data: numpy.ndarray 2D matrix to impute. nindex: int Null index. Index of the null value inside the moving average window. Use cases: Say you wanted to make value skewed toward the left or right side. 0 would only take the average of values from the right and -1 would only take the average of values from the left wsize: int Window size. Size of the moving average window/area of values being used for each local imputation. This number includes the missing value. errors: {"raise", "coerce", "ignore"} Errors will occur with the indexing of the windows - for example if there is a nan at data[x][0] and `nindex` is set to -1 or there is a nan at data[x][-1] and `nindex` is set to 0. `"raise"` will raise an error, `"coerce"` will try again using an nindex set to the middle and `"ignore"` will just leave it as a nan. inplace: {True, False} Whether to return a copy or run on the passed-in array Returns ------- numpy.ndarray Imputed data. """ if errors == "ignore": raise Exception("`errors` value `ignore` not implemented yet. Sorry!") if not inplace: data = data.copy() if nindex is None: # If using equal window side lengths assert wsize % 2 == 1, "The parameter `wsize` should not be even "\ "if the value `nindex` is not set since it defaults to the midpoint "\ "and an even `wsize` makes the midpoint ambiguous" wside_left = wsize // 2 wside_right = wsize // 2 else: # If using custom window side lengths assert nindex < wsize, "The null index must be smaller than the window size" if nindex == -1: wside_left = wsize - 1 wside_right = 0 else: wside_left = nindex wside_right = wsize - nindex - 1 while True: nan_xy = matrix.nan_indices(data) n_nan_prev = len(nan_xy) for x_i, y_i in nan_xy: left_i = max(0, y_i-wside_left) right_i = min(len(data), y_i+wside_right+1) window = data[x_i, left_i: right_i] window_not_null = window[~np.isnan(window)] if len(window_not_null) > 0: try: data[x_i][y_i] = func(window_not_null) continue except Exception as e: if errors == "raise": raise e if errors == "coerce": # If either the window has a length of 0 or the aggregate function fails somehow, # do a fallback of just trying the best we can by using it as the middle and trying # to recalculate. Use temporary wside_left/wside_right, for only the calculation of # this specific problamatic value wside_left_tmp = wsize // 2 wside_right_tmp = wside_left_tmp left_i_tmp = max(0, y_i-wside_left_tmp) right_i_tmp = min(len(data), y_i+wside_right_tmp+1) window = data[x_i, left_i_tmp:right_i_tmp] window_not_null = window[~np.isnan(window)] try: data[x_i][y_i] = func(window_not_null) except Exception as e: print("Exception:", e) if n_nan_prev == len(matrix.nan_indices(data)): break return data