Source code for impyute.imputation.ts.locf
import numpy as np
from impyute.ops import matrix
from impyute.ops import wrapper
from impyute.ops import error
[docs]@wrapper.wrappers
@wrapper.checks
def locf(data, axis=0):
""" Last Observation Carried Forward
For each set of missing indices, use the value of one row before(same
column). In the case that the missing value is the first row, look one
row ahead instead. If this next row is also NaN, look to the next row.
Repeat until you find a row in this column that's not NaN. All the rows
before will be filled with this value.
Parameters
----------
data: numpy.ndarray
Data to impute.
axis: boolean (optional)
0 if time series is in row format (Ex. data[0][:] is 1st data point).
1 if time series is in col format (Ex. data[:][0] is 1st data point).
Returns
-------
numpy.ndarray
Imputed data.
"""
if axis == 0:
data = np.transpose(data)
elif axis == 1:
pass
else:
raise error.BadInputError("Error: Axis value is invalid, please use either 0 (row format) or 1 (column format)")
nan_xy = matrix.nan_indices(data)
for x_i, y_i in nan_xy:
# Simplest scenario, look one row back
if x_i-1 > -1:
data[x_i][y_i] = data[x_i-1][y_i]
# Look n rows forward
else:
x_residuals = np.shape(data)[0]-x_i-1 # n datapoints left
val_found = False
for i in range(1, x_residuals):
if not np.isnan(data[x_i+i][y_i]):
val_found = True
break
if val_found:
# pylint: disable=undefined-loop-variable
for x_nan in range(i):
data[x_i+x_nan][y_i] = data[x_i+i][y_i]
else:
raise Exception("Error: Entire Column is NaN")
return data