Code
def best_fit_line(x_values,y_values):
""" Find slope, intercept of best fit line through x,y """
# https://enlight.nyc/projects/linear-regression
x_dot_y = [x*y for x,y in list(zip(x_values, y_values))]
normalizer = (mean(x_values)**2 - mean([x**2 for x in x_values]))
m = ((mean(x_values) * mean(y_values)) - mean(x_dot_y)) / normalizer if normalizer != 0.0 else 0.0
b = mean(y_values) - m * mean(x_values)
return m, b
def r_squared_value(original_y,predicted_y):
""" Measures goodness of fit by comparing original y values to predicted values.
Returns 0 to 1 (i.e. 100% = 1.0) """
# https://enlight.nyc/projects/linear-regression
def squared_error(original_y, predicted_y):
# helper function to return the sum of the distances between the two y values squared
return sum([(prediction - original)**2 for prediction, original in list(zip(predicted_y, original_y))])
squared_error_regr = squared_error(original_y, predicted_y) # squared error of regression line
y_mean_line = [mean(original_y) for y in original_y] # horizontal line (mean of y values)
squared_error_y_mean = squared_error(original_y, y_mean_line) # squared error of the y mean line
if squared_error_y_mean > 0.0:
return 1 - (squared_error_regr/squared_error_y_mean)
else:
return 0.0
def slope_intercept_linear_regression(list_of_points):
""" Calculates m/slope, b/intercept, and r^2/residual of a list of points
list_of_points = [(x0,y0), (x1,y1) ... """
# list_of_points.sort(key = lambda x: x[0]) # Sort points by x_coord... (usually a 1,2,3 type index)
x_vals = [x for (x,y) in list_of_points]
y_vals = [y for (x,y) in list_of_points]
# Calculate slope, intercept
m,b = best_fit_line(x_vals, y_vals)
# Calculate residuals r^2
predicted_line = [(m*x)+b for x in x_vals]
r_squared = r_squared_value(y_vals, predicted_line)
return r_squared, m, b
# Here is the test to see if the list of cord-values is roughly increasing, AND IN A LINE
# Here is the test to see if the list of cord-values is roughly decreasing, AND IN A LINE
def roughly_linear_decreasing(list_of_points, tolerance = 0.4):
""" If r^2/residuals > 1.0 - tolerance and slope is negative (but not near angles of 0 or 90°!) return True.
list_of_points = [(x0,y0), (x1,y1) ... """
r_squared, m, b = slope_intercept_linear_regression(list_of_points)
is_linear = (r_squared > (1.0 - tolerance))
# convert slope to angle theta
theta=math.atan(m)
theta=theta/math.pi*180
reasonable_slope = (theta > -87.0) and (theta < -5.0)
return is_linear and reasonable_slope
# Here is the simple test to see if the list of cord-values is roughly decreasing, ignoring if it's a line
def roughly_decreasing_list(list_of_values, tolerance = 0.7):
is_greater_left = [list_of_values[i] >= list_of_values[i+1] for i in range(len(list_of_values)-1)]
is_decreasing = sum(is_greater_left) > tolerance*(len(list_of_values)-1) # 70% of points must qualify
return is_decreasing