import numpy as np from typing import List, Tuple Data = List[ Tuple[ float, float ] ] def drop_outliers( data: Data, cutoff: float ) -> Data: x_ = [ x for x,_ in data ] y_ = [ y for _,y in data ] p = np.polyfit( x_, y_, 1 ) idx_max = 0 max_dist = 0 sum_dist = 0 for i in range( len( y_ ) ): dist = ( y_[ i ] - ( p[ 0 ] * x_[ i ] + p[ 1 ] ) ) ** 2 if dist > max_dist: idx_max = i max_dist = dist sum_dist += dist if max_dist > ( sum_dist * cutoff ): x_.pop( idx_max ) y_.pop( idx_max ) return drop_outliers( list( zip( x_, y_ ) ), cutoff ) return list( zip( x_, y_ ) ) def regress( data: Data, cutoff: float ) -> Tuple[ float, float ]: data = drop_outliers( data, cutoff ) x_ = [ x for x,_ in data ] y_ = [ y for _,y in data ] p = np.polyfit( x_, y_, 1 ) return ( p[0], p[1] ) import run_tests