最小二乗法におけるロバスト推定

最適化
最小二乗法におけるロバスト推定; 最小二乗法におけるロバスト回帰; Robust Estimation in Least Squares Method

概要

ロバスト推定とは外れ値を含むデータに対し、外れ値の影響を受けないようデータを説明する近似直線または曲線を得る手法である。
最小二乗法では外れ値を含んでいると近似直線または曲線が大きくズレてしまう問題がある。
そこで重みつき最小二乗法を用いる。外れ値に対する重みを小さくすることで近似関数に与える影響を小さくしようとするのがこの手法の考え方である。

重みつき最小二乗法については重み付き直線フィッティングと重み付き多項式フィッティングを参照のこと。

ロバスト推定(直線)

アルゴリズム

・初期化
　-全データの重み\(w\)を1として最小二乗法を行う。
・収束計算
　-近似関数との誤差\(d\)と許容誤差\(W\)から以下の式で重み\(w\)を求める。
\(\quad \displaystyle w_{new} = \begin{cases} \left( 1 - \left( \frac{d}{W} \right)^2 \right)^2 &\quad |d| \lt W \\ 0 &\quad |d| \geq W \end{cases} \)
　-更新された重み\(w_{new}\)で重み付き最小二乗法を行う。

許容誤差\(W\)の定め方は様々ある。当サイトでは絶対誤差の中央値の1.25倍とする。

ソースコード


namespace DataFitting {

    /// <summary>ロバスト線形フィッティング</summary>
    public class RobustLinearFittingMethod : FittingMethod {

        /// <summary>コンストラクタ</summary>
        public RobustLinearFittingMethod(FittingData[] data_list, bool is_enable_section) : base(data_list, is_enable_section ? 2 : 1) {
            IsEnableSection = is_enable_section;
        }

        /// <summary>y切片を有効にするか</summary>
        public bool IsEnableSection { get; private set; }

        /// <summary>フィッティング値</summary>
        public override double FittingValue(double x, Vector parameters) {
            if(parameters == null) {
                throw new ArgumentNullException(nameof(parameters));
            }
            if(parameters.Dim != ParametersCount) {
                throw new ArgumentException(nameof(parameters));
            }

            if(IsEnableSection) {
                return parameters[0] + parameters[1] * x;
            }
            else {
                return parameters[0] * x;
            }
        }

        /// <summary>フィッティング</summary>
        public Vector ExecureFitting(int converge_times = 8) {
            int n = data_list.Length;
            double err_threshold, inv_err;
            double[] weight_list = new double[n], err_list = new double[n], sort_err_list;
            Vector err, coef = null;
            WeightedLinearFittingMethod fitting;

            for(int i = 0; i < n; i++) {
                weight_list[i] = 1;
            }

            while(converge_times > 0) {
                fitting = new WeightedLinearFittingMethod(data_list, weight_list, IsEnableSection);
                
                coef = fitting.ExecureFitting();

                err = fitting.Error(coef);

                for(int i = 0; i < n; i++) {
                    err_list[i] = Math.Abs(err[i]);
                }

                sort_err_list = (double[])err_list.Clone();

                Array.Sort(sort_err_list);

                err_threshold = sort_err_list[n / 2] * 1.25;
                if(err_threshold <= 1e-14) {
                    break;
                }

                inv_err = 1 / err_threshold;

                for(int i = 0; i < n; i++) {
                    if(err_list[i] >= err_threshold) {
                        weight_list[i] = 0;
                    }
                    else {
                        double r = (1 - err_list[i] * inv_err);
                        weight_list[i] = r * r;
                    }
                }

                converge_times--;
            }

            return coef;
        }
    }

    /// <summary>ロバスト多項式フィッティング</summary>
    public class RobustPolynomialFittingMethod : FittingMethod {

        /// <summary>コンストラクタ</summary>
        public RobustPolynomialFittingMethod(FittingData[] data_list, int degree, bool is_enable_section)
            : base(data_list, degree + (is_enable_section ? 1 : 0)) {

            this.Degree = degree;
            this.IsEnableSection = is_enable_section;
        }

        /// <summary>次数</summary>
        public int Degree {
            get; private set;
        }

        /// <summary>y切片を有効にするか</summary>
        public bool IsEnableSection { get; private set; }

        /// <summary>フィッティング値</summary>
        public override double FittingValue(double x, Vector coefficients) {
            if(IsEnableSection) {
                double y = coefficients[0], ploy_x = 1;

                for(int i = 1; i < coefficients.Dim; i++) {
                    ploy_x *= x;
                    y += ploy_x * coefficients[i];
                }

                return y;
            }
            else {
                double y = 0, ploy_x = 1;

                for(int i = 0; i < coefficients.Dim; i++) {
                    ploy_x *= x;
                    y += ploy_x * coefficients[i];
                }

                return y;
            }
        }

        /// <summary>フィッティング</summary>
        public Vector ExecureFitting(int converge_times = 8) {
            int n = data_list.Length;
            double err_threshold, inv_err;
            double[] weight_list = new double[n], err_list = new double[n], sort_err_list;
            Vector err, coef = null;
            WeightedPolynomialFittingMethod fitting;

            for(int i = 0; i < n; i++) {
                weight_list[i] = 1;
            }

            while(converge_times > 0) {
                fitting = new WeightedPolynomialFittingMethod(data_list, weight_list, Degree, IsEnableSection);
                
                coef = fitting.ExecureFitting();

                err = fitting.Error(coef);

                for(int i = 0; i < n; i++) {
                    err_list[i] = Math.Abs(err[i]);
                }

                sort_err_list = (double[])err_list.Clone();

                Array.Sort(sort_err_list);

                err_threshold = sort_err_list[n / 2] * 1.25;
                if(err_threshold <= 1e-14) {
                    break;
                }

                inv_err = 1 / err_threshold;

                for(int i = 0; i < n; i++) {
                    if(err_list[i] >= err_threshold) {
                        weight_list[i] = 0;
                    }
                    else {
                        double r = (1 - err_list[i] * inv_err);
                        weight_list[i] = r * r;
                    }
                }

                converge_times--;
            }

            return coef;
        }
    }
}