机器算法验证 - 局部平均序列的实线 - 吾爱随笔录

局部平均序列的实线

机器算法验证时间序列

2022-03-23 08:55:26

我有一个从气象站获取的时间序列数据，我必须从中提取位于精确时间点的值；问题是我拥有的车站时间序列每半小时只给我一次测量的平均值。有一些方法可以从中获得一条平滑线，所以我可以获取我需要的奇异值？

这张图可以更好地解释它：黑线是我拥有的离散值，蓝线是我想要的。实际上，它不一定是平滑的，它必须是最现实的，所以它必须尊重平均约束；换句话说，取实线（蓝色）线的随机点的期望值应该是我现在拥有的（黑色）局部平均值。

如果存在不同的选择，也许有些书解释了它们，如果你告诉我，我会很高兴。

2个回答

你想要的是一个保持插值的平均值。John D'Errico 有您正在寻找的确切解决方案，但是用 MATLAB 编写。

https://www.mathworks.com/matlabcentral/newsreader/view_thread/31378

function [y,spl]=mean_series(ymeans,n,EndConditions)
% mean_series: cubic spline resampling of series in x
% (n times), maintaining the mean
%
% arguments:
% ymeans - vector of means
% n - 
% EndConditions - flag specifying natural or not-a-knot 
% end conditions on the spline. 
% EndConditions == 0 --> natural
% EndConditions == 1 (default) --> not-a-knot
%
% y - interpolated series, y has the property that:
% ymeans == sum(reshape(y,n,length(x)))
%
% spl - cubic spline as a piecewise cubic Hermite function

% ensure that ymeans is a column vector
ymeans=ymeans(:);
nmeans=length(ymeans);

% things will fail unless there are at least two
% points in ymeans
if nmeans<2
  error 'I require length(ymeans)>=2 or dire things will happen'
end

% nmeans+1 implicit knots
nk=nmeans+1;

% implicit positions of points supplied
xmeans=(0:(nmeans-1))';

% implicit positions of knots
knots=(0:nmeans)'-0.5;

% knot spacing
delta=diff(knots);

% implicit coordinates of the points to be predicted
% (interpolations at these points)
x=linspace(knots(1),knots(end),nmeans*n+1)';
x(end)=[];
ny=n*nmeans;

% define the spline as a piecewise cubic Hermite at
% the knots. This gives us nk function values and
% nk derivatives.

% Force the spline to be C2. compute the matrix
% relating function values to first derivatives,
% comes about from second derivative continuity at
% the knots. these will be equality constraints on
% the unknown spline coefficients.
feq=zeros(nk-2,nk);
deq=zeros(nk-2,nk);
rhseq=zeros(nk-2,1);
for i=2:(nk-1)
  j=i-1;
  feq(j,i+[-1 0 1])=-[-6/delta(j)^2 , ...
      (6/delta(j)^2)-(6/delta(i)^2) , 6/delta(i)^2];
  deq(j,i+[-1 0 1])=[2/delta(j) , ...
      4/delta(j)+4/delta(i) , 2/delta(i)];
end

% next, "evaluate" the points through the implicitly
% defined spline. We do not yet have the coefficients
% defining the spline, but we will eventually.
% k specifies which knot interval the points fall in.
k=repmat(1:nmeans,n,1);
k=k(:);

t=(x-knots(k))./delta(k);
t2=t.*t;
t3=t2.*t;
s2=(1-t).*(1-t);
s3=s2.*(1-t);

% build the matrix sparsely for efficiency, then convert
% the system to full since its really not sparse enough
% to save much.
fmat=[3*s2-2*s3 , 3*t2-2*t3];
dmat=[-delta(k).*(s3-s2) , delta(k).*(t3-t2)];
imat=(1:ny)';
imat=[imat,imat];
jmat=[k,k+1];

fmat0=full(sparse(imat,jmat,fmat,ny,nk));
dmat0=full(sparse(imat,jmat,dmat,ny,nk));

% now, compute implicit means of every n points.
% do it via a matrix multiply.
M=kron(eye(nmeans,nmeans),ones(1,n)/n);
fmat=M*fmat0;
dmat=M*dmat0;

% Since these means are to be forced, specify them
% as equality constraints, appending them to the set
% of equalities.
feq=[feq;fmat];
deq=[deq;dmat];
rhseq=[rhseq;ymeans];

% End condotions are either: 'natural', 'not-a-knot'
% test for default on EndConditions

switch EndConditions
 case 0
  % natural boundary conditions, i.e., zero second
  % derivative at ends
  fe=zeros(2,nk);
  de=zeros(2,nk);
  % at the bottom knot:
  fe(1,1:2)=[-1, 1]*6/(delta(1)^2);
  de(1,1:2)=[-2, -1]*2/delta(1);
  % at the top knot:
  fe(2,nk+[-1 0])=[1, -1]*6/(delta(nk-1)^2);
  de(2,nk+[-1 0])=[1, 2]*2/delta(nk-1);
  % append to equality constraints matrix
  feq=[feq;fe];
  deq=[deq;de];
  rhseq=[rhseq;zeros(2,1)];
case 1
  % not-a-knot end conditions, 3rd derivative continuity at
  % 2nd knot and next to last knots
  fe=zeros(1+(nk>3),nk);
  de=fe;
  % at the second knot:
  fe(1,1:3)=[1 -1 0]*12/(delta(1)^3)-[0 1 -1]*12/(delta(2)^3);
  de(1,1:3)=[1 1 0]*6/(delta(1)^2)-[0 1 1]*6/(delta(2)^2);
  % at the top knot:
  if nk>3
    fe(2,nk+(-2:0))=[1 -1 0]*12/(delta(nk-2)^3)- ...
         [0 1 -1]*12/(delta(nk-1)^3);
    de(2,nk+(-2:0))=[1 1 0]*6/(delta(nk-2)^2)- ...
         [0 1 1]*6/(delta(nk-1)^2);
  end
  % append to equality constraints matrix
  feq=[feq;fe];
  deq=[deq;de];
  rhseq=[rhseq;zeros(1+(nk>3),1)];
end

% the second derivative regularizer is a quadratic form of the form
% s'*regmat*s,
% where s is the vector of (unknown) second derivatives at the knots.
regmat=zeros(nk,nk);
regmat(1,1:2)=[delta(1)/3 , delta(1)/6];
regmat(nk,nk+[-1 0])=[delta(nk-1)/6 , delta(nk-1)/3];

for i=2:(nk-1)
  regmat(i,i+[-1 0 1])=[delta(i-1)/6 , (delta(i-1)+delta(i))/3 , delta(i)/6];
end

% next, write the second derivatives as a function of the
% function values and first derivatives: s = [sf,sd]*[f;d]
sf=zeros(nk,nk);
sd=zeros(nk,nk);
for i=1:(nk-1)
  sf(i,i+[0 1])=[-1 1].*(6/(delta(i)^2));
  sd(i,i+[0 1])=[-4 -2]./delta(i);
end
sf(nk,nk+[-1 0])=[1 -1].*(6/(delta(nk-1)^2));
sd(nk,nk+[-1 0])=[2 4]./delta(nk-1);

regmat=[sf,sd]'*regmat*[sf,sd];

% ensure numerical symmetry of the hessian
regmat=regmat+regmat';

% now, solve this whole mess of a linear system
% using a quadratic programming package.
% assume quadprog from the optimization toolbox 2.0
H=regmat;
f=zeros(2*nk,1);
Aeq=[feq,deq];
beq=rhseq;
options=optimset('quadprog');
coef=quadprog(H,f,[],[],Aeq,beq,[],[],[],options);

% break the coeficients up into a spline.
% first column is the knots, the second column 
% is function values at the knots, the third column
% is first derivatives at the knots.
spl=[knots,reshape(coef,nk,2)];

% return predictions at the original points
y=fmat0*spl(:,2)+dmat0*spl(:,3);

我们可以测试一下这个功能：

ymeans=sin(1:22)';
n=3;
EndConditions=1;

[yhat,spl]=mean_series(ymeans,n,1);

plot(yhat);
hold on
plot(kron(ymeans,ones(n,1)));
hold off

美丽的！！

这是 yhat，重新整形，使每行的平均值等于原始输入：

>> reshape(yhat,3,22)'
ans =

    0.6038    0.8903    1.0303
    1.0413    0.9407    0.7459
    0.4743    0.1473   -0.1983
   -0.5214   -0.7848   -0.9642
   -1.0390   -0.9948   -0.8430
   -0.6010   -0.2903    0.0531
    0.3894    0.6811    0.9005
    1.0219    1.0263    0.9198
    0.7147    0.4279    0.0937
   -0.2493   -0.5639   -0.8188
   -0.9845   -1.0373   -0.9782
   -0.8141   -0.5570   -0.2387
    0.1042    0.4354    0.7210
    0.9276    1.0275    1.0167
    0.8968    0.6749    0.3792
    0.0434   -0.2981   -0.6090
   -0.8525   -0.9972   -1.0344
   -0.9610   -0.7792   -0.5128
   -0.1911    0.1548    0.4860
    0.7618    0.9471    1.0300
    1.0038    0.8673    0.6388
    0.3414   -0.0014   -0.3666

让我们确保为每组 n 个点保留平均值：

>>yhat_means=kron(eye(length(ymeans)),repmat(1/n,1,n))*yhat;
>>[yhat_means,ymeans]

ans =

    0.8415    0.8415
    0.9093    0.9093
    0.1411    0.1411
   -0.7568   -0.7568
   -0.9589   -0.9589
   -0.2794   -0.2794
    0.6570    0.6570
    0.9894    0.9894
    0.4121    0.4121
   -0.5440   -0.5440
   -1.0000   -1.0000
   -0.5366   -0.5366
    0.4202    0.4202
    0.9906    0.9906
    0.6503    0.6503
   -0.2879   -0.2879
   -0.9614   -0.9614
   -0.7510   -0.7510
    0.1499    0.1499
    0.9129    0.9129
    0.8367    0.8367
   -0.0089   -0.0089

我认为这段代码非常令人满意。我试图在 R 中复制它，但我还没有找到一个 QP 求解器，它可以很好地处理等式约束和无边界（Rcplex 包可能有效，但你必须为此付费，这很愚蠢，因为如果你'正在为软件付费，你会选择 MATLAB 或其他一些支持不错的软件）。以下具有 QP 求解器的 R 函数/包不适用于此问题（非排他性列表）：quadprog、ipop、ROI、auglag、nlminb、optimx、trust、COBYLA。

如果有人对该 R 代码感兴趣，这里有一个愚蠢的求解器（如果有一个好的求解器可用，您可以替换该行，代码将与 MATLAB 实现一样工作 - 并且不要如果您找到它，请忘记在此处发表评论）：

library(alabama)
library(fBasics)

# identity matrix
eye<-function(n){
  if(n==0){
    return(matrix(0,0,0))
  }else{
    M<-matrix(0,n,n)
    M[1+0:(n-1)*(n+1)]<-1
    return(M)
  }
}

# function inspired by matlab's sparse()
sparse<-function(i,j,v,n=0,m=0,nz=0){
  S<-matrix(nz,max(max(i),n),max(max(j),m))
  S[cbind(matrix(i,ncol=1),matrix(j,ncol=1))]<-matrix(v,ncol=1)
  return(S)
}

mean_series<-function(ymeans,n,EndConditions=1,Method="L-BFGS-B"){
  # mean_series: cubic spline resampling of series in x
  # (n times), maintaining the mean
  #
  # arguments:
  # ymeans - vector of means
  # n - 
  # EndConditions - flag specifying natural or not-a-knot 
  # end conditions on the spline. 
  # EndConditions <-<- 0 --> natural
  # EndConditions <-<- 1 (default) --> not-a-knot
  #
  # y - interpolated series, y has the property that:
  # ymeans <-<- sum(reshape(y,n,length(x)))
  #
  # spl - cubic spline as a piecewise cubic Hermite function

  # ensure that ymeans is a column vector
  ymeans<-matrix(ymeans,ncol = 1)
  nmeans<-length(ymeans)

  # things will fail unless there are at least two
  # points in ymeans
  if (nmeans<2){
    stop('I require length(ymeans)><-2 or dire things will happen')
  }

  # nmeans+1 implicit knots
  nk<-nmeans+1

  # implicit positions of knots
  knots<-cbind(0:nmeans)-0.5

  # knot spacing
  delta<-diff(knots)

  # implicit coordinates of the points to be predicted
  # (interpolations at these points)
  x<-seq(knots[1],knots[length(knots)],length.out =nmeans*n+1)
  x<-cbind(x[-length(x)])
  ny<-n*nmeans

  # define the spline as a piecewise cubic Hermite at
  # the knots. This gives us nk function values and
  # nk derivatives.

  # Force the spline to be C2. compute the matrix
  # relating function values to first derivatives,
  # comes about from second derivative continuity at
  # the knots. these will be equality constraints on
  # the unknown spline coefficients.
  feq<-matrix(0,nk-2,nk)
  deq<-matrix(0,nk-2,nk)
  rhseq<-matrix(0,nk-2,1)
  for(i in 2:(nk-1)){
    j<-i-1
    feq[j,i+(-1:1)]<--cbind(-6/delta[j]^2 ,(6/delta[j]^2)-(6/delta[i]^2) , 6/delta[i]^2)
    deq[j,i+(-1:1)]<- cbind(2/delta[j] , 4/delta[j]+4/delta[i] , 2/delta[i])
  }

  # next, "evaluate" the points through the implicitly
  # defined spline. We do not yet have the coefficients
  # defining the spline, but we will eventually.
  # k specifies which knot interval the points fall in.
  k<-matrix(1:nmeans,n*nmeans,1)

  t<-(x-knots[k])/delta[k]
  t2<-t*t
  t3<-t2*t
  s2<-(1-t)*(1-t)
  s3<-s2*(1-t)

  # build the matrix sparsely for efficiency, then convert
  # the system to full since its really not sparse enough
  # to save much.
  fmat<-cbind(3*s2-2*s3 , 3*t2-2*t3)
  dmat<-cbind(-delta[k]*(s3-s2) , delta[k]*(t3-t2))
  imat<-cbind(1:ny)
  imat<-cbind(imat,imat)
  jmat<-cbind(k,k+1)

  fmat0<-sparse(imat,jmat,fmat,ny,nk)
  dmat0<-sparse(imat,jmat,dmat,ny,nk)

  # now, compute implicit means of every n points.
  # do it via a matrix multiply.
  M<-kron(eye(nmeans),matrix(1,1,n)/n)
  fmat<-M%*%fmat0
  dmat<-M%*%dmat0

  # Since these means are to be forced, specify them
  # as equality constraints, appending them to the set
  # of equalities.
  feq<-rbind(feq,fmat)
  deq<-rbind(deq,dmat)
  rhseq<-rbind(rhseq,ymeans)

  # End condotions are either: 'natural', 'not-a-knot'
  # test for default on EndConditions

  if(EndConditions==0){
    # natural boundary conditions, i.e., zero second
    # derivative at ends
    fe<-matrix(0,2,nk)
    de<-matrix(0,2,nk)
    # at the bottom knot:
    fe[1,1:2]<-c(-1, 1)*6/(delta[1]^2)
    de[1,1:2]<-c(-2, -1)*2/delta[1]
    # at the top knot:
    fe[2,nk+(-1:0)]<-c(1,-1)*6/(delta[nk-1]^2)
    de[2,nk+(-1:0)]<-c(1,2)*2/delta[nk-1]
    # append to equality constraints matrix
    feq<-rbind(feq,fe)
    deq<-rbind(deq,de)
    rhseq<-rbind(rhseq,matrix(0,2,1))
  }
  if(EndConditions==1){
    # not-a-knot end conditions, 3rd derivative continuity at
    # 2nd knot and next to last knots
    fe<-matrix(0,1+(nk>3),nk)
    de<-fe
    # at the second knot:
    fe[1,1:3]<-c(1,-1,0)*12/(delta[1]^3)-c(0,1,-1)*12/(delta[2]^3)
    de[1,1:3]<-c(1,1,0)*6/(delta[1]^2)-c(0,1,1)*6/(delta[2]^2)
    # at the top knot:
    if(nk>3){
      fe[2,nk+(-2:0)]<-c(1,-1,0)*12/(delta[nk-2]^3)-c(0,1,-1)*12/(delta[nk-1]^3)
      de[2,nk+(-2:0)]<-c(1,1,0)*6/(delta[nk-2]^2)-c(0,1,1)*6/(delta[nk-1]^2)
    }
    # append to equality constraints matrix
    feq<-rbind(feq,fe)
    deq<-rbind(deq,de)
    rhseq<-rbind(rhseq,matrix(0,1+(nk>3),1))
  }

  # the second derivative regularizer is a quadratic form of the form
  # s'*regmat*s,
  # where s is the vector of (unknown) second derivatives at the knots.
  regmat<-matrix(0,nk,nk)
  regmat[1,1:2]<-cbind(delta[1]/3 , delta[1]/6)
  regmat[nk,nk+(-1:0)]<-cbind(delta[nk-1]/6 , delta[nk-1]/3)

  for(i in 2:(nk-1)){
    regmat[i,i+(-1:1)]<-c(delta[i-1]/6 , (delta[i-1]+delta[i])/3 , delta[i]/6)
  }

  # next, write the second derivatives as a function of the
  # function values and first derivatives: s <- [sf,sd]*[fd]
  sf<-matrix(0,nk,nk)
  sd<-matrix(0,nk,nk)
  for(i in 1:(nk-1)){
    sf[i,i+(0:1)]<-c(-1,1)*(6/(delta[i]^2))
    sd[i,i+(0:1)]<-c(-4,-2)/delta[i]
  }
  sf[nk,nk+(-1:0)]<-c(1,-1)*(6/(delta[nk-1]^2))
  sd[nk,nk+(-1:0)]<-c(2,4)/delta[nk-1]

  regmat<-t(cbind(sf,sd))%*%regmat%*%cbind(sf,sd)

  # ensure numerical symmetry of the hessian
  regmat<-regmat+t(regmat)

  # now, solve this whole mess of a linear system
  # using a quadratic programming package.
  # assume quadprog from the optimization toolbox 2.0
  H<-regmat
  f<-matrix(0,2*nk,1)
  Aeq<-cbind(feq,deq)
  beq<-rhseq


  # THIS QP SOLVER DOESN'T FREAKIN WORK AT ALL.
  coef<-cbind(auglag(cbind(rep(0,dim(H)[1])),
                     fn=function(x){t(x)%*%H%*%x},
                     hin=function(x){Aeq%*%x-beq}, 
                     hin.jac=function(x){Aeq}, 
                     heq=function(x){1}, 
                     heq.jac=function(x){rbind(rep(0,dim(Aeq)[2]))},
                     control.outer = list(method=Method,trace=FALSE,itmax=100))$par)



  # break the coeficients up into a spline.
  # first column is the knots, the second column 
  # is function values at the knots, the third column
  # is first derivatives at the knots.
  spl<-cbind(knots,matrix(coef,nk,2))

  # return predictions at the original points
  y<-fmat0%*%spl[,2]+dmat0%*%spl[,3]
  return(y)
}

让我们测试一下这个愚蠢的 QP 求解器，看看它是多么无用：

ymeans<-sin(1:22)
n<-3
yhat<-mean_series(ymeans,n)

lineplot(cbind(kron(ymeans,cbind(rep(1,n))),yhat),1:66,legend = FALSE)

我认为有许多方法可以满足您的需求 - 一些选项是：1. 内核平滑 2. 样条平滑 3. 移动平均

还有很多，但我认为这三个是不错的候选人。这是几种平滑技术的非常好的视觉解释。

问题是在技术上，在 $t_a$ 和 $t_b$ ，温度可以取您测量刻度上的任何值。幸运的是，由于我们可能至少可以假设适度的时间依赖性，温度在 $t_b$ 应该至少部分与温度有关 $t_a$ . 选择一个好的平滑方法和参数取决于您认为时间相关效应的强度、您希望平滑函数的平滑程度、您认为温度在未观察期间的变化程度。

希望有帮助！

其它你可能感兴趣的问题

上一篇delta 方法的名称从何而来？下一篇我应该如何学习深度学习？