1:ABC 2:D 3:B 4:D 5:B 6:A 7:C 8:CD
(1)请画出该数据的折线图。
import pandas as pd
AirPassengers=pd.read_excel('AirPassengers.xlsx','AirPassengers',index_col=0);
AirPassengers
value | |
---|---|
time | |
1949/1 | 112 |
1949/2 | 118 |
1949/3 | 132 |
1949/4 | 129 |
1949/5 | 121 |
... | ... |
1960/8 | 606 |
1960/9 | 508 |
1960/10 | 461 |
1960/11 | 390 |
1960/12 | 432 |
144 rows × 1 columns
import matplotlib.pyplot as plt #加载基本绘图包
AirPassengers.plot();
AirPassengers['Year']=AirPassengers.index.str[:4];AirPassengers #生成年度变量
value | Year | |
---|---|---|
time | ||
1949/1 | 112 | 1949 |
1949/2 | 118 | 1949 |
1949/3 | 132 | 1949 |
1949/4 | 129 | 1949 |
1949/5 | 121 | 1949 |
... | ... | ... |
1960/8 | 606 | 1960 |
1960/9 | 508 | 1960 |
1960/10 | 461 | 1960 |
1960/11 | 390 | 1960 |
1960/12 | 432 | 1960 |
144 rows × 2 columns
yvalue=AirPassengers.groupby(['Year'])['value'].sum();yvalue #形成年度时序数据
Year 1949 1520 1950 1676 1951 2042 1952 2364 1953 2700 1954 2867 1955 3408 1956 3939 1957 4421 1958 4572 1959 5140 1960 5714 Name: value, dtype: int64
import statsmodels.api as sm
def trendmodel(y,x): #定义两变量直线趋势回归模型,x为自变量,y为因变量,书本163页
fm=sm.OLS(y,sm.add_constant(x)).fit()
sfm=fm.summary2()
print("模型检验:\n",sfm.tables[1])
print("决定系数:",sfm.tables[0][1][6])
return fm.fittedvalues
import numpy as np
X1=np.arange(len(yvalue))+1;yvalue
L1=trendmodel(yvalue,X1);
plt.plot(yvalue,'o',L1,'r-'); #趋势预测法,采用的是线性模型,书本164页
模型检验: Coef. Std.Err. t P>|t| [0.025 \ const 873.515152 103.885467 8.408444 7.592912e-06 642.043906 x1 383.087413 14.115255 27.139956 1.067001e-10 351.636664 0.975] const 1104.986397 x1 414.538162 决定系数: 0.987
C:\Users\Lenovo\anaconda3\lib\site-packages\scipy\stats\stats.py:1541: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=12 warnings.warn("kurtosistest only valid for n>=20 ... continuing "
AP=AirPassengers.value
AP.mean() #简单平均法,书本165页
QtM=pd.DataFrame(AP) #简单移动平均法,书本166页
QtM['M2']=AP.rolling(3).mean()
QtM['M4']=AP.rolling(5).mean();QtM
value | M2 | M4 | |
---|---|---|---|
time | |||
1949/1 | 112 | NaN | NaN |
1949/2 | 118 | NaN | NaN |
1949/3 | 132 | 120.666667 | NaN |
1949/4 | 129 | 126.333333 | NaN |
1949/5 | 121 | 127.333333 | 122.4 |
... | ... | ... | ... |
1960/8 | 606 | 587.666667 | 539.2 |
1960/9 | 508 | 578.666667 | 548.6 |
1960/10 | 461 | 525.000000 | 546.4 |
1960/11 | 390 | 453.000000 | 517.4 |
1960/12 | 432 | 427.666667 | 479.4 |
144 rows × 3 columns
QtM.plot();
QtE=pd.DataFrame(AP,columns=['value']) #指数平滑预测法,书本167页
QtE['E3']=AP.ewm(alpha=0.3).mean() #平滑系数=0.3
QtE['E8']=AP.ewm(alpha=0.8).mean();QtE #平滑系数=0.8
value | E3 | E8 | |
---|---|---|---|
time | |||
1949/1 | 112 | 112.000000 | 112.000000 |
1949/2 | 118 | 115.529412 | 117.000000 |
1949/3 | 132 | 123.050228 | 129.096774 |
1949/4 | 129 | 125.399131 | 129.019231 |
1949/5 | 121 | 123.812773 | 122.601793 |
... | ... | ... | ... |
1960/8 | 606 | 542.383959 | 605.183454 |
1960/9 | 508 | 532.068772 | 527.436691 |
1960/10 | 461 | 510.748140 | 474.287338 |
1960/11 | 390 | 474.523698 | 406.857468 |
1960/12 | 432 | 461.766589 | 426.971494 |
144 rows × 3 columns
QtE.plot();
BJsales=pd.read_excel('BJsales.xlsx','BJsales',index_col=0);BJsales
value | |
---|---|
time | |
1 | 200.1 |
2 | 199.5 |
3 | 199.4 |
4 | 198.9 |
5 | 199.0 |
... | ... |
146 | 263.3 |
147 | 262.8 |
148 | 261.8 |
149 | 262.2 |
150 | 262.7 |
150 rows × 1 columns
BJsales.plot();
BJ=BJsales.value
QtMBJ=pd.DataFrame(BJ) #简单移动平均法,书本166页
QtMBJ['M2']=BJ.rolling(3).mean()
QtMBJ['M4']=BJ.rolling(5).mean();QtMBJ
value | M2 | M4 | |
---|---|---|---|
time | |||
1 | 200.1 | NaN | NaN |
2 | 199.5 | NaN | NaN |
3 | 199.4 | 199.666667 | NaN |
4 | 198.9 | 199.266667 | NaN |
5 | 199.0 | 199.100000 | 199.38 |
... | ... | ... | ... |
146 | 263.3 | 262.433333 | 260.88 |
147 | 262.8 | 263.000000 | 261.94 |
148 | 261.8 | 262.633333 | 262.38 |
149 | 262.2 | 262.266667 | 262.60 |
150 | 262.7 | 262.233333 | 262.56 |
150 rows × 3 columns
QtEBJ=pd.DataFrame(BJ,columns=['value']) #指数平滑预测法,书本167页
QtEBJ['E3']=BJ.ewm(alpha=0.3).mean() #平滑系数=0.3
QtEBJ['E8']=BJ.ewm(alpha=0.8).mean();QtEBJ #平滑系数=0.8
value | E3 | E8 | |
---|---|---|---|
time | |||
1 | 200.1 | 200.100000 | 200.100000 |
2 | 199.5 | 199.747059 | 199.600000 |
3 | 199.4 | 199.588584 | 199.438710 |
4 | 198.9 | 199.316739 | 199.007051 |
5 | 199.0 | 199.202521 | 199.001408 |
... | ... | ... | ... |
146 | 263.3 | 261.095916 | 263.132589 |
147 | 262.8 | 261.607141 | 262.866518 |
148 | 261.8 | 261.664999 | 262.013304 |
149 | 262.2 | 261.825499 | 262.162661 |
150 | 262.7 | 262.087849 | 262.592532 |
150 rows × 3 columns
QtEBJ.plot();
XBJ=np.arange(len(BJ))+1;
LBJ=trendmodel(BJ,XBJ);
plt.plot(BJ,'o',LBJ,'r-'); #趋势预测法,采用的是线性模型,书本164页
模型检验: Coef. Std.Err. t P>|t| [0.025 0.975] const 196.231919 1.511867 129.794429 2.367654e-154 193.244285 199.219554 x1 0.446968 0.017371 25.731131 1.686938e-56 0.412641 0.481295 决定系数: 0.817
EuStockMarkets数据集[ 数据来自Python数据包pydataset]包含了1991~1998年间欧洲主要股票交易市场的日收盘价。
该数据是时间序列格式,由1860行和4个变量构成。4个变量分别代表欧洲的4个
主要股票市场:Germany DAX (Ibis),Switzerland SMI,France CAC,UK FTSE。
EuStock=pd.read_excel('EuStockMarkets.xlsx','EuStockMarkets',index_col=0);EuStock
DAX | SMI | CAC | FTSE | |
---|---|---|---|---|
time | ||||
1 | 1628.75 | 1678.1 | 1772.8 | 2443.6 |
2 | 1613.63 | 1688.5 | 1750.5 | 2460.2 |
3 | 1606.51 | 1678.6 | 1718.0 | 2448.2 |
4 | 1621.04 | 1684.1 | 1708.1 | 2470.4 |
5 | 1618.16 | 1686.6 | 1723.1 | 2484.7 |
... | ... | ... | ... | ... |
1856 | 5460.43 | 7721.3 | 3939.5 | 5587.6 |
1857 | 5285.78 | 7447.9 | 3846.0 | 5432.8 |
1858 | 5386.94 | 7607.5 | 3945.7 | 5462.2 |
1859 | 5355.03 | 7552.6 | 3951.7 | 5399.5 |
1860 | 5473.72 | 7676.3 | 3995.0 | 5455.0 |
1860 rows × 4 columns
EuStock.plot();
QtMEu1=pd.DataFrame(EuStock.DAX) #简单移动平均法,书本166页,设各元素的权重相同
QtMEu1['DAXM2']=EuStock.DAX.rolling(3).mean()
QtMEu1['DAXM4']=EuStock.DAX.rolling(5).mean()
QtMEu1['SMI']=EuStock.SMI
QtMEu1['SMIM2']=EuStock.SMI.rolling(3).mean()
QtMEu1['SMIM4']=EuStock.SMI.rolling(5).mean()
QtMEu1['CAC']=EuStock.CAC
QtMEu1['CACM2']=EuStock.CAC.rolling(3).mean()
QtMEu1['CACM4']=EuStock.CAC.rolling(5).mean()
QtMEu1['FTSE']=EuStock.FTSE
QtMEu1['FTSEM2']=EuStock.FTSE.rolling(3).mean()
QtMEu1['FTSEM4']=EuStock.FTSE.rolling(5).mean();QtMEu1
DAX | DAXM2 | DAXM4 | SMI | SMIM2 | SMIM4 | CAC | CACM2 | CACM4 | FTSE | FTSEM2 | FTSEM4 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
time | ||||||||||||
1 | 1628.75 | NaN | NaN | 1678.1 | NaN | NaN | 1772.8 | NaN | NaN | 2443.6 | NaN | NaN |
2 | 1613.63 | NaN | NaN | 1688.5 | NaN | NaN | 1750.5 | NaN | NaN | 2460.2 | NaN | NaN |
3 | 1606.51 | 1616.296667 | NaN | 1678.6 | 1681.733333 | NaN | 1718.0 | 1747.100000 | NaN | 2448.2 | 2450.666667 | NaN |
4 | 1621.04 | 1613.726667 | NaN | 1684.1 | 1683.733333 | NaN | 1708.1 | 1725.533333 | NaN | 2470.4 | 2459.600000 | NaN |
5 | 1618.16 | 1615.236667 | 1617.618 | 1686.6 | 1683.100000 | 1683.18 | 1723.1 | 1716.400000 | 1734.50 | 2484.7 | 2467.766667 | 2461.42 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1856 | 5460.43 | 5528.956667 | 5584.068 | 7721.3 | 7840.133333 | 7926.76 | 3939.5 | 3983.333333 | 3994.86 | 5587.6 | 5620.700000 | 5646.14 |
1857 | 5285.78 | 5448.176667 | 5497.484 | 7447.9 | 7707.366667 | 7782.30 | 3846.0 | 3942.466667 | 3954.48 | 5432.8 | 5566.933333 | 5585.48 |
1858 | 5386.94 | 5377.716667 | 5451.918 | 7607.5 | 7592.233333 | 7715.16 | 3945.7 | 3910.400000 | 3948.34 | 5462.2 | 5494.200000 | 5551.42 |
1859 | 5355.03 | 5342.583333 | 5417.300 | 7552.6 | 7536.000000 | 7656.44 | 3951.7 | 3914.466667 | 3944.96 | 5399.5 | 5431.500000 | 5512.50 |
1860 | 5473.72 | 5405.230000 | 5392.380 | 7676.3 | 7612.133333 | 7601.12 | 3995.0 | 3964.133333 | 3935.58 | 5455.0 | 5438.900000 | 5467.42 |
1860 rows × 12 columns
XE=np.arange(len(EuStock))+1
LD=trendmodel(np.log(EuStock.DAX),XE);
plt.plot(EuStock.DAX,'o',np.exp(LD),'r-'); #趋势预测法,指数模型,书本164页
模型检验: Coef. Std.Err. t P>|t| [0.025 0.975] const 7.183568 0.006567 1093.816164 0.0 7.170688 7.196449 x1 0.000623 0.000006 101.884424 0.0 0.000611 0.000635 决定系数: 0.848
LS=trendmodel(np.log(EuStock.SMI),XE);
plt.plot(EuStock.SMI,'o',np.exp(LS),'r-'); #趋势预测法,指数模型,书本164页
模型检验: Coef. Std.Err. t P>|t| [0.025 0.975] const 7.300745 0.005777 1263.722456 0.0 7.289414 7.312075 x1 0.000776 0.000005 144.313308 0.0 0.000766 0.000787 决定系数: 0.918
(1)请画出该数据的折线图。
Johnson=pd.read_excel('Johnson.xlsx','Johnson',index_col=0);Johnson
value | |
---|---|
time | |
1960.00 | 0.71 |
1960.25 | 0.63 |
1960.50 | 0.85 |
1960.75 | 0.44 |
1961.00 | 0.61 |
... | ... |
1979.75 | 9.99 |
1980.00 | 16.20 |
1980.25 | 14.67 |
1980.50 | 16.02 |
1980.75 | 11.61 |
84 rows × 1 columns
Johnson.plot();
(2)分别用趋势预测方法和平滑预测方法进行预测。
Johnson['Year']=Johnson.index.astype(str).str[:4];Johnson #生成年度变量
value | Year | |
---|---|---|
time | ||
1960.00 | 0.71 | 1960 |
1960.25 | 0.63 | 1960 |
1960.50 | 0.85 | 1960 |
1960.75 | 0.44 | 1960 |
1961.00 | 0.61 | 1961 |
... | ... | ... |
1979.75 | 9.99 | 1979 |
1980.00 | 16.20 | 1980 |
1980.25 | 14.67 | 1980 |
1980.50 | 16.02 | 1980 |
1980.75 | 11.61 | 1980 |
84 rows × 2 columns
jvalue=Johnson.groupby(['Year'])['value'].sum();jvalue #形成年度时序数据
Year 1960 2.63 1961 2.77 1962 3.01 1963 3.40 1964 4.16 1965 5.16 1966 6.06 1967 6.81 1968 8.19 1969 9.54 1970 13.50 1971 16.29 1972 19.35 1973 23.31 1974 25.20 1975 28.62 1976 31.77 1977 38.07 1978 45.00 1979 51.84 1980 58.50 Name: value, dtype: float64
X2=np.arange(len(jvalue))+1;jvalue
L2=trendmodel(jvalue,X2);
plt.plot(jvalue,'o',L2,'r-'); #趋势预测法,线性模型,书本164页
模型检验: Coef. Std.Err. t P>|t| [0.025 0.975] const -9.615667 2.763893 -3.479031 2.512051e-03 -15.400560 -3.830773 x1 2.619519 0.220116 11.900643 2.981116e-10 2.158812 3.080227 决定系数: 0.882
X2=np.arange(len(jvalue))+1;jvalue
L2=trendmodel(np.log(jvalue),X2);
plt.plot(jvalue,'o',np.exp(L2),'r-'); #趋势预测法,指数模型,书本164页
模型检验: Coef. Std.Err. t P>|t| [0.025 0.975] const 0.664830 0.039199 16.960216 6.229450e-13 0.582785 0.746876 x1 0.166678 0.003122 53.391099 3.605778e-22 0.160144 0.173212 决定系数: 0.993
jo=Johnson.value
jo.mean() #简单平均法,书本165页
4.799761904761905
QtM1=pd.DataFrame(jo) #简单移动平均法,书本166页
QtM1['M2']=jo.rolling(3).mean()
QtM1['M4']=jo.rolling(5).mean();QtM1
value | M2 | M4 | |
---|---|---|---|
time | |||
1960.00 | 0.71 | NaN | NaN |
1960.25 | 0.63 | NaN | NaN |
1960.50 | 0.85 | 0.730000 | NaN |
1960.75 | 0.44 | 0.640000 | NaN |
1961.00 | 0.61 | 0.633333 | 0.648 |
... | ... | ... | ... |
1979.75 | 9.99 | 12.600000 | 12.150 |
1980.00 | 16.20 | 13.680000 | 13.608 |
1980.25 | 14.67 | 13.620000 | 13.734 |
1980.50 | 16.02 | 15.630000 | 14.346 |
1980.75 | 11.61 | 14.100000 | 13.698 |
84 rows × 3 columns
QtM1.plot();
QtE1=pd.DataFrame(jo,columns=['value']) #指数平滑预测法,书本167页
QtE1['E3']=jo.ewm(alpha=0.3).mean() #平滑系数=0.3
QtE1['E8']=jo.ewm(alpha=0.8).mean();QtE1 #平滑系数=0.8
value | E3 | E8 | |
---|---|---|---|
time | |||
1960.00 | 0.71 | 0.710000 | 0.710000 |
1960.25 | 0.63 | 0.662941 | 0.643333 |
1960.50 | 0.85 | 0.748356 | 0.810000 |
1960.75 | 0.44 | 0.626621 | 0.513526 |
1961.00 | 0.61 | 0.620627 | 0.590730 |
... | ... | ... | ... |
1979.75 | 9.99 | 11.953112 | 10.887853 |
1980.00 | 16.20 | 13.227178 | 15.137571 |
1980.25 | 14.67 | 13.660025 | 14.763514 |
1980.50 | 16.02 | 14.368017 | 15.768703 |
1980.75 | 11.61 | 13.540612 | 12.441741 |
84 rows × 3 columns
QtE1.plot();