import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import preprocessing
from numpy.matlib import repmat
from openpyxl import Workbook
class Linear():
def __init__(self):
self.df = pd.read_excel('Weather_secondary_treatment.xlsx', sheet_name='Weather_secondary_treatment',
engine='openpyxl')
self.city = '杭州、宁波、温州、绍兴、湖州、嘉兴、金华、衢州、台州、丽水、舟山'.split('、')
self.city.sort()
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用黑体显示中文
plt.rcParams['axes.unicode_minus'] = False
self.f = open('result.txt', mode='w')
self.if_file()
self.df_r = pd.read_excel('Result.xlsx', sheet_name='result', engine='openpyxl')
for city_name in self.city:
plt.clf()
x0, y0, num, xishu, ch0, xish, sol = self.find(city_name=city_name)
self.save(sol=sol, city_name=city_name)
self.PLOT(ch0=ch0, num=num, x0=x0, y0=y0, xishu=xishu, xish=xish)
plt.savefig('{}.jpg'.format(city_name))
self.df_r.to_excel('Result.xlsx', index=False, sheet_name='result')
def find(self, city_name):
i = self.city.index(city_name)
Highest = list(self.df['Highest'][i * 31:i * 31 + 31])
Lowest = list(self.df['Lowest'][i * 31:i * 31 + 31])
tianqi_situation = list(self.df['天气情况'][i * 31:i * 31 + 31])
fengli = list(self.df['风力'][i * 31:i * 31 + 31])
air_quality = list(self.df['空气质量'][i * 31:i * 31 + 31])
city_dataframe = pd.DataFrame(
{'tianqi_situation': tianqi_situation, 'fengli': fengli, "air_quality": air_quality, 'Highest': Highest,
'Lowest': Lowest})
city_matrix = np.array([tianqi_situation, fengli, air_quality, Highest, Lowest])
city_matrix = city_matrix.T
# 平均值
mu = np.mean(city_matrix, axis=0)
# 标准差
sig = np.std(city_matrix, axis=0)
rr = city_dataframe.corr()
data = preprocessing.scale(city_matrix)
x0 = city_matrix[:, :3]
y0 = city_matrix[:, 3:]
e0 = data[:, :3]
f0 = data[:, 3:]
num = 31
n = 3
m = 2
# 3阶单位阵
chg = np.identity(n)
w = np.zeros([n, n])
w_star = np.zeros([n, n])
t = np.zeros([num, n])
ss = []
Q_h2 = []
press_i = [0 for i in range(num)]
press = [0, 0, 0]
alpha = np.zeros([3, 1])
for i in range(n):
press_i = list(press_i)
# 点乘
matrix = e0.T @ f0 @ f0.T @ e0
# 求特征值
[val, vec] = np.linalg.eig(matrix)
val = val.argsort()
w[:, i] = vec[:, val[len(val) - 1]]
w_star[:, i] = chg @ w[:, i]
t[:, i] = e0 @ w[:, i]
alpha = np.array([list(e0.T @ t[:, i] / (t[:, i].T @ t[:, i]))])
w1 = np.array(w[:, i:i+1])
chg = chg @ (np.identity(n) - w1 @ alpha)
t1 = np.array(t[:, i:i+1])
e0 = e0 - t1 @ alpha
beta = np.linalg.pinv(np.c_[t[:, :i + 1], np.ones(31)]) @ f0
beta = np.delete(beta, (-1), axis=0)
cancha = f0 - t[:, :i + 1] @ beta
cancha = [[cancha[i][j] ** 2 for j in range(len(cancha[i]))] for i in range(len(cancha))]
cancha = np.array(cancha)
ss.append(cancha.sum())
for j in range(num):
t1 = t[:, :i + 1]
f1 = f0
she_t = t1[j:j + 1, :]
she_f = f1[j:j + 1, :]
t1 = np.delete(t1, j, axis=0)
f1 = np.delete(f1, j, axis=0)
beta1 = np.linalg.pinv(np.c_[t1, np.ones(num - 1)]) @ f1
beta1 = np.delete(beta1, (-1), axis=0)
cancha = she_f - she_t @ beta1
cancha = [[cancha[i][j] ** 2 for j in range(len(cancha[i]))] for i in range(len(cancha))]
cancha = np.array(cancha)
press_i[j] = cancha.sum()
press_i = np.array(press_i)
press[i] = press_i.sum()
if i > 0:
Q_h2.append(1 - press[i] / ss[i - 1])
# print('Q_h2[{}] = {}'.format(i, (1 - press[i] / ss[i - 1])))
else:
Q_h2.append(1)
if Q_h2[i] < 0.0975:
# print('Number of components proposedr = %d' % (i + 1))
r = i
break
beta_z = np.linalg.pinv(np.c_[t[:, :r + 1], np.ones(31)]) @ f0
beta_z = np.delete(beta_z, (-1), axis=0)
xishu = w_star[:, :r + 1] @ beta_z
mu_x = mu[:n]
mu_y = mu[n:]
sig_x = sig[:n]
sig_y = sig[n:]
ch0 = []
for i in range(m):
ch0.append(float(mu_y[i] - np.true_divide(mu_x, sig_x) * sig_y[i] @ xishu[:, i:i + 1]))
xish = np.zeros([3, 2])
for i in range(m):
xish[:, i] = np.true_divide(xishu[:, i], sig_x.T) * sig_y[i]
sol = np.r_[np.array([ch0]), xish]
# print(' city = {}\n x0 = {}\n y0 = {}\n num = {}\n xishu = {}\n ch0 = {}\n xish = {}\n sol = {}'.format(city, x0, y0, num, xishu, ch0, xish, sol))
return x0, y0, num, xishu, ch0, xish, sol
def PLOT(self, ch0, num, x0, y0, xishu, xish):
ch0 = repmat(ch0, num, 1)
y_hat = ch0 + x0 @ xish
y1max = y_hat.max(axis=0)
y2max = y0.max(axis=0)
ymax = np.r_[np.array([y1max]), np.array([y2max])].max(axis=0)
cancha = y_hat - y0
ax1 = plt.subplot(221)
ax2 = plt.subplot(222)
ax3 = plt.subplot(223)
plt.sca(ax1)
x = [i for i in range(int(ymax[0]))]
y = [i for i in range(int(ymax[0]))]
plt.plot(x, y, '-')
plt.plot(y_hat[:, 0], y0[:, 0], '*')
plt.sca(ax2)
x = [i for i in range(int(ymax[1]))]
y = [i for i in range(int(ymax[1]))]
plt.plot(x, y, '-')
plt.plot(y_hat[:, 1], y0[:, 1], 'o')
plt.sca(ax3)
x = np.arange(6)
plt.bar(x, height=xishu.reshape([1, 6], order='F')[0], width=0.5)
# plt.show()
def if_file(self):
try:
f = open('Result.xlsx')
f.close()
except IOError:
wb = Workbook()
sheet = wb.active
sheet.title = 'result'
row = ['City', 'x0', 'x1', 'x2', 'x3']
sheet.append(row)
wb.save('Result.xlsx')
def save(self, sol, city_name):
self.f.write('{}\n y1 = {} {}x1 {}x2 {}x3\ny2 = {} {}x1 {}x2 {}x3\n\n'.format(city_name,
sol[0][0] if sol[0][
0] < 0 else '+' + str(
sol[0][0]),
sol[1][0] if sol[1][
0] < 0 else '+' + str(
sol[1][0]),
sol[2][0] if sol[2][
0] < 0 else '+' + str(
sol[2][0]),
sol[3][0] if sol[3][
0] < 0 else '+' + str(
sol[3][0]),
sol[0][1] if sol[0][
1] < 0 else '+' + str(
sol[0][1]),
sol[1][1] if sol[1][
1] < 0 else '+' + str(
sol[1][1]),
sol[2][1] if sol[2][
1] < 0 else '+' + str(
sol[2][1]),
sol[3][1] if sol[3][
1] < 0 else '+' + str(
sol[3][1])))
row = [city_name, sol[0][0], sol[1][0], sol[2][0], sol[3][0]]
self.df_r.loc[len(list(self.df_r.index))] = row
row = [city_name, sol[0][1], sol[1][1], sol[2][1], sol[3][1]]
self.df_r.loc[len(list(self.df_r.index))] = row
if __name__ == '__main__':
Linear()