该代码为原创代码,版权归杭州电子科技大学-蹲pycharm的小火龙队,成员:陈志龙、潘一壬、金日翔 2021.5
如果需要请自取并自行改编
全部资料代码下载
其中Linear.py为最小二乘法python代码实现,想做别的最小二乘法项目可以进行参考,代码具体理解及注释可以参考书籍。
题目
城市天气分析
【背景说明】
我国幅员辽阔,跨纬度较广,距海远近差距较大,加之地势高低不同,地形类型及山脉走向多样,因而气温降水的组合多种多样,形成了多种多样的气候。气温,降水,温差,霜冻,风力等等对于人们的生产生活至关重要,天气对我们的影响是多方面的,比如农业生产,南方水稻北方小麦为主,受温度和降水影响;工业生产,风向对于大气污染会有影响,对于工厂选址也有影响,天气对生活的影响不言而喻。
【问题说明】
(1)要求参赛队员通过 http://www.tianqiapi.com/index/doc?version=history 提供的接口,使用爬虫爬取浙江省 11 个城市 2021 年 3 月份天气数据。
(2)对爬取数据进行处理,分别提取出各城市每天最高最低温,绘制其最高温最低温折线图,根据 31 组数据使用线性回归预测 4 月 1 日至 7 日最高温,最低温,最后取这 11所城市的预测结果平均值作为浙江省 4 月 1 日至 7 日最高温最低温预测值。
【提交标准】
提交文件为:
1. 相关数据分析报告(pdf/pdf 各一份)
2. 相关代码文档(包括爬虫代码文档和分析代码文档,压缩包形式上交)
3. 爬取资料文件(压缩包形式上交)
备注:
爬取资料文件如过大,可上交部分资料文件并说明
分析代码建议以 Jupyter Notebook ipynb 文件格式上交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import numpy as np from openpyxl import Workbook import matplotlib.pyplot as plt import pandas as pd from sklearn import preprocessing from numpy.matlib import repmat import requests import time import json from Spider import Spider from Spider_april import Spider_april from Linear import Linear from Find_average import Find_average from Predict import Predict from Plot import Plot from Plot_april import Plot_april Spider() Spider_april() Plot() Linear() Predict() Find_average() Plot_april() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import requests from openpyxl import Workbook import time import pandas as pd import json class Spider(): def __init__(self): # 全局header self.headers = { 'user_agent': 'MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'} # 网址必要后缀 self.appid = xxxxx self.appsecret = 'xxxxx' self.version = 'history' self.year = 2021 self.month = 3 self.city = '杭州、宁波、温州、绍兴、湖州、嘉兴、金华、衢州、台州、丽水、舟山'.split('、') # 全部需要爬取的网址 self.urls = [ 'https://tianqiapi.com/api?version={}&appid={}&appsecret={}&city={}&year={}&month={}'.format(self.version, self.appid, self.appsecret, i, self.year, self.month) for i in self.city] self.if_file() # city可输入为包含任意城市的数组 self.result = self.spider(self.city) self.save_file(self.result) self.secondary_treatment() # 判断是否已经存在文件,若无则新建程序 def if_file(self): try: f = open('Weather.xlsx') f.close() except IOError: self.creat_Excel() # 新建文件 def creat_Excel(self): wb = Workbook() sheet = wb.active sheet.title = 'Weather' row = ['City', 'Date', 'Highest', 'Lowest', '天气情况', '风向', '风力', '空气质量', '空气质量描述', '空气质量等级'] sheet.append(row) wb.save('Weather.xlsx') # 可输入城市(数组)名称进行爬取 def spider(self, city_name): # 最高温和最低温结果保存 result = [] # 全部数据保存以便后续使用 result_sum = [] sum = 1 for i in city_name: # 异常处理代码 if not i in self.city: print('City {} not find in Zhejiang province'.format(i)) exit() else: print('Now spide {} city'.format(i)) # 爬取 res = requests.get(url=self.urls[self.city.index(i)], headers=self.headers) # 错误处理代码:若爬取失败每1s爬取一次,每个城市最多再10次重新爬取,一共最多再50次重新爬取 while res.status_code != 200: j = 1 print('Error, the {} attemp in {} city, together {} attemp'.format(j, i, sum)) j += 1 sum += 1 res = requests.get(url=self.urls[i], headers=self.headers) time.sleep(1) if j > 10: print('City {} can\'t find, go to next city'.format(i)) result_sum.append({'city': i, 'status': None}) continue if sum > 50: print('Please check your Internet') exit() print('City {} success'.format(i)) # 数据处理 res = json.loads(res.text) result_sum.append(res) for k in range(31): result.append([i, res['data'][k]['ymd'], res['data'][k]['bWendu'], res['data'][k]['yWendu'], res['data'][k]['tianqi'], res['data'][k]['fengxiang'], res['data'][k]['fengli'], res['data'][k]['aqi'], res['data'][k]['aqiInfo'], res['data'][k]['aqiLevel']]) time.sleep(5.01) print(result_sum) return result # 保存爬取文件 def save_file(self, result): df = pd.read_excel('Weather.xlsx', sheet_name='Weather', engine='openpyxl') for i in result: df.loc[len(list(df.index))] = i # 去重 df = self.delete_same(df) # 排序 df.sort_values(['City', 'Date'], ascending=True, inplace=True) # 保存 df.to_excel('Weather.xlsx', index=False, sheet_name='Weather') # 可单独调用排序 def sort_values(self): df = pd.read_excel('Weather.xlsx', sheet_name='Weather', engine='openpyxl') df.sort_values(['City', 'Date'], ascending=True, inplace=True) df.to_excel('Weather.xlsx', index=False, sheet_name='Weather') # 去重代码 def delete_same(self, df): sum = 0 for i in range(len(list(df.index)) - 1): for j in range(i + 1, len(list(df.index))): for k in ['City', 'Date', 'Highest']: if df[k][i] != df[k][j]: break if k == 'Highest': df.drop(j, inplace=True) sum += 1 print('共去除了{}调重复项目'.format(sum)) return df # 数据二次处理代码(可单独调用) def secondary_treatment(self): self.if_file_2() fengli = ['微'] fengli += ['%d级' % (i + 1) for i in range(17)] fengli_change = [0.1, 0.85, 2.4, 4.35, 6.65, 9.3, 12.25, 15.45, 18.9, 22.55, 26.4, 30.5, 34.75, 39.15, 43.75, 48.5, 53.45, None] tianqi_situation = ['晴', '多云', '阴', '雾', '小雨', '中雨', '大雨', '暴雨'] df = pd.read_excel('Weather.xlsx', sheet_name='Weather', engine='openpyxl') fengli_col = list(df['风力']) tianqi_situation_col = list(df['天气情况']) Highest = list(df['Highest']) Lowest = list(df['Lowest']) for i in range(31 * 11): Highest[i] = int(Highest[i][:-1]) Lowest[i] = int(Lowest[i][:-1]) fengli_col[i] = fengli_change[fengli.index(df['风力'][i])] tianqi_situation_number = 0 tianqi_situation_sum = 0 for j in range(8): if tianqi_situation[j] in df['天气情况'][i]: tianqi_situation_sum += j tianqi_situation_number += 1 tianqi_situation_col[i] = tianqi_situation_sum / tianqi_situation_number df['风力'] = fengli_col df['天气情况'] = tianqi_situation_col df['Highest'] = Highest df['Lowest'] = Lowest df.to_excel('Weather_secondary_treatment.xlsx', index=False, sheet_name='Weather_secondary_treatment') # 判断是否已经存在文件,若无则新建程序 def if_file_2(self): try: f = open('Weather_secondary_treatment.xlsx') f.close() except IOError: wb = Workbook() sheet = wb.active sheet.title = 'Weather_secondary_treatment' row = ['City', 'Date', 'Highest', 'Lowest', '天气情况', '风向', '风力', '空气质量', '空气质量描述', '空气质量等级'] sheet.append(row) wb.save('Weather_secondary_treatment.xlsx') if __name__ == '__main__': Spider() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
import requests from openpyxl import Workbook import time import pandas as pd import json class Spider_april(): def __init__(self): # 全局header self.headers = { 'user_agent': 'MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'} # 网址必要后缀 self.appid = xxxxx self.appsecret = 'xxxx' self.version = 'history' self.year = 2021 self.month = 4 self.city = '杭州、宁波、温州、绍兴、湖州、嘉兴、金华、衢州、台州、丽水、舟山'.split('、') # 全部需要爬取的网址 self.urls = [ 'https://tianqiapi.com/api?version={}&appid={}&appsecret={}&city={}&year={}&month={}'.format(self.version, self.appid, self.appsecret, i, self.year, self.month) for i in self.city] self.if_file() # city可输入为包含任意城市的数组 self.result = self.spider(self.city) self.save_file(self.result) self.secondary_treatment() # 判断是否已经存在文件,若无则新建程序 def if_file(self): try: f = open('.\\4月1-7日\\Weather.xlsx') f.close() except IOError: self.creat_Excel() # 新建文件 def creat_Excel(self): wb = Workbook() sheet = wb.active sheet.title = 'Weather' row = ['City', 'Date', 'Highest', 'Lowest', '天气情况', '风向', '风力', '空气质量', '空气质量描述', '空气质量等级'] sheet.append(row) wb.save('.\\4月1-7日\\Weather.xlsx') # 可输入城市(数组)名称进行爬取 def spider(self, city_name): # 最高温和最低温结果保存 result = [] # 全部数据保存以便后续使用 result_sum = [] sum = 1 for i in city_name: # 异常处理代码 if not i in self.city: print('City {} not find in Zhejiang province'.format(i)) exit() else: print('Now spide {} city'.format(i)) # 爬取 res = requests.get(url=self.urls[self.city.index(i)], headers=self.headers) # 错误处理代码:若爬取失败每1s爬取一次,每个城市最多再10次重新爬取,一共最多再50次重新爬取 while res.status_code != 200: j = 1 print('Error, the {} attemp in {} city, together {} attemp'.format(j, i, sum)) j += 1 sum += 1 res = requests.get(url=self.urls[i], headers=self.headers) time.sleep(1) if j > 10: print('City {} can\'t find, go to next city'.format(i)) result_sum.append({'city': i, 'status': None}) continue if sum > 50: print('Please check your Internet') exit() print('City {} success'.format(i)) # 数据处理 res = json.loads(res.text) result_sum.append(res) for k in range(7): result.append([i, res['data'][k]['ymd'], res['data'][k]['bWendu'], res['data'][k]['yWendu'], res['data'][k]['tianqi'], res['data'][k]['fengxiang'], res['data'][k]['fengli'], res['data'][k]['aqi'], res['data'][k]['aqiInfo'], res['data'][k]['aqiLevel']]) time.sleep(5.01) print(result_sum) return result # 保存爬取文件 def save_file(self, result): df = pd.read_excel('.\\4月1-7日\\Weather.xlsx', sheet_name='Weather', engine='openpyxl') for i in result: df.loc[len(list(df.index))] = i # 去重 df = self.delete_same(df) # 排序 df.sort_values(['City', 'Date'], ascending=True, inplace=True) # 保存 df.to_excel('.\\4月1-7日\\Weather.xlsx', index=False, sheet_name='Weather') # 可单独调用排序 def sort_values(self): df = pd.read_excel('.\\4月1-7日\\Weather.xlsx', sheet_name='Weather', engine='openpyxl') df.sort_values(['City', 'Date'], ascending=True, inplace=True) df.to_excel('.\\4月1-7日\\Weather.xlsx', index=False, sheet_name='Weather') # 去重代码 def delete_same(self, df): sum = 0 for i in range(len(list(df.index)) - 1): for j in range(i + 1, len(list(df.index))): for k in ['City', 'Date', 'Highest']: if df[k][i] != df[k][j]: break if k == 'Highest': df.drop(j, inplace=True) sum += 1 print('共去除了{}调重复项目'.format(sum)) return df # 数据二次处理代码(可单独调用) def secondary_treatment(self): self.if_file_2() fengli = ['微'] fengli += ['%d级' % (i + 1) for i in range(17)] fengli_change = [0.1, 0.85, 2.4, 4.35, 6.65, 9.3, 12.25, 15.45, 18.9, 22.55, 26.4, 30.5, 34.75, 39.15, 43.75, 48.5, 53.45, None] tianqi_situation = ['晴', '多云', '阴', '雾', '小雨', '中雨', '大雨', '暴雨'] df = pd.read_excel('.\\4月1-7日\\Weather.xlsx', sheet_name='Weather', engine='openpyxl') fengli_col = list(df['风力']) tianqi_situation_col = list(df['天气情况']) Highest = list(df['Highest']) Lowest = list(df['Lowest']) for i in range(7 * 11): Highest[i] = int(Highest[i][:-1]) Lowest[i] = int(Lowest[i][:-1]) fengli_col[i] = fengli_change[fengli.index(df['风力'][i])] tianqi_situation_number = 0 tianqi_situation_sum = 0 for j in range(8): if tianqi_situation[j] in df['天气情况'][i]: tianqi_situation_sum += j tianqi_situation_number += 1 tianqi_situation_col[i] = tianqi_situation_sum / tianqi_situation_number df['风力'] = fengli_col df['天气情况'] = tianqi_situation_col df['Highest'] = Highest df['Lowest'] = Lowest df.to_excel('.\\4月1-7日\\Weather_secondary_treatment.xlsx', index=False, sheet_name='Weather_secondary_treatment') # 判断是否已经存在文件,若无则新建程序 def if_file_2(self): try: f = open('.\\4月1-7日\\Weather_secondary_treatment.xlsx') f.close() except IOError: wb = Workbook() sheet = wb.active sheet.title = 'Weather_secondary_treatment' row = ['City', 'Date', 'Highest', 'Lowest', '天气情况', '风向', '风力', '空气质量', '空气质量描述', '空气质量等级'] sheet.append(row) wb.save('.\\4月1-7日\\Weather_secondary_treatment.xlsx') if __name__ == '__main__': Spider_april() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import pandas as pd import matplotlib.pyplot as plt class Plot: def __init__(self): # 打开爬虫爬好的文件 self.df = pd.read_excel('Weather.xlsx', sheet_name='Weather', engine='openpyxl') # 初始化图表 plt.rcParams['font.family'] = ['Fangsong'] plt.rcParams['axes.unicode_minus'] = False plt.figure(figsize=(6, 3), dpi=200) plt.subplots_adjust(hspace=0.5) # 城市名称 self.city = '杭州、宁波、温州、绍兴、湖州、嘉兴、金华、衢州、台州、丽水、舟山'.split('、') self.city.sort() # 自定义曲线颜色 self.color = ['black', 'gray', 'lightcoral', 'red', 'darkgoldenrod', 'yellow', 'green', 'blue', 'cyan', 'purple', 'pink'] for i in self.city: self.plot_Weather_Lowest(i) self.plot_Weather_Highest(i) plt.title('{}最高最低气温'.format(i)) # 保存图片 plt.savefig('./{}3月份每天最高气温和最低气温.jpg'.format(i)) plt.clf() for i in self.city: self.plot_Weather_Lowest(i) self.plot_Weather_Highest(i) plt.title('浙江各城市最高最低气温') plt.legend(labels=self.city, bbox_to_anchor=(1, 1), fontsize=6.5, framealpha=0) plt.savefig('./浙江各个城市3月份每天最高气温和最低气温.jpg') # 画出最高温度 def plot_Weather_Highest(self, city_name): # x为横坐标共31天,y为℃ x = [i + 1 for i in range(31)] plt1 = plt.subplot(2, 1, 1) y = list(self.df['Highest'][self.city.index(city_name) * 31:(self.city.index(city_name) + 1) * 31]) for j in range(31): y[j] = int(y[j][:-1]) plt1.plot(x, y, '^-', color=self.color[self.city.index(city_name)], linewidth=0.5, markersize=0.6) plt.xticks(x, fontsize=5) plt.xlabel('Highest', fontsize=7) plt.yticks(fontsize=5) # 画出最低温度 def plot_Weather_Lowest(self, city_name): # x为横坐标共31天,y为℃ x = [i + 1 for i in range(31)] plt2 = plt.subplot(2, 1, 2) y = list(self.df['Lowest'][self.city.index(city_name) * 31:(self.city.index(city_name) + 1) * 31]) for j in range(31): y[j] = int(y[j][:-1]) plt2.plot(x, y, 'v-', color=self.color[self.city.index(city_name)], linewidth=0.5, markersize=0.6) plt.xticks(x, fontsize=5) plt.xlabel('Lowest', fontsize=7) plt.yticks(fontsize=5) if __name__ == '__main__': Plot() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import matplotlib.pyplot as plt import pandas as pd import numpy as np from sklearn import preprocessing from numpy.matlib import repmat from openpyxl import Workbook class Linear(): def __init__(self): self.df = pd.read_excel('Weather_secondary_treatment.xlsx', sheet_name='Weather_secondary_treatment', engine='openpyxl') self.city = '杭州、宁波、温州、绍兴、湖州、嘉兴、金华、衢州、台州、丽水、舟山'.split('、') self.city.sort() plt.rcParams['font.sans-serif'] = ['SimHei'] # 用黑体显示中文 plt.rcParams['axes.unicode_minus'] = False self.f = open('result.txt', mode='w') self.if_file() self.df_r = pd.read_excel('Result.xlsx', sheet_name='result', engine='openpyxl') for city_name in self.city: plt.clf() x0, y0, num, xishu, ch0, xish, sol = self.find(city_name=city_name) self.save(sol=sol, city_name=city_name) self.PLOT(ch0=ch0, num=num, x0=x0, y0=y0, xishu=xishu, xish=xish) plt.savefig('{}.jpg'.format(city_name)) self.df_r.to_excel('Result.xlsx', index=False, sheet_name='result') def find(self, city_name): i = self.city.index(city_name) Highest = list(self.df['Highest'][i * 31:i * 31 + 31]) Lowest = list(self.df['Lowest'][i * 31:i * 31 + 31]) tianqi_situation = list(self.df['天气情况'][i * 31:i * 31 + 31]) fengli = list(self.df['风力'][i * 31:i * 31 + 31]) air_quality = list(self.df['空气质量'][i * 31:i * 31 + 31]) city_dataframe = pd.DataFrame( {'tianqi_situation': tianqi_situation, 'fengli': fengli, "air_quality": air_quality, 'Highest': Highest, 'Lowest': Lowest}) city_matrix = np.array([tianqi_situation, fengli, air_quality, Highest, Lowest]) city_matrix = city_matrix.T # 平均值 mu = np.mean(city_matrix, axis=0) # 标准差 sig = np.std(city_matrix, axis=0) rr = city_dataframe.corr() data = preprocessing.scale(city_matrix) x0 = city_matrix[:, :3] y0 = city_matrix[:, 3:] e0 = data[:, :3] f0 = data[:, 3:] num = 31 n = 3 m = 2 # 3阶单位阵 chg = np.identity(n) w = np.zeros([n, n]) w_star = np.zeros([n, n]) t = np.zeros([num, n]) ss = [] Q_h2 = [] press_i = [0 for i in range(num)] press = [0, 0, 0] alpha = np.zeros([3, 1]) for i in range(n): press_i = list(press_i) # 点乘 matrix = e0.T @ f0 @ f0.T @ e0 # 求特征值 [val, vec] = np.linalg.eig(matrix) val = val.argsort() w[:, i] = vec[:, val[len(val) - 1]] w_star[:, i] = chg @ w[:, i] t[:, i] = e0 @ w[:, i] alpha = np.array([list(e0.T @ t[:, i] / (t[:, i].T @ t[:, i]))]) w1 = np.array(w[:, i:i+1]) chg = chg @ (np.identity(n) - w1 @ alpha) t1 = np.array(t[:, i:i+1]) e0 = e0 - t1 @ alpha beta = np.linalg.pinv(np.c_[t[:, :i + 1], np.ones(31)]) @ f0 beta = np.delete(beta, (-1), axis=0) cancha = f0 - t[:, :i + 1] @ beta cancha = [[cancha[i][j] ** 2 for j in range(len(cancha[i]))] for i in range(len(cancha))] cancha = np.array(cancha) ss.append(cancha.sum()) for j in range(num): t1 = t[:, :i + 1] f1 = f0 she_t = t1[j:j + 1, :] she_f = f1[j:j + 1, :] t1 = np.delete(t1, j, axis=0) f1 = np.delete(f1, j, axis=0) beta1 = np.linalg.pinv(np.c_[t1, np.ones(num - 1)]) @ f1 beta1 = np.delete(beta1, (-1), axis=0) cancha = she_f - she_t @ beta1 cancha = [[cancha[i][j] ** 2 for j in range(len(cancha[i]))] for i in range(len(cancha))] cancha = np.array(cancha) press_i[j] = cancha.sum() press_i = np.array(press_i) press[i] = press_i.sum() if i > 0: Q_h2.append(1 - press[i] / ss[i - 1]) # print('Q_h2[{}] = {}'.format(i, (1 - press[i] / ss[i - 1]))) else: Q_h2.append(1) if Q_h2[i] < 0.0975: # print('Number of components proposedr = %d' % (i + 1)) r = i break beta_z = np.linalg.pinv(np.c_[t[:, :r + 1], np.ones(31)]) @ f0 beta_z = np.delete(beta_z, (-1), axis=0) xishu = w_star[:, :r + 1] @ beta_z mu_x = mu[:n] mu_y = mu[n:] sig_x = sig[:n] sig_y = sig[n:] ch0 = [] for i in range(m): ch0.append(float(mu_y[i] - np.true_divide(mu_x, sig_x) * sig_y[i] @ xishu[:, i:i + 1])) xish = np.zeros([3, 2]) for i in range(m): xish[:, i] = np.true_divide(xishu[:, i], sig_x.T) * sig_y[i] sol = np.r_[np.array([ch0]), xish] # print(' city = {}\n x0 = {}\n y0 = {}\n num = {}\n xishu = {}\n ch0 = {}\n xish = {}\n sol = {}'.format(city, x0, y0, num, xishu, ch0, xish, sol)) return x0, y0, num, xishu, ch0, xish, sol def PLOT(self, ch0, num, x0, y0, xishu, xish): ch0 = repmat(ch0, num, 1) y_hat = ch0 + x0 @ xish y1max = y_hat.max(axis=0) y2max = y0.max(axis=0) ymax = np.r_[np.array([y1max]), np.array([y2max])].max(axis=0) cancha = y_hat - y0 ax1 = plt.subplot(221) ax2 = plt.subplot(222) ax3 = plt.subplot(223) plt.sca(ax1) x = [i for i in range(int(ymax[0]))] y = [i for i in range(int(ymax[0]))] plt.plot(x, y, '-') plt.plot(y_hat[:, 0], y0[:, 0], '*') plt.sca(ax2) x = [i for i in range(int(ymax[1]))] y = [i for i in range(int(ymax[1]))] plt.plot(x, y, '-') plt.plot(y_hat[:, 1], y0[:, 1], 'o') plt.sca(ax3) x = np.arange(6) plt.bar(x, height=xishu.reshape([1, 6], order='F')[0], width=0.5) # plt.show() def if_file(self): try: f = open('Result.xlsx') f.close() except IOError: wb = Workbook() sheet = wb.active sheet.title = 'result' row = ['City', 'x0', 'x1', 'x2', 'x3'] sheet.append(row) wb.save('Result.xlsx') def save(self, sol, city_name): self.f.write('{}\n y1 = {} {}x1 {}x2 {}x3\ny2 = {} {}x1 {}x2 {}x3\n\n'.format(city_name, sol[0][0] if sol[0][ 0] < 0 else '+' + str( sol[0][0]), sol[1][0] if sol[1][ 0] < 0 else '+' + str( sol[1][0]), sol[2][0] if sol[2][ 0] < 0 else '+' + str( sol[2][0]), sol[3][0] if sol[3][ 0] < 0 else '+' + str( sol[3][0]), sol[0][1] if sol[0][ 1] < 0 else '+' + str( sol[0][1]), sol[1][1] if sol[1][ 1] < 0 else '+' + str( sol[1][1]), sol[2][1] if sol[2][ 1] < 0 else '+' + str( sol[2][1]), sol[3][1] if sol[3][ 1] < 0 else '+' + str( sol[3][1]))) row = [city_name, sol[0][0], sol[1][0], sol[2][0], sol[3][0]] self.df_r.loc[len(list(self.df_r.index))] = row row = [city_name, sol[0][1], sol[1][1], sol[2][1], sol[3][1]] self.df_r.loc[len(list(self.df_r.index))] = row if __name__ == '__main__': Linear() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import matplotlib.pyplot as plt import pandas as pd import numpy as np from openpyxl import Workbook class Predict(object): def __init__(self): self.df = pd.read_excel('.\\4月1-7日\\Weather_secondary_treatment.xlsx', sheet_name='Weather_secondary_treatment', engine='openpyxl') self.df_r = pd.read_excel('Result.xlsx', sheet_name='result', engine='openpyxl') self.city = '杭州、宁波、温州、绍兴、湖州、嘉兴、金华、衢州、台州、丽水、舟山'.split('、') self.city.sort() self.if_file() self.df_r_a = pd.read_excel('Result_April.xlsx', sheet_name='result', engine='openpyxl') for i in range(len(list(self.df.index))): j = self.city.index(self.df['City'][i]) j = j * 2 self.predict_high = self.df_r['x0'][j] + self.df_r['x1'][j] * self.df['天气情况'][i] + self.df_r['x2'][j] * \ self.df['风力'][i] + self.df_r['x3'][j] * \ self.df['空气质量'][i] self.predict_low = self.df_r['x0'][j + 1] + self.df_r['x1'][j + 1] * self.df['天气情况'][i] + self.df_r['x2'][ j + 1] * self.df['风力'][i] + \ self.df_r['x3'][j + 1] * self.df['空气质量'][i] row = [self.df['City'][i], self.df['Date'][i], self.df['天气情况'][i], self.df['风力'][i], self.df['空气质量'][i], self.df['Highest'][i], self.df['Lowest'][i], self.predict_high, self.predict_low] self.df_r_a.loc[len(list(self.df_r_a.index))] = row self.df_r_a.to_excel('Result_April.xlsx', index=False, sheet_name='result') def if_file(self): try: f = open('Result_April.xlsx') f.close() except IOError: wb = Workbook() sheet = wb.active sheet.title = 'result' row = ['City', '时间', '天气情况', '风力', '空气质量', '实际最高温', '实际最低温', '预测最高温', '预测最低温'] sheet.append(row) wb.save('Result_April.xlsx') if __name__ == '__main__': Predict() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import pandas as pd import numpy as np class Find_average(): def __init__(self): self.df = pd.read_excel('Result_April.xlsx', sheet_name='result', engine='openpyxl') f = open('result_april.txt', 'w') self.predict_high = np.array([[self.df['预测最高温'][i * 7 + j] for i in range(11)] for j in range(7)]).sum( axis=1) / 11 self.predict_low = np.array([[self.df['预测最低温'][i * 7 + j] for i in range(11)] for j in range(7)]).sum( axis=1) / 11 self.true_high = np.array([[self.df['实际最高温'][i * 7 + j] for i in range(11)] for j in range(7)]).sum(axis=1) / 11 self.true_low = np.array([[self.df['实际最低温'][i * 7 + j] for i in range(11)] for j in range(7)]).sum(axis=1) / 11 print(self.predict_high) print(self.predict_low) print(self.true_high) print(self.true_low) f.write('Predict_high\n') for i in range(7): f.write('4月{}日 = {}\n'.format(i + 1, self.predict_high[i])) f.write('Predict_low\n') for i in range(7): f.write('4月{}日 = {}\n'.format(i + 1, self.predict_low[i])) f.write('True_high\n') for i in range(7): f.write('4月{}日 = {}\n'.format(i + 1, self.true_high[i])) f.write('True_low\n') for i in range(7): f.write('4月{}日 = {}\n'.format(i + 1, self.true_low[i])) if __name__ == '__main__': Find_average() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import pandas as pd import matplotlib.pyplot as plt class Plot_april(): def __init__(self): # 打开爬虫爬好的文件 self.df = pd.read_excel('Result_april.xlsx', sheet_name='result', engine='openpyxl') # 初始化图表 plt.rcParams['font.family'] = ['Fangsong'] plt.rcParams['axes.unicode_minus'] = False plt.figure(figsize=(6, 3), dpi=200) plt.subplots_adjust(hspace=0.5) # 城市名称 self.city = '杭州、宁波、温州、绍兴、湖州、嘉兴、金华、衢州、台州、丽水、舟山'.split('、') self.city.sort() # 自定义曲线颜色 self.color = ['black', 'gray', 'lightcoral', 'red', 'darkgoldenrod', 'yellow', 'green', 'blue', 'cyan', 'purple', 'pink'] for i in self.city: self.plot_Weather_Lowest(i) self.plot_Weather_Highest(i) plt.title('{}最高最低气温'.format(i)) # 保存图片 plt.savefig('./{}4月份每天最高气温和最低气温.jpg'.format(i)) plt.clf() # 画出最高温度 def plot_Weather_Highest(self,city_name): # x为横坐标共7天,y为℃ x = [i + 1 for i in range(7)] plt1 = plt.subplot(2, 1, 1) plt.sca(plt1) y = list(self.df['实际最高温'][self.city.index(city_name) * 7:(self.city.index(city_name) + 1) * 7]) plt1.plot(x, y, '-', color=self.color[self.city.index(city_name)], linewidth=0.5, markersize=0.6) y = list(self.df['预测最高温'][self.city.index(city_name) * 7:(self.city.index(city_name) + 1) * 7]) plt1.plot(x, y, '--', color=self.color[self.city.index(city_name)], linewidth=0.5, markersize=0.6) plt.ylim(0, 50) plt.xticks(x, fontsize=5) plt.xlabel('Highest', fontsize=7) plt.yticks(fontsize=5) # 画出最低温度 def plot_Weather_Lowest(self,city_name): # x为横坐标共31天,y为℃ x = [i + 1 for i in range(7)] plt2 = plt.subplot(2, 1, 2) plt.sca(plt2) y = list(self.df['实际最低温'][self.city.index(city_name) * 7:(self.city.index(city_name) + 1) * 7]) plt2.plot(x, y, '-', color=self.color[self.city.index(city_name)], linewidth=0.5, markersize=0.6) y = list(self.df['预测最低温'][self.city.index(city_name) * 7:(self.city.index(city_name) + 1) * 7]) plt2.plot(x, y, '--', color=self.color[self.city.index(city_name)], linewidth=0.5, markersize=0.6) plt.ylim(0, 50) plt.xticks(x, fontsize=5) plt.xlabel('Lowest', fontsize=7) plt.yticks(fontsize=5) if __name__ == '__main__': Plot_april() |