Categories: Python

by Jarno

Share

Predicting the number of COVID-19 cases of the coming 7 days with Multiple Linear Regression.

Using:

  • Python
  • PyCharm IDE
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model

# Dataset: NL-data: view-source:https://flo.uri.sh/story/469135/embed?auto=1#slide-0

# Load data from the CSV file
data = pd.read_csv('coronaCasesNL_2nd_C.csv', sep=',')
data = data
[['id', 'cases']] print('-'*30);print('HEAD');print('-'*30) print(data.head()) # Prepare the data print('-' * 30); print('PREPARE DATA'); print('-' * 30) x = np.array(data['id']).reshape(-1, 1) y = np.array(data['cases']).reshape(-1, 1) plt.plot(y, '-m') # Setup a polynomial feature polyFeat = PolynomialFeatures(degree=4) # 3 -> x + x2 + x3 x = polyFeat.fit_transform(x) # Training with data print('-' * 30) ; print('TRAINING DATA'); print('-' * 30) model = linear_model.LinearRegression() model.fit(x, y) accuracy = model.score(x, y) print(f'Accuracy:{round(accuracy*100, 3)} %') y0 = model.predict(x) # Prediction days = 7 numvalues = np.count_nonzero(y0) print('-' * 30); print('PREDICTION'); print('-' * 30) print(f'Prediction - Cases after {days} days: ', end='') print(round(int(model.predict(polyFeat.fit_transform([[numvalues+days]])))/1, 0), 'People') # Reshape and predict fot the coming ... days x1 = np.array(list(range(1, numvalues+days))).reshape(-1, 1) y1 = model.predict(polyFeat.fit_transform(x1)) # Plot the lines plt.plot(y1, '--r') plt.plot(y0, '--b') # Plot titles and information plt.title(f'Number of COVID-19 cases in The Netherlands (03SEP20-31OKT20) + prediction {days} days') plt.xlabel('days') plt.ylabel('cases') plt.show()
coronaCasesNL_2nd_C.csv

date,id,total,average,cases
09/03/2020,1,72464,549,601
09/04/2020,2,73208,582,744
09/05/2020,3,73862,604,654
09/06/2020,4,74787,664,925
09/07/2020,5,75584,702,797
09/08/2020,6,76548,774,964
09/09/2020,7,77688,832,1140
09/10/2020,8,78511,864,823
09/11/2020,9,79781,939,1270
09/12/2020,10,81012,1021,1231
09/13/2020,11,82099,1045,1087
09/14/2020,12,83399,1116,1300
09/15/2020,13,84778,1176,1379
09/16/2020,14,86320,1233,1542
09/17/2020,15,88073,1366,1753
09/18/2020,16,90047,1467,1974
09/19/2020,17,91934,1560,1887
09/20/2020,18,93778,1668,1844
09/21/2020,19,95995,1799,2217
09/22/2020,20,98240,1923,2245
09/23/2020,21,100597,2040,2357
09/24/2020,22,103141,2153,2544
09/25/2020,23,105918,2267,2777
09/26/2020,24,108631,2385,2713
09/27/2020,25,111626,2550,2995
09/28/2020,26,114540,2649,2914
09/29/2020,27,117551,2759,3011
09/30/2020,28,120845,2893,3294
10/01/2020,29,124097,2994,3252
10/02/2020,30,127922,3143,3825
10/03/2020,31,131889,3323,3967
10/04/2020,32,135892,3467,4003
10/05/2020,33,140471,3704,4579
10/06/2020,34,144999,3921,4528
10/07/2020,35,149988,4163,4989
10/08/2020,36,155810,4530,5822
10/09/2020,37,161781,4837,5971
10/10/2020,38,168280,5199,6499
10/11/2020,39,174653,5537,6373
10/12/2020,40,181498,5861,6845
10/13/2020,41,188876,6268,7378
10/14/2020,42,196163,6596,7287
10/15/2020,43,203954,6878,7791
10/16/2020,44,211938,7165,7984
10/17/2020,45,220052,7396,8114
10/18/2020,46,228234,7654,8182
10/19/2020,47,236226,7818,7992
10/20/2020,48,244391,7931,8165
10/21/2020,49,253134,8139,8743
10/22/2020,50,262405,8350,9271
10/23/2020,51,272401,8638,9996
10/24/2020,52,281052,8714,8651
10/25/2020,53,291254,9003,10202
10/26/2020,54,301597,9339,10343
10/27/2020,55,311889,9643,10292
10/28/2020,56,319991,9551,8102
10/29/2020,57,330255,9693,10264
10/30/2020,58,341374,9853,11119
10/31/2020,59,351178,10018,9804