import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import dates as mpl_dates


df_hk_covid = pd.read_csv('http://www.chp.gov.hk/files/misc/latest_situation_of_reported_cases_covid_19_eng.csv')


df_hk_covid


df_hk_covid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 604 entries, 0 to 603
Data columns (total 10 columns):
 #   Column                                                Non-Null Count  Dtype  
---  ------                                                --------------  -----  
 0   As of date                                            604 non-null    object 
 1   As of time                                            194 non-null    object 
 2   Number of confirmed cases                             604 non-null    int64  
 3   Number of ruled out cases                             89 non-null     float64
 4   Number of cases still hospitalised for investigation  89 non-null     float64
 5   Number of cases fulfilling the reporting criteria     89 non-null     float64
 6   Number of death cases                                 604 non-null    int64  
 7   Number of discharge cases                             604 non-null    int64  
 8   Number of probable cases                              604 non-null    int64  
 9   Number of hospitalised cases in critical condition    515 non-null    float64
dtypes: float64(4), int64(4), object(2)
memory usage: 47.3+ KB


df_hk_covid['As of date'] = pd.to_datetime(df_hk_covid['As of date'],dayfirst=True)


df_hk_covid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 604 entries, 0 to 603
Data columns (total 10 columns):
 #   Column                                                Non-Null Count  Dtype         
---  ------                                                --------------  -----         
 0   As of date                                            604 non-null    datetime64[ns]
 1   As of time                                            194 non-null    object        
 2   Number of confirmed cases                             604 non-null    int64         
 3   Number of ruled out cases                             89 non-null     float64       
 4   Number of cases still hospitalised for investigation  89 non-null     float64       
 5   Number of cases fulfilling the reporting criteria     89 non-null     float64       
 6   Number of death cases                                 604 non-null    int64         
 7   Number of discharge cases                             604 non-null    int64         
 8   Number of probable cases                              604 non-null    int64         
 9   Number of hospitalised cases in critical condition    515 non-null    float64       
dtypes: datetime64[ns](1), float64(4), int64(4), object(1)
memory usage: 47.3+ KB


# Extract the date column
dates = df_hk_covid['As of date']
latest_date = dates.max()
latest_date

Timestamp('2021-09-02 00:00:00')


idx_latest_date = df_hk_covid['As of date']==dates.max()
idx_latest_date

0      False
1      False
2      False
3      False
4      False
       ...  
599    False
600    False
601    False
602    False
603     True
Name: As of date, Length: 604, dtype: bool


total_cases = df_hk_covid['Number of confirmed cases']


latest_total_cases = int(df_hk_covid.loc[idx_latest_date,'Number of confirmed cases'])


# Configure figure style and size
plt.style.use('seaborn')
plt.figure(figsize=(12,8))

# Plot the figure
plt.plot(dates, total_cases.values, linewidth=2)

# Format the date in the x-axis
plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%d %b, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

# Plot axis and titles
plt.xlabel('')
plt.ylabel('Cases',fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=13)
plt.suptitle('Accumulated SARS-CoV-2 Cases in Hong Kong by Time', fontsize=20)

# Plot annotation of the latest case number
plt.annotate(text="Total cases: "+str(latest_total_cases), xy=(latest_date,latest_total_cases),
             xycoords='data', xytext=(-90,-30), textcoords='offset points', fontsize=14)
plt.show()


df_hk_covid['Number of daily confirmed cases'] = df_hk_covid["Number of confirmed cases"].diff().fillna(0).astype(int)
daily_cases = df_hk_covid['Number of daily confirmed cases']


today_cases = int(df_hk_covid.loc[idx_latest_date, 'Number of daily confirmed cases'])
max_cases = int(df_hk_covid.loc[:,'Number of daily confirmed cases'].max())


# Configure figure style and size
plt.figure(figsize=(12,8))

# Plot the figure
plt.plot(dates, daily_cases,'-', linewidth=2)

# Format the date in the x-axis
plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%d %b, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

# Plot axis and titles
plt.xlabel('')
plt.xticks(fontsize=14)
plt.yticks(fontsize=13)
plt.ylabel('Daily Cases',fontsize=16)
plt.suptitle('SARS-CoV-2 Daily Confirmed Cases in Hong Kong',fontsize=20)

# Plot annotation of the latest case number
plt.annotate(text="Latest daily cases: "+str(today_cases), xy=(latest_date, today_cases),
             xycoords='data', xytext=(-100,50), textcoords='offset points', fontsize=14)
plt.annotate(text="Peak daily cases: "+str(max_cases), xy=(latest_date, today_cases),
             xycoords='data', xytext=(-500,355), textcoords='offset points', fontsize=14)

plt.show()


df_hk_covid['Number of active cases'] = df_hk_covid['Number of confirmed cases']-df_hk_covid['Number of death cases']-df_hk_covid['Number of discharge cases']


latest_active_cases = int(df_hk_covid.loc[idx_latest_date,"Number of active cases"])
latest_death_cases = int(df_hk_covid.loc[idx_latest_date,'Number of death cases'])
latest_discharge_cases = int(df_hk_covid.loc[idx_latest_date,'Number of discharge cases'])


# For stack plots, we need to generate subplots 
fig, ax = plt.subplots()

# Configure figure style and size
fig.set_figheight(8)
fig.set_figwidth(12)

# Define labels to show on the plot
labels = ['Deaths','Discharged','Active']

# Plot the stack plot
ax.stackplot(dates, df_hk_covid['Number of death cases'],df_hk_covid['Number of discharge cases'],df_hk_covid['Number of active cases'] , labels=labels)

# Format the date in the x-axis
fig.autofmt_xdate()
date_format = mpl_dates.DateFormatter('%d %b, %Y')

# Plot legends, axis and titles
ax.legend(loc='upper left', fontsize=14)
ax.xaxis.set_major_locator(plt.MaxNLocator(18))
ax.xaxis.set_major_formatter(date_format)
plt.xticks(fontsize=14)
plt.yticks(fontsize=13)
plt.ylabel('Cases',fontsize=16)
plt.suptitle('Trend of SARS-CoV-2 Active Cases in Hong Kong',fontsize=20)
plt.annotate(text="Latest death cases: "+str(latest_death_cases), xy=(latest_date, latest_death_cases),
             xycoords='data', xytext=(-160,5), textcoords='offset points', fontsize=14)

plt.annotate(text="Latest discharge cases: "+str(latest_discharge_cases), xy=(latest_date, latest_discharge_cases),
             xycoords='data', xytext=(-200,-60), textcoords='offset points', fontsize=14)

plt.annotate(text="Latest active cases: "+str(latest_active_cases), xy=(latest_date, latest_active_cases + latest_discharge_cases),
             xycoords='data', xytext=(-160,10), textcoords='offset points', fontsize=14)

plt.show()


df_world_covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')
df_world_covid


df_hk_vac = df_world_covid.loc[df_world_covid["location"]=="Hong Kong",\
                               ["date","total_cases","new_cases","total_vaccinations","people_vaccinated","people_fully_vaccinated","population"]]
df_hk_vac


df_hk_vac.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 588 entries, 45217 to 45804
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   date                     588 non-null    object 
 1   total_cases              588 non-null    float64
 2   new_cases                588 non-null    float64
 3   total_vaccinations       192 non-null    float64
 4   people_vaccinated        192 non-null    float64
 5   people_fully_vaccinated  183 non-null    float64
 6   population               588 non-null    float64
dtypes: float64(6), object(1)
memory usage: 36.8+ KB


df_hk_vac = df_hk_vac.fillna(0)
df_hk_vac


df_hk_vac['date'] = pd.to_datetime(df_hk_vac['date'],yearfirst=True)


df_hk_vac["people_unvaccinated"] = df_hk_vac["population"]-df_hk_vac["people_vaccinated"]
df_hk_vac["pct_vaccinated"] = df_hk_vac["people_vaccinated"]/df_hk_vac["population"]
df_hk_vac["pct_fully_vaccinated"] = df_hk_vac["people_fully_vaccinated"]/df_hk_vac["population"]


idx_latest_date_vac = df_hk_vac['date']==df_hk_vac['date'].max()
pct_vac = float(df_hk_vac.loc[idx_latest_date_vac,'pct_vaccinated'])
pct_com_vac = float(df_hk_vac.loc[idx_latest_date_vac,'pct_fully_vaccinated'])


# Configure figure style and size
plt.figure(figsize=(12,8))

slices = [pct_com_vac, pct_vac-pct_com_vac, 1-pct_vac]
labels = ['Two doses','First dose','Unvaccinated']
plt.pie(slices, labels=labels,wedgeprops={'edgecolor':'black'}, shadow=True, explode=(0.2,0.2,0),  autopct='%.2f%%',textprops={'fontsize': 14})
plt.suptitle('SARS-CoV-2 Vaccine Dose Status in Hong Kong',fontsize=20)
plt.show()


df_hk_covid_all = df_hk_vac.merge(df_hk_covid,left_on="date",right_on='As of date',how="inner")


df_hk_covid_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 588 entries, 0 to 587
Data columns (total 22 columns):
 #   Column                                                Non-Null Count  Dtype         
---  ------                                                --------------  -----         
 0   date                                                  588 non-null    datetime64[ns]
 1   total_cases                                           588 non-null    float64       
 2   new_cases                                             588 non-null    float64       
 3   total_vaccinations                                    588 non-null    float64       
 4   people_vaccinated                                     588 non-null    float64       
 5   people_fully_vaccinated                               588 non-null    float64       
 6   population                                            588 non-null    float64       
 7   people_unvaccinated                                   588 non-null    float64       
 8   pct_vaccinated                                        588 non-null    float64       
 9   pct_fully_vaccinated                                  588 non-null    float64       
 10  As of date                                            588 non-null    datetime64[ns]
 11  As of time                                            179 non-null    object        
 12  Number of confirmed cases                             588 non-null    int64         
 13  Number of ruled out cases                             74 non-null     float64       
 14  Number of cases still hospitalised for investigation  74 non-null     float64       
 15  Number of cases fulfilling the reporting criteria     74 non-null     float64       
 16  Number of death cases                                 588 non-null    int64         
 17  Number of discharge cases                             588 non-null    int64         
 18  Number of probable cases                              588 non-null    int64         
 19  Number of hospitalised cases in critical condition    514 non-null    float64       
 20  Number of daily confirmed cases                       588 non-null    int32         
 21  Number of active cases                                588 non-null    int64         
dtypes: datetime64[ns](2), float64(13), int32(1), int64(5), object(1)
memory usage: 103.4+ KB


# Total cases
cases_gov = df_hk_covid_all['total_cases']
cases_world = df_hk_covid_all['Number of confirmed cases']
# Daily cases
daily_gov = df_hk_covid_all['new_cases']
daily_world = df_hk_covid_all['Number of daily confirmed cases']


sum(cases_gov == cases_world)/len(df_hk_covid_all)

0.9030612244897959


sum(daily_gov == daily_world)/len(df_hk_covid_all)

0.8673469387755102


# Configure figure style and size
plt.figure(figsize=(8,8))
# Plot the figure
plt.scatter(daily_gov, daily_world, edgecolor='black', alpha=.3)
# Plot legends, axis and titles
plt.xlabel('Cases from DATA.GOV.HK',fontsize=16)
plt.ylabel('Cases from Our World Data',fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=13)
plt.suptitle('Daily SARS-CoV-2 Active Cases in Hong Kong from two datasets',fontsize=20)
plt.show()

	As of date	As of time	Number of confirmed cases	Number of ruled out cases	Number of cases still hospitalised for investigation	Number of cases fulfilling the reporting criteria	Number of death cases	Number of discharge cases	Number of probable cases	Number of hospitalised cases in critical condition
0	08/01/2020	12:00	0	21.0	17.0	38.0	0	0	0	NaN
1	09/01/2020	12:00	0	25.0	23.0	48.0	0	0	0	NaN
2	10/01/2020	12:00	0	31.0	23.0	54.0	0	0	0	NaN
3	11/01/2020	12:00	0	46.0	15.0	61.0	0	0	0	NaN
4	12/01/2020	12:00	0	51.0	16.0	67.0	0	0	0	NaN
...	...	...	...	...	...	...	...	...	...	...
599	29/08/2021	NaN	12107	NaN	NaN	NaN	212	11783	1	0.0
600	30/08/2021	NaN	12110	NaN	NaN	NaN	212	11786	1	0.0
601	31/08/2021	NaN	12112	NaN	NaN	NaN	212	11791	1	0.0
602	01/09/2021	NaN	12113	NaN	NaN	NaN	212	11798	1	0.0
603	02/09/2021	NaN	12113	NaN	NaN	NaN	212	11799	1	0.0

	iso_code	continent	location	date	total_cases	new_cases	new_cases_smoothed	total_deaths	new_deaths	new_deaths_smoothed	...	extreme_poverty	cardiovasc_death_rate	diabetes_prevalence	female_smokers	male_smokers	handwashing_facilities	hospital_beds_per_thousand	life_expectancy	human_development_index	excess_mortality
0	AFG	Asia	Afghanistan	2020-02-24	1.0	1.0	NaN	NaN	NaN	NaN	...	NaN	597.029	9.59	NaN	NaN	37.746	0.5	64.83	0.511	NaN
1	AFG	Asia	Afghanistan	2020-02-25	1.0	0.0	NaN	NaN	NaN	NaN	...	NaN	597.029	9.59	NaN	NaN	37.746	0.5	64.83	0.511	NaN
2	AFG	Asia	Afghanistan	2020-02-26	1.0	0.0	NaN	NaN	NaN	NaN	...	NaN	597.029	9.59	NaN	NaN	37.746	0.5	64.83	0.511	NaN
3	AFG	Asia	Afghanistan	2020-02-27	1.0	0.0	NaN	NaN	NaN	NaN	...	NaN	597.029	9.59	NaN	NaN	37.746	0.5	64.83	0.511	NaN
4	AFG	Asia	Afghanistan	2020-02-28	1.0	0.0	NaN	NaN	NaN	NaN	...	NaN	597.029	9.59	NaN	NaN	37.746	0.5	64.83	0.511	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
113624	ZWE	Africa	Zimbabwe	2021-08-28	124367.0	131.0	268.571	4390.0	16.0	22.000	...	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571	NaN
113625	ZWE	Africa	Zimbabwe	2021-08-29	124437.0	70.0	255.000	4401.0	11.0	21.714	...	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571	NaN
113626	ZWE	Africa	Zimbabwe	2021-08-30	124581.0	144.0	225.714	4416.0	15.0	17.571	...	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571	NaN
113627	ZWE	Africa	Zimbabwe	2021-08-31	124773.0	192.0	207.571	4419.0	3.0	14.143	...	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571	NaN
113628	ZWE	Africa	Zimbabwe	2021-09-01	124960.0	187.0	178.429	4438.0	19.0	14.286	...	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571	NaN

	date	total_cases	new_cases	total_vaccinations	people_vaccinated	people_fully_vaccinated	population
45217	2020-01-23	2.0	2.0	NaN	NaN	NaN	7552800.0
45218	2020-01-24	2.0	0.0	NaN	NaN	NaN	7552800.0
45219	2020-01-25	5.0	3.0	NaN	NaN	NaN	7552800.0
45220	2020-01-26	8.0	3.0	NaN	NaN	NaN	7552800.0
45221	2020-01-27	8.0	0.0	NaN	NaN	NaN	7552800.0
...	...	...	...	...	...	...	...
45800	2021-08-28	12100.0	6.0	7411981.0	4077707.0	3334274.0	7552800.0
45801	2021-08-29	12107.0	7.0	7463078.0	4096452.0	3366626.0	7552800.0
45802	2021-08-30	12110.0	3.0	7527021.0	4119787.0	3407234.0	7552800.0
45803	2021-08-31	12112.0	2.0	7588397.0	4140055.0	3448342.0	7552800.0
45804	2021-09-01	12113.0	1.0	7637640.0	4162074.0	3475566.0	7552800.0

	date	total_cases	new_cases	total_vaccinations	people_vaccinated	people_fully_vaccinated	population
45217	2020-01-23	2.0	2.0	0.0	0.0	0.0	7552800.0
45218	2020-01-24	2.0	0.0	0.0	0.0	0.0	7552800.0
45219	2020-01-25	5.0	3.0	0.0	0.0	0.0	7552800.0
45220	2020-01-26	8.0	3.0	0.0	0.0	0.0	7552800.0
45221	2020-01-27	8.0	0.0	0.0	0.0	0.0	7552800.0
...	...	...	...	...	...	...	...
45800	2021-08-28	12100.0	6.0	7411981.0	4077707.0	3334274.0	7552800.0
45801	2021-08-29	12107.0	7.0	7463078.0	4096452.0	3366626.0	7552800.0
45802	2021-08-30	12110.0	3.0	7527021.0	4119787.0	3407234.0	7552800.0
45803	2021-08-31	12112.0	2.0	7588397.0	4140055.0	3448342.0	7552800.0
45804	2021-09-01	12113.0	1.0	7637640.0	4162074.0	3475566.0	7552800.0

Hong Kong SARS-CoV-2 Pandemic Trend Analysis¶

About¶

Preprocessing¶

Viusual analysis of the number of SARS-CoV-2 cases over time¶

Visual analysis of the daily cases of SARS-CoV-2¶

Viusual analysis of the active cases of SARS-CoV-2¶

Analysis of Vaccination¶

Cross-validation of multiple datasets¶