import sqlite3
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
= "notebook_connected" pio.renderers.default
The NOAA has climate data from thousands of weather stations located around the world.
In this post, we will visualize how climate varies over time and by region in stations across the world.
Setting Up
We will begin with our import statements. For this project, we will use sqlite3, pandas, numpy, and plotly.
In order to increase the efficiency of our program, we will convert the csv files to an SQL database. We will access the queries using a connection from sqlite3.
= sqlite3.connect("temps.db")
conn
= pd.read_csv("temps_stacked.csv")
df "temperatures", conn, index=False)
df.to_sql(
= pd.read_csv("countries.csv")
df "countries", conn, index=False)
df.to_sql(
= pd.read_csv("station-metadata.csv")
df "stations", conn, index=False) df.to_sql(
Querying the Data
Next, we will create a function to query our data. We will allow the user to input the country, month, and year range they would like to look at.
def query_climate_database(country, year_begin, year_end, month):
"""
queries climate data for a country in a given month for a given period of years
Parameters
----------
country, name of country of interest
year_begin, first year in range
year_end, last year in range,
month, month to be looked at
Return
----------
pandas dataframe with query result
"""
# SQL command
= """SELECT S.name NAME, S.latitude, S.longitude, C.Name COUNTRY, T.year YEAR,
cmd T.month MONTH, ROUND(AVG(T.temp),2) mean_temp
FROM temperatures T
LEFT JOIN stations S ON T.id = S.id
LEFT JOIN countries C ON SUBSTRING(T.id,1,2) == C.[FIPS 10-4]
WHERE T.year >= {1} AND T.year <= {2} AND T.month == {3} AND C.Name == \'{0}\'
GROUP BY T.id, T.year;""".format(country, year_begin, year_end, month)
return pd.read_sql_query(cmd, conn)
'Peru',2018,2020,1) query_climate_database(
NAME | LATITUDE | LONGITUDE | COUNTRY | YEAR | MONTH | mean_temp | |
---|---|---|---|---|---|---|---|
0 | IQUITOS | -3.783 | -73.300 | Peru | 2018 | 1 | 25.75 |
1 | IQUITOS | -3.783 | -73.300 | Peru | 2019 | 1 | 25.85 |
2 | IQUITOS | -3.783 | -73.300 | Peru | 2020 | 1 | 26.54 |
3 | CHACHAPOYAS | -6.200 | -77.850 | Peru | 2020 | 1 | 16.59 |
4 | TARAPOTO | -6.500 | -76.367 | Peru | 2018 | 1 | 27.46 |
5 | TARAPOTO | -6.500 | -76.367 | Peru | 2019 | 1 | 27.28 |
6 | TARAPOTO | -6.500 | -76.367 | Peru | 2020 | 1 | 28.45 |
7 | AREQUIPA | -16.332 | -71.567 | Peru | 2018 | 1 | 14.72 |
8 | AREQUIPA | -16.332 | -71.567 | Peru | 2019 | 1 | 15.24 |
9 | AREQUIPA | -16.332 | -71.567 | Peru | 2020 | 1 | 15.28 |
10 | PEDRO_CANGA | -3.553 | -80.381 | Peru | 2018 | 1 | 26.47 |
11 | PEDRO_CANGA | -3.553 | -80.381 | Peru | 2019 | 1 | 27.15 |
12 | PEDRO_CANGA | -3.553 | -80.381 | Peru | 2020 | 1 | 27.75 |
13 | CAPITAN_FAP_GUILLERMO_CONCHA | -5.206 | -80.616 | Peru | 2018 | 1 | 27.16 |
14 | CAPITAN_FAP_GUILLERMO_CONCHA | -5.206 | -80.616 | Peru | 2019 | 1 | 27.12 |
15 | CAPITAN_FAP_GUILLERMO_CONCHA | -5.206 | -80.616 | Peru | 2020 | 1 | 27.00 |
16 | MOISES_BENZAQUEN_RENGIFO | -5.894 | -76.118 | Peru | 2018 | 1 | 27.08 |
17 | MOISES_BENZAQUEN_RENGIFO | -5.894 | -76.118 | Peru | 2019 | 1 | 27.09 |
18 | MOISES_BENZAQUEN_RENGIFO | -5.894 | -76.118 | Peru | 2020 | 1 | 28.14 |
19 | CAPT_JOSE_AQUINONES_GONZALE | -6.787 | -79.828 | Peru | 2018 | 1 | 23.05 |
20 | CAPT_JOSE_AQUINONES_GONZALE | -6.787 | -79.828 | Peru | 2019 | 1 | 24.93 |
21 | CAPT_JOSE_AQUINONES_GONZALE | -6.787 | -79.828 | Peru | 2020 | 1 | 24.41 |
22 | JUANJUI | -7.169 | -76.729 | Peru | 2018 | 1 | 27.99 |
23 | JUANJUI | -7.169 | -76.729 | Peru | 2019 | 1 | 28.27 |
24 | JUANJUI | -7.169 | -76.729 | Peru | 2020 | 1 | 29.41 |
25 | CAPITAN_CARLOS_MARTINEZ_DE_PI | -8.081 | -79.109 | Peru | 2018 | 1 | 20.52 |
26 | CAPITAN_CARLOS_MARTINEZ_DE_PI | -8.081 | -79.109 | Peru | 2019 | 1 | 22.99 |
27 | CAPITAN_CARLOS_MARTINEZ_DE_PI | -8.081 | -79.109 | Peru | 2020 | 1 | 21.86 |
28 | CAP_FAP_DAVID_ABENZUR_RENGIFO | -8.378 | -74.574 | Peru | 2018 | 1 | 26.17 |
29 | CAP_FAP_DAVID_ABENZUR_RENGIFO | -8.378 | -74.574 | Peru | 2019 | 1 | 25.98 |
30 | CAP_FAP_DAVID_ABENZUR_RENGIFO | -8.378 | -74.574 | Peru | 2020 | 1 | 27.11 |
31 | TENIENTE_JAIME_A_DE_MONTREUIL | -9.150 | -78.524 | Peru | 2020 | 1 | 23.19 |
32 | TINGO_MARIA | -9.133 | -75.950 | Peru | 2018 | 1 | 25.56 |
33 | TINGO_MARIA | -9.133 | -75.950 | Peru | 2019 | 1 | 25.97 |
34 | TINGO_MARIA | -9.133 | -75.950 | Peru | 2020 | 1 | 26.32 |
35 | JORGE_CHAVEZ_INTL | -12.022 | -77.114 | Peru | 2018 | 1 | 22.04 |
36 | JORGE_CHAVEZ_INTL | -12.022 | -77.114 | Peru | 2019 | 1 | 23.73 |
37 | JORGE_CHAVEZ_INTL | -12.022 | -77.114 | Peru | 2020 | 1 | 23.22 |
38 | PADRE_ALDAMIZ | -12.614 | -69.229 | Peru | 2018 | 1 | 26.74 |
39 | PADRE_ALDAMIZ | -12.614 | -69.229 | Peru | 2019 | 1 | 27.18 |
40 | PADRE_ALDAMIZ | -12.614 | -69.229 | Peru | 2020 | 1 | 28.24 |
41 | CORONEL_FAP_ALFREDO_MENDIVIL | -13.155 | -74.204 | Peru | 2018 | 1 | 16.65 |
42 | CORONEL_FAP_ALFREDO_MENDIVIL | -13.155 | -74.204 | Peru | 2019 | 1 | 17.19 |
43 | CORONEL_FAP_ALFREDO_MENDIVIL | -13.155 | -74.204 | Peru | 2020 | 1 | 18.03 |
44 | TENIENTE_ALEJANDRO_VELASCO_AS | -13.536 | -71.939 | Peru | 2018 | 1 | 12.56 |
45 | TENIENTE_ALEJANDRO_VELASCO_AS | -13.536 | -71.939 | Peru | 2019 | 1 | 13.26 |
46 | TENIENTE_ALEJANDRO_VELASCO_AS | -13.536 | -71.939 | Peru | 2020 | 1 | 13.06 |
47 | PISCO_INTL | -13.745 | -76.220 | Peru | 2018 | 1 | 22.32 |
48 | PISCO_INTL | -13.745 | -76.220 | Peru | 2019 | 1 | 22.98 |
49 | PISCO_INTL | -13.745 | -76.220 | Peru | 2020 | 1 | 22.72 |
50 | JULIACA | -15.467 | -70.158 | Peru | 2018 | 1 | 11.34 |
51 | JULIACA | -15.467 | -70.158 | Peru | 2019 | 1 | 12.20 |
52 | JULIACA | -15.467 | -70.158 | Peru | 2020 | 1 | 12.74 |
53 | CORONEL_CARLOS_CIRIANI_SANTA | -18.053 | -70.276 | Peru | 2018 | 1 | 21.26 |
54 | CORONEL_CARLOS_CIRIANI_SANTA | -18.053 | -70.276 | Peru | 2019 | 1 | 21.58 |
55 | CORONEL_CARLOS_CIRIANI_SANTA | -18.053 | -70.276 | Peru | 2020 | 1 | 22.37 |
Above, we can see the mean temperatures from stations in Peru in January, years 2018-2020.
Plotting Yearly Temperature Changes
Now that we have the query function defined, we will use this to create a new function that plots the average change in temperature over a given year range for a given month. To accomplish this, we will put the mean temperatures across the years in a linear classifier and extract the coefficient from it to find the average yearly increase.
from sklearn.linear_model import LinearRegression
= {1:'January', 2:'February', 3:'March', 4:'April', 5:'May', 6:'June',
monthDict 7:'July', 8:'August', 9:'September', 10:'October', 11:'November', 12:'December'}
def temperature_coefficient_plot(country, year_begin, year_end, month, min_obs, **kwargs):
"""
displays yearly change in temperature for a country in a given month for a given period of years
Parameters
----------
country, name of country of interest
year_begin, first year in range
year_end, last year in range,
month, month to be looked at
min_obs, min required number of years of data for a given station
Return
----------
map with interactive points representing stations overlayed
"""
# dict to convert month number to month name
= {1:'January', 2:'February', 3:'March', 4:'April', 5:'May', 6:'June',
monthDict 7:'July', 8:'August', 9:'September', 10:'October', 11:'November', 12:'December'}
# get climate data
= query_climate_database(country, year_begin, year_end, month)
db
# filter out stations with less than min_obs
"total_obs"] = db.groupby("NAME")["YEAR"].transform("size")
db[= db[db["total_obs"] > min_obs]
db
# create new column for yearly increase
"Estimated Yearly Increase (°C)"] = 0
db[
# determine yearly increase
for station in db["NAME"].unique():
= db[db["NAME"]==station]["YEAR"].to_numpy()
X = db[db["NAME"]==station]["mean_temp"]
y
# create linear model
= LinearRegression().fit(X.reshape(-1,1),y)
reg "NAME"]==station,"Estimated Yearly Increase (°C)"] = np.round(reg.coef_[0],4)
db.loc[db[
# plot on map
= px.scatter_mapbox(db,
fig = "LATITUDE",
lat= "LONGITUDE",
lon="NAME",
hover_name= "Yearly Temperature Changes in {0} for stations in {1}, years {2}-{3}".format(monthDict[month],
title
country,
year_begin,
year_end),="Estimated Yearly Increase (°C)",
color=0, **kwargs)
color_continuous_midpointreturn fig
# test function
= px.colors.diverging.RdGy_r # choose a colormap
color_map
= temperature_coefficient_plot("India", 1980, 2020, 1,
fig = 10,
min_obs = 2,
zoom ="carto-positron",
mapbox_style=color_map)
color_continuous_scale
= "notebook") fig.show(renderer
Above we can see the changes in temperatures from stations in India in January, years 1980-2020. Let’s try another country to see how the changes differ in other parts of the world. I will use Australia for the next analysis.
= px.colors.diverging.RdGy_r # choose a colormap
color_map
"Australia", 1980, 2020, 1,
temperature_coefficient_plot(= 10,
min_obs = 2,
zoom ="carto-positron",
mapbox_style=color_map) color_continuous_scale
Here, we can see that in Australia, most weather stations reported a fairly notable yearly increase in temperature between 1980 and 2020.
Plotting Temperature Variations across Latitudes
Next, we will look at how average temperatures can vary across latitudes. We will use a function that queries the necessary data and then plots it on a scatter plot. I will use Chile for this example, since it spans a large range of latitudes.
def plot_latitude_temps(country, year_begin, year_end, month, **kwargs):
"""
Plots mean temperature vs latitude for a given country, month, and year range
Parameters
----------
country, name of country of interest
year_begin, first year in range
year_end, last year in range,
month, month to be looked at
Return
----------
map with interactive points representing stations overlayed
"""
# SQL query
= """SELECT S.name NAME, S.latitude, C.Name COUNTRY,
cmd T.month MONTH, ROUND(AVG(T.temp),2) mean_temp
FROM temperatures T
LEFT JOIN stations S ON T.id = S.id
LEFT JOIN countries C ON SUBSTRING(T.id,1,2) == C.[FIPS 10-4]
WHERE T.year >= {1} AND T.year <= {2} AND T.month == {3} AND C.Name == \'{0}\'
GROUP BY T.id;""".format(country, year_begin, year_end, month)
# read query
= pd.read_sql_query(cmd, conn)
db
# create scatter plot
= px.scatter(db,
fig ="LATITUDE",
x="mean_temp",
y="NAME",
hover_name={
labels"LATITUDE" : "Latitude",
"mean_temp" : "Mean Temperature"},
= "Variations of Temperatures in %s from stations in %s, Years %s-%s" %(monthDict[month],
title
country,
year_begin,**kwargs)
year_end), return fig
# test function
"Chile", 2000,2020,6) plot_latitude_temps(
This plot shows a trend that as the latitude comes closer to the equator, the mean temperature fairly consistently becomes higher.
Plotting Yearly Temperature Variations by Station
This is an interesting observation, and opens the questions of how much the temperatures vary in each station throughout the year. Next, we will look at a histogram of the temperatures recorded at each station, broken down by year. I will also use Chile in this example, but we will look at the years 2017-2018.
def query_station_data(country, year_begin, year_end):
"""
queries climate data for a country in a given month for a given period of years
Parameters
----------
country, name of country of interest
year_begin, first year in range
year_end, last year in range,
Return
----------
pandas dataframe with query result
"""
# SQL command
= """SELECT S.name NAME, S.latitude, C.Name COUNTRY,
cmd T.month MONTH, ROUND(T.temp,2) temp, T.year YEAR
FROM temperatures T
LEFT JOIN stations S ON T.id = S.id
LEFT JOIN countries C ON SUBSTRING(T.id,1,2) == C.[FIPS 10-4]
WHERE T.year >= {1} AND T.year <= {2} AND C.Name == \'{0}\';""".format(country, year_begin, year_end)
return pd.read_sql_query(cmd, conn)
def plot_by_station(country, year_begin, year_end, **kwargs):
"""
Plots histogram with temperature data by station
Parameters
----------
country, name of country of interest
year_begin, first year in range
year_end, last year in range,
Return
----------
histogram with temperature data by station, faceted by year
"""
# SQL query
= query_station_data(country, year_begin, year_end)
db
# create histogram
= px.box(db,
fig ="NAME",
x="temp",
y="NAME",
hover_name= "YEAR",
facet_row ={
labels"NAME" : "Station",
"temp" : "Temperature"},
= "Variation in Temperatures by Station in %s, Years %s-%s" %(country,
title
year_begin,
year_end), **kwargs)
return fig
"Chile", 2017,2018)
plot_by_station(
conn.close()
Here, we can see that stations in Chile not only have great temperature variations when compared to each other, but also compared to itself within the same year.