"""
@author: ruben calje
"""
import warnings
from numpy import ndarray
from pandas import (DataFrame, Timedelta, Timestamp, infer_freq, read_csv,
to_datetime, to_timedelta)
from ..timeseries import TimeSeries
[docs]def read_knmi(fname, variables='RD'):
"""This method can be used to import KNMI data from a file in Pastas.
Parameters
----------
fname: str
Filename and path to a KNMI file.
variables: str, optional
String with the variable name to extract.
Returns
-------
ts: pastas.TimeSeries
returns a Pastas TimeSeries object or a list of objects.
"""
warnings.warn("The read module of pastas is deprecated please use hydropandas instead -> https://hydropandas.readthedocs.io", DeprecationWarning)
knmi = KnmiStation.fromfile(fname)
if variables is None:
variables = knmi.variables.keys()
if isinstance(variables, str):
variables = [variables]
stn_codes = knmi.data['STN'].unique()
ts = []
for code in stn_codes:
for variable in variables:
if variable not in knmi.data.keys():
raise (ValueError(
"variable %s is not in this dataset. Please use one of "
"the following keys: %s" % (variable, knmi.data.keys())))
series = knmi.data.loc[knmi.data['STN'] == code, variable]
# get rid of the hours when data is daily
if infer_freq(series.index) == 'D':
series.index = series.index.normalize()
metadata = {}
if knmi.stations is not None and not knmi.stations.empty:
station = knmi.stations.loc[code, :]
metadata['x'] = station.LON_east
metadata['y'] = station.LAT_north
metadata['z'] = station.ALT_m
metadata['projection'] = 'epsg:4326'
stationname = station.NAME
else:
stationname = str(code)
metadata['description'] = knmi.variables[variable]
if variable == 'RD' or variable == 'RH':
kind = 'prec'
elif variable == 'EV24':
kind = 'evap'
else:
kind = None
ts.append(TimeSeries(series, name=variable + ' ' + stationname,
metadata=metadata, settings=kind))
if len(ts) == 1:
ts = ts[0]
return ts
class KnmiStation:
"""
Reads daily meteorological data in a file from stations of the KNMI:
knmi = KnmiStation.fromfile(filename)
Data can be downloaded for the meteorological stations at:
https://www.knmi.nl/nederland-nu/klimatologie/daggegevens
or
http://projects.knmi.nl/klimatologie/daggegevens/selectie.cgi
For the rainfall stations data is available at:
https://www.knmi.nl/nederland-nu/klimatologie/monv/reeksen
Also, data from the meteorological stations can be downloaded directly, for example with
knmi = KnmiStation(stns=260, start=datetime(1970, 1, 1), end=datetime(1971, 1, 1)) # 260 = de bilt
knmi.download()
Hourly data can be downloaded with the 'interval'keyword set to 'hour' or 'hourly':
knmi = KnmiStation(stns=260, start='2017', end='2018', interval='hourly')
Data from rainfall-stations can be downloaded by asking for the variable 'RD' (the stns variable now describes codes for rainfall-stations):
knmi = KnmiStation(stns=550, start='2018', end='2019', vars='RD') # rainfall-station in de bilt
Times are recalculated to UT+1 (standard-time in the Netherlands), from UT.
Also the datetime-index of the data is set at the end of the period that the data describes.
So the rainfall between 2018-01-01 09:00:00 (08:00:00 UT) and 2018-01-02 09:00:00 (08:00:00 UT) gets the timestamp of 2018-01-02 09:00:00
Units in the data of the knmi are recalculated to more basic SI-units. So mm are transformed to m, and a factor of 0,1 is transformed to 1.
A description of the variables is found in knmi.variables.
Information about the measurement-station(s) is found in knmi.stations.
The measurement-data itself is found in knmi.data
"""
def __init__(self, *args, **kwargs):
warnings.warn("The read module of pastas is deprecated please use hydropandas instead -> https://hydropandas.readthedocs.io", DeprecationWarning)
self.stations = DataFrame()
self.variables = dict()
self.data = DataFrame()
if len(args) > 0 or len(kwargs) > 0:
warnings.warn("In the future use KnmiStation.download(**kwargs) "
"instead of KnmiStation(**kwargs)", FutureWarning)
self._download(*args, **kwargs)
# diable download method, as old code will call this again
self.download = lambda *args, **kwargs: None
else:
# change download method to the instance-method
self.download = self._download
# Construct KnmiStation from file
@classmethod
def fromfile(cls, fname):
"""Reads data from a KNMI-file."""
self = cls()
with open(fname, 'r') as f:
self.readdata(f)
f.close()
return self
# Construct KnmiStation from download
@classmethod
def download(cls, start=None, end=None, inseason=False, vars='ALL',
stns=260, interval='daily'):
"""Downloads data from the KNMI-server.
Parameters
----------
start : str or pandas Timestamp
The start-date of the data to be downloaded
(defaults to start of current year)
end : str or pandas Timestamp
The end-date of the data to be downloaded
(defaults to today)
inseason : bool
Only download the data in the season between start and end
(defaults to False)
vars : str or list/ndarray
The variable(s) to be downloaded
Use 'RD' to download from rainfall stations
(defaults to 'ALL')
stns : int, str or list/ndarray
station number(s) to be downloaded
(defauls to 260 : 'De Bilt')
interval : str
The required data interval: 'daily' or 'hourly'
(defaults to 'daily')
Notes
-----
For more information see
https://www.knmi.nl/kennis-en-datacentrum/achtergrond/data-ophalen-vanuit-een-script
"""
self = cls()
self._download(start=start, end=end, inseason=inseason, vars=vars,
stns=stns, interval=interval)
return self
def _download(self, start=None, end=None, inseason=False, vars='ALL',
stns=260, interval='daily'):
# Import the necessary modules (optional and not included in the
# installation of pastas).
try:
import requests
except ImportError:
raise ImportError(
'The module requests could not be imported. '
'Please install through:'
'>>> pip install requests'
'or:'
'>>> conda install requests')
from io import StringIO
if start is None:
start = Timestamp(Timestamp.today().year, 1, 1)
else:
start = to_datetime(start)
if end is None:
end = Timestamp.today()
else:
end = to_datetime(end)
if not isinstance(vars, list):
if isinstance(vars, ndarray):
vars = list(vars)
else:
vars = [vars]
if not isinstance(stns, list):
if isinstance(stns, ndarray):
stns = list(stns)
else:
stns = [stns]
# convert possible integers to string
stns = [str(i) for i in stns]
if interval.startswith('hour') and 'RD' in vars:
message = 'Interval can not be hourly for rainfall-stations'
raise (ValueError(message))
if 'RD' in vars and len(vars) > 1:
message = 'Only daily precipitation can be downloaded from ' \
'rainfall-stations'
raise (ValueError(message))
if interval.startswith('hour'):
# hourly data from meteorological stations
url = 'https://www.daggegevens.knmi.nl/klimatologie/uurgegevens'
elif 'RD' in vars:
# daily data from rainfall-stations
url = 'https://www.daggegevens.knmi.nl/klimatologie/monv/reeksen'
else:
# daily data from meteorological stations
url = 'https://www.daggegevens.knmi.nl/klimatologie/daggegevens'
vars = ":".join(vars)
stns = ":".join(stns)
if interval.startswith('hour'):
data = {
'start': start.strftime('%Y%m%d') + '01',
'end': end.strftime('%Y%m%d') + '24',
'vars': vars,
'stns': stns,
}
else:
data = {
'start': start.strftime('%Y%m%d'),
'end': end.strftime('%Y%m%d'),
'inseason': str(int(inseason)),
'vars': vars,
'stns': stns,
}
result = requests.get(url, params=data).text
f = StringIO(result)
self.readdata(f)
def readdata(self, f):
self.stations = DataFrame()
self.variables = dict()
isLocations = False
line = f.readline()
isMeteo = line.startswith('# ')
# Process the header information (Everything < 'STN,')
while 'STN,' not in line and line != "":
# Pre-format the line
line = line.strip('\n')
line = line.lstrip('# ')
# If line is empty, skipline
if line.strip() == '':
pass
# If line contains station info (can only happen for meteorological stations)
elif isMeteo and line.startswith('STN '):
isLocations = True
line = line.strip()
titels = line.split()
titels = [x.replace('(', '_') for x in titels]
titels = [x.replace(r')', '') for x in titels]
# Create pd.DataFrame for station data
self.stations = DataFrame(columns=titels)
self.stations.set_index(['STN'], inplace=True)
# If line contains variables
elif ' = ' in line or ' : ' in line:
isLocations = False
if ' = ' in line:
varDes = line.split(' = ')
else:
varDes = line.split(' : ')
self.variables[varDes[0].strip()] = varDes[1].strip()
# If location data is recognized in the previous line
elif isLocations:
# Format line. Ensure delimiter is two spaces to read the
# location correctly
line = line.strip()
line = line.replace(':', '')
line = line.replace(' ', ' ')
line = line.replace(' ', ' ')
line = line.replace(' ', ' ')
line = line.replace(' ', ' ')
line = line.replace(' ', ' ')
line = line.replace(' ', ' ')
line = line.replace(' ', ' ')
# Add station location data
line = line.split(' ')
stn = int(line[0])
def maybe_float(s):
try:
return float(s)
except (ValueError, TypeError):
return s
line = [maybe_float(v) for v in line[1:]]
self.stations.loc[stn] = line
# Read in a new line and start over
line = f.readline()
# The header information of the datablock
line = line.strip('\n')
line = line.lstrip('# ')
header = line.split(',')
header = [item.lstrip().rstrip() for item in header]
pos = f.tell()
line = f.readline() # Skip empty line after header
if line not in ["\n", "\r\n", "# \n", '# \r\n']:
# sometimes there is no empty line between the header and the data
f.seek(pos)
# Process the datablock
data = read_csv(f, header=None, names=header, na_values=' ')
# Close file
f.close()
if data.empty:
warnings.warn('No KNMI data found')
self.data = data
return
data.set_index(to_datetime(data.YYYYMMDD, format='%Y%m%d'),
inplace=True)
data = data.drop('YYYYMMDD', axis=1)
# convert the hours if provided
if 'HH' in data.keys():
# hourly data, Hourly division 05 runs from 04.00 UT to 5.00 UT
data.index = data.index + to_timedelta(data['HH'], unit='h')
data.pop('HH')
elif 'H' in data.keys():
# hourly data, Hourly division 05 runs from 04.00 UT to 5.00 UT
data.index = data.index + to_timedelta(data['H'], unit='h')
data.pop('H')
else:
# daily data
if 'RD' in data.keys():
# daily precipitation amount in 0.1 mm over the period 08.00
# preceding day - 08.00 UTC present day
data.index = data.index + Timedelta(8, unit='h')
else:
# add a full day for meteorological data, so that the
# timestamp is at the end of the period in the data
data.index = data.index + Timedelta(1, unit='d')
# from UT to UT+1 (standard-time in the Netherlands)
data.index = data.index + Timedelta(1, unit='h')
# Delete empty columns
if '' in data.columns:
data.drop('', axis=1, inplace=True)
# Adjust the unit of the measurements
for key, value in self.variables.items():
# test if key exists in data
if key not in data.keys():
if key == 'YYYYMMDD' or key == 'HH':
pass
elif key == 'T10N':
self.variables.pop(key)
key = 'T10'
else:
raise NameError(key + ' does not exist in data')
if ' (-1 for <0.05 mm)' in value or ' (-1 voor <0.05 mm)' in value:
# set 0.025 mm where data == -1
data.loc[data[key] == -1, key] = 0.25 # unit is still 0.1 mm
value = value.replace(' (-1 for <0.05 mm)', '')
value = value.replace(' (-1 voor <0.05 mm)', '')
if '0.1 ' in value:
# transform 0.1 to 1
data[key] = data[key] * 0.1
value = value.replace('0.1 ', '')
if ' tiende ' in value:
# transform 0.1 to 1
data[key] = data[key] * 0.1
value = value.replace(' tiende ', ' ')
if ' mm' in value:
# transform mm to m
data[key] = data[key] * 0.001
value = value.replace(' mm', ' m')
if ' millimeters' in value:
# transform mm to m
data[key] = data[key] * 0.001
value = value.replace(' millimeters', ' m')
if '(in percents)' in value:
# do not adjust (yet)
pass
if 'hPa' in value:
# do not adjust (yet)
pass
if 'J/cm2' in value:
# do not adjust (yet)
pass
# Store new variable
self.variables[key] = value
self.data = data