Source code for pastas.read.knmi

"""
@author: ruben calje

"""

import warnings

from numpy import ndarray
from pandas import (DataFrame, Timedelta, Timestamp, infer_freq, read_csv,
                    to_datetime, to_timedelta)

from ..timeseries import TimeSeries


[docs]def read_knmi(fname, variables='RD'): """This method can be used to import KNMI data from a file in Pastas. Parameters ---------- fname: str Filename and path to a KNMI file. variables: str, optional String with the variable name to extract. Returns ------- ts: pastas.TimeSeries returns a Pastas TimeSeries object or a list of objects. """ warnings.warn("The read module of pastas is deprecated please use hydropandas instead -> https://hydropandas.readthedocs.io", DeprecationWarning) knmi = KnmiStation.fromfile(fname) if variables is None: variables = knmi.variables.keys() if isinstance(variables, str): variables = [variables] stn_codes = knmi.data['STN'].unique() ts = [] for code in stn_codes: for variable in variables: if variable not in knmi.data.keys(): raise (ValueError( "variable %s is not in this dataset. Please use one of " "the following keys: %s" % (variable, knmi.data.keys()))) series = knmi.data.loc[knmi.data['STN'] == code, variable] # get rid of the hours when data is daily if infer_freq(series.index) == 'D': series.index = series.index.normalize() metadata = {} if knmi.stations is not None and not knmi.stations.empty: station = knmi.stations.loc[code, :] metadata['x'] = station.LON_east metadata['y'] = station.LAT_north metadata['z'] = station.ALT_m metadata['projection'] = 'epsg:4326' stationname = station.NAME else: stationname = str(code) metadata['description'] = knmi.variables[variable] if variable == 'RD' or variable == 'RH': kind = 'prec' elif variable == 'EV24': kind = 'evap' else: kind = None ts.append(TimeSeries(series, name=variable + ' ' + stationname, metadata=metadata, settings=kind)) if len(ts) == 1: ts = ts[0] return ts
class KnmiStation: """ Reads daily meteorological data in a file from stations of the KNMI: knmi = KnmiStation.fromfile(filename) Data can be downloaded for the meteorological stations at: https://www.knmi.nl/nederland-nu/klimatologie/daggegevens or http://projects.knmi.nl/klimatologie/daggegevens/selectie.cgi For the rainfall stations data is available at: https://www.knmi.nl/nederland-nu/klimatologie/monv/reeksen Also, data from the meteorological stations can be downloaded directly, for example with knmi = KnmiStation(stns=260, start=datetime(1970, 1, 1), end=datetime(1971, 1, 1)) # 260 = de bilt knmi.download() Hourly data can be downloaded with the 'interval'keyword set to 'hour' or 'hourly': knmi = KnmiStation(stns=260, start='2017', end='2018', interval='hourly') Data from rainfall-stations can be downloaded by asking for the variable 'RD' (the stns variable now describes codes for rainfall-stations): knmi = KnmiStation(stns=550, start='2018', end='2019', vars='RD') # rainfall-station in de bilt Times are recalculated to UT+1 (standard-time in the Netherlands), from UT. Also the datetime-index of the data is set at the end of the period that the data describes. So the rainfall between 2018-01-01 09:00:00 (08:00:00 UT) and 2018-01-02 09:00:00 (08:00:00 UT) gets the timestamp of 2018-01-02 09:00:00 Units in the data of the knmi are recalculated to more basic SI-units. So mm are transformed to m, and a factor of 0,1 is transformed to 1. A description of the variables is found in knmi.variables. Information about the measurement-station(s) is found in knmi.stations. The measurement-data itself is found in knmi.data """ def __init__(self, *args, **kwargs): warnings.warn("The read module of pastas is deprecated please use hydropandas instead -> https://hydropandas.readthedocs.io", DeprecationWarning) self.stations = DataFrame() self.variables = dict() self.data = DataFrame() if len(args) > 0 or len(kwargs) > 0: warnings.warn("In the future use KnmiStation.download(**kwargs) " "instead of KnmiStation(**kwargs)", FutureWarning) self._download(*args, **kwargs) # diable download method, as old code will call this again self.download = lambda *args, **kwargs: None else: # change download method to the instance-method self.download = self._download # Construct KnmiStation from file @classmethod def fromfile(cls, fname): """Reads data from a KNMI-file.""" self = cls() with open(fname, 'r') as f: self.readdata(f) f.close() return self # Construct KnmiStation from download @classmethod def download(cls, start=None, end=None, inseason=False, vars='ALL', stns=260, interval='daily'): """Downloads data from the KNMI-server. Parameters ---------- start : str or pandas Timestamp The start-date of the data to be downloaded (defaults to start of current year) end : str or pandas Timestamp The end-date of the data to be downloaded (defaults to today) inseason : bool Only download the data in the season between start and end (defaults to False) vars : str or list/ndarray The variable(s) to be downloaded Use 'RD' to download from rainfall stations (defaults to 'ALL') stns : int, str or list/ndarray station number(s) to be downloaded (defauls to 260 : 'De Bilt') interval : str The required data interval: 'daily' or 'hourly' (defaults to 'daily') Notes ----- For more information see https://www.knmi.nl/kennis-en-datacentrum/achtergrond/data-ophalen-vanuit-een-script """ self = cls() self._download(start=start, end=end, inseason=inseason, vars=vars, stns=stns, interval=interval) return self def _download(self, start=None, end=None, inseason=False, vars='ALL', stns=260, interval='daily'): # Import the necessary modules (optional and not included in the # installation of pastas). try: import requests except ImportError: raise ImportError( 'The module requests could not be imported. ' 'Please install through:' '>>> pip install requests' 'or:' '>>> conda install requests') from io import StringIO if start is None: start = Timestamp(Timestamp.today().year, 1, 1) else: start = to_datetime(start) if end is None: end = Timestamp.today() else: end = to_datetime(end) if not isinstance(vars, list): if isinstance(vars, ndarray): vars = list(vars) else: vars = [vars] if not isinstance(stns, list): if isinstance(stns, ndarray): stns = list(stns) else: stns = [stns] # convert possible integers to string stns = [str(i) for i in stns] if interval.startswith('hour') and 'RD' in vars: message = 'Interval can not be hourly for rainfall-stations' raise (ValueError(message)) if 'RD' in vars and len(vars) > 1: message = 'Only daily precipitation can be downloaded from ' \ 'rainfall-stations' raise (ValueError(message)) if interval.startswith('hour'): # hourly data from meteorological stations url = 'https://www.daggegevens.knmi.nl/klimatologie/uurgegevens' elif 'RD' in vars: # daily data from rainfall-stations url = 'https://www.daggegevens.knmi.nl/klimatologie/monv/reeksen' else: # daily data from meteorological stations url = 'https://www.daggegevens.knmi.nl/klimatologie/daggegevens' vars = ":".join(vars) stns = ":".join(stns) if interval.startswith('hour'): data = { 'start': start.strftime('%Y%m%d') + '01', 'end': end.strftime('%Y%m%d') + '24', 'vars': vars, 'stns': stns, } else: data = { 'start': start.strftime('%Y%m%d'), 'end': end.strftime('%Y%m%d'), 'inseason': str(int(inseason)), 'vars': vars, 'stns': stns, } result = requests.get(url, params=data).text f = StringIO(result) self.readdata(f) def readdata(self, f): self.stations = DataFrame() self.variables = dict() isLocations = False line = f.readline() isMeteo = line.startswith('# ') # Process the header information (Everything < 'STN,') while 'STN,' not in line and line != "": # Pre-format the line line = line.strip('\n') line = line.lstrip('# ') # If line is empty, skipline if line.strip() == '': pass # If line contains station info (can only happen for meteorological stations) elif isMeteo and line.startswith('STN '): isLocations = True line = line.strip() titels = line.split() titels = [x.replace('(', '_') for x in titels] titels = [x.replace(r')', '') for x in titels] # Create pd.DataFrame for station data self.stations = DataFrame(columns=titels) self.stations.set_index(['STN'], inplace=True) # If line contains variables elif ' = ' in line or ' : ' in line: isLocations = False if ' = ' in line: varDes = line.split(' = ') else: varDes = line.split(' : ') self.variables[varDes[0].strip()] = varDes[1].strip() # If location data is recognized in the previous line elif isLocations: # Format line. Ensure delimiter is two spaces to read the # location correctly line = line.strip() line = line.replace(':', '') line = line.replace(' ', ' ') line = line.replace(' ', ' ') line = line.replace(' ', ' ') line = line.replace(' ', ' ') line = line.replace(' ', ' ') line = line.replace(' ', ' ') line = line.replace(' ', ' ') # Add station location data line = line.split(' ') stn = int(line[0]) def maybe_float(s): try: return float(s) except (ValueError, TypeError): return s line = [maybe_float(v) for v in line[1:]] self.stations.loc[stn] = line # Read in a new line and start over line = f.readline() # The header information of the datablock line = line.strip('\n') line = line.lstrip('# ') header = line.split(',') header = [item.lstrip().rstrip() for item in header] pos = f.tell() line = f.readline() # Skip empty line after header if line not in ["\n", "\r\n", "# \n", '# \r\n']: # sometimes there is no empty line between the header and the data f.seek(pos) # Process the datablock data = read_csv(f, header=None, names=header, na_values=' ') # Close file f.close() if data.empty: warnings.warn('No KNMI data found') self.data = data return data.set_index(to_datetime(data.YYYYMMDD, format='%Y%m%d'), inplace=True) data = data.drop('YYYYMMDD', axis=1) # convert the hours if provided if 'HH' in data.keys(): # hourly data, Hourly division 05 runs from 04.00 UT to 5.00 UT data.index = data.index + to_timedelta(data['HH'], unit='h') data.pop('HH') elif 'H' in data.keys(): # hourly data, Hourly division 05 runs from 04.00 UT to 5.00 UT data.index = data.index + to_timedelta(data['H'], unit='h') data.pop('H') else: # daily data if 'RD' in data.keys(): # daily precipitation amount in 0.1 mm over the period 08.00 # preceding day - 08.00 UTC present day data.index = data.index + Timedelta(8, unit='h') else: # add a full day for meteorological data, so that the # timestamp is at the end of the period in the data data.index = data.index + Timedelta(1, unit='d') # from UT to UT+1 (standard-time in the Netherlands) data.index = data.index + Timedelta(1, unit='h') # Delete empty columns if '' in data.columns: data.drop('', axis=1, inplace=True) # Adjust the unit of the measurements for key, value in self.variables.items(): # test if key exists in data if key not in data.keys(): if key == 'YYYYMMDD' or key == 'HH': pass elif key == 'T10N': self.variables.pop(key) key = 'T10' else: raise NameError(key + ' does not exist in data') if ' (-1 for <0.05 mm)' in value or ' (-1 voor <0.05 mm)' in value: # set 0.025 mm where data == -1 data.loc[data[key] == -1, key] = 0.25 # unit is still 0.1 mm value = value.replace(' (-1 for <0.05 mm)', '') value = value.replace(' (-1 voor <0.05 mm)', '') if '0.1 ' in value: # transform 0.1 to 1 data[key] = data[key] * 0.1 value = value.replace('0.1 ', '') if ' tiende ' in value: # transform 0.1 to 1 data[key] = data[key] * 0.1 value = value.replace(' tiende ', ' ') if ' mm' in value: # transform mm to m data[key] = data[key] * 0.001 value = value.replace(' mm', ' m') if ' millimeters' in value: # transform mm to m data[key] = data[key] * 0.001 value = value.replace(' millimeters', ' m') if '(in percents)' in value: # do not adjust (yet) pass if 'hPa' in value: # do not adjust (yet) pass if 'J/cm2' in value: # do not adjust (yet) pass # Store new variable self.variables[key] = value self.data = data