The default behavior for adding and solving with noisemodels has changed from Pastas 1.5. Find more information here

Source code for pastas.dataset

"""This module contains functions to load datasets from the pastas-data repository on
GitHub. The datasets are used for testing and examples in the documentation. The
load_dataset function can be used to load a single csv file or multiple csv files from
a subfolder in the pastas-data repository.

"""

from typing import Dict, List, Union

from pandas import DataFrame, read_csv

GITHUB_URL = "https://api.github.com/repos/pastas/pastas-data/contents/"


[docs]def load_dataset(name: str) -> Union[DataFrame, Dict[str, DataFrame]]: """Load csv-files from a subfolder in the pastas dataset repository on GitHub. Parameters ---------- name : str The name of the subfolder, i.e., collenteur_2023. For a list of available datasets, see the pastas-data repository on GitHub (www.github.com/pastas/pastas-data). Returns ------- Union[pd.DataFrame, Dict[str, pd.DataFrame]] The loaded dataset(s). If one csv file is found, returns a pandas DataFrame. If multiple csv files are found, returns a dictionary with file names as keys and dataframes as values. Raises ------ Exception: If the request status code is not 200 (OK), an exception is raised. This is likely due to an invalid folder name. Check the pastas-data repository on GitHub for available datasets. Examples -------- >>> ps.load_dataset("collenteur_2021") Returns the dataset from the "collenteur_2021" subfolder as a pandas DataFrame. >>> ps.load_dataset("collenteur_2023") Returns a dictionary with datasets from the "collenteur_2023" subfolder. The keys are the file names and the values are pandas DataFrames. """ # Try to import requests, if not installed raise error try: import requests except ImportError: raise ImportError( "The requests package is required to load datasets from the pastas-data " "repository. Install requests using 'pip install requests'." ) # Get the folder from the pastas-data repository r = requests.get(f"{GITHUB_URL}/{name}/") # Check if requests status is okay, otherwise raise error and return status code if not r.status_code == 200: raise Exception(f"Error: {r.status_code}. Reason: {r.reason}. ") # Get information about the files in the folder data = {} # Loop over the files in the folder for file in r.json(): if file["name"].endswith(".csv"): # Read file df = read_csv(file["download_url"], index_col=0, parse_dates=True) data[file["name"].split(".")[0]] = df # Return the data, if only one file is found return the dataframe, otherwise return # a dictionary with the dataframes if len(data) == 1: return list(data.values())[0] elif len(data) > 1: return data else: raise Exception( f"No csv files found in the folder {name}. Check the pastas-data repository " "on GitHub for available datasets." )
[docs]def list_datasets() -> List[str]: """Print a list of available datasets in the pastas-data repository on GitHub. Returns ------- list[str] A list of available datasets in the pastas-data repository on GitHub. Prints a list of available datasets in the pastas-data repository on GitHub. Examples -------- >>> ps.list_datasets() Prints a list of available datasets in the pastas-data repository on GitHub. """ # Try to import requests, if not installed raise error try: import requests except ImportError: raise ImportError( "The requests package is required to load datasets from the pastas-data " "repository. Install requests using 'pip install requests'." ) # Get the folder from the pastas-data repository r = requests.get(GITHUB_URL) # Check if requests status is okay, otherwise raise error and return status code if not r.status_code == 200: raise Exception(f"Error: {r.status_code}. Reason: {r.reason}. ") # Get information about the files in the folder data = [] # Loop over the files in the folder for file in r.json(): if file["type"] == "dir": data.append(file["name"]) # Print the list of datasets print("Available datasets in the pastas-data repository on GitHub:") for folder in data: print(f" - {folder}") print(f"Use ps.load_dataset('folder_name') to load a dataset from the repository.") return data