Source code for icenet.data.cli

import argparse
import collections
import datetime as dt
import logging
import re

import pandas as pd

from icenet.utils import setup_logging
"""

"""


[docs] def date_arg(string: str) -> object: """ :param string: :return: """ date_match = re.search(r"(\d{4})-(\d{1,2})-(\d{1,2})", string) return dt.date(*[int(s) for s in date_match.groups()])
[docs] def dates_arg(string: str) -> object: """ :param string: :return: """ if string == "none": return [] date_match = re.findall(r"(\d{4})-(\d{1,2})-(\d{1,2})", string) if len(date_match) < 1: raise argparse.ArgumentError( "No dates found for supplied argument {}".format(string)) return [dt.date(*[int(s) for s in date_tuple]) for date_tuple in date_match]
[docs] def csv_arg(string: str) -> list: """ :param string: :return: """ csv_items = [] string = re.sub(r'^\'(.*)\'$', r'\1', string) for el in string.split(","): if len(el) == 0: csv_items.append(None) else: csv_items.append(el) return csv_items
[docs] def csv_of_csv_arg(string: str) -> list: """ :param string: :return: """ csv_items = [] string = re.sub(r'^\'(.*)\'$', r'\1', string) for el in string.split(","): if len(el) == 0: csv_items.append(None) else: csv_items.append(el.split("|")) return csv_items
[docs] def int_or_list_arg(string: str) -> object: """ :param string: :return: """ try: val = int(string) except ValueError: val = string.split(",") return val
[docs] @setup_logging def download_args(choices: object = None, dates: bool = True, dates_optional: bool = False, var_specs: bool = True, workers: bool = False, extra_args: object = ()) -> object: """ :param choices: :param dates: :param dates_optional: :param var_specs: :param workers: :param extra_args: :return: """ ap = argparse.ArgumentParser() ap.add_argument("hemisphere", choices=("north", "south")) if choices and type(choices) == list: ap.add_argument("-c", "--choice", choices=choices, default=choices[0]) if dates: pos_args = [["start_date"], ["end_date"]] if not dates_optional else \ [["-sd", "--start-date"], ["-ed", "--end-date"]] ap.add_argument(*pos_args[0], type=date_arg, default=None) ap.add_argument(*pos_args[1], type=date_arg, default=None) if workers: ap.add_argument("-w", "--workers", default=8, type=int) ap.add_argument("-po", "--parallel-opens", default=False, action="store_true", help="Allow xarray mfdataset to work with parallel opens") ap.add_argument("-d", "--dont-delete", dest="delete", action="store_false", default=True) ap.add_argument("-v", "--verbose", action="store_true", default=False) if var_specs: ap.add_argument("--vars", help="Comma separated list of vars", type=csv_arg, default=[]) ap.add_argument( "--levels", help="Comma separated list of pressures/depths as needed, " "use zero length string if None (e.g. ',,500,,,') and " "pipes for multiple per var (e.g. ',,250|500,,'", type=csv_of_csv_arg, default=[]) for arg in extra_args: ap.add_argument(*arg[0], **arg[1]) args = ap.parse_args() return args
[docs] @setup_logging def process_args(dates: bool = True, ref_option: bool = True, extra_args: object = ()) -> object: """ :param dates: :param ref_option: :param extra_args: :return: """ ap = argparse.ArgumentParser() ap.add_argument("name", type=str) ap.add_argument("hemisphere", choices=("north", "south")) if dates: add_date_args(ap) ap.add_argument("-l", "--lag", type=int, default=2) ap.add_argument("-f", "--forecast", type=int, default=93) ap.add_argument("-po", "--parallel-opens", default=False, action="store_true", help="Allow xarray mfdataset to work with parallel opens") ap.add_argument("--abs", help="Comma separated list of abs vars", type=csv_arg, default=[]) ap.add_argument("--anom", help="Comma separated list of abs vars", type=csv_arg, default=[]) ap.add_argument("--trends", help="Comma separated list of abs vars", type=csv_arg, default=[]) ap.add_argument("--trend-lead", help="Time steps in the future for linear trends", type=int_or_list_arg, default=93) for arg in extra_args: ap.add_argument(*arg[0], **arg[1]) if ref_option: ap.add_argument("-r", "--ref", help="Reference loader for normalisations etc", default=None, type=str) ap.add_argument("-v", "--verbose", action="store_true", default=False) ap.add_argument( "-u", "--update-key", default=None, help="Add update key to processor to avoid overwriting default" "entries in the loader configuration", type=str) args = ap.parse_args() return args
[docs] def add_date_args(arg_parser: object): """ :param arg_parser: """ arg_parser.add_argument("-ns", "--train_start", type=dates_arg, required=False, default=[]) arg_parser.add_argument("-ne", "--train_end", type=dates_arg, required=False, default=[]) arg_parser.add_argument("-vs", "--val_start", type=dates_arg, required=False, default=[]) arg_parser.add_argument("-ve", "--val_end", type=dates_arg, required=False, default=[]) arg_parser.add_argument("-ts", "--test-start", type=dates_arg, required=False, default=[]) arg_parser.add_argument("-te", "--test-end", dest="test_end", type=dates_arg, required=False, default=[])
[docs] def process_date_args(args: object) -> dict: """ :param args: :return: """ dates = dict(train=[], val=[], test=[]) for dataset in ("train", "val", "test"): dataset_dates = collections.deque() for i, period_start in \ enumerate(getattr(args, "{}_start".format(dataset))): period_end = getattr(args, "{}_end".format(dataset))[i] dataset_dates += [ pd.to_datetime(date).date() for date in pd.date_range(period_start, period_end, freq="D") ] logging.info("Got {} dates for {}".format(len(dataset_dates), dataset)) dates[dataset] = sorted(list(dataset_dates)) return dates