Source code for icenet.data.loader

import argparse
import logging
import os

import numpy as np

from icenet.data.loaders import IceNetDataLoaderFactory
from icenet.data.cli import add_date_args, process_date_args
from icenet.utils import setup_logging
"""

"""


[docs] @setup_logging def create_get_args() -> object: """Converts input data creation argument strings to objects, and assigns them as attributes to the namespace. The args added in this function relate to the dataloader creation process. Returns: An argparse.ArgumentParser object with all arguments added via `add_argument` accessible as object attributes. """ implementations = list(IceNetDataLoaderFactory().loader_map) ap = argparse.ArgumentParser() ap.add_argument("name", type=str) ap.add_argument("hemisphere", choices=("north", "south")) ap.add_argument("-c", "--cfg-only", help="Do not generate data, " "only config", default=False, action="store_true", dest="cfg") ap.add_argument("-d", "--dry", help="Don't output files, just generate data", default=False, action="store_true") ap.add_argument("-dt", "--dask-timeouts", type=int, default=120) ap.add_argument("-dp", "--dask-port", type=int, default=8888) ap.add_argument("-f", "--futures-per-worker", type=float, default=2., dest="futures") ap.add_argument("-fn", "--forecast-name", dest="forecast_name", default=None, type=str) ap.add_argument("-fd", "--forecast-days", dest="forecast_days", default=93, type=int) ap.add_argument("-i", "--implementation", type=str, choices=implementations, default=implementations[0]) ap.add_argument("-l", "--lag", type=int, default=2) ap.add_argument("-ob", "--output-batch-size", dest="batch_size", type=int, default=8) ap.add_argument("-p", "--pickup", help="Skip existing tfrecords", default=False, action="store_true") ap.add_argument("-t", "--tmp-dir", help="Temporary directory", default="/local/tmp", dest="tmp_dir", type=str) ap.add_argument("-v", "--verbose", action="store_true", default=False) ap.add_argument("-w", "--workers", help="Number of workers to use " "generating sets", type=int, default=2) add_date_args(ap) args = ap.parse_args() return args
[docs] def create(): """ """ args = create_get_args() dates = process_date_args(args) dl = IceNetDataLoaderFactory().create_data_loader( args.implementation, "loader.{}.json".format(args.name), args.forecast_name if args.forecast_name else args.name, args.lag, dates_override=dates if sum([len(v) for v in dates.values()]) > 0 else None, dry=args.dry, n_forecast_days=args.forecast_days, north=args.hemisphere == "north", south=args.hemisphere == "south", output_batch_size=args.batch_size, pickup=args.pickup, generate_workers=args.workers, dask_port=args.dask_port, futures_per_worker=args.futures) if args.cfg: dl.write_dataset_config_only() else: dl.generate()
[docs] def save_sample(output_folder: str, date: object, sample: tuple): """ :param output_folder: :param date: :param sample: """ net_input, net_output, sample_weights = sample if os.path.exists(output_folder): logging.warning("{} output already exists".format(output_folder)) os.makedirs(output_folder, exist_ok=output_folder) for date, output, directory in ((date, net_input, "input"), (date, net_output, "outputs"), (date, sample_weights, "weights")): output_directory = os.path.join(output_folder, "loader", directory) os.makedirs(output_directory, exist_ok=True) loader_output_path = os.path.join(output_directory, date.strftime("%Y_%m_%d.npy")) logging.info("Saving {} - generated {} {}".format( date, directory, output.shape)) np.save(loader_output_path, output)