Module nowcastlib.pipeline.process.postprocess.cli
Command-Line interface functionality for preprocessing
Expand source code
"""
Command-Line interface functionality for preprocessing
"""
import logging
import json
from typing import Union
import argparse
import cattr
from nowcastlib.pipeline.structs import config
from nowcastlib.pipeline import utils
from nowcastlib.pipeline.process import postprocess
from nowcastlib.pipeline import features
logger = logging.getLogger(__name__)
def configure_parser(action_object):
"""Configures the subparser for our preprocess command"""
pparser = action_object.add_parser(
"postprocess",
description="Postprocess dataset",
help="Run `nowcastlib postprocess -h` for further help",
formatter_class=argparse.HelpFormatter,
)
pparser.add(
"-c",
"--config",
required=True,
help="path to JSON file following the DataSet format. See docs for available fields",
)
def run(args):
"""runs appropriate function based on provided cli args"""
with open(args.config) as json_file:
options = json.load(json_file)
cattr_cnvrtr = cattr.GenConverter(forbid_extra_keys=True)
cattr_cnvrtr.register_structure_hook(
Union[int, float, str], utils.disambiguate_intfloatstr
)
dataset_config = cattr_cnvrtr.structure(options, config.DataSet)
proc_df = postprocess.postprocess_dataset(dataset_config)
# add generated fields if necessary
if dataset_config.generated_fields is not None:
proc_df = features.generate_fields(dataset_config, proc_df)
if dataset_config.postprocessing_output is not None:
logger.info("Serializing postprocessing results...")
utils.handle_serialization(proc_df, dataset_config.postprocessing_output)
logger.info("Serialization complete.")
return proc_df
Functions
def configure_parser(action_object)
-
Configures the subparser for our preprocess command
Expand source code
def configure_parser(action_object): """Configures the subparser for our preprocess command""" pparser = action_object.add_parser( "postprocess", description="Postprocess dataset", help="Run `nowcastlib postprocess -h` for further help", formatter_class=argparse.HelpFormatter, ) pparser.add( "-c", "--config", required=True, help="path to JSON file following the DataSet format. See docs for available fields", )
def run(args)
-
runs appropriate function based on provided cli args
Expand source code
def run(args): """runs appropriate function based on provided cli args""" with open(args.config) as json_file: options = json.load(json_file) cattr_cnvrtr = cattr.GenConverter(forbid_extra_keys=True) cattr_cnvrtr.register_structure_hook( Union[int, float, str], utils.disambiguate_intfloatstr ) dataset_config = cattr_cnvrtr.structure(options, config.DataSet) proc_df = postprocess.postprocess_dataset(dataset_config) # add generated fields if necessary if dataset_config.generated_fields is not None: proc_df = features.generate_fields(dataset_config, proc_df) if dataset_config.postprocessing_output is not None: logger.info("Serializing postprocessing results...") utils.handle_serialization(proc_df, dataset_config.postprocessing_output) logger.info("Serialization complete.") return proc_df