Source code for mlcompare.pipelines
from __future__ import annotations as _annotations
import logging
from pathlib import Path
from typing import Literal
from .params_reader import ParamsInput
from .processing import process_datasets, process_models
from .results_writer import ResultsWriter
logger = logging.getLogger(__name__)
[docs]
def data_exploration_pipeline():
pass
[docs]
def data_pipeline(
dataset_params: ParamsInput,
save_original: bool = True,
save_processed: bool = True,
save_directory: str | Path | None = None,
) -> None:
"""
A pipeline which only performs data retrieval and/or processing.
Args:
-----
dataset_params (ParamsInput): Parameters for loading and processing datasets.
save_original (bool, optional): Save original datasets. Defaults to True.
save_processed (bool, optional): Save processed datasets. Defaults to True.
save_directory (str | Path, optional): Directory to save results to. Defaults to "mlcompare-results-Y-m-dTH-M-S"
"""
writer = ResultsWriter(save_directory)
writer.create_directory()
split_data = process_datasets(dataset_params, writer, save_original, save_processed)
for data in split_data:
pass
[docs]
def full_pipeline(
dataset_params: ParamsInput,
model_params: ParamsInput,
task_type: Literal["classification", "regression"],
save_models: Literal["all", "best", "none"] = "none",
save_original: bool = True,
save_processed: bool = True,
save_directory: str | Path | None = None,
) -> None:
"""
A pipeline with data retrieval, processing, model training and model evaluation.
Args:
-----
dataset_params (ParamsInput): List containing dataset information.
model_params (ParamsInput): List containing model information.
task_type (Literal["classification", "regression"]): Type of machine learning task to be performed.
save_models (Literal["all", "best", "none"], optional): Save all models, only the best model, or no models. Defaults to "none".
save_original (bool, optional): Save original datasets. Defaults to True.
save_processed (bool, optional): Save processed datasets. Defaults to True.
save_directory (str | Path, optional): Directory to save data, models, and results to. Defaults to "mlc-y-m-DTH-M-S-MS".
"""
writer = ResultsWriter(save_directory)
writer.create_directory()
split_data = process_datasets(dataset_params, writer, save_original, save_processed)
for data in split_data:
process_models(model_params, data, writer, task_type, save_models)