Skip to content

API Reference

API Reference

DataPipeline

class DataPipeline:
def __init__(
self,
sources: Optional[List[DataSource]] = None,
transformers: Optional[List[Transformer]] = None,
validators: Optional[List[Validator]] = None,
): ...
def add_source(self, source: DataSource) -> "DataPipeline": ...
def add_transformer(self, transformer: Transformer) -> "DataPipeline": ...
def add_validator(self, validator: Validator) -> "DataPipeline": ...
def run(
self,
start: Union[str, pd.Timestamp], # Start date
end: Union[str, pd.Timestamp], # End date
cache: Union[bool, str] = False, # Caching option
) -> pd.DataFrame: ...
def save(self, path: Union[str, Path]) -> None: ... # Save to YAML
@classmethod
def load(cls, path: Union[str, Path]) -> "DataPipeline": ... # Load from YAML

ModelPipeline

class ModelPipeline:
def __init__(self): ...
def add_model(self, model: BaseModel) -> "ModelPipeline": ...
def add_evaluator(self, evaluator: Evaluator) -> "ModelPipeline": ...
def add_exporter(self, exporter: Exporter) -> "ModelPipeline": ...
def run(
self,
data: pd.DataFrame, # Input DataFrame
test_start: str, # Test period start
test_end: str, # Test period end
target: str = "price", # Target column
horizon: int = 7, # Max horizon
save_dir: Optional[str] = None, # Result cache directory
) -> EvaluationReport: ...
def save(self, path: Union[str, Path]) -> None: ... # Save to YAML
@classmethod
def load(cls, path: Union[str, Path]) -> "ModelPipeline": ... # Load from YAML

Workflow

class Workflow:
def __init__(
self,
data_start: str, # Data fetch start date
data_end: str, # Data fetch end date
model_test_start: str, # Test period start
model_test_end: str, # Test period end
data_pipeline: Optional[Union[str, Path]] = None, # Path to data_pipeline.yaml
model_pipeline: Optional[Union[str, Path]] = None, # Path to model_pipeline.yaml
data_cache: Union[bool, str] = False, # Data caching option
model_target: str = "price", # Target column
model_horizon: int = 7, # Max forecast horizon
max_processes: Optional[int] = None, # Worker process count
threads_per_process: Optional[int] = None, # Threads per process
cache_path: Optional[str] = None, # Cache directory (overrides data_cache)
model_index: Optional[int] = None, # Run only models[model_index]; all models if None
): ...
def run(self) -> EvaluationReport: ...
def save(self, path: Union[str, Path]) -> None: ...
@classmethod
def load(cls, path: Union[str, Path]) -> "Workflow": ...

Workflow.run() always sets OMP_NUM_THREADS=1 and PYTHON_GIL=0 before spawning workers. Pipeline paths are resolved relative to the workflow YAML’s directory. $VAR in paths is expanded at runtime.


EvaluationReport

class EvaluationReport:
def summary(self) -> pd.DataFrame: ...
def by_hour(self) -> pd.DataFrame: ...
def by_horizon(self) -> pd.DataFrame: ...
def by_hour_horizon(self) -> pd.DataFrame: ...
def by_year(self) -> pd.DataFrame: ...
def by_year_horizon(self) -> pd.DataFrame: ...

Data Sources

class EntsoeSource(DataSource):
def __init__(self, country_code: str, api_key: str, type: List[str]): ...
def fetch(self, start: pd.Timestamp, end: pd.Timestamp) -> pd.DataFrame: ...
class OpenMeteoSource(DataSource):
def __init__(self, latitude: float, longitude: float, horizon: int = 7,
model: str = "jma_seamless", columns: List[str] = None,
prefix: str = ""): ...
def fetch(self, start: pd.Timestamp, end: pd.Timestamp) -> pd.DataFrame: ...
class CalendarSource(DataSource):
def __init__(self, country: str, timezone: str = None,
holidays: Union[str, bool] = "binary",
weekday: Union[str, bool] = "number",
hour: Union[str, bool] = False,
month: Union[str, bool] = False,
daylight: bool = False, prefix: str = ""): ...
def fetch(self, start: pd.Timestamp, end: pd.Timestamp) -> pd.DataFrame: ...
class CsvSource(DataSource):
def __init__(self, file_path: str, datetime_column: str = "datetime",
columns: List[str] = None, prefix: str = "",
datetime_format: str = None, separator: str = ","): ...
def fetch(self, start: pd.Timestamp, end: pd.Timestamp) -> pd.DataFrame: ...

Transformers

class ResampleTransformer(Transformer):
def __init__(self, freq: str = "1h", method: str = "linear"): ...
def transform(self, df: pd.DataFrame) -> pd.DataFrame: ...
class LagTransformer(Transformer):
def __init__(
self,
columns: Union[str, List[str], None] = None,
lags: Union[int, List[int], range] = 1, # range and lists both accepted
freq: str = "1h",
): ...
def transform(self, df: pd.DataFrame) -> pd.DataFrame: ...
class TimezoneTransformer(Transformer):
def __init__(self, target_tz: str): ...
def transform(self, df: pd.DataFrame) -> pd.DataFrame: ...

Validators

class NullCheckValidator(Validator):
def __init__(self, columns: List[str] = None, allow_nulls: bool = False): ...
def validate(self, df: pd.DataFrame) -> ValidationResult: ...
class ContinuityValidator(Validator):
def __init__(self, freq: str = "1h"): ...
def validate(self, df: pd.DataFrame) -> ValidationResult: ...
class EdaValidator(Validator):
def __init__(self, columns: List[str] = None): ...
def validate(self, df: pd.DataFrame) -> ValidationResult: ...

Models

class OLSModel(BaseModel):
def __init__(self, predictors: List, training_window: int = 365,
name: str = "Model"): ...
class LassoCVModel(BaseModel):
def __init__(self, predictors: List, training_window: int = 365,
cv: int = 5, max_iter: int = 10000, name: str = "Model"): ...

Evaluators

class Evaluator(ABC):
name: str
def compute(self, df: pd.DataFrame, **kwargs) -> float: ...
class MAEEvaluator(Evaluator):
name = "MAE"
class RMSEEvaluator(Evaluator):
name = "RMSE"
class rMAEEvaluator(Evaluator):
name = "rMAE"
def __init__(self, base_model: str): ... # Name of the benchmark model

Exporters

class TerminalExporter(Exporter):
def __init__(self, show: List[str] = None): ...
def export(self, report: EvaluationReport) -> None: ...
class ExcelExporter(Exporter):
def __init__(self, path: str, sheets: List[str] = None): ...
def export(self, report: EvaluationReport) -> None: ...
class CsvExporter(Exporter):
def __init__(self, path: str, extra_columns: List[str] = None): ...
def export(self, report: EvaluationReport) -> None: ...