Source code for lpspline.spline.factor
import numpy as np
import cvxpy as cp
from typing import List, Optional
from .base import Spline
[docs]
class Factor(Spline):
"""
Categorical Factor mapping utilizing a one-hot-encoded basis.
"""
def __init__(self, term: str, tag: Optional[str] = 'factor', n_classes: Optional[int] = None):
"""
Initialize the Factor.
Parameters
----------
term : str
The column name representing the categorical features.
tag : Optional[str], default='factor'
The descriptive tag denoting spline implementation type.
n_classes : Optional[int], default=None
The number of explicitly known classes or categories natively included in the data array `x`.
If not populated, it uses the length of exactly uniquely present feature subsets.
"""
super().__init__(term=term, tag=tag)
self._n_classes = n_classes
self._variables = []
@property
def n_classes(self) -> Optional[int]:
"""
Returns the number of unique identified categorical classes.
Returns
-------
Optional[int]
The number of classes evaluated in the factor basis system.
"""
return self._n_classes
[docs]
def init_spline(self, x: np.ndarray, by: np.ndarray = None):
"""
Introspectively determines the number of categorical instances locally contained in `x`
assuming none were globally established at initialization.
Parameters
----------
x : np.ndarray
The evaluation group.
by : np.ndarray, default=None
The grouped indexing column if modeling interactions.
"""
super().init_spline(x, by)
self._classes = np.unique(x)
self._int_map = {c: i for i, c in enumerate(self._classes)}
if self._n_classes is None:
self._n_classes = len(self._classes)
def _build_basis(self, x: np.ndarray) -> np.ndarray:
"""
Generates the one-hot-encoded transformation matrix for evaluated inputs.
Parameters
----------
x : np.ndarray
Categorical representation list implicitly structured as indexed zero-based classes.
Returns
-------
np.ndarray
A 2D binary matrix of shape `(n_samples, n_classes)`.
"""
# One-hot encoding
x_flat = np.array(x).flatten()
if getattr(self, '_int_map', None) is not None:
x_mapped = np.array([self._int_map.get(v, -1) for v in x_flat])
else:
x_mapped = x_flat.astype(int)
n = len(x_mapped)
basis = np.zeros((n, self.n_classes))
mask = (x_mapped >= 0) & (x_mapped < self.n_classes)
basis[np.arange(n)[mask], x_mapped[mask]] = 1.0
return basis
def _build_variables(self) -> cp.Variable:
"""
Create the respective individual mapping variables corresponding cleanly to individual elements encoded.
Returns
-------
cp.Variable
A 1D dimensional vector tracking factor biases sized `(n_classes,)`.
"""
if not self._variables:
dim = self.n_classes
self._variables = cp.Variable(shape=(dim,), name=f"{self.term}_factor")
return self._variables
def __repr__(self):
return f"Factor(term='{self.term}', n_classes={self.n_classes})"