From 2dc1603a13a9366e8c2fb48ed997660a717f4fa6 Mon Sep 17 00:00:00 2001 From: ankitlade12 Date: Sat, 28 Feb 2026 20:22:30 -0600 Subject: [PATCH 1/3] docs: fix DatetimeOrdinal user guide and api documentation --- docs/api_doc/datetime/DatetimeOrdinal.rst | 2 +- docs/user_guide/datetime/DatetimeOrdinal.rst | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/api_doc/datetime/DatetimeOrdinal.rst b/docs/api_doc/datetime/DatetimeOrdinal.rst index f5960c8e0..274232c95 100644 --- a/docs/api_doc/datetime/DatetimeOrdinal.rst +++ b/docs/api_doc/datetime/DatetimeOrdinal.rst @@ -1,6 +1,6 @@ DatetimeOrdinal =============== -.. automodule:: feature_engine.datetime.datetime_ordinal +.. autoclass:: feature_engine.datetime.DatetimeOrdinal :members: diff --git a/docs/user_guide/datetime/DatetimeOrdinal.rst b/docs/user_guide/datetime/DatetimeOrdinal.rst index 7e150c984..393083c87 100644 --- a/docs/user_guide/datetime/DatetimeOrdinal.rst +++ b/docs/user_guide/datetime/DatetimeOrdinal.rst @@ -7,6 +7,10 @@ DatetimeOrdinal :class:`DatetimeOrdinal()` converts datetime variables into ordinal numbers, that is, a numerical representation of the date. +Datetime variables cannot be used directly by machine learning algorithms because they are not numerical. However, they contain valuable information about sequences of events or elapsed time. + +By converting datetime variables into ordinal numbers, we can capture this information while discarding the complexities of raw datetime formats. Ordinal numbers preserve the relative distances between dates (e.g., the number of days between events), allowing algorithms to capture linear trends, calculate temporal distances naturally, and handle time consistently without needing to parse or split the datetime into multiple separate features like year, month, or day. + By default, :class:`DatetimeOrdinal()` returns the proleptic Gregorian ordinal, where January 1 of year 1 has ordinal 1. Optionally, :class:`DatetimeOrdinal()` can compute the number of days relative to a user-defined `start_date`. @@ -121,12 +125,6 @@ If `missing_values="raise"`, the transformer will raise an error if NaT values a If `missing_values="ignore"`, the transformer will ignore NaT values, and the resulting ordinal feature will contain `NaN` (or `pd.NA`) in their place. -.. autoclass:: DatetimeOrdinal - :members: - :undoc-members: - :show-inheritance: - - Additional resources -------------------- From 43b46046c5c344f7e0849461d18687da8fe571d2 Mon Sep 17 00:00:00 2001 From: solegalli Date: Mon, 2 Mar 2026 23:18:35 -0500 Subject: [PATCH 2/3] update docstrigns and user guide --- docs/user_guide/datetime/DatetimeOrdinal.rst | 62 ++++++++++++++------ feature_engine/datetime/datetime_ordinal.py | 39 ++++++------ 2 files changed, 64 insertions(+), 37 deletions(-) diff --git a/docs/user_guide/datetime/DatetimeOrdinal.rst b/docs/user_guide/datetime/DatetimeOrdinal.rst index 393083c87..4ca543e06 100644 --- a/docs/user_guide/datetime/DatetimeOrdinal.rst +++ b/docs/user_guide/datetime/DatetimeOrdinal.rst @@ -2,23 +2,30 @@ .. currentmodule:: feature_engine.datetime + DatetimeOrdinal -================ +=============== -:class:`DatetimeOrdinal()` converts datetime variables into ordinal numbers, that is, a numerical representation of the date. +:class:`DatetimeOrdinal()` converts datetime variables into ordinal numbers, thereby +providing a numerical representation of the date. By default, it returns the proleptic +Gregorian ordinal of the date, where 1st January of year 1 has ordinal 1. -Datetime variables cannot be used directly by machine learning algorithms because they are not numerical. However, they contain valuable information about sequences of events or elapsed time. +If 1st January of year 1 has ordinal number 1 then, 2nd January of year 1 will have ordinal +number 2, and so on. -By converting datetime variables into ordinal numbers, we can capture this information while discarding the complexities of raw datetime formats. Ordinal numbers preserve the relative distances between dates (e.g., the number of days between events), allowing algorithms to capture linear trends, calculate temporal distances naturally, and handle time consistently without needing to parse or split the datetime into multiple separate features like year, month, or day. +Optionally, :class:`DatetimeOrdinal()` can compute the number of days relative to a +user-defined `start_date`. This can be useful for reducing the magnitude of the ordinal +values and for aligning them to a specific project timeline. -By default, :class:`DatetimeOrdinal()` returns the proleptic Gregorian ordinal, where January 1 of year 1 has ordinal 1. +Ordinal numbers preserve the relative distances between dates (e.g., the number of days +between events), allowing algorithms to capture linear trends and temporal distances. -Optionally, :class:`DatetimeOrdinal()` can compute the number of days relative to a user-defined `start_date`. Datetime ordinals with pandas ----------------------------- -In Python, we can get the Gregorian ordinal of a date using the `toordinal()` method from a datetime object. +In Python, we can get the Gregorian ordinal of a date using the `toordinal()` method +from a datetime object as follows: .. code:: python @@ -38,13 +45,18 @@ The output shows the new ordinal feature: 0 2023-01-01 738521 1 2023-01-10 738530 +In the variable `ordinal`, the value `738521` means that `2023-01-01` is 738521 days +*after* the 1st of January of the year 1. -Datetime ordinals with Feature-engine -------------------------------------- +Datetime ordinal with Feature-engine +------------------------------------ -:class:`DatetimeOrdinal()` automatically converts one or more datetime variables into ordinal numbers. It works with variables whose dtype is datetime, as well as with object-type variables, provided that they can be parsed into datetime format. +:class:`DatetimeOrdinal()` automatically converts one or more datetime variables into +ordinal numbers. It works with variables whose dtype is datetime, as well as with +object-type variables, provided that they can be parsed into datetime format. -:class:`DatetimeOrdinal()` uses pandas `toordinal()` under the hood. The main functionalities are: +:class:`DatetimeOrdinal()` uses pandas `toordinal()` under the hood. The main +functionalities are: - It can convert multiple datetime variables at once. - It can compute the ordinal number relative to a `start_date`. @@ -86,12 +98,14 @@ We see the new ordinal feature in the output: 2 Jan-1999 3 733987 3 Feb-2002 4 737729 -By default, :class:`DatetimeOrdinal()` drops the original datetime variable. To keep it, you can set `drop_original=False`. +By default, :class:`DatetimeOrdinal()` drops the original datetime variable. To keep +it, you can set `drop_original=False`. Calculate days from a start date ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -:class:`DatetimeOrdinal()` can also calculate the number of days elapsed since a specific `start_date`. +:class:`DatetimeOrdinal()` can also calculate the number of days elapsed since a +specific `start_date`. .. code:: python @@ -104,7 +118,8 @@ Calculate days from a start date df_transf -The new feature now represents the number of days between `var_date2` and January 1st, 2010. Note that dates before the `start_date` will result in negative numbers. +The new feature now represents the number of days between `var_date2` and January 1st, +2010. Note that dates before the `start_date` will result in negative numbers. .. code:: python @@ -118,11 +133,14 @@ The new feature now represents the number of days between `var_date2` and Januar Missing timestamps ------------------ -:class:`DatetimeOrdinal()` handles missing values (NaT) in datetime variables through the `missing_values` parameter, which can be set to `"raise"` or `"ignore"`. +:class:`DatetimeOrdinal()` handles missing values (NaT) in datetime variables through +the `missing_values` parameter, which can be set to `"raise"` or `"ignore"`. -If `missing_values="raise"`, the transformer will raise an error if NaT values are found in the datetime variables during `fit()` or `transform()`. +If `missing_values="raise"`, the transformer will raise an error if NaT values are +found in the datetime variables during `fit()` or `transform()`. -If `missing_values="ignore"`, the transformer will ignore NaT values, and the resulting ordinal feature will contain `NaN` (or `pd.NA`) in their place. +If `missing_values="ignore"`, the transformer will ignore NaT values, and the resulting +ordinal feature will contain `NaN` (or `pd.NA`) in their place. Additional resources @@ -141,7 +159,7 @@ For tutorials on how to create and use features from datetime columns, check the .. figure:: ../../images/fetsf.png :width: 300 :figclass: align-center - :align: right + :align: left :target: https://www.trainindata.com/p/feature-engineering-for-forecasting Feature Engineering for Time Series Forecasting @@ -156,6 +174,14 @@ For tutorials on how to create and use features from datetime columns, check the | | | +| +| +| +| +| +| +| +| Or read our book: diff --git a/feature_engine/datetime/datetime_ordinal.py b/feature_engine/datetime/datetime_ordinal.py index 28fed0436..ccd313a40 100644 --- a/feature_engine/datetime/datetime_ordinal.py +++ b/feature_engine/datetime/datetime_ordinal.py @@ -40,38 +40,38 @@ class DatetimeOrdinal(TransformerMixin, BaseEstimator, GetFeatureNamesOutMixin): since January 1, 0001 in the Gregorian calendar. Optionally, a `start_date` can be provided to set a custom reference point, - making the ordinal values relative to this date (starting from 1). This can be - useful for reducing the magnitude of the ordinal values and for aligning them - to a specific project timeline. + making the ordinal values relative to this date (starting from 1). + + More details in the :ref:`User Guide `. Parameters ---------- variables: str, list, default=None - List with the variables from which date and time information will be extracted. - If None, the transformer will find and select all datetime variables, - including variables of type object that can be converted to datetime. + List of the variables to convert into ordinal. If None, the transformer will + find and select all datetime variables, including variables of type object that + can be converted to datetime. missing_values: string, default='raise' Indicates if missing values should be ignored or raised. If 'raise' the - transformer will return an error if the datasets to `fit` or `transform` + transformer will return an error if the datasets passed to `fit` or `transform` contain missing values. If 'ignore', missing data will be ignored when - performing the feature extraction. + performing the transformation. start_date: str, datetime.datetime, default=None A reference date from which the ordinal values will be calculated. - If provided, the ordinal value of `start_date` will be subtracted from - each datetime variable's ordinal value, and 1 will be added, so the - `start_date` itself corresponds to an ordinal value of 1. - If None, the standard `datetime.toordinal()` value will be used. - The `start_date` can be a string (e.g., "YYYY-MM-DD") or a datetime object. + If provided, the ordinal value of `start_date` will be 1, the day after will be + 2, and so on. Days before `start_date` will take negative values. + If None, the transformation will represent the number of days since January 1, 0001. + `start_date` can be a string (e.g., "YYYY-MM-DD") or a datetime object. drop_original: bool, default=True - If True, the original datetime variables will be dropped from the dataframe. + If True, the original datetime variables will be dropped from the dataframe + after the transformation. Attributes ---------- variables_: - List of variables from which date and time features will be extracted. + List of variables to convert into ordinals. start_date_ordinal_: The ordinal value of the provided `start_date`, if applicable. @@ -91,8 +91,8 @@ class DatetimeOrdinal(TransformerMixin, BaseEstimator, GetFeatureNamesOutMixin): See also -------- - pandas.to_datetime - datetime.toordinal + feature_engine.datetime.DatetimeFeatures + feature_engine.datetime.DatetimeSubtraction Examples -------- @@ -186,7 +186,8 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None): def transform(self, X: pd.DataFrame) -> pd.DataFrame: """ - Extract the ordinal datetime features and add them to the dataframe. + Calculate ordinal representation of datetime features and add them to the + dataframe. Parameters ---------- @@ -196,7 +197,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame: Returns ------- X_new: Pandas dataframe, shape = [n_samples, n_features x n_df_features] - The dataframe with the original variables plus the new variables. + The dataframe with the original variables plus the new features. """ # Check method fit has been called From bab23af92384b04da180f3f798432f2edc1c6bf3 Mon Sep 17 00:00:00 2001 From: ankitlade12 Date: Mon, 2 Mar 2026 23:08:04 -0600 Subject: [PATCH 3/3] Fix flake8 E501 line too long in DatetimeOrdinal --- feature_engine/datetime/datetime_ordinal.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/feature_engine/datetime/datetime_ordinal.py b/feature_engine/datetime/datetime_ordinal.py index ccd313a40..981251488 100644 --- a/feature_engine/datetime/datetime_ordinal.py +++ b/feature_engine/datetime/datetime_ordinal.py @@ -61,8 +61,9 @@ class DatetimeOrdinal(TransformerMixin, BaseEstimator, GetFeatureNamesOutMixin): A reference date from which the ordinal values will be calculated. If provided, the ordinal value of `start_date` will be 1, the day after will be 2, and so on. Days before `start_date` will take negative values. - If None, the transformation will represent the number of days since January 1, 0001. - `start_date` can be a string (e.g., "YYYY-MM-DD") or a datetime object. + If None, the transformation will represent the number of days since + January 1, 0001. `start_date` can be a string (e.g., "YYYY-MM-DD") + or a datetime object. drop_original: bool, default=True If True, the original datetime variables will be dropped from the dataframe