openml · JATAYU000 · Dec 29, 2025 · Dec 31, 2025 · Jan 1, 2026 · Jan 1, 2026
diff --git a/openml/base.py b/openml/base.py
@@ -1,26 +1,22 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
-import re
 import webbrowser
 from abc import ABC, abstractmethod
-from collections.abc import Iterable, Sequence
+from collections.abc import Sequence
 
 import xmltodict
 
 import openml._api_calls
 import openml.config
+from openml.utils import ReprMixin
 
 from .utils import _get_rest_api_type_alias, _tag_openml_base
 
 
-class OpenMLBase(ABC):
+class OpenMLBase(ReprMixin, ABC):
     """Base object for functionality that is shared across entities."""
 
-    def __repr__(self) -> str:
-        body_fields = self._get_repr_body_fields()
-        return self._apply_repr_template(body_fields)
-
     @property
     @abstractmethod
     def id(self) -> int | None:
@@ -60,34 +56,6 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | N
         """
         # Should be implemented in the base class.
 
-    def _apply_repr_template(
-        self,
-        body_fields: Iterable[tuple[str, str | int | list[str] | None]],
-    ) -> str:
-        """Generates the header and formats the body for string representation of the object.
-
-        Parameters
-        ----------
-        body_fields: List[Tuple[str, str]]
-           A list of (name, value) pairs to display in the body of the __repr__.
-        """
-        # We add spaces between capitals, e.g. ClassificationTask -> Classification Task
-        name_with_spaces = re.sub(
-            r"(\w)([A-Z])",
-            r"\1 \2",
-            self.__class__.__name__[len("OpenML") :],
-        )
-        header_text = f"OpenML {name_with_spaces}"
-        header = f"{header_text}\n{'=' * len(header_text)}\n"
-
-        _body_fields: list[tuple[str, str | int | list[str]]] = [
-            (k, "None" if v is None else v) for k, v in body_fields
-        ]
-        longest_field_name_length = max(len(name) for name, _ in _body_fields)
-        field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
-        body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields)
-        return header + body
-
     @abstractmethod
     def _to_dict(self) -> dict[str, dict]:
         """Creates a dictionary representation of self.

diff --git a/openml/datasets/data_feature.py b/openml/datasets/data_feature.py
@@ -7,8 +7,10 @@
 if TYPE_CHECKING:
     from IPython.lib import pretty
 
+from openml.utils import ReprMixin
 
-class OpenMLDataFeature:  # noqa: PLW1641
+
+class OpenMLDataFeature(ReprMixin):
     """
     Data Feature (a.k.a. Attribute) object.
 
@@ -74,11 +76,35 @@ def __init__(  # noqa: PLR0913
         self.number_missing_values = number_missing_values
         self.ontologies = ontologies
 
-    def __repr__(self) -> str:
-        return f"[{self.index} - {self.name} ({self.data_type})]"
+    def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+        """Collect all information to display in the __repr__ body."""
+        fields: dict[str, int | str | None] = {
+            "Index": self.index,
+            "Name": self.name,
+            "Data Type": self.data_type,
+        }
+
+        order = [
+            "Index",
+            "Name",
+            "Data Type",
+        ]
+        return [(key, fields[key]) for key in order if key in fields]
 
     def __eq__(self, other: Any) -> bool:
         return isinstance(other, OpenMLDataFeature) and self.__dict__ == other.__dict__
 
+    def __hash__(self) -> int:
+        return hash(
+            (
+                self.index,
+                self.name,
+                self.data_type,
+                tuple(self.nominal_values) if self.nominal_values is not None else None,
+                self.number_missing_values,
+                tuple(self.ontologies) if self.ontologies is not None else None,
+            )
+        )
+
     def _repr_pretty_(self, pp: pretty.PrettyPrinter, cycle: bool) -> None:  # noqa: ARG002
         pp.text(str(self))
diff --git a/openml/setups/setup.py b/openml/setups/setup.py
@@ -1,13 +1,15 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+from collections.abc import Sequence
 from typing import Any
 
 import openml.config
 import openml.flows
+from openml.utils import ReprMixin
 
 
-class OpenMLSetup:
+class OpenMLSetup(ReprMixin):
     """Setup object (a.k.a. Configuration).
 
     Parameters
@@ -43,30 +45,21 @@ def _to_dict(self) -> dict[str, Any]:
             else None,
         }
 
-    def __repr__(self) -> str:
-        header = "OpenML Setup"
-        header = f"{header}\n{'=' * len(header)}\n"
-
-        fields = {
+    def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+        """Collect all information to display in the __repr__ body."""
+        fields: dict[str, int | str | None] = {
             "Setup ID": self.setup_id,
             "Flow ID": self.flow_id,
             "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
-            "# of Parameters": (
-                len(self.parameters) if self.parameters is not None else float("nan")
-            ),
+            "# of Parameters": (len(self.parameters) if self.parameters is not None else "nan"),
         }
 
         # determines the order in which the information will be printed
         order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
-        _fields = [(key, fields[key]) for key in order if key in fields]
-
-        longest_field_name_length = max(len(name) for name, _ in _fields)
-        field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
-        body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
-        return header + body
+        return [(key, fields[key]) for key in order if key in fields]
 
 
-class OpenMLParameter:
+class OpenMLParameter(ReprMixin):
     """Parameter object (used in setup).
 
     Parameters
@@ -123,11 +116,9 @@ def _to_dict(self) -> dict[str, Any]:
             "value": self.value,
         }
 
-    def __repr__(self) -> str:
-        header = "OpenML Parameter"
-        header = f"{header}\n{'=' * len(header)}\n"
-
-        fields = {
+    def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+        """Collect all information to display in the __repr__ body."""
+        fields: dict[str, int | str | None] = {
             "ID": self.id,
             "Flow ID": self.flow_id,
             # "Flow Name": self.flow_name,
@@ -156,9 +147,4 @@ def __repr__(self) -> str:
             parameter_default,
             parameter_value,
         ]
-        _fields = [(key, fields[key]) for key in order if key in fields]
-
-        longest_field_name_length = max(len(name) for name, _ in _fields)
-        field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
-        body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
-        return header + body
+        return [(key, fields[key]) for key in order if key in fields]
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
@@ -3,13 +3,16 @@
 
 import pickle
 from collections import OrderedDict
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 from typing_extensions import NamedTuple
 
 import arff  # type: ignore
 import numpy as np
 
+from openml.utils import ReprMixin
+
 
 class Split(NamedTuple):
     """A single split of a dataset."""
@@ -18,7 +21,7 @@ class Split(NamedTuple):
     test: np.ndarray
 
 
-class OpenMLSplit:  # noqa: PLW1641
+class OpenMLSplit(ReprMixin):
     """OpenML Split object.
 
     This class manages train-test splits for a dataset across multiple
@@ -63,6 +66,22 @@ def __init__(
         self.folds = len(self.split[0])
         self.samples = len(self.split[0][0])
 
+    def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+        """Collect all information to display in the __repr__ body."""
+        fields = {
+            "Name": self.name,
+            "Description": (
+                self.description if len(self.description) <= 80 else self.description[:77] + "..."
+            ),
+            "Repeats": self.repeats,
+            "Folds": self.folds,
+            "Samples": self.samples,
+        }
+
+        order = ["Name", "Description", "Repeats", "Folds", "Samples"]
+
+        return [(key, fields[key]) for key in order if key in fields]
+
     def __eq__(self, other: Any) -> bool:
         if (
             (not isinstance(self, type(other)))
@@ -90,6 +109,29 @@ def __eq__(self, other: Any) -> bool:
                 return False
         return True
 
+    def __hash__(self) -> int:
+        split_items = []
+        for repetition in sorted(self.split):
+            for fold in sorted(self.split[repetition]):
+                for sample in sorted(self.split[repetition][fold]):
+                    train, test = self.split[repetition][fold][sample]
+                    split_items.append(
+                        (
+                            repetition,
+                            fold,
+                            sample,
+                            hash(train.tobytes()),
+                            hash(test.tobytes()),
+                        )
+                    )
+        return hash(
+            (
+                self.name,
+                self.description,
+                tuple(split_items),
+            )
+        )
+
     @classmethod
     def _from_arff_file(cls, filename: Path) -> OpenMLSplit:  # noqa: C901, PLR0912
         repetitions = None

diff --git a/openml/utils.py b/openml/utils.py
@@ -2,9 +2,11 @@
 from __future__ import annotations
 
 import contextlib
+import re
 import shutil
 import warnings
-from collections.abc import Callable, Mapping, Sized
+from abc import ABC, abstractmethod
+from collections.abc import Callable, Iterable, Mapping, Sequence, Sized
 from functools import wraps
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal, TypeVar, overload
@@ -470,3 +472,57 @@ def update(self, length: int) -> None:
         self._progress_bar.update(length)
         if self._progress_bar.total <= self._progress_bar.n:
             self._progress_bar.close()
+
+
+class ReprMixin(ABC):
+    """A mixin class that provides a customizable string representation for OpenML objects.
+
+    This mixin standardizes the __repr__ output format across OpenML classes.
+    Classes inheriting from this mixin should implement the
+    _get_repr_body_fields method to specify which fields to display.
+    """
+
+    def __repr__(self) -> str:
+        body_fields = self._get_repr_body_fields()
+        return self._apply_repr_template(body_fields)
+
+    @abstractmethod
+    def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+        """Collect all information to display in the __repr__ body.
+
+        Returns
+        -------
+        body_fields : List[Tuple[str, Union[str, int, List[str]]]]
+            A list of (name, value) pairs to display in the body of the __repr__.
+            E.g.: [('metric', 'accuracy'), ('dataset', 'iris')]
+            If value is a List of str, then each item of the list will appear in a separate row.
+        """
+        # Should be implemented in the base class.
+
+    def _apply_repr_template(
+        self,
+        body_fields: Iterable[tuple[str, str | int | list[str] | None]],
+    ) -> str:
+        """Generates the header and formats the body for string representation of the object.
+
+        Parameters
+        ----------
+        body_fields: List[Tuple[str, str]]
+           A list of (name, value) pairs to display in the body of the __repr__.
+        """
+        # We add spaces between capitals, e.g. ClassificationTask -> Classification Task
+        name_with_spaces = re.sub(
+            r"(\w)([A-Z])",
+            r"\1 \2",
+            self.__class__.__name__[len("OpenML") :],
+        )
+        header_text = f"OpenML {name_with_spaces}"
+        header = f"{header_text}\n{'=' * len(header_text)}\n"
+
+        _body_fields: list[tuple[str, str | int | list[str]]] = [
+            (k, "None" if v is None else v) for k, v in body_fields
+        ]
+        longest_field_name_length = max(len(name) for name, _ in _body_fields)
+        field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
+        body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields)
+        return header + body