from __future__ import annotations import importlib import inspect import pathlib import pkgutil import pytest import pandas as pd from pandas import api import pandas._testing as tm from pandas.api import ( executors as api_executors, extensions as api_extensions, indexers as api_indexers, interchange as api_interchange, types as api_types, typing as api_typing, ) from pandas.api.typing import aliases as api_aliases class Base: def check(self, namespace, expected, ignored=None): # see which names are in the namespace, minus optional # ignored ones # compare vs the expected result = sorted( f for f in dir(namespace) if not f.startswith("__") and f != "annotations" ) if ignored is not None: result = sorted(set(result) - set(ignored)) expected = sorted(expected) tm.assert_almost_equal(result, expected) class TestPDApi(Base): # these are optionally imported based on testing # & need to be ignored ignored = ["tests", "locale", "conftest", "_version_meson"] # top-level sub-packages public_lib = [ "api", "arrays", "options", "test", "testing", "errors", "plotting", "io", "tseries", ] private_lib = ["compat", "core", "pandas", "util", "_built_with_meson"] # misc misc = ["IndexSlice", "NaT", "NA"] # top-level classes classes = [ "ArrowDtype", "Categorical", "CategoricalIndex", "DataFrame", "DateOffset", "DatetimeIndex", "ExcelFile", "ExcelWriter", "Flags", "Grouper", "HDFStore", "Index", "MultiIndex", "Period", "PeriodIndex", "RangeIndex", "Series", "SparseDtype", "StringDtype", "Timedelta", "TimedeltaIndex", "Timestamp", "Interval", "IntervalIndex", "CategoricalDtype", "PeriodDtype", "IntervalDtype", "DatetimeTZDtype", "BooleanDtype", "Int8Dtype", "Int16Dtype", "Int32Dtype", "Int64Dtype", "UInt8Dtype", "UInt16Dtype", "UInt32Dtype", "UInt64Dtype", "Float32Dtype", "Float64Dtype", "NamedAgg", ] # these are already deprecated; awaiting removal deprecated_classes: list[str] = [] # external modules exposed in pandas namespace modules: list[str] = [] # top-level functions funcs = [ "array", "bdate_range", "col", "concat", "crosstab", "cut", "date_range", "interval_range", "eval", "factorize", "get_dummies", "from_dummies", "infer_freq", "isna", "isnull", "lreshape", "melt", "notna", "notnull", "offsets", "merge", "merge_ordered", "merge_asof", "period_range", "pivot", "pivot_table", "qcut", "show_versions", "timedelta_range", "unique", "wide_to_long", ] # top-level option funcs funcs_option = [ "reset_option", "describe_option", "get_option", "option_context", "set_option", "set_eng_float_format", ] # top-level read_* funcs funcs_read = [ "read_clipboard", "read_csv", "read_excel", "read_fwf", "read_hdf", "read_html", "read_xml", "read_json", "read_pickle", "read_sas", "read_sql", "read_sql_query", "read_sql_table", "read_stata", "read_table", "read_feather", "read_parquet", "read_orc", "read_spss", "read_iceberg", ] # top-level json funcs funcs_json = ["json_normalize"] # top-level to_* funcs funcs_to = ["to_datetime", "to_numeric", "to_pickle", "to_timedelta"] # top-level to deprecate in the future deprecated_funcs_in_future: list[str] = [] # these are already deprecated; awaiting removal deprecated_funcs: list[str] = [] # private modules in pandas namespace private_modules = [ "_config", "_libs", "_is_numpy_dev", "_pandas_datetime_CAPI", "_pandas_parser_CAPI", "_testing", "_typing", ] if not pd._built_with_meson: private_modules.append("_version") def test_api(self): checkthese = ( self.public_lib + self.private_lib + self.misc + self.modules + self.classes + self.funcs + self.funcs_option + self.funcs_read + self.funcs_json + self.funcs_to + self.private_modules ) self.check(namespace=pd, expected=checkthese, ignored=self.ignored) def test_api_all(self): expected = set( self.public_lib + self.misc + self.modules + self.classes + self.funcs + self.funcs_option + self.funcs_read + self.funcs_json + self.funcs_to ) - set(self.deprecated_classes) actual = set(pd.__all__) extraneous = actual - expected assert not extraneous missing = expected - actual assert not missing def test_depr(self): deprecated_list = ( self.deprecated_classes + self.deprecated_funcs + self.deprecated_funcs_in_future ) for depr in deprecated_list: with tm.assert_produces_warning(FutureWarning): _ = getattr(pd, depr) class TestApi(Base): allowed_api_dirs = [ "executors", "types", "extensions", "indexers", "interchange", "typing", "internals", ] allowed_typing = [ "DataFrameGroupBy", "DatetimeIndexResamplerGroupby", "Expanding", "ExpandingGroupby", "ExponentialMovingWindow", "ExponentialMovingWindowGroupby", "Expression", "FrozenList", "JsonReader", "NaTType", "NAType", "NoDefault", "PeriodIndexResamplerGroupby", "Resampler", "Rolling", "RollingGroupby", "SeriesGroupBy", "StataReader", "SASReader", "TimedeltaIndexResamplerGroupby", "TimeGrouper", "Window", "aliases", ] allowed_api_types = [ "is_any_real_numeric_dtype", "is_array_like", "is_bool", "is_bool_dtype", "is_categorical_dtype", "is_complex", "is_complex_dtype", "is_datetime64_any_dtype", "is_datetime64_dtype", "is_datetime64_ns_dtype", "is_datetime64tz_dtype", "is_dict_like", "is_dtype_equal", "is_extension_array_dtype", "is_file_like", "is_float", "is_float_dtype", "is_hashable", "is_int64_dtype", "is_integer", "is_integer_dtype", "is_interval_dtype", "is_iterator", "is_list_like", "is_named_tuple", "is_number", "is_numeric_dtype", "is_object_dtype", "is_period_dtype", "is_re", "is_re_compilable", "is_scalar", "is_signed_integer_dtype", "is_sparse", "is_string_dtype", "is_timedelta64_dtype", "is_timedelta64_ns_dtype", "is_unsigned_integer_dtype", "pandas_dtype", "infer_dtype", "union_categoricals", "CategoricalDtype", "DatetimeTZDtype", "IntervalDtype", "PeriodDtype", ] allowed_api_interchange = ["from_dataframe", "DataFrame"] allowed_api_indexers = [ "check_array_indexer", "BaseIndexer", "FixedForwardWindowIndexer", "VariableOffsetWindowIndexer", ] allowed_api_extensions = [ "no_default", "ExtensionDtype", "register_extension_dtype", "register_dataframe_accessor", "register_index_accessor", "register_series_accessor", "take", "ExtensionArray", "ExtensionScalarOpsMixin", ] allowed_api_executors = ["BaseExecutionEngine"] allowed_api_aliases = [ "AggFuncType", "AlignJoin", "AnyAll", "AnyArrayLike", "ArrayLike", "AstypeArg", "Axes", "Axis", "CSVEngine", "ColspaceArgType", "CompressionOptions", "CorrelationMethod", "DropKeep", "Dtype", "DtypeArg", "DtypeBackend", "DtypeObj", "ExcelWriterIfSheetExists", "ExcelWriterMergeCells", "FilePath", "FillnaOptions", "FloatFormatType", "FormattersType", "FromDictOrient", "HTMLFlavors", "IgnoreRaise", "IndexLabel", "InterpolateOptions", "JSONEngine", "JSONSerializable", "JoinHow", "JoinValidate", "MergeHow", "MergeValidate", "NaPosition", "NsmallestNlargestKeep", "OpenFileErrors", "Ordered", "ParquetCompressionOptions", "QuantileInterpolation", "ReadBuffer", "ReadCsvBuffer", "ReadPickleBuffer", "ReindexMethod", "Scalar", "SequenceNotStr", "SliceType", "SortKind", "StorageOptions", "Suffixes", "TakeIndexer", "TimeAmbiguous", "TimeGrouperOrigin", "TimeNonexistent", "TimeUnit", "TimedeltaConvertibleTypes", "TimestampConvertibleTypes", "ToStataByteorder", "ToTimestampHow", "UpdateJoin", "UsecolsArgType", "WindowingRankType", "WriteBuffer", "WriteExcelBuffer", "XMLParsers", ] def test_api(self): self.check(api, self.allowed_api_dirs) def test_api_typing(self): self.check(api_typing, self.allowed_typing) def test_api_types(self): self.check(api_types, self.allowed_api_types) def test_api_interchange(self): self.check(api_interchange, self.allowed_api_interchange) def test_api_indexers(self): self.check(api_indexers, self.allowed_api_indexers) def test_api_extensions(self): self.check(api_extensions, self.allowed_api_extensions) def test_api_executors(self): self.check(api_executors, self.allowed_api_executors) def test_api_typing_aliases(self): self.check(api_aliases, self.allowed_api_aliases) class TestErrors(Base): def test_errors(self): ignored = ["_CurrentDeprecationWarning", "abc", "ctypes", "cow"] self.check(pd.errors, pd.errors.__all__, ignored=ignored) class TestUtil(Base): def test_util(self): self.check( pd.util, ["hash_array", "hash_pandas_object"], ignored=[ "_decorators", "_test_decorators", "_exceptions", "_validators", "capitalize_first_letter", "version", "_print_versions", "_tester", ], ) class TestTesting(Base): funcs = [ "assert_frame_equal", "assert_series_equal", "assert_index_equal", "assert_extension_array_equal", ] def test_testing(self): from pandas import testing self.check(testing, self.funcs) def test_util_in_top_level(self): with pytest.raises(AttributeError, match="foo"): pd.util.foo def get_pandas_objects( module_name: str, recurse: bool ) -> list[tuple[str, str, object]]: """ Get all pandas objects within a module. An object is determined to be part of pandas if it has a string __module__ attribute that starts with ``"pandas"``. Parameters ---------- module_name : str Name of the module to search. recurse : bool Whether to search submodules. Returns ------- List of all objects that are determined to be a part of pandas. """ module = importlib.import_module(module_name) objs = [] for name, obj in inspect.getmembers(module): module_dunder = getattr(obj, "__module__", None) if isinstance(module_dunder, str) and module_dunder.startswith("pandas"): objs.append((module_name, name, obj)) if not recurse: return objs # __file__ can, but shouldn't, be None assert isinstance(module.__file__, str) paths = [pathlib.Path(module.__file__).parent] for module_info in pkgutil.walk_packages(paths): name = module_info.name if name.startswith("_") or name == "internals": continue objs.extend( get_pandas_objects(f"{module.__name__}.{name}", recurse=module_info.ispkg) ) return objs @pytest.mark.slow @pytest.mark.parametrize( "module_name", [ "pandas", "pandas.api", "pandas.arrays", "pandas.errors", pytest.param("pandas.io", marks=pytest.mark.xfail(reason="Private imports")), "pandas.plotting", "pandas.testing", ], ) def test_attributes_module(module_name): """ Ensures that all public objects have their __module__ set to the public import path. """ recurse = module_name not in ["pandas", "pandas.testing"] objs = get_pandas_objects(module_name, recurse=recurse) failures = [ (module_name, name, type(obj), obj.__module__) for module_name, name, obj in objs if not ( obj.__module__ == module_name # Explicit exceptions or ("Dtype" in name and obj.__module__ == "pandas") or (name == "Categorical" and obj.__module__ == "pandas") ) ] assert len(failures) == 0, "\n".join(str(e) for e in failures) # Check that all objects can indeed be imported from their __module__ failures = [] for module_name, name, obj in objs: module = importlib.import_module(obj.__module__) try: getattr(module, name) except Exception: failures.append((module_name, name, type(obj), obj.__module__)) assert len(failures) == 0, "\n".join(str(e) for e in failures)