"""
Utility functions and objects for implementing the interchange API.
"""

from __future__ import annotations

import typing

import numpy as np

from pandas._libs import lib

from pandas.core.dtypes.dtypes import (
    ArrowDtype,
    CategoricalDtype,
    DatetimeTZDtype,
)

if typing.TYPE_CHECKING:
    from pandas._typing import DtypeObj


# Maps str(pyarrow.DataType) = C type format string
# Currently, no pyarrow API for this
PYARROW_CTYPES = {
    "null": "n",
    "bool": "b",
    "uint8": "C",
    "uint16": "S",
    "uint32": "I",
    "uint64": "L",
    "int8": "c",
    "int16": "S",
    "int32": "i",
    "int64": "l",
    "halffloat": "e",  # float16
    "float": "f",  # float32
    "double": "g",  # float64
    "string": "u",
    "binary": "z",
    "time32[s]": "tts",
    "time32[ms]": "ttm",
    "time64[us]": "ttu",
    "time64[ns]": "ttn",
    "date32[day]": "tdD",
    "date64[ms]": "tdm",
    "timestamp[s]": "tss:",
    "timestamp[ms]": "tsm:",
    "timestamp[us]": "tsu:",
    "timestamp[ns]": "tsn:",
    "duration[s]": "tDs",
    "duration[ms]": "tDm",
    "duration[us]": "tDu",
    "duration[ns]": "tDn",
}


class ArrowCTypes:
    """
    Enum for Apache Arrow C type format strings.

    The Arrow C data interface:
    https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings
    """

    NULL = "n"
    BOOL = "b"
    INT8 = "c"
    UINT8 = "C"
    INT16 = "s"
    UINT16 = "S"
    INT32 = "i"
    UINT32 = "I"
    INT64 = "l"
    UINT64 = "L"
    FLOAT16 = "e"
    FLOAT32 = "f"
    FLOAT64 = "g"
    STRING = "u"  # utf-8
    LARGE_STRING = "U"  # utf-8
    DATE32 = "tdD"
    DATE64 = "tdm"
    # Resoulution:
    #   - seconds -> 's'
    #   - milliseconds -> 'm'
    #   - microseconds -> 'u'
    #   - nanoseconds -> 'n'
    TIMESTAMP = "ts{resolution}:{tz}"
    TIME = "tt{resolution}"


class Endianness:
    """Enum indicating the byte-order of a data-type."""

    LITTLE = "<"
    BIG = ">"
    NATIVE = "="
    NA = "|"


def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str:
    """
    Represent pandas `dtype` as a format string in Apache Arrow C notation.

    Parameters
    ----------
    dtype : np.dtype
        Datatype of pandas DataFrame to represent.

    Returns
    -------
    str
        Format string in Apache Arrow C notation of the given `dtype`.
    """
    if isinstance(dtype, CategoricalDtype):
        return ArrowCTypes.INT64
    elif dtype == np.dtype("O"):
        return ArrowCTypes.STRING
    elif isinstance(dtype, ArrowDtype):
        import pyarrow as pa

        pa_type = dtype.pyarrow_dtype
        if pa.types.is_decimal(pa_type):
            return f"d:{pa_type.precision},{pa_type.scale}"
        elif pa.types.is_timestamp(pa_type) and pa_type.tz is not None:
            return f"ts{pa_type.unit[0]}:{pa_type.tz}"
        format_str = PYARROW_CTYPES.get(str(pa_type), None)
        if format_str is not None:
            return format_str

    format_str = getattr(ArrowCTypes, dtype.name.upper(), None)
    if format_str is not None:
        return format_str

    if lib.is_np_dtype(dtype, "M"):
        # Selecting the first char of resolution string:
        # dtype.str -> '<M8[ns]' -> 'n'
        resolution = np.datetime_data(dtype)[0][0]
        return ArrowCTypes.TIMESTAMP.format(resolution=resolution, tz="")

    elif isinstance(dtype, DatetimeTZDtype):
        return ArrowCTypes.TIMESTAMP.format(resolution=dtype.unit[0], tz=dtype.tz)

    raise NotImplementedError(
        f"Conversion of {dtype} to Arrow C format string is not implemented."
    )
