import datetime
from datetime import timedelta
from decimal import Decimal
from io import StringIO
import json
import os
import sys
import time

import numpy as np
import pytest

from pandas.compat import IS64
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
    NA,
    DataFrame,
    DatetimeIndex,
    Series,
    Timestamp,
    read_json,
)
import pandas._testing as tm
from pandas.core.arrays import (
    ArrowStringArray,
    StringArray,
)


def test_literal_json_deprecation():
    # PR 53409
    expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])

    jsonl = """{"a": 1, "b": 2}
        {"a": 3, "b": 4}
        {"a": 5, "b": 6}
        {"a": 7, "b": 8}"""

    msg = (
        "Passing literal json to 'read_json' is deprecated and "
        "will be removed in a future version. To read from a "
        "literal string, wrap it in a 'StringIO' object."
    )

    with tm.assert_produces_warning(FutureWarning, match=msg):
        try:
            read_json(jsonl, lines=False)
        except ValueError:
            pass

    with tm.assert_produces_warning(FutureWarning, match=msg):
        read_json(expected.to_json(), lines=False)

    with tm.assert_produces_warning(FutureWarning, match=msg):
        result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
        tm.assert_frame_equal(result, expected)

    with tm.assert_produces_warning(FutureWarning, match=msg):
        try:
            result = read_json(
                '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n',
                lines=False,
            )
        except ValueError:
            pass

    with tm.assert_produces_warning(FutureWarning, match=msg):
        try:
            result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=False)
        except ValueError:
            pass
        tm.assert_frame_equal(result, expected)


def assert_json_roundtrip_equal(result, expected, orient):
    if orient in ("records", "values"):
        expected = expected.reset_index(drop=True)
    if orient == "values":
        expected.columns = range(len(expected.columns))
    tm.assert_frame_equal(result, expected)


class TestPandasContainer:
    @pytest.fixture
    def categorical_frame(self):
        _seriesd = tm.getSeriesData()

        _cat_frame = DataFrame(_seriesd)

        cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15)
        _cat_frame.index = pd.CategoricalIndex(cat, name="E")
        _cat_frame["E"] = list(reversed(cat))
        _cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64")
        return _cat_frame

    @pytest.fixture
    def datetime_series(self):
        # Same as usual datetime_series, but with index freq set to None,
        #  since that doesn't round-trip, see GH#33711
        ser = tm.makeTimeSeries()
        ser.name = "ts"
        ser.index = ser.index._with_freq(None)
        return ser

    @pytest.fixture
    def datetime_frame(self):
        # Same as usual datetime_frame, but with index freq set to None,
        #  since that doesn't round-trip, see GH#33711
        df = DataFrame(tm.getTimeSeriesData())
        df.index = df.index._with_freq(None)
        return df

    def test_frame_double_encoded_labels(self, orient):
        df = DataFrame(
            [["a", "b"], ["c", "d"]],
            index=['index " 1', "index / 2"],
            columns=["a \\ b", "y / z"],
        )

        data = StringIO(df.to_json(orient=orient))
        result = read_json(data, orient=orient)
        expected = df.copy()
        assert_json_roundtrip_equal(result, expected, orient)

    @pytest.mark.parametrize("orient", ["split", "records", "values"])
    def test_frame_non_unique_index(self, orient):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
        data = StringIO(df.to_json(orient=orient))
        result = read_json(data, orient=orient)
        expected = df.copy()

        assert_json_roundtrip_equal(result, expected, orient)

    @pytest.mark.parametrize("orient", ["index", "columns"])
    def test_frame_non_unique_index_raises(self, orient):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
        msg = f"DataFrame index must be unique for orient='{orient}'"
        with pytest.raises(ValueError, match=msg):
            df.to_json(orient=orient)

    @pytest.mark.parametrize("orient", ["split", "values"])
    @pytest.mark.parametrize(
        "data",
        [
            [["a", "b"], ["c", "d"]],
            [[1.5, 2.5], [3.5, 4.5]],
            [[1, 2.5], [3, 4.5]],
            [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
        ],
    )
    def test_frame_non_unique_columns(self, orient, data):
        df = DataFrame(data, index=[1, 2], columns=["x", "x"])

        result = read_json(
            StringIO(df.to_json(orient=orient)), orient=orient, convert_dates=["x"]
        )
        if orient == "values":
            expected = DataFrame(data)
            if expected.iloc[:, 0].dtype == "datetime64[ns]":
                # orient == "values" by default will write Timestamp objects out
                # in milliseconds; these are internally stored in nanosecond,
                # so divide to get where we need
                # TODO: a to_epoch method would also solve; see GH 14772
                expected.iloc[:, 0] = expected.iloc[:, 0].view(np.int64) // 1000000
        elif orient == "split":
            expected = df
            expected.columns = ["x", "x.1"]

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("orient", ["index", "columns", "records"])
    def test_frame_non_unique_columns_raises(self, orient):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])

        msg = f"DataFrame columns must be unique for orient='{orient}'"
        with pytest.raises(ValueError, match=msg):
            df.to_json(orient=orient)

    def test_frame_default_orient(self, float_frame):
        assert float_frame.to_json() == float_frame.to_json(orient="columns")

    @pytest.mark.parametrize("dtype", [False, float])
    @pytest.mark.parametrize("convert_axes", [True, False])
    def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame):
        data = StringIO(float_frame.to_json(orient=orient))
        result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)

        expected = float_frame

        assert_json_roundtrip_equal(result, expected, orient)

    @pytest.mark.parametrize("dtype", [False, np.int64])
    @pytest.mark.parametrize("convert_axes", [True, False])
    def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame):
        data = StringIO(int_frame.to_json(orient=orient))
        result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
        expected = int_frame
        assert_json_roundtrip_equal(result, expected, orient)

    @pytest.mark.parametrize("dtype", [None, np.float64, int, "U3"])
    @pytest.mark.parametrize("convert_axes", [True, False])
    def test_roundtrip_str_axes(self, orient, convert_axes, dtype):
        df = DataFrame(
            np.zeros((200, 4)),
            columns=[str(i) for i in range(4)],
            index=[str(i) for i in range(200)],
            dtype=dtype,
        )

        data = StringIO(df.to_json(orient=orient))
        result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)

        expected = df.copy()
        if not dtype:
            expected = expected.astype(np.int64)

        # index columns, and records orients cannot fully preserve the string
        # dtype for axes as the index and column labels are used as keys in
        # JSON objects. JSON keys are by definition strings, so there's no way
        # to disambiguate whether those keys actually were strings or numeric
        # beforehand and numeric wins out.
        if convert_axes and (orient in ("index", "columns")):
            expected.columns = expected.columns.astype(np.int64)
            expected.index = expected.index.astype(np.int64)
        elif orient == "records" and convert_axes:
            expected.columns = expected.columns.astype(np.int64)
        elif convert_axes and orient == "split":
            expected.columns = expected.columns.astype(np.int64)

        assert_json_roundtrip_equal(result, expected, orient)

    @pytest.mark.parametrize("convert_axes", [True, False])
    def test_roundtrip_categorical(
        self, request, orient, categorical_frame, convert_axes
    ):
        # TODO: create a better frame to test with and improve coverage
        if orient in ("index", "columns"):
            request.node.add_marker(
                pytest.mark.xfail(
                    reason=f"Can't have duplicate index values for orient '{orient}')"
                )
            )

        data = StringIO(categorical_frame.to_json(orient=orient))
        result = read_json(data, orient=orient, convert_axes=convert_axes)

        expected = categorical_frame.copy()
        expected.index = expected.index.astype(str)  # Categorical not preserved
        expected.index.name = None  # index names aren't preserved in JSON
        assert_json_roundtrip_equal(result, expected, orient)

    @pytest.mark.parametrize("convert_axes", [True, False])
    def test_roundtrip_empty(self, orient, convert_axes):
        empty_frame = DataFrame()
        data = StringIO(empty_frame.to_json(orient=orient))
        result = read_json(data, orient=orient, convert_axes=convert_axes)
        if orient == "split":
            idx = pd.Index([], dtype=(float if convert_axes else object))
            expected = DataFrame(index=idx, columns=idx)
        elif orient in ["index", "columns"]:
            expected = DataFrame()
        else:
            expected = empty_frame.copy()

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("convert_axes", [True, False])
    def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame):
        # TODO: improve coverage with date_format parameter
        data = StringIO(datetime_frame.to_json(orient=orient))
        result = read_json(data, orient=orient, convert_axes=convert_axes)
        expected = datetime_frame.copy()

        if not convert_axes:  # one off for ts handling
            # DTI gets converted to epoch values
            idx = expected.index.view(np.int64) // 1000000
            if orient != "split":  # TODO: handle consistently across orients
                idx = idx.astype(str)

            expected.index = idx

        assert_json_roundtrip_equal(result, expected, orient)

    @pytest.mark.parametrize("convert_axes", [True, False])
    def test_roundtrip_mixed(self, orient, convert_axes):
        index = pd.Index(["a", "b", "c", "d", "e"])
        values = {
            "A": [0.0, 1.0, 2.0, 3.0, 4.0],
            "B": [0.0, 1.0, 0.0, 1.0, 0.0],
            "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
            "D": [True, False, True, False, True],
        }

        df = DataFrame(data=values, index=index)

        data = StringIO(df.to_json(orient=orient))
        result = read_json(data, orient=orient, convert_axes=convert_axes)

        expected = df.copy()
        expected = expected.assign(**expected.select_dtypes("number").astype(np.int64))

        assert_json_roundtrip_equal(result, expected, orient)

    @pytest.mark.xfail(
        reason="#50456 Column multiindex is stored and loaded differently",
        raises=AssertionError,
    )
    @pytest.mark.parametrize(
        "columns",
        [
            [["2022", "2022"], ["JAN", "FEB"]],
            [["2022", "2023"], ["JAN", "JAN"]],
            [["2022", "2022"], ["JAN", "JAN"]],
        ],
    )
    def test_roundtrip_multiindex(self, columns):
        df = DataFrame(
            [[1, 2], [3, 4]],
            columns=pd.MultiIndex.from_arrays(columns),
        )
        data = StringIO(df.to_json(orient="split"))
        result = read_json(data, orient="split")
        tm.assert_frame_equal(result, df)

    @pytest.mark.parametrize(
        "data,msg,orient",
        [
            ('{"key":b:a:d}', "Expected object or value", "columns"),
            # too few indices
            (
                '{"columns":["A","B"],'
                '"index":["2","3"],'
                '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
                "|".join(
                    [
                        r"Length of values \(3\) does not match length of index \(2\)",
                    ]
                ),
                "split",
            ),
            # too many columns
            (
                '{"columns":["A","B","C"],'
                '"index":["1","2","3"],'
                '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
                "3 columns passed, passed data had 2 columns",
                "split",
            ),
            # bad key
            (
                '{"badkey":["A","B"],'
                '"index":["2","3"],'
                '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
                r"unexpected key\(s\): badkey",
                "split",
            ),
        ],
    )
    def test_frame_from_json_bad_data_raises(self, data, msg, orient):
        with pytest.raises(ValueError, match=msg):
            read_json(StringIO(data), orient=orient)

    @pytest.mark.parametrize("dtype", [True, False])
    @pytest.mark.parametrize("convert_axes", [True, False])
    def test_frame_from_json_missing_data(self, orient, convert_axes, dtype):
        num_df = DataFrame([[1, 2], [4, 5, 6]])

        result = read_json(
            StringIO(num_df.to_json(orient=orient)),
            orient=orient,
            convert_axes=convert_axes,
            dtype=dtype,
        )
        assert np.isnan(result.iloc[0, 2])

        obj_df = DataFrame([["1", "2"], ["4", "5", "6"]])
        result = read_json(
            StringIO(obj_df.to_json(orient=orient)),
            orient=orient,
            convert_axes=convert_axes,
            dtype=dtype,
        )
        assert np.isnan(result.iloc[0, 2])

    @pytest.mark.parametrize("dtype", [True, False])
    def test_frame_read_json_dtype_missing_value(self, dtype):
        # GH28501 Parse missing values using read_json with dtype=False
        # to NaN instead of None
        result = read_json(StringIO("[null]"), dtype=dtype)
        expected = DataFrame([np.nan])

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("inf", [np.inf, -np.inf])
    @pytest.mark.parametrize("dtype", [True, False])
    def test_frame_infinity(self, inf, dtype):
        # infinities get mapped to nulls which get mapped to NaNs during
        # deserialisation
        df = DataFrame([[1, 2], [4, 5, 6]])
        df.loc[0, 2] = inf

        data = StringIO(df.to_json())
        result = read_json(data, dtype=dtype)
        assert np.isnan(result.iloc[0, 2])

    @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865")
    @pytest.mark.parametrize(
        "value,precision,expected_val",
        [
            (0.95, 1, 1.0),
            (1.95, 1, 2.0),
            (-1.95, 1, -2.0),
            (0.995, 2, 1.0),
            (0.9995, 3, 1.0),
            (0.99999999999999944, 15, 1.0),
        ],
    )
    def test_frame_to_json_float_precision(self, value, precision, expected_val):
        df = DataFrame([{"a_float": value}])
        encoded = df.to_json(double_precision=precision)
        assert encoded == f'{{"a_float":{{"0":{expected_val}}}}}'

    def test_frame_to_json_except(self):
        df = DataFrame([1, 2, 3])
        msg = "Invalid value 'garbage' for option 'orient'"
        with pytest.raises(ValueError, match=msg):
            df.to_json(orient="garbage")

    def test_frame_empty(self):
        df = DataFrame(columns=["jim", "joe"])
        assert not df._is_mixed_type

        data = StringIO(df.to_json())
        result = read_json(data, dtype=dict(df.dtypes))
        tm.assert_frame_equal(result, df, check_index_type=False)

    def test_frame_empty_to_json(self):
        # GH 7445
        df = DataFrame({"test": []}, index=[])
        result = df.to_json(orient="columns")
        expected = '{"test":{}}'
        assert result == expected

    def test_frame_empty_mixedtype(self):
        # mixed type
        df = DataFrame(columns=["jim", "joe"])
        df["joe"] = df["joe"].astype("i8")
        assert df._is_mixed_type
        data = df.to_json()
        tm.assert_frame_equal(
            read_json(StringIO(data), dtype=dict(df.dtypes)),
            df,
            check_index_type=False,
        )

    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [
            [10, 1, "foo", 0.1, 0.01],
            [20, 2, "bar", 0.2, 0.02],
            [30, 3, "baz", 0.3, 0.03],
            [40, 4, "qux", 0.4, 0.04],
        ]

        df = DataFrame(
            vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"]
        )

        assert df._is_mixed_type
        right = df.copy()

        for orient in ["split", "index", "columns"]:
            inp = StringIO(df.to_json(orient=orient))
            left = read_json(inp, orient=orient, convert_axes=False)
            tm.assert_frame_equal(left, right)

        right.index = pd.RangeIndex(len(df))
        inp = StringIO(df.to_json(orient="records"))
        left = read_json(inp, orient="records", convert_axes=False)
        tm.assert_frame_equal(left, right)

        right.columns = pd.RangeIndex(df.shape[1])
        inp = StringIO(df.to_json(orient="values"))
        left = read_json(inp, orient="values", convert_axes=False)
        tm.assert_frame_equal(left, right)

    def test_v12_compat(self, datapath):
        dti = pd.date_range("2000-01-03", "2000-01-07")
        # freq doesn't roundtrip
        dti = DatetimeIndex(np.asarray(dti), freq=None)
        df = DataFrame(
            [
                [1.56808523, 0.65727391, 1.81021139, -0.17251653],
                [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
                [1.51493992, 0.11805825, 1.629455, -1.31506612],
                [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
                [0.05951614, -2.69652057, 1.28163262, 0.34703478],
            ],
            columns=["A", "B", "C", "D"],
            index=dti,
        )
        df["date"] = Timestamp("19920106 18:21:32.12").as_unit("ns")
        df.iloc[3, df.columns.get_loc("date")] = Timestamp("20130101")
        df["modified"] = df["date"]
        df.iloc[1, df.columns.get_loc("modified")] = pd.NaT

        dirpath = datapath("io", "json", "data")
        v12_json = os.path.join(dirpath, "tsframe_v012.json")
        df_unser = read_json(v12_json)
        tm.assert_frame_equal(df, df_unser)

        df_iso = df.drop(["modified"], axis=1)
        v12_iso_json = os.path.join(dirpath, "tsframe_iso_v012.json")
        df_unser_iso = read_json(v12_iso_json)
        tm.assert_frame_equal(df_iso, df_unser_iso)

    def test_blocks_compat_GH9037(self):
        index = pd.date_range("20000101", periods=10, freq="H")
        # freq doesn't round-trip
        index = DatetimeIndex(list(index), freq=None)

        df_mixed = DataFrame(
            {
                "float_1": [
                    -0.92077639,
                    0.77434435,
                    1.25234727,
                    0.61485564,
                    -0.60316077,
                    0.24653374,
                    0.28668979,
                    -2.51969012,
                    0.95748401,
                    -1.02970536,
                ],
                "int_1": [
                    19680418,
                    75337055,
                    99973684,
                    65103179,
                    79373900,
                    40314334,
                    21290235,
                    4991321,
                    41903419,
                    16008365,
                ],
                "str_1": [
                    "78c608f1",
                    "64a99743",
                    "13d2ff52",
                    "ca7f4af2",
                    "97236474",
                    "bde7e214",
                    "1a6bde47",
                    "b1190be5",
                    "7a669144",
                    "8d64d068",
                ],
                "float_2": [
                    -0.0428278,
                    -1.80872357,
                    3.36042349,
                    -0.7573685,
                    -0.48217572,
                    0.86229683,
                    1.08935819,
                    0.93898739,
                    -0.03030452,
                    1.43366348,
                ],
                "str_2": [
                    "14f04af9",
                    "d085da90",
                    "4bcfac83",
                    "81504caf",
                    "2ffef4a9",
                    "08e2f5c4",
                    "07e1af03",
                    "addbd4a7",
                    "1f6a09ba",
                    "4bfc4d87",
                ],
                "int_2": [
                    86967717,
                    98098830,
                    51927505,
                    20372254,
                    12601730,
                    20884027,
                    34193846,
                    10561746,
                    24867120,
                    76131025,
                ],
            },
            index=index,
        )

        # JSON deserialisation always creates unicode strings
        df_mixed.columns = df_mixed.columns.astype(np.str_)
        data = StringIO(df_mixed.to_json(orient="split"))
        df_roundtrip = read_json(data, orient="split")
        tm.assert_frame_equal(
            df_mixed,
            df_roundtrip,
            check_index_type=True,
            check_column_type=True,
            by_blocks=True,
            check_exact=True,
        )

    def test_frame_nonprintable_bytes(self):
        # GH14256: failing column caused segfaults, if it is not the last one

        class BinaryThing:
            def __init__(self, hexed) -> None:
                self.hexed = hexed
                self.binary = bytes.fromhex(hexed)

            def __str__(self) -> str:
                return self.hexed

        hexed = "574b4454ba8c5eb4f98a8f45"
        binthing = BinaryThing(hexed)

        # verify the proper conversion of printable content
        df_printable = DataFrame({"A": [binthing.hexed]})
        assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}'

        # check if non-printable content throws appropriate Exception
        df_nonprintable = DataFrame({"A": [binthing]})
        msg = "Unsupported UTF-8 sequence length when encoding string"
        with pytest.raises(OverflowError, match=msg):
            df_nonprintable.to_json()

        # the same with multiple columns threw segfaults
        df_mixed = DataFrame({"A": [binthing], "B": [1]}, columns=["A", "B"])
        with pytest.raises(OverflowError, match=msg):
            df_mixed.to_json()

        # default_handler should resolve exceptions for non-string types
        result = df_nonprintable.to_json(default_handler=str)
        expected = f'{{"A":{{"0":"{hexed}"}}}}'
        assert result == expected
        assert (
            df_mixed.to_json(default_handler=str)
            == f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}'
        )

    def test_label_overflow(self):
        # GH14256: buffer length not checked when writing label
        result = DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json()
        expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}'
        assert result == expected

    def test_series_non_unique_index(self):
        s = Series(["a", "b"], index=[1, 1])

        msg = "Series index must be unique for orient='index'"
        with pytest.raises(ValueError, match=msg):
            s.to_json(orient="index")

        tm.assert_series_equal(
            s,
            read_json(
                StringIO(s.to_json(orient="split")), orient="split", typ="series"
            ),
        )
        unserialized = read_json(
            StringIO(s.to_json(orient="records")), orient="records", typ="series"
        )
        tm.assert_numpy_array_equal(s.values, unserialized.values)

    def test_series_default_orient(self, string_series):
        assert string_series.to_json() == string_series.to_json(orient="index")

    def test_series_roundtrip_simple(self, orient, string_series):
        data = StringIO(string_series.to_json(orient=orient))
        result = read_json(data, typ="series", orient=orient)

        expected = string_series
        if orient in ("values", "records"):
            expected = expected.reset_index(drop=True)
        if orient != "split":
            expected.name = None

        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("dtype", [False, None])
    def test_series_roundtrip_object(self, orient, dtype, object_series):
        data = StringIO(object_series.to_json(orient=orient))
        result = read_json(data, typ="series", orient=orient, dtype=dtype)

        expected = object_series
        if orient in ("values", "records"):
            expected = expected.reset_index(drop=True)
        if orient != "split":
            expected.name = None

        tm.assert_series_equal(result, expected)

    def test_series_roundtrip_empty(self, orient):
        empty_series = Series([], index=[], dtype=np.float64)
        data = StringIO(empty_series.to_json(orient=orient))
        result = read_json(data, typ="series", orient=orient)

        expected = empty_series.reset_index(drop=True)
        if orient in ("split"):
            expected.index = expected.index.astype(np.float64)

        tm.assert_series_equal(result, expected)

    def test_series_roundtrip_timeseries(self, orient, datetime_series):
        data = StringIO(datetime_series.to_json(orient=orient))
        result = read_json(data, typ="series", orient=orient)

        expected = datetime_series
        if orient in ("values", "records"):
            expected = expected.reset_index(drop=True)
        if orient != "split":
            expected.name = None

        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("dtype", [np.float64, int])
    def test_series_roundtrip_numeric(self, orient, dtype):
        s = Series(range(6), index=["a", "b", "c", "d", "e", "f"])
        data = StringIO(s.to_json(orient=orient))
        result = read_json(data, typ="series", orient=orient)

        expected = s.copy()
        if orient in ("values", "records"):
            expected = expected.reset_index(drop=True)

        tm.assert_series_equal(result, expected)

    def test_series_to_json_except(self):
        s = Series([1, 2, 3])
        msg = "Invalid value 'garbage' for option 'orient'"
        with pytest.raises(ValueError, match=msg):
            s.to_json(orient="garbage")

    def test_series_from_json_precise_float(self):
        s = Series([4.56, 4.56, 4.56])
        result = read_json(StringIO(s.to_json()), typ="series", precise_float=True)
        tm.assert_series_equal(result, s, check_index_type=False)

    def test_series_with_dtype(self):
        # GH 21986
        s = Series([4.56, 4.56, 4.56])
        result = read_json(StringIO(s.to_json()), typ="series", dtype=np.int64)
        expected = Series([4] * 3)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "dtype,expected",
        [
            (True, Series(["2000-01-01"], dtype="datetime64[ns]")),
            (False, Series([946684800000])),
        ],
    )
    def test_series_with_dtype_datetime(self, dtype, expected):
        s = Series(["2000-01-01"], dtype="datetime64[ns]")
        data = StringIO(s.to_json())
        result = read_json(data, typ="series", dtype=dtype)
        tm.assert_series_equal(result, expected)

    def test_frame_from_json_precise_float(self):
        df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
        result = read_json(StringIO(df.to_json()), precise_float=True)
        tm.assert_frame_equal(result, df)

    def test_typ(self):
        s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64")
        result = read_json(StringIO(s.to_json()), typ=None)
        tm.assert_series_equal(result, s)

    def test_reconstruction_index(self):
        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(StringIO(df.to_json()))
        tm.assert_frame_equal(result, df)

        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
        result = read_json(StringIO(df.to_json()))
        tm.assert_frame_equal(result, df)

    def test_path(self, float_frame, int_frame, datetime_frame):
        with tm.ensure_clean("test.json") as path:
            for df in [float_frame, int_frame, datetime_frame]:
                df.to_json(path)
                read_json(path)

    def test_axis_dates(self, datetime_series, datetime_frame):
        # frame
        json = StringIO(datetime_frame.to_json())
        result = read_json(json)
        tm.assert_frame_equal(result, datetime_frame)

        # series
        json = StringIO(datetime_series.to_json())
        result = read_json(json, typ="series")
        tm.assert_series_equal(result, datetime_series, check_names=False)
        assert result.name is None

    def test_convert_dates(self, datetime_series, datetime_frame):
        # frame
        df = datetime_frame
        df["date"] = Timestamp("20130101").as_unit("ns")

        json = StringIO(df.to_json())
        result = read_json(json)
        tm.assert_frame_equal(result, df)

        df["foo"] = 1.0
        json = StringIO(df.to_json(date_unit="ns"))

        result = read_json(json, convert_dates=False)
        expected = df.copy()
        expected["date"] = expected["date"].values.view("i8")
        expected["foo"] = expected["foo"].astype("int64")
        tm.assert_frame_equal(result, expected)

        # series
        ts = Series(Timestamp("20130101").as_unit("ns"), index=datetime_series.index)
        json = StringIO(ts.to_json())
        result = read_json(json, typ="series")
        tm.assert_series_equal(result, ts)

    @pytest.mark.parametrize("date_format", ["epoch", "iso"])
    @pytest.mark.parametrize("as_object", [True, False])
    @pytest.mark.parametrize("date_typ", [datetime.date, datetime.datetime, Timestamp])
    def test_date_index_and_values(self, date_format, as_object, date_typ):
        data = [date_typ(year=2020, month=1, day=1), pd.NaT]
        if as_object:
            data.append("a")

        ser = Series(data, index=data)
        result = ser.to_json(date_format=date_format)

        if date_format == "epoch":
            expected = '{"1577836800000":1577836800000,"null":null}'
        else:
            expected = (
                '{"2020-01-01T00:00:00.000":"2020-01-01T00:00:00.000","null":null}'
            )

        if as_object:
            expected = expected.replace("}", ',"a":"a"}')

        assert result == expected

    @pytest.mark.parametrize(
        "infer_word",
        [
            "trade_time",
            "date",
            "datetime",
            "sold_at",
            "modified",
            "timestamp",
            "timestamps",
        ],
    )
    def test_convert_dates_infer(self, infer_word):
        # GH10747
        from pandas.io.json import dumps

        data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
        expected = DataFrame(
            [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
        )

        result = read_json(StringIO(dumps(data)))[["id", infer_word]]
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "date,date_unit",
        [
            ("20130101 20:43:42.123", None),
            ("20130101 20:43:42", "s"),
            ("20130101 20:43:42.123", "ms"),
            ("20130101 20:43:42.123456", "us"),
            ("20130101 20:43:42.123456789", "ns"),
        ],
    )
    def test_date_format_frame(self, date, date_unit, datetime_frame):
        df = datetime_frame

        df["date"] = Timestamp(date).as_unit("ns")
        df.iloc[1, df.columns.get_loc("date")] = pd.NaT
        df.iloc[5, df.columns.get_loc("date")] = pd.NaT
        if date_unit:
            json = df.to_json(date_format="iso", date_unit=date_unit)
        else:
            json = df.to_json(date_format="iso")

        result = read_json(StringIO(json))
        expected = df.copy()
        tm.assert_frame_equal(result, expected)

    def test_date_format_frame_raises(self, datetime_frame):
        df = datetime_frame
        msg = "Invalid value 'foo' for option 'date_unit'"
        with pytest.raises(ValueError, match=msg):
            df.to_json(date_format="iso", date_unit="foo")

    @pytest.mark.parametrize(
        "date,date_unit",
        [
            ("20130101 20:43:42.123", None),
            ("20130101 20:43:42", "s"),
            ("20130101 20:43:42.123", "ms"),
            ("20130101 20:43:42.123456", "us"),
            ("20130101 20:43:42.123456789", "ns"),
        ],
    )
    def test_date_format_series(self, date, date_unit, datetime_series):
        ts = Series(Timestamp(date).as_unit("ns"), index=datetime_series.index)
        ts.iloc[1] = pd.NaT
        ts.iloc[5] = pd.NaT
        if date_unit:
            json = ts.to_json(date_format="iso", date_unit=date_unit)
        else:
            json = ts.to_json(date_format="iso")

        result = read_json(StringIO(json), typ="series")
        expected = ts.copy()
        tm.assert_series_equal(result, expected)

    def test_date_format_series_raises(self, datetime_series):
        ts = Series(Timestamp("20130101 20:43:42.123"), index=datetime_series.index)
        msg = "Invalid value 'foo' for option 'date_unit'"
        with pytest.raises(ValueError, match=msg):
            ts.to_json(date_format="iso", date_unit="foo")

    @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
    def test_date_unit(self, unit, datetime_frame):
        df = datetime_frame
        df["date"] = Timestamp("20130101 20:43:42").as_unit("ns")
        dl = df.columns.get_loc("date")
        df.iloc[1, dl] = Timestamp("19710101 20:43:42")
        df.iloc[2, dl] = Timestamp("21460101 20:43:42")
        df.iloc[4, dl] = pd.NaT

        json = df.to_json(date_format="epoch", date_unit=unit)

        # force date unit
        result = read_json(StringIO(json), date_unit=unit)
        tm.assert_frame_equal(result, df)

        # detect date unit
        result = read_json(StringIO(json), date_unit=None)
        tm.assert_frame_equal(result, df)

    @pytest.mark.parametrize("unit", ["s", "ms", "us"])
    def test_iso_non_nano_datetimes(self, unit):
        # Test that numpy datetimes
        # in an Index or a column with non-nano resolution can be serialized
        # correctly
        # GH53686
        index = DatetimeIndex(
            [np.datetime64("2023-01-01T11:22:33.123456", unit)],
            dtype=f"datetime64[{unit}]",
        )
        df = DataFrame(
            {
                "date": Series(
                    [np.datetime64("2022-01-01T11:22:33.123456", unit)],
                    dtype=f"datetime64[{unit}]",
                    index=index,
                ),
                "date_obj": Series(
                    [np.datetime64("2023-01-01T11:22:33.123456", unit)],
                    dtype=object,
                    index=index,
                ),
            },
        )

        buf = StringIO()
        df.to_json(buf, date_format="iso", date_unit=unit)
        buf.seek(0)

        # read_json always reads datetimes in nanosecond resolution
        # TODO: check_dtype/check_index_type should be removable
        # once read_json gets non-nano support
        tm.assert_frame_equal(
            read_json(buf, convert_dates=["date", "date_obj"]),
            df,
            check_index_type=False,
            check_dtype=False,
        )

    def test_weird_nested_json(self):
        # this used to core dump the parser
        s = r"""{
        "status": "success",
        "data": {
        "posts": [
            {
            "id": 1,
            "title": "A blog post",
            "body": "Some useful content"
            },
            {
            "id": 2,
            "title": "Another blog post",
            "body": "More content"
            }
           ]
          }
        }"""
        read_json(StringIO(s))

    def test_doc_example(self):
        dfj2 = DataFrame(
            np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB")
        )
        dfj2["date"] = Timestamp("20130101")
        dfj2["ints"] = range(5)
        dfj2["bools"] = True
        dfj2.index = pd.date_range("20130101", periods=5)

        json = StringIO(dfj2.to_json())
        result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
        tm.assert_frame_equal(result, result)

    def test_round_trip_exception(self, datapath):
        # GH 3867
        path = datapath("io", "json", "data", "teams.csv")
        df = pd.read_csv(path)
        s = df.to_json()

        result = read_json(StringIO(s))
        res = result.reindex(index=df.index, columns=df.columns)
        msg = "The 'downcast' keyword in fillna is deprecated"
        with tm.assert_produces_warning(FutureWarning, match=msg):
            res = res.fillna(np.nan, downcast=False)
        tm.assert_frame_equal(res, df)

    @pytest.mark.network
    @pytest.mark.single_cpu
    @pytest.mark.parametrize(
        "field,dtype",
        [
            ["created_at", pd.DatetimeTZDtype(tz="UTC")],
            ["closed_at", "datetime64[ns]"],
            ["updated_at", pd.DatetimeTZDtype(tz="UTC")],
        ],
    )
    def test_url(self, field, dtype, httpserver):
        data = '{"created_at": ["2023-06-23T18:21:36Z"], "closed_at": ["2023-06-23T18:21:36"], "updated_at": ["2023-06-23T18:21:36Z"]}\n'  # noqa: E501
        httpserver.serve_content(content=data)
        result = read_json(httpserver.url, convert_dates=True)
        assert result[field].dtype == dtype

    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit="ms")

        ser = Series([timedelta(23), timedelta(seconds=5)])
        assert ser.dtype == "timedelta64[ns]"

        result = read_json(StringIO(ser.to_json()), typ="series").apply(converter)
        tm.assert_series_equal(result, ser)

        ser = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1]))
        assert ser.dtype == "timedelta64[ns]"
        result = read_json(StringIO(ser.to_json()), typ="series").apply(converter)
        tm.assert_series_equal(result, ser)

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        assert frame[0].dtype == "timedelta64[ns]"
        tm.assert_frame_equal(
            frame, read_json(StringIO(frame.to_json())).apply(converter)
        )

    def test_timedelta2(self):
        frame = DataFrame(
            {
                "a": [timedelta(days=23), timedelta(seconds=5)],
                "b": [1, 2],
                "c": pd.date_range(start="20130101", periods=2),
            }
        )
        data = StringIO(frame.to_json(date_unit="ns"))
        result = read_json(data)
        result["a"] = pd.to_timedelta(result.a, unit="ns")
        result["c"] = pd.to_datetime(result.c)
        tm.assert_frame_equal(frame, result)

    def test_mixed_timedelta_datetime(self):
        td = timedelta(23)
        ts = Timestamp("20130101")
        frame = DataFrame({"a": [td, ts]}, dtype=object)

        expected = DataFrame(
            {"a": [pd.Timedelta(td).as_unit("ns")._value, ts.as_unit("ns")._value]}
        )
        data = StringIO(frame.to_json(date_unit="ns"))
        result = read_json(data, dtype={"a": "int64"})
        tm.assert_frame_equal(result, expected, check_index_type=False)

    @pytest.mark.parametrize("as_object", [True, False])
    @pytest.mark.parametrize("date_format", ["iso", "epoch"])
    @pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
    def test_timedelta_to_json(self, as_object, date_format, timedelta_typ):
        # GH28156: to_json not correctly formatting Timedelta
        data = [timedelta_typ(days=1), timedelta_typ(days=2), pd.NaT]
        if as_object:
            data.append("a")

        ser = Series(data, index=data)
        if date_format == "iso":
            expected = (
                '{"P1DT0H0M0S":"P1DT0H0M0S","P2DT0H0M0S":"P2DT0H0M0S","null":null}'
            )
        else:
            expected = '{"86400000":86400000,"172800000":172800000,"null":null}'

        if as_object:
            expected = expected.replace("}", ',"a":"a"}')

        result = ser.to_json(date_format=date_format)
        assert result == expected

    def test_default_handler(self):
        value = object()
        frame = DataFrame({"a": [7, value]})
        expected = DataFrame({"a": [7, str(value)]})
        result = read_json(StringIO(frame.to_json(default_handler=str)))
        tm.assert_frame_equal(expected, result, check_index_type=False)

    def test_default_handler_indirect(self):
        from pandas.io.json import dumps

        def default(obj):
            if isinstance(obj, complex):
                return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)]
            return str(obj)

        df_list = [
            9,
            DataFrame(
                {"a": [1, "STR", complex(4, -5)], "b": [float("nan"), None, "N/A"]},
                columns=["a", "b"],
            ),
        ]
        expected = (
            '[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
            '["re",4.0],["im",-5.0]],"N\\/A"]]]'
        )
        assert dumps(df_list, default_handler=default, orient="values") == expected

    def test_default_handler_numpy_unsupported_dtype(self):
        # GH12554 to_json raises 'Unhandled numpy dtype 15'
        df = DataFrame(
            {"a": [1, 2.3, complex(4, -5)], "b": [float("nan"), None, complex(1.2, 0)]},
            columns=["a", "b"],
        )
        expected = (
            '[["(1+0j)","(nan+0j)"],'
            '["(2.3+0j)","(nan+0j)"],'
            '["(4-5j)","(1.2+0j)"]]'
        )
        assert df.to_json(default_handler=str, orient="values") == expected

    def test_default_handler_raises(self):
        msg = "raisin"

        def my_handler_raises(obj):
            raise TypeError(msg)

        with pytest.raises(TypeError, match=msg):
            DataFrame({"a": [1, 2, object()]}).to_json(
                default_handler=my_handler_raises
            )
        with pytest.raises(TypeError, match=msg):
            DataFrame({"a": [1, 2, complex(4, -5)]}).to_json(
                default_handler=my_handler_raises
            )

    def test_categorical(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
        df["B"] = df["A"]
        expected = df.to_json()

        df["B"] = df["A"].astype("category")
        assert expected == df.to_json()

        s = df["A"]
        sc = df["B"]
        assert s.to_json() == sc.to_json()

    def test_datetime_tz(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        tz_range = pd.date_range("20130101", periods=3, tz="US/Eastern")
        tz_naive = tz_range.tz_convert("utc").tz_localize(None)

        df = DataFrame({"A": tz_range, "B": pd.date_range("20130101", periods=3)})

        df_naive = df.copy()
        df_naive["A"] = tz_naive
        expected = df_naive.to_json()
        assert expected == df.to_json()

        stz = Series(tz_range)
        s_naive = Series(tz_naive)
        assert stz.to_json() == s_naive.to_json()

    def test_sparse(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
        df.loc[:8] = np.nan

        sdf = df.astype("Sparse")
        expected = df.to_json()
        assert expected == sdf.to_json()

        s = Series(np.random.default_rng(2).standard_normal(10))
        s.loc[:8] = np.nan
        ss = s.astype("Sparse")

        expected = s.to_json()
        assert expected == ss.to_json()

    @pytest.mark.parametrize(
        "ts",
        [
            Timestamp("2013-01-10 05:00:00Z"),
            Timestamp("2013-01-10 00:00:00", tz="US/Eastern"),
            Timestamp("2013-01-10 00:00:00-0500"),
        ],
    )
    def test_tz_is_utc(self, ts):
        from pandas.io.json import dumps

        exp = '"2013-01-10T05:00:00.000Z"'

        assert dumps(ts, iso_dates=True) == exp
        dt = ts.to_pydatetime()
        assert dumps(dt, iso_dates=True) == exp

    def test_tz_is_naive(self):
        from pandas.io.json import dumps

        ts = Timestamp("2013-01-10 05:00:00")
        exp = '"2013-01-10T05:00:00.000"'

        assert dumps(ts, iso_dates=True) == exp
        dt = ts.to_pydatetime()
        assert dumps(dt, iso_dates=True) == exp

    @pytest.mark.parametrize(
        "tz_range",
        [
            pd.date_range("2013-01-01 05:00:00Z", periods=2),
            pd.date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"),
            pd.date_range("2013-01-01 00:00:00-0500", periods=2),
        ],
    )
    def test_tz_range_is_utc(self, tz_range):
        from pandas.io.json import dumps

        exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
        dfexp = (
            '{"DT":{'
            '"0":"2013-01-01T05:00:00.000Z",'
            '"1":"2013-01-02T05:00:00.000Z"}}'
        )

        assert dumps(tz_range, iso_dates=True) == exp
        dti = DatetimeIndex(tz_range)
        # Ensure datetimes in object array are serialized correctly
        # in addition to the normal DTI case
        assert dumps(dti, iso_dates=True) == exp
        assert dumps(dti.astype(object), iso_dates=True) == exp
        df = DataFrame({"DT": dti})
        result = dumps(df, iso_dates=True)
        assert result == dfexp
        assert dumps(df.astype({"DT": object}), iso_dates=True)

    def test_tz_range_is_naive(self):
        from pandas.io.json import dumps

        dti = pd.date_range("2013-01-01 05:00:00", periods=2)

        exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
        dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'

        # Ensure datetimes in object array are serialized correctly
        # in addition to the normal DTI case
        assert dumps(dti, iso_dates=True) == exp
        assert dumps(dti.astype(object), iso_dates=True) == exp
        df = DataFrame({"DT": dti})
        result = dumps(df, iso_dates=True)
        assert result == dfexp
        assert dumps(df.astype({"DT": object}), iso_dates=True)

    def test_read_inline_jsonl(self):
        # GH9180

        result = read_json(StringIO('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n'), lines=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
        tm.assert_frame_equal(result, expected)

    @pytest.mark.single_cpu
    @td.skip_if_not_us_locale
    def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so):
        # GH17200

        result = read_json(
            f"s3n://{s3_public_bucket_with_data.name}/items.jsonl",
            lines=True,
            storage_options=s3so,
        )
        expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
        tm.assert_frame_equal(result, expected)

    def test_read_local_jsonl(self):
        # GH17200
        with tm.ensure_clean("tmp_items.json") as path:
            with open(path, "w", encoding="utf-8") as infile:
                infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
            result = read_json(path, lines=True)
            expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
            tm.assert_frame_equal(result, expected)

    def test_read_jsonl_unicode_chars(self):
        # GH15132: non-ascii unicode characters
        # \u201d == RIGHT DOUBLE QUOTATION MARK

        # simulate file handle
        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
        json = StringIO(json)
        result = read_json(json, lines=True)
        expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
        tm.assert_frame_equal(result, expected)

        # simulate string
        json = StringIO('{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n')
        result = read_json(json, lines=True)
        expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)])
    def test_to_json_large_numbers(self, bigNum):
        # GH34473
        series = Series(bigNum, dtype=object, index=["articleId"])
        json = series.to_json()
        expected = '{"articleId":' + str(bigNum) + "}"
        assert json == expected

        df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
        json = df.to_json()
        expected = '{"0":{"articleId":' + str(bigNum) + "}}"
        assert json == expected

    @pytest.mark.parametrize("bigNum", [-(2**63) - 1, 2**64])
    def test_read_json_large_numbers(self, bigNum):
        # GH20599, 26068
        json = StringIO('{"articleId":' + str(bigNum) + "}")
        msg = r"Value is too small|Value is too big"
        with pytest.raises(ValueError, match=msg):
            read_json(json)

        json = StringIO('{"0":{"articleId":' + str(bigNum) + "}}")
        with pytest.raises(ValueError, match=msg):
            read_json(json)

    def test_read_json_large_numbers2(self):
        # GH18842
        json = '{"articleId": "1404366058080022500245"}'
        json = StringIO(json)
        result = read_json(json, typ="series")
        expected = Series(1.404366e21, index=["articleId"])
        tm.assert_series_equal(result, expected)

        json = '{"0": {"articleId": "1404366058080022500245"}}'
        json = StringIO(json)
        result = read_json(json)
        expected = DataFrame(1.404366e21, index=["articleId"], columns=[0])
        tm.assert_frame_equal(result, expected)

    def test_to_jsonl(self):
        # GH9180
        df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
        assert result == expected

        df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
        assert result == expected
        tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)

        # GH15096: escaped characters in columns and data
        df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
        assert result == expected

        tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)

    # TODO: there is a near-identical test for pytables; can we share?
    @pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError)
    @pytest.mark.parametrize(
        "val",
        [
            [b"E\xc9, 17", b"", b"a", b"b", b"c"],
            [b"E\xc9, 17", b"a", b"b", b"c"],
            [b"EE, 17", b"", b"a", b"b", b"c"],
            [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
            [b"", b"a", b"b", b"c"],
            [b"\xf8\xfc", b"a", b"b", b"c"],
            [b"A\xf8\xfc", b"", b"a", b"b", b"c"],
            [np.nan, b"", b"b", b"c"],
            [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
        ],
    )
    @pytest.mark.parametrize("dtype", ["category", object])
    def test_latin_encoding(self, dtype, val):
        # GH 13774
        ser = Series(
            [x.decode("latin-1") if isinstance(x, bytes) else x for x in val],
            dtype=dtype,
        )
        encoding = "latin-1"
        with tm.ensure_clean("test.json") as path:
            ser.to_json(path, encoding=encoding)
            retr = read_json(StringIO(path), encoding=encoding)
            tm.assert_series_equal(ser, retr, check_categorical=False)

    def test_data_frame_size_after_to_json(self):
        # GH15344
        df = DataFrame({"a": [str(1)]})

        size_before = df.memory_usage(index=True, deep=True).sum()
        df.to_json()
        size_after = df.memory_usage(index=True, deep=True).sum()

        assert size_before == size_after

    @pytest.mark.parametrize(
        "index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]]
    )
    @pytest.mark.parametrize("columns", [["a", "b"], ["1", "2"], ["1.", "2."]])
    def test_from_json_to_json_table_index_and_columns(self, index, columns):
        # GH25433 GH25435
        expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns)
        dfjson = expected.to_json(orient="table")

        result = read_json(StringIO(dfjson), orient="table")
        tm.assert_frame_equal(result, expected)

    def test_from_json_to_json_table_dtypes(self):
        # GH21345
        expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
        dfjson = expected.to_json(orient="table")
        result = read_json(StringIO(dfjson), orient="table")
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("orient", ["split", "records", "index", "columns"])
    def test_to_json_from_json_columns_dtypes(self, orient):
        # GH21892 GH33205
        expected = DataFrame.from_dict(
            {
                "Integer": Series([1, 2, 3], dtype="int64"),
                "Float": Series([None, 2.0, 3.0], dtype="float64"),
                "Object": Series([None, "", "c"], dtype="object"),
                "Bool": Series([True, False, True], dtype="bool"),
                "Category": Series(["a", "b", None], dtype="category"),
                "Datetime": Series(
                    ["2020-01-01", None, "2020-01-03"], dtype="datetime64[ns]"
                ),
            }
        )
        dfjson = expected.to_json(orient=orient)

        result = read_json(
            StringIO(dfjson),
            orient=orient,
            dtype={
                "Integer": "int64",
                "Float": "float64",
                "Object": "object",
                "Bool": "bool",
                "Category": "category",
                "Datetime": "datetime64[ns]",
            },
        )
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
    def test_read_json_table_dtype_raises(self, dtype):
        # GH21345
        df = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
        dfjson = df.to_json(orient="table")
        msg = "cannot pass both dtype and orient='table'"
        with pytest.raises(ValueError, match=msg):
            read_json(dfjson, orient="table", dtype=dtype)

    @pytest.mark.parametrize("orient", ["index", "columns", "records", "values"])
    def test_read_json_table_empty_axes_dtype(self, orient):
        # GH28558

        expected = DataFrame()
        result = read_json(StringIO("{}"), orient=orient, convert_axes=True)
        tm.assert_index_equal(result.index, expected.index)
        tm.assert_index_equal(result.columns, expected.columns)

    def test_read_json_table_convert_axes_raises(self):
        # GH25433 GH25435
        df = DataFrame([[1, 2], [3, 4]], index=[1.0, 2.0], columns=["1.", "2."])
        dfjson = df.to_json(orient="table")
        msg = "cannot pass both convert_axes and orient='table'"
        with pytest.raises(ValueError, match=msg):
            read_json(dfjson, orient="table", convert_axes=True)

    @pytest.mark.parametrize(
        "data, expected",
        [
            (
                DataFrame([[1, 2], [4, 5]], columns=["a", "b"]),
                {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
            ),
            (
                DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo"),
                {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
            ),
            (
                DataFrame(
                    [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
                ),
                {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
            ),
            (Series([1, 2, 3], name="A"), {"name": "A", "data": [1, 2, 3]}),
            (
                Series([1, 2, 3], name="A").rename_axis("foo"),
                {"name": "A", "data": [1, 2, 3]},
            ),
            (
                Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]]),
                {"name": "A", "data": [1, 2]},
            ),
        ],
    )
    def test_index_false_to_json_split(self, data, expected):
        # GH 17394
        # Testing index=False in to_json with orient='split'

        result = data.to_json(orient="split", index=False)
        result = json.loads(result)

        assert result == expected

    @pytest.mark.parametrize(
        "data",
        [
            (DataFrame([[1, 2], [4, 5]], columns=["a", "b"])),
            (DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo")),
            (
                DataFrame(
                    [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
                )
            ),
            (Series([1, 2, 3], name="A")),
            (Series([1, 2, 3], name="A").rename_axis("foo")),
            (Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]])),
        ],
    )
    def test_index_false_to_json_table(self, data):
        # GH 17394
        # Testing index=False in to_json with orient='table'

        result = data.to_json(orient="table", index=False)
        result = json.loads(result)

        expected = {
            "schema": pd.io.json.build_table_schema(data, index=False),
            "data": DataFrame(data).to_dict(orient="records"),
        }

        assert result == expected

    @pytest.mark.parametrize("orient", ["index", "columns"])
    def test_index_false_error_to_json(self, orient):
        # GH 17394, 25513
        # Testing error message from to_json with index=False

        df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])

        msg = (
            "'index=False' is only valid when 'orient' is 'split', "
            "'table', 'records', or 'values'"
        )
        with pytest.raises(ValueError, match=msg):
            df.to_json(orient=orient, index=False)

    @pytest.mark.parametrize("orient", ["records", "values"])
    def test_index_true_error_to_json(self, orient):
        # GH 25513
        # Testing error message from to_json with index=True

        df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])

        msg = (
            "'index=True' is only valid when 'orient' is 'split', "
            "'table', 'index', or 'columns'"
        )
        with pytest.raises(ValueError, match=msg):
            df.to_json(orient=orient, index=True)

    @pytest.mark.parametrize("orient", ["split", "table"])
    @pytest.mark.parametrize("index", [True, False])
    def test_index_false_from_json_to_json(self, orient, index):
        # GH25170
        # Test index=False in from_json to_json
        expected = DataFrame({"a": [1, 2], "b": [3, 4]})
        dfjson = expected.to_json(orient=orient, index=index)
        result = read_json(StringIO(dfjson), orient=orient)
        tm.assert_frame_equal(result, expected)

    def test_read_timezone_information(self):
        # GH 25546
        result = read_json(
            StringIO('{"2019-01-01T11:00:00.000Z":88}'), typ="series", orient="index"
        )
        expected = Series([88], index=DatetimeIndex(["2019-01-01 11:00:00"], tz="UTC"))
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "url",
        [
            "s3://example-fsspec/",
            "gcs://another-fsspec/file.json",
            "https://example-site.com/data",
            "some-protocol://data.txt",
        ],
    )
    def test_read_json_with_url_value(self, url):
        # GH 36271
        result = read_json(StringIO(f'{{"url":{{"0":"{url}"}}}}'))
        expected = DataFrame({"url": [url]})
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "compression",
        ["", ".gz", ".bz2", ".tar"],
    )
    def test_read_json_with_very_long_file_path(self, compression):
        # GH 46718
        long_json_path = f'{"a" * 1000}.json{compression}'
        with pytest.raises(
            FileNotFoundError, match=f"File {long_json_path} does not exist"
        ):
            # path too long for Windows is handled in file_exists() but raises in
            # _get_data_from_filepath()
            read_json(long_json_path)

    @pytest.mark.parametrize(
        "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")]
    )
    def test_timedelta_as_label(self, date_format, key):
        df = DataFrame([[1]], columns=[pd.Timedelta("1D")])
        expected = f'{{"{key}":{{"0":1}}}}'
        result = df.to_json(date_format=date_format)

        assert result == expected

    @pytest.mark.parametrize(
        "orient,expected",
        [
            ("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"),
            ("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"),
            # TODO: the below have separate encoding procedures
            pytest.param(
                "split",
                "",
                marks=pytest.mark.xfail(
                    reason="Produces JSON but not in a consistent manner"
                ),
            ),
            pytest.param(
                "table",
                "",
                marks=pytest.mark.xfail(
                    reason="Produces JSON but not in a consistent manner"
                ),
            ),
        ],
    )
    def test_tuple_labels(self, orient, expected):
        # GH 20500
        df = DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")])
        result = df.to_json(orient=orient)
        assert result == expected

    @pytest.mark.parametrize("indent", [1, 2, 4])
    def test_to_json_indent(self, indent):
        # GH 12004
        df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])

        result = df.to_json(indent=indent)
        spaces = " " * indent
        expected = f"""{{
{spaces}"a":{{
{spaces}{spaces}"0":"foo",
{spaces}{spaces}"1":"baz"
{spaces}}},
{spaces}"b":{{
{spaces}{spaces}"0":"bar",
{spaces}{spaces}"1":"qux"
{spaces}}}
}}"""

        assert result == expected

    @pytest.mark.parametrize(
        "orient,expected",
        [
            (
                "split",
                """{
    "columns":[
        "a",
        "b"
    ],
    "index":[
        0,
        1
    ],
    "data":[
        [
            "foo",
            "bar"
        ],
        [
            "baz",
            "qux"
        ]
    ]
}""",
            ),
            (
                "records",
                """[
    {
        "a":"foo",
        "b":"bar"
    },
    {
        "a":"baz",
        "b":"qux"
    }
]""",
            ),
            (
                "index",
                """{
    "0":{
        "a":"foo",
        "b":"bar"
    },
    "1":{
        "a":"baz",
        "b":"qux"
    }
}""",
            ),
            (
                "columns",
                """{
    "a":{
        "0":"foo",
        "1":"baz"
    },
    "b":{
        "0":"bar",
        "1":"qux"
    }
}""",
            ),
            (
                "values",
                """[
    [
        "foo",
        "bar"
    ],
    [
        "baz",
        "qux"
    ]
]""",
            ),
            (
                "table",
                """{
    "schema":{
        "fields":[
            {
                "name":"index",
                "type":"integer"
            },
            {
                "name":"a",
                "type":"string"
            },
            {
                "name":"b",
                "type":"string"
            }
        ],
        "primaryKey":[
            "index"
        ],
        "pandas_version":"1.4.0"
    },
    "data":[
        {
            "index":0,
            "a":"foo",
            "b":"bar"
        },
        {
            "index":1,
            "a":"baz",
            "b":"qux"
        }
    ]
}""",
            ),
        ],
    )
    def test_json_indent_all_orients(self, orient, expected):
        # GH 12004
        df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])
        result = df.to_json(orient=orient, indent=4)
        assert result == expected

    def test_json_negative_indent_raises(self):
        with pytest.raises(ValueError, match="must be a nonnegative integer"):
            DataFrame().to_json(indent=-1)

    def test_emca_262_nan_inf_support(self):
        # GH 12213
        data = StringIO(
            '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
        )
        result = read_json(data)
        expected = DataFrame(
            ["a", None, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
        )
        tm.assert_frame_equal(result, expected)

    def test_frame_int_overflow(self):
        # GH 30320
        encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}])
        expected = DataFrame({"col": ["31900441201190696999", "Text"]})
        result = read_json(StringIO(encoded_json))
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "dataframe,expected",
        [
            (
                DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}),
                '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,'
                '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}',
            )
        ],
    )
    def test_json_multiindex(self, dataframe, expected):
        series = dataframe.stack(future_stack=True)
        result = series.to_json(orient="index")
        assert result == expected

    @pytest.mark.single_cpu
    def test_to_s3(self, s3_public_bucket, s3so):
        # GH 28375
        mock_bucket_name, target_file = s3_public_bucket.name, "test.json"
        df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
        df.to_json(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
        timeout = 5
        while True:
            if target_file in (obj.key for obj in s3_public_bucket.objects.all()):
                break
            time.sleep(0.1)
            timeout -= 0.1
            assert timeout > 0, "Timed out waiting for file to appear on moto"

    def test_json_pandas_nulls(self, nulls_fixture, request):
        # GH 31615
        if isinstance(nulls_fixture, Decimal):
            mark = pytest.mark.xfail(reason="not implemented")
            request.node.add_marker(mark)

        result = DataFrame([[nulls_fixture]]).to_json()
        assert result == '{"0":{"0":null}}'

    def test_readjson_bool_series(self):
        # GH31464
        result = read_json(StringIO("[true, true, false]"), typ="series")
        expected = Series([True, True, False])
        tm.assert_series_equal(result, expected)

    def test_to_json_multiindex_escape(self):
        # GH 15273
        df = DataFrame(
            True,
            index=pd.date_range("2017-01-20", "2017-01-23"),
            columns=["foo", "bar"],
        ).stack(future_stack=True)
        result = df.to_json()
        expected = (
            "{\"(Timestamp('2017-01-20 00:00:00'), 'foo')\":true,"
            "\"(Timestamp('2017-01-20 00:00:00'), 'bar')\":true,"
            "\"(Timestamp('2017-01-21 00:00:00'), 'foo')\":true,"
            "\"(Timestamp('2017-01-21 00:00:00'), 'bar')\":true,"
            "\"(Timestamp('2017-01-22 00:00:00'), 'foo')\":true,"
            "\"(Timestamp('2017-01-22 00:00:00'), 'bar')\":true,"
            "\"(Timestamp('2017-01-23 00:00:00'), 'foo')\":true,"
            "\"(Timestamp('2017-01-23 00:00:00'), 'bar')\":true}"
        )
        assert result == expected

    def test_to_json_series_of_objects(self):
        class _TestObject:
            def __init__(self, a, b, _c, d) -> None:
                self.a = a
                self.b = b
                self._c = _c
                self.d = d

            def e(self):
                return 5

        # JSON keys should be all non-callable non-underscore attributes, see GH-42768
        series = Series([_TestObject(a=1, b=2, _c=3, d=4)])
        assert json.loads(series.to_json()) == {"0": {"a": 1, "b": 2, "d": 4}}

    @pytest.mark.parametrize(
        "data,expected",
        [
            (
                Series({0: -6 + 8j, 1: 0 + 1j, 2: 9 - 5j}),
                '{"0":{"imag":8.0,"real":-6.0},'
                '"1":{"imag":1.0,"real":0.0},'
                '"2":{"imag":-5.0,"real":9.0}}',
            ),
            (
                Series({0: -9.39 + 0.66j, 1: 3.95 + 9.32j, 2: 4.03 - 0.17j}),
                '{"0":{"imag":0.66,"real":-9.39},'
                '"1":{"imag":9.32,"real":3.95},'
                '"2":{"imag":-0.17,"real":4.03}}',
            ),
            (
                DataFrame([[-2 + 3j, -1 - 0j], [4 - 3j, -0 - 10j]]),
                '{"0":{"0":{"imag":3.0,"real":-2.0},'
                '"1":{"imag":-3.0,"real":4.0}},'
                '"1":{"0":{"imag":0.0,"real":-1.0},'
                '"1":{"imag":-10.0,"real":0.0}}}',
            ),
            (
                DataFrame(
                    [[-0.28 + 0.34j, -1.08 - 0.39j], [0.41 - 0.34j, -0.78 - 1.35j]]
                ),
                '{"0":{"0":{"imag":0.34,"real":-0.28},'
                '"1":{"imag":-0.34,"real":0.41}},'
                '"1":{"0":{"imag":-0.39,"real":-1.08},'
                '"1":{"imag":-1.35,"real":-0.78}}}',
            ),
        ],
    )
    def test_complex_data_tojson(self, data, expected):
        # GH41174
        result = data.to_json()
        assert result == expected

    def test_json_uint64(self):
        # GH21073
        expected = (
            '{"columns":["col1"],"index":[0,1],'
            '"data":[[13342205958987758245],[12388075603347835679]]}'
        )
        df = DataFrame(data={"col1": [13342205958987758245, 12388075603347835679]})
        result = df.to_json(orient="split")
        assert result == expected

    @pytest.mark.parametrize(
        "orient", ["split", "records", "values", "index", "columns"]
    )
    def test_read_json_dtype_backend(self, string_storage, dtype_backend, orient):
        # GH#50750
        pa = pytest.importorskip("pyarrow")
        df = DataFrame(
            {
                "a": Series([1, np.nan, 3], dtype="Int64"),
                "b": Series([1, 2, 3], dtype="Int64"),
                "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
                "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
                "e": [True, False, None],
                "f": [True, False, True],
                "g": ["a", "b", "c"],
                "h": ["a", "b", None],
            }
        )

        if string_storage == "python":
            string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_))
            string_array_na = StringArray(np.array(["a", "b", NA], dtype=np.object_))

        else:
            string_array = ArrowStringArray(pa.array(["a", "b", "c"]))
            string_array_na = ArrowStringArray(pa.array(["a", "b", None]))

        out = df.to_json(orient=orient)
        with pd.option_context("mode.string_storage", string_storage):
            result = read_json(
                StringIO(out), dtype_backend=dtype_backend, orient=orient
            )

        expected = DataFrame(
            {
                "a": Series([1, np.nan, 3], dtype="Int64"),
                "b": Series([1, 2, 3], dtype="Int64"),
                "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
                "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
                "e": Series([True, False, NA], dtype="boolean"),
                "f": Series([True, False, True], dtype="boolean"),
                "g": string_array,
                "h": string_array_na,
            }
        )

        if dtype_backend == "pyarrow":
            from pandas.arrays import ArrowExtensionArray

            expected = DataFrame(
                {
                    col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
                    for col in expected.columns
                }
            )

        if orient == "values":
            expected.columns = list(range(0, 8))

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("orient", ["split", "records", "index"])
    def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
        # GH#50750
        pa = pytest.importorskip("pyarrow")
        ser = Series([1, np.nan, 3], dtype="Int64")

        out = ser.to_json(orient=orient)
        with pd.option_context("mode.string_storage", string_storage):
            result = read_json(
                StringIO(out), dtype_backend=dtype_backend, orient=orient, typ="series"
            )

        expected = Series([1, np.nan, 3], dtype="Int64")

        if dtype_backend == "pyarrow":
            from pandas.arrays import ArrowExtensionArray

            expected = Series(ArrowExtensionArray(pa.array(expected, from_pandas=True)))

        tm.assert_series_equal(result, expected)

    def test_invalid_dtype_backend(self):
        msg = (
            "dtype_backend numpy is invalid, only 'numpy_nullable' and "
            "'pyarrow' are allowed."
        )
        with pytest.raises(ValueError, match=msg):
            read_json("test", dtype_backend="numpy")


def test_invalid_engine():
    # GH 48893
    ser = Series(range(1))
    out = ser.to_json()
    with pytest.raises(ValueError, match="The engine type foo"):
        read_json(out, engine="foo")


def test_pyarrow_engine_lines_false():
    # GH 48893
    ser = Series(range(1))
    out = ser.to_json()
    with pytest.raises(ValueError, match="currently pyarrow engine only supports"):
        read_json(out, engine="pyarrow", lines=False)


def test_json_roundtrip_string_inference(orient):
    pytest.importorskip("pyarrow")
    df = DataFrame(
        [["a", "b"], ["c", "d"]], index=["row 1", "row 2"], columns=["col 1", "col 2"]
    )
    out = df.to_json()
    with pd.option_context("future.infer_string", True):
        result = read_json(StringIO(out))
    expected = DataFrame(
        [["a", "b"], ["c", "d"]],
        dtype="string[pyarrow_numpy]",
        index=pd.Index(["row 1", "row 2"], dtype="string[pyarrow_numpy]"),
        columns=pd.Index(["col 1", "col 2"], dtype="string[pyarrow_numpy]"),
    )
    tm.assert_frame_equal(result, expected)
