From 1e212d428b5ced9d789fbb8e304012e6fa954ed7 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Thu, 19 Feb 2026 17:31:46 +0100 Subject: [PATCH 1/7] BUG: fix error writing a column with mix of datetimes with and without timezone without arrow --- CHANGES.md | 11 ++++++++-- pyogrio/_io.pyx | 2 ++ pyogrio/tests/test_geopandas_io.py | 32 ++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d1547230..0540044e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,12 +1,19 @@ # CHANGELOG +## 0.12.2 (????-??-??) + +### Bug fixes + +- Fix error in `write_dataframe` with `use_arrow=False` when writing a column with a + mix of datetimes with and without time zones (#). + ## 0.12.1 (2025-11-28) ### Bug fixes -- Fix regression in reading date columns (#616) +- Fix regression in reading date columns (#616). - Fix regression in `read_dataframe` when `use_arrow=True` and `columns` is used to filter - out columns of some specific types (#611) + out columns of some specific types (#611). ## 0.12.0 (2025-11-26) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index c236f652..abc22fb7 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -2786,6 +2786,8 @@ def ogr_write( tz_array = gdal_tz_offsets.get(fields[field_idx], None) if tz_array is None: gdal_tz = 0 + elif np.isnan(tz_array[i]): + gdal_tz = 0 else: gdal_tz = tz_array[i] OGR_F_SetFieldDateTimeEx( diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index ddc2e322..3c76f839 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -1001,6 +1001,38 @@ def test_write_read_datetime_tz_mixed_offsets( assert_geodataframe_equal(result, df) +@pytest.mark.requires_arrow_write_api +def test_write_read_datetime_tz_mixed_offsets_None(tmp_path, use_arrow): + """Test with dates with mixed time zone offsets where some offsets are None. + + When datetimes with a mix of having a time zone offsets and some without an offset + were written without arrow, this gave the following error: + `ValueError: cannot convert float NaN to integer` + """ + # Pandas datetime64 column types doesn't support mixed time zone offsets, so + # it needs to be a list of pandas.Timestamp objects instead. + dates = [ + pd.Timestamp("2023-01-01 11:00:01.111+01:00"), + pd.Timestamp("2023-06-01 10:00:01.111+05:00"), + pd.Timestamp("2023-06-01 10:00:01.111"), + np.nan, + ] + + df = gp.GeoDataFrame( + {"dates": dates, "geometry": [Point(1, 1)] * len(dates)}, crs="EPSG:4326" + ) + fpath = tmp_path / "test.gpkg" + write_dataframe(df, fpath, use_arrow=use_arrow) + result = read_dataframe( + fpath, + use_arrow=use_arrow, + datetime_as_string=False, + mixed_offsets_as_utc=False, + ) + + assert_geodataframe_equal(result, df) + + @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext != ".shp"]) @pytest.mark.parametrize( "dates_raw", From 14051cb4b0d7bc128fdcb3f02377a1cbec6b0119 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Thu, 19 Feb 2026 19:31:46 +0100 Subject: [PATCH 2/7] Several improvements --- CHANGES.md | 5 +-- pyogrio/geopandas.py | 8 ++++- pyogrio/tests/test_geopandas_io.py | 52 +++++++++++++++++++----------- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 0540044e..c8066de5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,8 +4,9 @@ ### Bug fixes -- Fix error in `write_dataframe` with `use_arrow=False` when writing a column with a - mix of datetimes with and without time zones (#). +- Fix error in `write_dataframe` with `use_arrow=False` when writing a column with + datetimes without any time zone offsets or with a mix of offsets and no offsets + (#634). ## 0.12.1 (2025-11-28) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index e4f3790d..420b05ab 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -893,7 +893,13 @@ def write_dataframe( # Column of Timestamp/datetime objects, split in naive datetime and tz. if pd.api.types.infer_dtype(df[name]) == "datetime": tz_offset = col.map(lambda x: x.utcoffset(), na_action="ignore") - gdal_offset_repr = tz_offset // pd.Timedelta("15m") + 100 + gdal_offset_repr = tz_offset.map( + lambda x: x // pd.Timedelta("15m") + 100, na_action="ignore" + ) + # Replace pd.NA with np.nan because the cython code expects numpy arrays + gdal_offset_repr = gdal_offset_repr.where( + ~pd.isna(gdal_offset_repr), np.nan + ) gdal_tz_offsets[name] = gdal_offset_repr.values naive = col.map(lambda x: x.replace(tzinfo=None), na_action="ignore") values = naive.values diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 3c76f839..af3f9b5e 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -3,7 +3,7 @@ import os import re import warnings -from datetime import datetime +from datetime import datetime, timedelta, timezone from io import BytesIO from zipfile import ZipFile @@ -1001,25 +1001,35 @@ def test_write_read_datetime_tz_mixed_offsets( assert_geodataframe_equal(result, df) +@pytest.mark.parametrize( + "dates", + [ + [ + datetime(2023, 1, 1, 11, 0, 1, 111000), + datetime(2023, 6, 1, 10, 0, 1, 111000), + np.nan, + ], + [ + datetime(2023, 1, 1, 11, 0, 1, 111000, tzinfo=timezone(timedelta(hours=1))), + datetime(2023, 6, 1, 10, 0, 1, 111000), + np.nan, + ], + ], +) @pytest.mark.requires_arrow_write_api -def test_write_read_datetime_tz_mixed_offsets_None(tmp_path, use_arrow): - """Test with dates with mixed time zone offsets where some offsets are None. - - When datetimes with a mix of having a time zone offsets and some without an offset - were written without arrow, this gave the following error: - `ValueError: cannot convert float NaN to integer` +def test_write_read_datetime_tz_offsets_None(tmp_path, dates, use_arrow): + """Test writing a column with datetimes with and without time zone offsets. + + Two types of errors occured: + - For datetimes without any offset, when written without arrow: + `TypeError: Invalid dtype float64 for __floordiv__` + - For datetimes with a mix of having a time zone offset and without, when written + without arrow: `ValueError: cannot convert float NaN to integer` """ - # Pandas datetime64 column types doesn't support mixed time zone offsets, so - # it needs to be a list of pandas.Timestamp objects instead. - dates = [ - pd.Timestamp("2023-01-01 11:00:01.111+01:00"), - pd.Timestamp("2023-06-01 10:00:01.111+05:00"), - pd.Timestamp("2023-06-01 10:00:01.111"), - np.nan, - ] - df = gp.GeoDataFrame( - {"dates": dates, "geometry": [Point(1, 1)] * len(dates)}, crs="EPSG:4326" + {"dates": dates, "geometry": [Point(1, 1)] * len(dates)}, + crs="EPSG:4326", + dtype=object, ) fpath = tmp_path / "test.gpkg" write_dataframe(df, fpath, use_arrow=use_arrow) @@ -1030,7 +1040,13 @@ def test_write_read_datetime_tz_mixed_offsets_None(tmp_path, use_arrow): mixed_offsets_as_utc=False, ) - assert_geodataframe_equal(result, df) + exp_df = df.copy() + if dates[0].tzinfo is None: + exp_df.dates = pd.to_datetime(exp_df.dates, utc=False) + if PANDAS_GE_20: + exp_df.dates = exp_df.dates.dt.as_unit("ms") + + assert_geodataframe_equal(result, exp_df) @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext != ".shp"]) From 862954ccbd79b50f6a3f297687d760fdd68a6095 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Thu, 19 Feb 2026 19:44:27 +0100 Subject: [PATCH 3/7] Fixes in tests for older GDAL and pandas versions --- pyogrio/tests/test_geopandas_io.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index af3f9b5e..e095bf40 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -1045,6 +1045,12 @@ def test_write_read_datetime_tz_offsets_None(tmp_path, dates, use_arrow): exp_df.dates = pd.to_datetime(exp_df.dates, utc=False) if PANDAS_GE_20: exp_df.dates = exp_df.dates.dt.as_unit("ms") + if not PANDAS_GE_30: + exp_df.loc[2, "dates"] = None + + if not GDAL_GE_311 and use_arrow: + # Older versions of GDAL with arrow didn't handle datetimes properly + exp_df.dates = exp_df.dates.astype("str") assert_geodataframe_equal(result, exp_df) From b0af6477f87ee6bc0d939a6bfb198829c33529c3 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Thu, 19 Feb 2026 20:07:06 +0100 Subject: [PATCH 4/7] fixes to tests for old versions --- pyogrio/tests/test_geopandas_io.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index e095bf40..c551f06c 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -1045,12 +1045,13 @@ def test_write_read_datetime_tz_offsets_None(tmp_path, dates, use_arrow): exp_df.dates = pd.to_datetime(exp_df.dates, utc=False) if PANDAS_GE_20: exp_df.dates = exp_df.dates.dt.as_unit("ms") - if not PANDAS_GE_30: - exp_df.loc[2, "dates"] = None + if not GDAL_GE_311 and use_arrow: + # Older versions of GDAL with arrow didn't handle datetimes properly + exp_df.dates = exp_df.dates.astype("str") + result.dates = result.dates.str.slice(0, -3) - if not GDAL_GE_311 and use_arrow: - # Older versions of GDAL with arrow didn't handle datetimes properly - exp_df.dates = exp_df.dates.astype("str") + if not PANDAS_GE_30: + exp_df.loc[2, "dates"] = None assert_geodataframe_equal(result, exp_df) From 38d3dea44ae82d8755b8f690df159c0b30c7904e Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Thu, 19 Feb 2026 20:12:10 +0100 Subject: [PATCH 5/7] Update test_geopandas_io.py --- pyogrio/tests/test_geopandas_io.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index c551f06c..a56cb71f 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -1017,6 +1017,10 @@ def test_write_read_datetime_tz_mixed_offsets( ], ) @pytest.mark.requires_arrow_write_api +@pytest.skipif( + not GDAL_GE_311, + reason="before GDAL 3.11, datetimes weren't handled as well", +) def test_write_read_datetime_tz_offsets_None(tmp_path, dates, use_arrow): """Test writing a column with datetimes with and without time zone offsets. @@ -1045,10 +1049,6 @@ def test_write_read_datetime_tz_offsets_None(tmp_path, dates, use_arrow): exp_df.dates = pd.to_datetime(exp_df.dates, utc=False) if PANDAS_GE_20: exp_df.dates = exp_df.dates.dt.as_unit("ms") - if not GDAL_GE_311 and use_arrow: - # Older versions of GDAL with arrow didn't handle datetimes properly - exp_df.dates = exp_df.dates.astype("str") - result.dates = result.dates.str.slice(0, -3) if not PANDAS_GE_30: exp_df.loc[2, "dates"] = None From 35495cbe6feae7f0da743e1e462d294f1271437f Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Thu, 19 Feb 2026 20:15:43 +0100 Subject: [PATCH 6/7] Update test_geopandas_io.py --- pyogrio/tests/test_geopandas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index a56cb71f..f47b4751 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -1017,7 +1017,7 @@ def test_write_read_datetime_tz_mixed_offsets( ], ) @pytest.mark.requires_arrow_write_api -@pytest.skipif( +@pytest.mark.skipif( not GDAL_GE_311, reason="before GDAL 3.11, datetimes weren't handled as well", ) From cfa0864bcd4b8f6f85281d2e0af08fa98feb4b46 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Thu, 4 Jun 2026 13:23:35 +0200 Subject: [PATCH 7/7] Apply feedback --- pyogrio/geopandas.py | 10 +++------- pyogrio/tests/test_geopandas_io.py | 9 +-------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index b768adea..dd8568aa 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -921,14 +921,10 @@ def write_dataframe( elif col.dtype == "object": # Column of Timestamp/datetime objects, split in naive datetime and tz. if pd.api.types.infer_dtype(df[name]) == "datetime": - tz_offset = col.map(lambda x: x.utcoffset(), na_action="ignore") - gdal_offset_repr = tz_offset.map( - lambda x: x // pd.Timedelta("15m") + 100, na_action="ignore" - ) - # Replace pd.NA with np.nan because the cython code expects numpy arrays - gdal_offset_repr = gdal_offset_repr.where( - ~pd.isna(gdal_offset_repr), np.nan + tz_offset = col.map(lambda x: x.utcoffset(), na_action="ignore").astype( + "timedelta64[us]" if PANDAS_GE_30 else "timedelta64[ns]" ) + gdal_offset_repr = tz_offset // pd.Timedelta("15m") + 100 gdal_tz_offsets[name] = gdal_offset_repr.values naive = col.map(lambda x: x.replace(tzinfo=None), na_action="ignore") values = naive.values diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 15425f5c..6331bdf8 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -1026,14 +1026,7 @@ def test_write_read_datetime_tz_mixed_offsets( reason="before GDAL 3.11, datetimes weren't handled as well", ) def test_write_read_datetime_tz_offsets_None(tmp_path, dates, use_arrow): - """Test writing a column with datetimes with and without time zone offsets. - - Two types of errors occured: - - For datetimes without any offset, when written without arrow: - `TypeError: Invalid dtype float64 for __floordiv__` - - For datetimes with a mix of having a time zone offset and without, when written - without arrow: `ValueError: cannot convert float NaN to integer` - """ + """Test writing a column with datetimes with and without time zone offsets.""" df = gp.GeoDataFrame( {"dates": dates, "geometry": [Point(1, 1)] * len(dates)}, crs="EPSG:4326",