When trying to load a parquet into a df by typing:
g34i9 = pd.read_parquet('G34I9.snappy.parquet')
I am getting an error as below:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[46], line 1
----> 1 g34i9 = pd.read_parquet('G34I9.snappy.parquet')
File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pandas\io\parquet.py:670, in read_parquet(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs)
667 use_nullable_dtypes = False
668 check_dtype_backend(dtype_backend)
--> 670 return impl.read(
671 path,
672 columns=columns,
673 filters=filters,
674 storage_options=storage_options,
675 use_nullable_dtypes=use_nullable_dtypes,
676 dtype_backend=dtype_backend,
677 filesystem=filesystem,
678 **kwargs,
679 )
File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pandas\io\parquet.py:279, in PyArrowImpl.read(self, path, columns, filters, use_nullable_dtypes, dtype_backend, storage_options, filesystem, **kwargs)
271 try:
272 pa_table = self.api.parquet.read_table(
273 path_or_handle,
274 columns=columns,
(...)
277 **kwargs,
278 )
--> 279 result = pa_table.to_pandas(**to_pandas_kwargs)
281 if manager == "array":
282 result = result._as_manager("array", copy=False)
File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\array.pxi:884, in pyarrow.lib._PandasConvertible.to_pandas()
File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\table.pxi:4192, in pyarrow.lib.Table._to_pandas()
File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\pandas_compat.py:776, in table_to_dataframe(options, table, categories, ignore_metadata, types_mapper)
774 _check_data_column_metadata_consistency(all_columns)
775 columns = _deserialize_column_index(table, all_columns, column_indexes)
--> 776 blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
778 axes = [columns, index]
779 mgr = BlockManager(blocks, axes)
File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\pandas_compat.py:1129, in _table_to_blocks(options, block_table, categories, extension_columns)
1124 def _table_to_blocks(options, block_table, categories, extension_columns):
1125 # Part of table_to_blockmanager
1126
1127 # Convert an arrow table to Block from the internal pandas API
1128 columns = block_table.column_names
-> 1129 result = pa.lib.table_to_blocks(options, block_table, categories,
1130 list(extension_columns.keys()))
1131 return [_reconstruct_block(item, columns, extension_columns)
1132 for item in result]
File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\table.pxi:3115, in pyarrow.lib.table_to_blocks()
File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\types.pxi:88, in pyarrow.lib._datatype_to_pep3118()
ValueError: year 0 is out of range
I would like to successfully load parquet into df.
This error occur when pandas encounters a date value outside its supported range while reading a Parquet file. Try the below way if this solves.