Open zarr with xarray_tensorstore: deep copy is not working after openning file

101 views Asked by At

I opened a file using xarray_tensorstore.open_zarr. While the opening was successful, I encountered an issue when trying to use copy(deep=True). However, copy(deep=True) works correctly when I use the original dataset or open the file with xarray.open_dataset(path, engine='zarr') The original code can be found here: https://github.com/google/xarray-tensorstore/blob/main/xarray_tensorstore_test.py

my modified code:

def test_open_zarr(self, transform):
    source = xarray.Dataset(
        {
            'foo': (('x',), np.arange(2), {'local': 'local metadata'}),
        },
        coords={
            'x': [1, 2],
        },
        attrs={'global': 'global metadata'},
    )
    path = self.create_tempdir().full_path
    source.chunk().to_zarr(path)
    openned1 = xarray.open_dataset(path, engine='zarr')
    openned2 = xarray_tensorstore.open_zarr(path)
    openned11 = openned1.copy(deep=True)
    openned22 = openned2.copy(deep=True) # this line fails

and got this error:

xarray_tensorstore_test.py:24 (XarrayTensorstoreTest.test_open_zarr_base)
self = <xarray_tensorstore_test.XarrayTensorstoreTest testMethod=test_open_zarr_base>
transform = <function XarrayTensorstoreTest.<lambda> at 0x10c9eb2e0>

    @parameterized.named_parameters(
        # TODO(shoyer): consider using hypothesis to convert these into
        # property-based tests
        {
            'testcase_name': 'base',
            'transform': lambda ds: ds,
        },
    
    )
    def test_open_zarr(self, transform):
      source = xarray.Dataset(
          {
              'foo': (('x',), np.arange(2), {'local': 'local metadata'}),
          },
          coords={
              'x': [1, 2],
          },
          attrs={'global': 'global metadata'},
      )
      path = self.create_tempdir().full_path
      source.chunk().to_zarr(path)
  openned1 = xarray.open_dataset(path, engine='zarr')
      openned2 = xarray_tensorstore.open_zarr(path)
      openned11 = openned1.copy(deep=True)
>     openned22 = openned2.copy(deep=True)

xarray_tensorstore_test.py:49: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/xarray/core/dataset.py:1282: in copy
    return self._copy(deep=deep, data=data)
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/xarray/core/dataset.py:1318: in _copy
    variables[k] = v._copy(deep=deep, data=data.get(k), memo=memo)
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/xarray/core/variable.py:1082: in _copy
    ndata = copy.deepcopy(ndata, memo)
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/copy.py:172: in deepcopy
    y = _reconstruct(x, memo, *rv)
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/copy.py:271: in _reconstruct
    state = deepcopy(state, memo)
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/copy.py:146: in deepcopy
    y = copier(x, memo)
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/copy.py:231: in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/copy.py:172: in deepcopy
    y = _reconstruct(x, memo, *rv)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = TensorStore({
  'context': {
    'cache_pool': {},
    'data_copy_concurrency': {},
    'file_io_concurrency': {},
   ... 'shape': [2],
    'zarr_format': 2,
  },
  'transform': {'input_exclusive_max': [[2]], 'input_inclusive_min': [0]},
})
memo = {4628834944: [b'\x00\x00\x01\x0cfile_io_sync'], 4632967376: <[AttributeError("'_TensorStoreAdapter' object has no attr... in repr()] _TensorStoreAdapter object at 0x116012d50>, 4667144384: {}, 4785223744: [b'\x00\x00\x01\ncache_pool'], ...}
func = <tensorstore._GlobalPicklableFunction object at 0x11422f700>
args = <generator object _reconstruct.<locals>.<genexpr> at 0x11d2ffd30>
state = None, listiter = None, dictiter = None

    def _reconstruct(x, memo, func, args,
                     state=None, listiter=None, dictiter=None,
                     *, deepcopy=deepcopy):
        deep = memo is not None
        if deep and args:
            args = (deepcopy(arg, memo) for arg in args)
>       y = func(*args)
E       ValueError: Error opening "zarr" driver: Error reading local file "/var/folders/j9/3y1whvhj1ll0xb6p0p1xy4t00000gn/T/absl_testing/XarrayTensorstoreTest/test_open_zarr_base/tmpf9s5vbjh/foo/": Invalid key: "/var/folders/j9/3y1whvhj1ll0xb6p0p1xy4t00000gn/T/absl_testing/XarrayTensorstoreTest/test_open_zarr_base/tmpf9s5vbjh/foo/"; at byte 51 [tensorstore_spec='{\"context\":{\"cache_pool\":{},\"data_copy_concurrency\":{},\"file_io_concurrency\":{},\"file_io_sync\":true},\"driver\":\"zarr\",\"dtype\":\"int64\",\"kvstore\":{\"driver\":\"file\",\"path\":\"/var/folders/j9/3y1whvhj1ll0xb6p0p1xy4t00000gn/T/absl_testing/XarrayTensorstoreTest/test_open_zarr_base/tmpf9s5vbjh/foo/\"},\"metadata\":{\"chunks\":[2],\"compressor\":{\"blocksize\":0,\"clevel\":5,\"cname\":\"lz4\",\"id\":\"blosc\",\"shuffle\":1},\"dimension_separator\":\".\",\"dtype\":\"<i8\",\"fill_value\":null,\"filters\":null,\"order\":\"C\",\"shape\":[2],\"zarr_format\":2},\"metadata_key\":\"\",\"transform\":{\"input_exclusive_max\":[[2]],\"input_inclusive_min\":[0]}}'] [source locations='tensorstore/kvstore/file/file_key_value_store.cc:660\ntensorstore/kvstore/kvstore.cc:267\ntensorstore/driver/driver.cc:109\ntensorstore/driver/driver.cc:313']

/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/copy.py:265: ValueError

I Tried to debug the issue, but didn't find the problem

0

There are 0 answers