TileDBError: [TileDB::S3] error storing h5ad with SOMA format

Hello,
I am trying to create a SOMA file from h5ad.
I was able to generate the SOMA file using the local hard disk but when I try to generate the file on S3 I get the following error:

START Experiment.from_h5ad input/pbmc3k.h5ad
START READING input/pbmc3k.h5ad
/home/ubuntu/soma/venv/lib/python3.10/site-packages/anndata/compat/init.py:229: FutureWarning: Moving element from .uns[‘neighbors’][‘distances’] to .obsp[‘distances’].

File “tiledb/libtiledb.pyx”, line 353, in tiledb.libtiledb._raise_tiledb_error
tiledb.cc.TileDBError: [TileDB::S3] Error: Error while listing with prefix ‘s3://…/pbmc3k/ms/RNA/uns/draw_graph/params/random_state/__schema/’ and delimiter ‘/’
Exception: PermanentRedirect
Error message: Unable to parse ExceptionName: PermanentRedirect Message: The bucket you are attempting to access must be addressed using the specified endpoint. Please send all future requests to this endpoint.

The output is partially created but something went wrong.

I am using
python 3.10
tiledb==0.22.0
tiledbsoma==1.3.0

import tiledb
import tiledbsoma
import tiledbsoma.io
import tiledbsoma.logging

from tiledbsoma.options import SOMATileDBContext

tiledbsoma.logging.debug()

config = tiledb.Config()
config["vfs.s3.scheme"] = "https" 
config["vfs.s3.region"] = "us-east-2"
config["vfs.s3.endpoint_override"] = ""
config["vfs.s3.use_virtual_addressing"] = "true"
config["vfs.s3.use_multipart_upload"] = "true"


tdb_ctx = tiledb.Ctx(config=config)
soma_ctx = SOMATileDBContext(tiledb_ctx=tdb_ctx)
pbmc3k_uri = tiledbsoma.io.from_h5ad("s3://.../pbmc3k", input_path = "input/pbmc3k.h5ad", measurement_name = "RNA", context=soma_ctx)

The debug output for the config parameters

Default settings:
"config.env_var_prefix" : "TILEDB_"
"config.logging_format" : "DEFAULT"
"config.logging_level" : "0"
"filestore.buffer_size" : "104857600"
"rest.curl.buffer_size" : "524288"
"rest.curl.verbose" : "false"
"rest.http_compressor" : "any"
"rest.load_metadata_on_array_open" : "true"
"rest.load_non_empty_domain_on_array_open" : "true"
"rest.retry_count" : "25"
"rest.retry_delay_factor" : "1.25"
"rest.retry_http_codes" : "503"
"rest.retry_initial_delay_ms" : "500"
"rest.server_address" : "https://api.tiledb.com"
"rest.server_serialization_format" : "CAPNP"
"rest.use_refactored_array_open" : "false"
"rest.use_refactored_array_open_and_query_submit" : "false"
"sm.allow_separate_attribute_writes" : "false"
"sm.allow_updates_experimental" : "false"
"sm.check_coord_dups" : "true"
"sm.check_coord_oob" : "true"
"sm.check_global_order" : "true"
"sm.compute_concurrency_level" : "8"
"sm.consolidation.amplification" : "1.0"
"sm.consolidation.buffer_size" : "50000000"
"sm.consolidation.max_fragment_size" : "18446744073709551615"
"sm.consolidation.mode" : "fragments"
"sm.consolidation.purge_deleted_cells" : "false"
"sm.consolidation.step_max_frags" : "4294967295"
"sm.consolidation.step_min_frags" : "4294967295"
"sm.consolidation.step_size_ratio" : "0.0"
"sm.consolidation.steps" : "4294967295"
"sm.consolidation.timestamp_end" : "18446744073709551615"
"sm.consolidation.timestamp_start" : "0"
"sm.dedup_coords" : "false"
"sm.enable_signal_handlers" : "true"
"sm.encryption_key" : ""
"sm.encryption_type" : "NO_ENCRYPTION"
"sm.fragment_info.preload_mbrs" : "false"
"sm.group.timestamp_end" : "18446744073709551615"
"sm.group.timestamp_start" : "0"
"sm.io_concurrency_level" : "8"
"sm.max_tile_overlap_size" : "314572800"
"sm.mem.malloc_trim" : "true"
"sm.mem.reader.sparse_global_order.ratio_array_data" : "0.1"
"sm.mem.reader.sparse_global_order.ratio_coords" : "0.5"
"sm.mem.reader.sparse_global_order.ratio_tile_ranges" : "0.1"
"sm.mem.reader.sparse_unordered_with_dups.ratio_array_data" : "0.1"
"sm.mem.reader.sparse_unordered_with_dups.ratio_coords" : "0.5"
"sm.mem.reader.sparse_unordered_with_dups.ratio_tile_ranges" : "0.1"
"sm.mem.tile_upper_memory_limit" : "1073741824"
"sm.mem.total_budget" : "10737418240"
"sm.memory_budget" : "5368709120"
"sm.memory_budget_var" : "10737418240"
"sm.partial_tile_offsets_loading" : "false"
"sm.query.dense.qc_coords_mode" : "false"
"sm.query.dense.reader" : "refactored"
"sm.query.sparse_global_order.reader" : "refactored"
"sm.query.sparse_unordered_with_dups.reader" : "refactored"
"sm.read_range_oob" : "warn"
"sm.skip_checksum_validation" : "false"
"sm.skip_est_size_partitioning" : "false"
"sm.skip_unary_partitioning_budget_check" : "false"
"sm.vacuum.mode" : "fragments"
"sm.var_offsets.bitsize" : "64"
"sm.var_offsets.extra_element" : "false"
"sm.var_offsets.mode" : "bytes"
"vfs.azure.blob_endpoint" : ""
"vfs.azure.block_list_block_size" : "5242880"
"vfs.azure.max_parallel_ops" : "8"
"vfs.azure.max_retries" : "5"
"vfs.azure.max_retry_delay_ms" : "60000"
"vfs.azure.retry_delay_ms" : "800"
"vfs.azure.storage_account_key" : ""
"vfs.azure.storage_account_name" : ""
"vfs.azure.storage_sas_token" : ""
"vfs.azure.use_block_list_upload" : "true"
"vfs.file.max_parallel_ops" : "1"
"vfs.file.posix_directory_permissions" : "755"
"vfs.file.posix_file_permissions" : "644"
"vfs.gcs.max_parallel_ops" : "8"
"vfs.gcs.multi_part_size" : "5242880"
"vfs.gcs.project_id" : ""
"vfs.gcs.request_timeout_ms" : "3000"
"vfs.gcs.use_multi_part_upload" : "true"
"vfs.hdfs.kerb_ticket_cache_path" : ""
"vfs.hdfs.name_node_uri" : ""
"vfs.hdfs.username" : ""
"vfs.max_batch_size" : "104857600"
"vfs.min_batch_gap" : "512000"
"vfs.min_batch_size" : "20971520"
"vfs.min_parallel_size" : "10485760"
"vfs.read_ahead_cache_size" : "10485760"
"vfs.read_ahead_size" : "102400"
"vfs.s3.aws_access_key_id" : ""
"vfs.s3.aws_external_id" : ""
"vfs.s3.aws_load_frequency" : ""
"vfs.s3.aws_role_arn" : ""
"vfs.s3.aws_secret_access_key" : ""
"vfs.s3.aws_session_name" : ""
"vfs.s3.aws_session_token" : ""
"vfs.s3.bucket_canned_acl" : "NOT_SET"
"vfs.s3.ca_file" : ""
"vfs.s3.ca_path" : ""
"vfs.s3.config_source" : "auto"
"vfs.s3.connect_max_tries" : "5"
"vfs.s3.connect_scale_factor" : "25"
"vfs.s3.connect_timeout_ms" : "60800"
"vfs.s3.endpoint_override" : ""
"vfs.s3.logging_level" : "Off"
"vfs.s3.max_parallel_ops" : "8"
"vfs.s3.multipart_part_size" : "5242880"
"vfs.s3.no_sign_request" : "false"
"vfs.s3.object_canned_acl" : "NOT_SET"
"vfs.s3.proxy_host" : ""
"vfs.s3.proxy_password" : ""
"vfs.s3.proxy_port" : "0"
"vfs.s3.proxy_scheme" : "http"
"vfs.s3.proxy_username" : ""
"vfs.s3.region" : "us-east-2"
"vfs.s3.request_timeout_ms" : "3000"
"vfs.s3.requester_pays" : "false"
"vfs.s3.scheme" : "https"
"vfs.s3.skip_init" : "false"
"vfs.s3.sse" : ""
"vfs.s3.sse_kms_key_id" : ""
"vfs.s3.use_multipart_upload" : "true"
"vfs.s3.use_virtual_addressing" : "true"
"vfs.s3.verify_ssl" : "true"

Any idea about how to fix this issue, please?

Cinzia

Hello @cinzia ! The message from s3 usually means the region is either not set or set incorrectly. I see from your configuration the "vfs.s3.region" has been set to us-east-2. Is this the region of the bucket? If not can you modify the config to set the correct region?

config["vfs.s3.region"] = "REGION"

Hi @seth
thank you for your reply.

The region is correct. Before this attempt I used the wrong region and I got a different error.
So the region is correct otherwise I could not explain why the partial creation of the files on S3 (obs is created properly)

FINISH READING input/pbmc3k.h5ad TIME 0.042 seconds
START DECATEGORICALIZING
FINISH DECATEGORICALIZING TIME 0.000 seconds
START WRITING s3://…-us-east-2/output/pbmc3k
START WRITING s3://…-us-east-2/output/pbmc3k/obs
FINISH WRITING s3://…-us-east-2/output/pbmc3k/obs TIME 1.674 seconds

These files are created properly on S3 but for some reason the file
‘s3://…/pbmc3k/ms/RNA/uns/draw_graph/params/random_state/__schema/’
cannot be created properly

I tried with another big h5ad file (tabula_immune.h5ad) and I got a similar behaviour
s3://…-us-east-2/output/tabula/ms/RNA/uns/_scvi/categorical_mappings/_scvi_labels/mapping/__schema/

Any idea, please ?

Another interesting thing.
I copied manually the SOMA files to S3 and I tried to read the files using the same tiledb context defined above and it worked.

pbmc3k_uri = "s3://...-us-east-2/output/pbmc3k"
with tiledbsoma.open(pbmc3k_uri, context=soma_ctx) as pbmc3k_soma:
    pbmc3k_obs_slice = pbmc3k_soma.obs.read(
        value_filter="n_genes >500 and louvain in ['Megakaryocytes', 'CD4 T cells']"
    )
    
    # Concatenate iterator to pyarrow.Table
    print("Output from S3")
    print(pbmc3k_obs_slice.concat())

@cinzia I’ll be in touch soon – thank you for reporting!

Hello @cinzia – I cannot reproduce this with my own data on a cross-region write. Can you please show the output from the following, so I can compare it to the data I’m using?

import anndata as ad
adata = ad.read_h5ad('input/pbmc3k.h5ad')
type(adata.uns['draw_graph']['params']['random_state'])
adata.uns['draw_graph']['params']['random_state'].dtype

@cinzia also, in your original report

/home/ubuntu/soma/venv/lib/python3.10/site-packages/anndata/compat/init.py:229: FutureWarning: Moving element from .uns[‘neighbors’][‘distances’] to .obsp[‘distances’].
…

File “tiledb/libtiledb.pyx”, line 353, in tiledb.libtiledb._raise_tiledb_error

can you please send all the lines that were omitted with ...?

Here we go:

The file was downloaded from
https://github.com/chanzuckerberg/cellxgene/raw/main/example-dataset/pbmc3k.h5ad

/home/ubuntu/soma/venv/lib/python3.10/site-packages/anndata/compat/init.py:229: FutureWarning: Moving element from .uns[‘neighbors’][‘distances’] to .obsp[‘distances’].

This is where adjacency matrices should go now.
warn(
/home/ubuntu/soma/venv/lib/python3.10/site-packages/anndata/compat/init.py:229: FutureWarning: Moving element from .uns[‘neighbors’][‘connectivities’] to .obsp[‘connectivities’].

This is where adjacency matrices should go now.
warn(

The message is truncated as you see.

File get from
https://github.com/chanzuckerberg/cellxgene/raw/main/example-dataset/pbmc3k.h5ad
This is the output.

/home/ubuntu/soma/venv/lib/python3.10/site-packages/anndata/compat/__init__.py:229: FutureWarning: Moving element from .uns['neighbors']['distances'] to .obsp['distances'].

This is where adjacency matrices should go now.
  warn(
/home/ubuntu/soma/venv/lib/python3.10/site-packages/anndata/compat/__init__.py:229: FutureWarning: Moving element from .uns['neighbors']['connectivities'] to .obsp['connectivities'].

This is where adjacency matrices should go now.
  warn(
AnnData object with n_obs × n_vars = 2638 × 1838
    obs: 'n_genes', 'percent_mito', 'n_counts', 'louvain'
    var: 'n_cells'
    uns: 'draw_graph', 'louvain', 'louvain_colors', 'neighbors', 'pca', 'rank_genes_groups'
    obsm: 'X_pca', 'X_tsne', 'X_umap', 'X_draw_graph_fr'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
<class 'numpy.ndarray'>
int64

@cinzia thank you!

What were the lines above

File “tiledb/libtiledb.pyx”, line 353, in tiledb.libtiledb._raise_tiledb_error

? Did you see literally on your screen ... on the line just above the libtiledb._raise_tiledb_error line?

(@cinzia I suspect this is fixed by [python] Adding missing context in ingest uns ndarray by johnkerl · Pull Request #1552 · single-cell-data/TileDB-SOMA · GitHub – however, I’m puzzled by my inability to reproduce the error myself – hence my questions about the stack trace you saw)

....
FINISH WRITING s3://....-us-east-2/output/pbmc3k_test/obs TIME 2.012 seconds
Traceback (most recent call last):
  File "/home/ubuntu/soma/create_soma_aws.py", line 58, in <module>
    pbmc3k_uri = tiledbsoma.io.from_h5ad("s3://....-us-east-2/output/pbmc3k_test", input_path = "input/pbmc3k.h5ad", measurement_name = "RNA", context=soma_ctx)
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 169, in from_h5ad
    uri = from_anndata(
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 319, in from_anndata
    _maybe_ingest_uns(
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 1298, in _maybe_ingest_uns
    _ingest_uns_dict(
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 1329, in _ingest_uns_dict
    _ingest_uns_node(
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 1366, in _ingest_uns_node
    _ingest_uns_dict(
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 1329, in _ingest_uns_dict
    _ingest_uns_node(
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 1366, in _ingest_uns_node
    _ingest_uns_dict(
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 1329, in _ingest_uns_dict
    _ingest_uns_node(
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 1411, in _ingest_uns_node
    _ingest_uns_ndarray(
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/io/ingest.py", line 1501, in _ingest_uns_ndarray
    soma_arr = _factory.open(arr_uri, "w", soma_type=DenseNDArray)
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/_factory.py", line 114, in open
    obj = _open_internal(_tdb_handles.open, uri, mode, context, tiledb_timestamp)
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/_factory.py", line 143, in _open_internal
    handle = opener(uri, mode, context, timestamp)
  File "/home/ubuntu/soma/venv/lib/python3.10/site-packages/tiledbsoma/_tdb_handles.py", line 48, in open
    obj_type = tiledb.object_type(uri, ctx=context.tiledb_ctx)
  File "tiledb/libtiledb.pyx", line 3171, in tiledb.libtiledb.object_type
  File "tiledb/libtiledb.pyx", line 374, in tiledb.libtiledb.check_error
  File "tiledb/libtiledb.pyx", line 368, in tiledb.libtiledb._raise_ctx_err
  File "tiledb/libtiledb.pyx", line 353, in tiledb.libtiledb._raise_tiledb_error
tiledb.cc.TileDBError: [TileDB::S3] Error: Error while listing with prefix 's3://....-us-east-2/output/pbmc3k_test/ms/RNA/uns/draw_graph/params/random_state/__schema/' and delimiter '/'
Exception:  PermanentRedirect
Error message:  Unable to parse ExceptionName: PermanentRedirect Message: The bucket you are attempting to access must be addressed using the specified endpoint. Please send all future requests to this endpoint.

Thank you @cinzia ! This indeed points to

We’ll have a TileDB-SOMA 1.3.1 release out very soon – I’ll be in touch!

Perfect. I am happy to try the new release.
Thank you for the help

@cinzia fantastic, thank you!

Dear @johnkerl
I tried to run the code above with
tiledb==0.22.3
tiledbsoma==1.4.3
and it worked.

Many thanks
Cinzia

@cinzia fantastic – thank you! :slight_smile: