Hello,
I am trying to create a SOMA file from h5ad.
I was able to generate the SOMA file using the local hard disk but when I try to generate the file on S3 I get the following error:
START Experiment.from_h5ad input/pbmc3k.h5ad
START READING input/pbmc3k.h5ad
/home/ubuntu/soma/venv/lib/python3.10/site-packages/anndata/compat/init.py:229: FutureWarning: Moving element from .uns[‘neighbors’][‘distances’] to .obsp[‘distances’].
…
File “tiledb/libtiledb.pyx”, line 353, in tiledb.libtiledb._raise_tiledb_error
tiledb.cc.TileDBError: [TileDB::S3] Error: Error while listing with prefix ‘s3://…/pbmc3k/ms/RNA/uns/draw_graph/params/random_state/__schema/’ and delimiter ‘/’
Exception: PermanentRedirect
Error message: Unable to parse ExceptionName: PermanentRedirect Message: The bucket you are attempting to access must be addressed using the specified endpoint. Please send all future requests to this endpoint.
The output is partially created but something went wrong.
I am using
python 3.10
tiledb==0.22.0
tiledbsoma==1.3.0
import tiledb
import tiledbsoma
import tiledbsoma.io
import tiledbsoma.logging
from tiledbsoma.options import SOMATileDBContext
tiledbsoma.logging.debug()
config = tiledb.Config()
config["vfs.s3.scheme"] = "https"
config["vfs.s3.region"] = "us-east-2"
config["vfs.s3.endpoint_override"] = ""
config["vfs.s3.use_virtual_addressing"] = "true"
config["vfs.s3.use_multipart_upload"] = "true"
tdb_ctx = tiledb.Ctx(config=config)
soma_ctx = SOMATileDBContext(tiledb_ctx=tdb_ctx)
pbmc3k_uri = tiledbsoma.io.from_h5ad("s3://.../pbmc3k", input_path = "input/pbmc3k.h5ad", measurement_name = "RNA", context=soma_ctx)
The debug output for the config parameters
Default settings:
"config.env_var_prefix" : "TILEDB_"
"config.logging_format" : "DEFAULT"
"config.logging_level" : "0"
"filestore.buffer_size" : "104857600"
"rest.curl.buffer_size" : "524288"
"rest.curl.verbose" : "false"
"rest.http_compressor" : "any"
"rest.load_metadata_on_array_open" : "true"
"rest.load_non_empty_domain_on_array_open" : "true"
"rest.retry_count" : "25"
"rest.retry_delay_factor" : "1.25"
"rest.retry_http_codes" : "503"
"rest.retry_initial_delay_ms" : "500"
"rest.server_address" : "https://api.tiledb.com"
"rest.server_serialization_format" : "CAPNP"
"rest.use_refactored_array_open" : "false"
"rest.use_refactored_array_open_and_query_submit" : "false"
"sm.allow_separate_attribute_writes" : "false"
"sm.allow_updates_experimental" : "false"
"sm.check_coord_dups" : "true"
"sm.check_coord_oob" : "true"
"sm.check_global_order" : "true"
"sm.compute_concurrency_level" : "8"
"sm.consolidation.amplification" : "1.0"
"sm.consolidation.buffer_size" : "50000000"
"sm.consolidation.max_fragment_size" : "18446744073709551615"
"sm.consolidation.mode" : "fragments"
"sm.consolidation.purge_deleted_cells" : "false"
"sm.consolidation.step_max_frags" : "4294967295"
"sm.consolidation.step_min_frags" : "4294967295"
"sm.consolidation.step_size_ratio" : "0.0"
"sm.consolidation.steps" : "4294967295"
"sm.consolidation.timestamp_end" : "18446744073709551615"
"sm.consolidation.timestamp_start" : "0"
"sm.dedup_coords" : "false"
"sm.enable_signal_handlers" : "true"
"sm.encryption_key" : ""
"sm.encryption_type" : "NO_ENCRYPTION"
"sm.fragment_info.preload_mbrs" : "false"
"sm.group.timestamp_end" : "18446744073709551615"
"sm.group.timestamp_start" : "0"
"sm.io_concurrency_level" : "8"
"sm.max_tile_overlap_size" : "314572800"
"sm.mem.malloc_trim" : "true"
"sm.mem.reader.sparse_global_order.ratio_array_data" : "0.1"
"sm.mem.reader.sparse_global_order.ratio_coords" : "0.5"
"sm.mem.reader.sparse_global_order.ratio_tile_ranges" : "0.1"
"sm.mem.reader.sparse_unordered_with_dups.ratio_array_data" : "0.1"
"sm.mem.reader.sparse_unordered_with_dups.ratio_coords" : "0.5"
"sm.mem.reader.sparse_unordered_with_dups.ratio_tile_ranges" : "0.1"
"sm.mem.tile_upper_memory_limit" : "1073741824"
"sm.mem.total_budget" : "10737418240"
"sm.memory_budget" : "5368709120"
"sm.memory_budget_var" : "10737418240"
"sm.partial_tile_offsets_loading" : "false"
"sm.query.dense.qc_coords_mode" : "false"
"sm.query.dense.reader" : "refactored"
"sm.query.sparse_global_order.reader" : "refactored"
"sm.query.sparse_unordered_with_dups.reader" : "refactored"
"sm.read_range_oob" : "warn"
"sm.skip_checksum_validation" : "false"
"sm.skip_est_size_partitioning" : "false"
"sm.skip_unary_partitioning_budget_check" : "false"
"sm.vacuum.mode" : "fragments"
"sm.var_offsets.bitsize" : "64"
"sm.var_offsets.extra_element" : "false"
"sm.var_offsets.mode" : "bytes"
"vfs.azure.blob_endpoint" : ""
"vfs.azure.block_list_block_size" : "5242880"
"vfs.azure.max_parallel_ops" : "8"
"vfs.azure.max_retries" : "5"
"vfs.azure.max_retry_delay_ms" : "60000"
"vfs.azure.retry_delay_ms" : "800"
"vfs.azure.storage_account_key" : ""
"vfs.azure.storage_account_name" : ""
"vfs.azure.storage_sas_token" : ""
"vfs.azure.use_block_list_upload" : "true"
"vfs.file.max_parallel_ops" : "1"
"vfs.file.posix_directory_permissions" : "755"
"vfs.file.posix_file_permissions" : "644"
"vfs.gcs.max_parallel_ops" : "8"
"vfs.gcs.multi_part_size" : "5242880"
"vfs.gcs.project_id" : ""
"vfs.gcs.request_timeout_ms" : "3000"
"vfs.gcs.use_multi_part_upload" : "true"
"vfs.hdfs.kerb_ticket_cache_path" : ""
"vfs.hdfs.name_node_uri" : ""
"vfs.hdfs.username" : ""
"vfs.max_batch_size" : "104857600"
"vfs.min_batch_gap" : "512000"
"vfs.min_batch_size" : "20971520"
"vfs.min_parallel_size" : "10485760"
"vfs.read_ahead_cache_size" : "10485760"
"vfs.read_ahead_size" : "102400"
"vfs.s3.aws_access_key_id" : ""
"vfs.s3.aws_external_id" : ""
"vfs.s3.aws_load_frequency" : ""
"vfs.s3.aws_role_arn" : ""
"vfs.s3.aws_secret_access_key" : ""
"vfs.s3.aws_session_name" : ""
"vfs.s3.aws_session_token" : ""
"vfs.s3.bucket_canned_acl" : "NOT_SET"
"vfs.s3.ca_file" : ""
"vfs.s3.ca_path" : ""
"vfs.s3.config_source" : "auto"
"vfs.s3.connect_max_tries" : "5"
"vfs.s3.connect_scale_factor" : "25"
"vfs.s3.connect_timeout_ms" : "60800"
"vfs.s3.endpoint_override" : ""
"vfs.s3.logging_level" : "Off"
"vfs.s3.max_parallel_ops" : "8"
"vfs.s3.multipart_part_size" : "5242880"
"vfs.s3.no_sign_request" : "false"
"vfs.s3.object_canned_acl" : "NOT_SET"
"vfs.s3.proxy_host" : ""
"vfs.s3.proxy_password" : ""
"vfs.s3.proxy_port" : "0"
"vfs.s3.proxy_scheme" : "http"
"vfs.s3.proxy_username" : ""
"vfs.s3.region" : "us-east-2"
"vfs.s3.request_timeout_ms" : "3000"
"vfs.s3.requester_pays" : "false"
"vfs.s3.scheme" : "https"
"vfs.s3.skip_init" : "false"
"vfs.s3.sse" : ""
"vfs.s3.sse_kms_key_id" : ""
"vfs.s3.use_multipart_upload" : "true"
"vfs.s3.use_virtual_addressing" : "true"
"vfs.s3.verify_ssl" : "true"
Any idea about how to fix this issue, please?
Cinzia