I am trying to build a tileDB array to store sparse Lidar data (GEDI data).
I managed to build up and write the data to my tiled array, but then I could not read back the data based on spatial and temporal indexing. Either the data return emtpy if I only index for the time dimensions or I get the below error when I also do a spatial indexing
TileDBError: TileDB internal: Lower range bound 61.000000 cannot be larger than the higher bound 30.000000
I provide a code example below if someone can help me.
import tiledb
import numpy as np
# Define synthetic domain and schema
lat_min, lat_max = -90.0, 90.0
lon_min, lon_max = -180.0, 180.0
time_min, time_max = 1514764800000000, 1924991999000000 # Microseconds since epoch for a large time range
# Define dimensions with your specified structure
domain = tiledb.Domain(
tiledb.Dim(name="latitude", domain=(lat_min, lat_max), tile=1, dtype="float64"),
tiledb.Dim(name="longitude", domain=(lon_min, lon_max), tile=1, dtype="float64"),
tiledb.Dim(name="time", domain=(time_min, time_max), tile=None, dtype="int64")
)
# Define a schema with one attribute for simplicity
attr = tiledb.Attr(name="intensity", dtype="float32")
schema = tiledb.ArraySchema(domain=domain, attrs=[attr], sparse=True)
# Create the array
array_uri = "test_array"
if tiledb.array_exists(array_uri):
tiledb.remove(array_uri)
tiledb.Array.create(array_uri, schema)
# Write synthetic data to the array
with tiledb.open(array_uri, mode="w") as array:
# Generate 100 synthetic points within the domain ranges
np.random.seed(0)
lat_data = np.random.uniform(-90.0, 90.0, size=100)
lon_data = np.random.uniform(-180.0, 180.0, size=100)
time_data = np.random.randint(time_min, time_max, size=100)
intensity_data = np.random.rand(100).astype("float32")
# Write the synthetic data
array[lat_data, lon_data, time_data] = {"intensity": intensity_data}
# Query the data to test slicing
with tiledb.open(array_uri, mode="r") as array:
# Define slicing ranges (use slightly smaller ranges to avoid boundary issues)
lat_slice_min, lat_slice_max = -30.0, 30.0
lon_slice_min, lon_slice_max = -150.0, 150.0
time_slice_min, time_slice_max = 1514764800000000, 1514764900000000 # Narrow time slice for testing
# Query only within these bounds
sliced_data = array[lat_slice_min:lat_slice_max, lon_slice_min:lon_slice_max, time_slice_min:time_slice_max]
print("Sliced data:")
print(sliced_data)
Thanks a lot for your help.
Simon.