Query Conditions | TileDB Embedded Docs does not provide examples of querying for missing (NaN) values.
How does one perform the equivalent of isna()
or == None
for querying in tiledb?
1 Like
We don’t support that directly in our core engine. Since I suspect this is SOMA related, you can do something like the following to get the coords from obs that can then be passed into the subsequent search ExperimentAxisQuery:
from typing import List
import tiledbsoma
import tiledb
def find_nonnull_obs(
soma_uri: str,
attr: str,
) -> List[int]:
with tiledbsoma.Experiment.open(soma_uri) as exp:
obs_uri = exp.obs.uri
with tiledb.open(obs_uri) as arr:
obs_df = arr.df[:] # can filter here with other query conditions for a smaller return size e.g. arr.query().df[:]
null_joinids = obs_df.loc[~obs_df[attr].isnull(), "soma_joinid"]
return null_joinids.tolist()
if __name__ == "__main__":
non_null_joinids = find_nonnull_obs(
uri,
"tissue",
)
with tiledbsoma.Experiment.open(uri) as exp:
print(f"n cells all: {exp.obs.count}")
query = exp.axis_query(
measurement_name="RNA",
obs_query=tiledbsoma.AxisQuery(coords=(non_null_joinids,))
)
print(f"n cells non-null: {query.n_obs}")
print(f"proportion null: {(exp.obs.count - query.n_obs) / exp.obs.count}")
1 Like