I am trying to follow the tutorial on TileDB youtube https://youtu.be/u3QlXGwFqak?t=1508
Goal is to import ais csv, query it by given extents, and display the results on a map. The following code works fine, and I see the output map.
However the issue is the data imported into tileDB array has LAT LON dimensions as WGS84 coordinates. So when I try to display them on map, the points print but base map is wrong obviously. This is because points are expected to be in Web Mercator ( meters )
Like you can see I tried to convert, the dataframe like so, but it doesn’t work because arr.query
does not bring in LAT and LON as attributes. I can probably do that and the conversion will work, however my question is since LAT LON dimensions are available on Array and on Dataframe, whats the most performant ( TileDB idiomatic way) to convert the array dimensions to Web Mercator? Either when querying or opening the dataset?
This is a prototype example so dataset is quite small., however eventually I might be dealing with a much larger set of points.
df.loc[:, "LON"], df.loc[:, "LAT"] = lnglat_to_meters(df.LON, df.LAT) #<<<<< PROBLEM HERE
So is there a better way to do this than what am trying above by adding lat, lon to dataframe as attributes?
Full Code
import tiledb
import numpy as np
import pandas
from pandas_geojson import to_geojson
from pandas_geojson import write_geojson
from datashader.utils import lnglat_to_meters
import holoviews as hv
import holoviews.operation.datashader as hd
from holoviews.element import tiles
from matplotlib import cm
import subprocess
hv.extension("bokeh")
print(tiledb.libtiledb.version())
print("Querying data by bounds:")
xmin, ymin, xmax, ymax = [-81.41, 30.24, -80.85, 30.54]
with tiledb.SparseArray("data/ais.tldb", mode="r") as arr:
print(f"Array non-empty domain: {arr.nonempty_domain()}")
df = arr.query(attrs=["MMSI", "VesselName"], dims=["LAT", "LON"]).df[
ymin:ymax, xmin:xmax
]
print("------Dataframe columns-------")
print(df.columns)
print("------Array schema-------")
print(arr.schema)
print("------Convert dataframe to web mercator-------")
# df.loc[:, "LON"], df.loc[:, "LAT"] = lnglat_to_meters(df.LON, df.LAT) #<<<<< PROBLEM HERE
print(f"Retrived {len(df)} vessels in the area of interest")
hv.output(backend="bokeh")
background = tiles.CartoDark().opts(
alpha=1, width=600, height=600, xaxis=None, yaxis=None
)
points = hv.Points(df, ["LON", "LAT"])
obj = background * hd.datashade(points, cmap=cm.plasma)
hv.save(obj, "out.html")
subprocess.Popen("open http://127.0.0.1:5500/out.html", shell=True)
Console Output
(2, 17, 1)
Querying data by bounds:
Array non-empty domain: ((4.62448, 71.89011), (-159.61578, 145.46594), (numpy.datetime64('2023-04-20T00:00:00.000000000'), numpy.datetime64('2023-04-20T23:59:59.000000000')))
------Dataframe columns-------
Index(['MMSI', 'VesselName'], dtype='object')
------Array schema-------
ArraySchema(
domain=Domain(*[
Dim(name='LAT', domain=(4.62448, 71.89011), tile=68.0, dtype='float64', filters=FilterList([ZstdFilter(level=-1), ])),
Dim(name='LON', domain=(-159.61578, 145.46594), tile=100.0, dtype='float64', filters=FilterList([ZstdFilter(level=-1), ])),
Dim(name='BaseDateTime', domain=(numpy.datetime64('2023-04-20T00:00:00.000000000'), numpy.datetime64('2023-04-20T23:59:59.000000000')), tile=numpy.timedelta64(100,'ns'), dtype='datetime64[ns]', filters=FilterList([ZstdFilter(level=-1), ])),
]),
attrs=[
Attr(name='MMSI', dtype='int64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='SOG', dtype='float64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='COG', dtype='float64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='Heading', dtype='float64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='VesselName', dtype='<U0', var=True, nullable=True, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='IMO', dtype='<U0', var=True, nullable=True, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='CallSign', dtype='<U0', var=True, nullable=True, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='VesselType', dtype='float64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='Status', dtype='float64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='Length', dtype='float64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='Width', dtype='float64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='Draft', dtype='float64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='Cargo', dtype='float64', var=False, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
Attr(name='TransceiverClass', dtype='<U0', var=True, nullable=False, enum_label=None, filters=FilterList([ZstdFilter(level=-1), ])),
],
cell_order='row-major',
tile_order='row-major',
capacity=10000,
sparse=True,
allows_duplicates=True,
)
------Convert dataframe to web mercator-------
Retrived 8300 vessels in the area of interest
Image Output