I am trying read sql tables and perform merge in dask. This is using dask version 2.8.0. Here is the snippet of my code:
tdf = dd.read_sql_table('comments', conn_url, index_col='author', divisions=list('1234567890'))
adf = dd.read_sql_table('users', conn_url, index_col='id', divisions=list('1234567890'))
dd.merge(tdf, adf, how='left', left_index=True, right_index=True)
The dtypes of indexes is 'O'. However I get an error
...
...
~/continual/venv/lib/python3.8/site-packages/dask/dataframe/core.py in repartition(self, divisions, npartitions, partition_size, freq, force)
1120 return repartition_npartitions(self, npartitions)
1121 elif divisions is not None:
-> 1122 return repartition(self, divisions, force=force)
1123 elif freq is not None:
1124 return repartition_freq(self, freq=freq)
~/continual/venv/lib/python3.8/site-packages/dask/dataframe/core.py in repartition(df, divisions, force)
5656 tmp = "repartition-split-" + token
5657 out = "repartition-merge-" + token
-> 5658 dsk = repartition_divisions(
5659 df.divisions, divisions, df._name, tmp, out, force=force
5660 )
~/continual/venv/lib/python3.8/site-packages/dask/dataframe/core.py in repartition_divisions(a, b, name, out1, out2, force)
5314 ('c', 2): ('b', 3)}
5315 """
-> 5316 check_divisions(b)
5317
5318 if len(b) < 2:
~/continual/venv/lib/python3.8/site-packages/dask/dataframe/core.py in check_divisions(divisions)
5276 divisions = list(divisions)
5277 if divisions != sorted(divisions):
-> 5278 raise ValueError("New division must be sorted")
5279 if len(divisions[:-1]) != len(list(unique(divisions[:-1]))):
5280 msg = "New division must be unique, except for the last element"
ValueError: New division must be sorted
How can I achieve this join?