from pandas.api.types import is_string_dtype, is_bool_dtype, is_datetime64_any_dtype, is_numeric_dtype
import pandas as pd
import datetime as dt
import numpy as np
import json
import random
def cast_var_to_string(x):
if type(x) == list:
for idx, item in enumerate(x):
x[idx]["Configurations"] = json.dumps(item["Configurations"])
x[idx]["Properties"] = json.dumps(item["Properties"])
return x
def recurse_cols(df: pd.DataFrame, name1=None, islist=False):
fields = []
for item in df.columns:
name = item
if name1 is not None:
name = name1
item_dict = {
"name":name,
"nullable":True,
"dict_id":0,
"dict_is_ordered":False,
"metadata":{}
}
df1 = df[item]
if is_bool_dtype(df1):
item_dict.update({
"data_type":"Boolean",
})
elif is_datetime64_any_dtype(df1):
item_dict.update({
"data_type":{"Timestamp":["Nanosecond","UTC"]},
})
elif is_numeric_dtype(df1):
item_dict.update({
"data_type":"Float64",
})
elif is_string_dtype(df1):
item_dict.update({
"data_type":"Utf8",
})
for idx, val in enumerate(df.loc[:, item]):
if (df[item].isna()[idx]):
continue
elif type(val) == list:
new_df = df[item].explode().reset_index(drop=True).to_frame()
item_dict.update({
"data_type": {
"List": recurse_cols(new_df, name1="item", islist=True)
}
})
break
elif type(val) == dict:
item_dict.update({
"data_type": {
"Struct": recurse_cols(pd.json_normalize(df[item], max_level=0))
}
})
break
if islist:
return item_dict
fields.append(item_dict)
return fields
fields = recurse_cols(df)
struct = {"fields": fields, "metadata": {}}
print(json.dumps(struct))