# load libraries
import yaml
import lightgbm
import matplotlib.pyplot as plt
import pandas as pd
import geopandas
import numpy as np
import os
import pandas as pd
import pickle
os.chdir('/home/desktop3/itu')
path = os.getcwd()
print(path)
# read in (yaml) configs
with open(path + '/conf/model_config.yaml', 'r') as conf:
model_config = yaml.safe_load(conf)
# import data
dataset = model_config['model']['loc'] + model_config['model']['file']
dataset = pd.read_csv(dataset)
# define predictors and target
predictor = ['avg_d_kbps',
'avg_u_kbps',
'mean_ghm', 'avg_rad_mean', 'cf_cvg_mean', 'slope_avg_rad',
'slope_cf_cvg', 'slope_month_avg_rad',
'slope_monthcf_cvg', 'estimate_mau', 'value_norm']
target = ['A4A_right']
Current issue is that predictors are named differently in Thailand than in Brazil dataset and also, it needs to be the same exact predictors used for both
dataset.columns
dataset
# prepare data
X = dataset[predictor]
# y = dataset[target]
print('X Shape:', X.shape)
# print('y Shape:', y.shape)
import mlflow
logged_model = 'file:///files/mlruns/78/add3053875954bc494facaa833b20746/artifacts/model'
# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)
# Predict on a Pandas DataFrame.
import pandas as pd
y_pred = loaded_model.predict(X)
y_pred
Can't do the below because don't have ground truth on the same level
# Absolute error
errors = abs(y_pred - y.iloc[:,0].to_numpy())
avg_error = np.mean(errors)
#Low tail error
errors_low = abs(y_pred[y_pred<0.3] - np.asarray(y[y_pred<0.3]).flatten())
#Low tail error
errors_low_ytest = abs(y_pred[np.asarray(y).flatten()<0.3] - np.asarray(y[np.asarray(y).flatten()<0.3]).flatten())
#avg error
avg_error_low = np.mean(errors_low)
#avg error
avg_error_low_ytest = np.mean(errors_low)
#standard deviation
stan_dev_low= np.std(errors_low)
print('errors: ', errors)
print('avg error: ', avg_error)
# print('Just the lower errors: ', errors_low)
print('Mean lower error: ', avg_error_low)
print('Mean ytest lower error: ', avg_error_low_ytest)
# print('y test error: ', errors_low_ytest)
print('Standard Dev of Low Error: ', stan_dev_low)
# Absolute error
errors = abs(y_pred - y.iloc[:,0].to_numpy())
avg_error = np.mean(errors)
#Low tail error
errors_low = abs(y_pred[y_pred<0.5] - np.asarray(y[y_pred<0.5]).flatten())
#Low tail error
errors_low_ytest = abs(pred[np.asarray(y).flatten()<0.5] - np.asarray(y[np.asarray(y).flatten()<0.5]).flatten())
#avg error
avg_error_low = np.mean(errors_low)
#avg error
avg_error_low_ytest = np.mean(errors_low)
#standard deviation
stan_dev_low= np.std(errors_low)
print('errors: ', errors)
print('avg error: ', avg_error)
# print('Just the lower errors: ', errors_low)
print('Mean lower error: ', avg_error_low)
print('Mean ytest lower error: ', avg_error_low_ytest)
# print('y test error: ', errors_low_ytest)
print('Standard Dev of Low Error: ', stan_dev_low)
#Adding the predictions as a column to the dataset
dataset['Predictions']= y_pred
#Creating a geodataframe
from shapely import wkt
#Changing dataframe into a geodataframe focused on the school geometry column
dataset['School'] = geopandas.GeoSeries.from_wkt(dataset['School'])
gdf = geopandas.GeoDataFrame(dataset, geometry='School')
gdf.shape
#Set the figure size
fig,ax =plt.subplots(1, figsize=(15,10))
#plotting with the new geodataframe, each school point by prediction value
gdf.plot(column="Predictions", cmap = 'viridis' ,legend=True, ax=ax)
# add a title and annotation
plt.suptitle('Predictions for all Thailand schools', fontsize=18, fontweight=3)
plt.title('4723 schools', fontsize=13)
#show the plot
plt.show()
Low_pred = gdf.loc[gdf['Predictions']<.5]
Low_pred.shape
fig,ax =plt.subplots(1, figsize=(15,10))
# add a title and annotation
# ax.set_title('Predictions for Schools below 50% internet connectivity', fontdict={'fontsize': '13', 'fontweight' : '3'})
# ax.set_suptitle('69 schools')
Low_pred.plot(column="Predictions", cmap = 'viridis' ,legend=True, ax=ax)
plt.suptitle('Predictions for Schools below 50% internet connectivity', fontsize=18, fontweight=3)
plt.title('97 schools', fontsize=13)
plt.show()