{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/desktop3/itu\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/anaconda3/lib/python3.8/site-packages/geopandas/_compat.py:106: UserWarning: The Shapely GEOS version (3.8.0-CAPI-1.13.1 ) is incompatible with the GEOS version PyGEOS was compiled with (3.9.1-CAPI-1.14.2). Conversions between both will be slow.\n", " warnings.warn(\n" ] } ], "source": [ "# load libraries\n", "import yaml\n", "import lightgbm\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import geopandas\n", "import numpy as np\n", "import os\n", "import pandas as pd\n", "import pickle\n", "os.chdir('/home/desktop3/itu')\n", "path = os.getcwd()\n", "print(path)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading the Thailand Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# read in (yaml) configs\n", "with open(path + '/conf/model_config.yaml', 'r') as conf:\n", " model_config = yaml.safe_load(conf)\n", "\n", "# import data\n", "dataset = model_config['model']['loc'] + model_config['model']['file']\n", "dataset = pd.read_csv(dataset)\n", "\n", "# define predictors and target\n", "predictor = ['avg_d_kbps',\n", " 'avg_u_kbps',\n", " 'mean_ghm', 'avg_rad_mean', 'cf_cvg_mean', 'slope_avg_rad',\n", " 'slope_cf_cvg', 'slope_month_avg_rad',\n", " 'slope_monthcf_cvg', 'estimate_mau', 'value_norm']\n", "target = ['A4A_right']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " Current issue is that predictors are named differently in Thailand than in Brazil dataset and also, \n", "it needs to be the same exact predictors used for both" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Unnamed: 0', 'Unnamed: 0.1', 'School', 'source_school_id', 'ENUM',\n", " 'H107', 'lon', 'lat', 'range', 'samples', 'avg_d_kbps', 'avg_u_kbps',\n", " 'avg_d_kbps3', 'avg_u_kbps3', 'avg_rad_mean', 'cf_cvg_mean',\n", " 'slope_avg_rad', 'change_avg_rad', 'slope_cf_cvg', 'change_cf_cvg',\n", " 'slope_monthcf_cvg', 'change_monthcf_cvg', 'slope_month_avg_rad',\n", " 'change_month_avg_rad', 'slope_NDVI', 'change_NDVI', 'mean_ghm',\n", " 'estimate_dau', 'estimate_mau', 'estimate_ready', 'value',\n", " 'value_norm'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "Unnamed: 0.1 | \n", "School | \n", "source_school_id | \n", "ENUM | \n", "H107 | \n", "lon | \n", "lat | \n", "range | \n", "samples | \n", "... | \n", "slope_month_avg_rad | \n", "change_month_avg_rad | \n", "slope_NDVI | \n", "change_NDVI | \n", "mean_ghm | \n", "estimate_dau | \n", "estimate_mau | \n", "estimate_ready | \n", "value | \n", "value_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "0 | \n", "POINT (98.2554367 8.6658159) | \n", "255542593 | \n", "11.0 | \n", "1.0 | \n", "98.257309 | \n", "8.666424 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "0.008125 | \n", "0.008125 | \n", "-63.107123 | \n", "-63.107123 | \n", "0.556310 | \n", "4835 | \n", "5900 | \n", "True | \n", "254.46342 | \n", "0.804133 | \n", "
1 | \n", "1 | \n", "1 | \n", "POINT (100.5637277 13.7281689) | \n", "280389453 | \n", "11.0 | \n", "1.0 | \n", "100.563824 | \n", "13.728090 | \n", "1.0 | \n", "12.0 | \n", "... | \n", "0.154787 | \n", "0.154787 | \n", "-124.015608 | \n", "-124.015608 | \n", "0.891303 | \n", "1040046 | \n", "1600000 | \n", "True | \n", "13295.45700 | \n", "0.908735 | \n", "
2 | \n", "2 | \n", "2 | \n", "POINT (99.3090438 6.4918845) | \n", "321664554 | \n", "11.0 | \n", "1.0 | \n", "99.309902 | \n", "6.491847 | \n", "1.0 | \n", "4.0 | \n", "... | \n", "0.003737 | \n", "0.003737 | \n", "132.490023 | \n", "132.490023 | \n", "0.348923 | \n", "0 | \n", "0 | \n", "False | \n", "109.93822 | \n", "0.802974 | \n", "
3 | \n", "3 | \n", "3 | \n", "POINT (98.99020710000001 18.8422541) | \n", "336232077 | \n", "14.0 | \n", "1.0 | \n", "98.990578 | \n", "18.842300 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "0.029212 | \n", "0.029212 | \n", "-52.833499 | \n", "-52.833499 | \n", "0.773788 | \n", "237917 | \n", "330000 | \n", "True | \n", "1407.96750 | \n", "0.813386 | \n", "
4 | \n", "4 | \n", "4 | \n", "POINT (102.773318 16.4357572) | \n", "373156854 | \n", "14.0 | \n", "1.0 | \n", "102.774895 | \n", "16.435555 | \n", "1.0 | \n", "22.0 | \n", "... | \n", "0.023249 | \n", "0.023249 | \n", "19.408091 | \n", "19.408091 | \n", "0.819569 | \n", "42591 | \n", "58000 | \n", "True | \n", "548.45380 | \n", "0.806492 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
4718 | \n", "4718 | \n", "4718 | \n", "POINT (101.2758538 14.2842555) | \n", "8879142203 | \n", "11.0 | \n", "1.0 | \n", "101.275375 | \n", "14.284611 | \n", "1.0 | \n", "3.0 | \n", "... | \n", "0.012734 | \n", "0.012734 | \n", "43.746437 | \n", "43.746437 | \n", "0.599212 | \n", "10540 | \n", "14000 | \n", "True | \n", "145.64415 | \n", "0.803261 | \n", "
4719 | \n", "4719 | \n", "4719 | \n", "POINT (105.4027044 14.9497545) | \n", "8905212408 | \n", "11.0 | \n", "1.0 | \n", "105.407876 | \n", "14.950656 | \n", "1.0 | \n", "7.0 | \n", "... | \n", "0.001070 | \n", "0.001070 | \n", "22.923611 | \n", "22.923611 | \n", "0.477034 | \n", "1486 | \n", "1700 | \n", "True | \n", "37.91742 | \n", "0.802397 | \n", "
4720 | \n", "4720 | \n", "4720 | \n", "POINT (100.2320763 16.8519458) | \n", "8908633917 | \n", "14.0 | \n", "1.0 | \n", "100.231919 | \n", "16.851753 | \n", "1.0 | \n", "11.0 | \n", "... | \n", "0.005698 | \n", "0.005698 | \n", "27.090730 | \n", "27.090730 | \n", "0.799692 | \n", "89652 | \n", "120000 | \n", "True | \n", "1028.61450 | \n", "0.810343 | \n", "
4721 | \n", "4721 | \n", "4721 | \n", "POINT (99.82662910000001 19.904528) | \n", "8931699084 | \n", "14.0 | \n", "1.0 | \n", "99.826927 | \n", "19.904137 | \n", "1.0 | \n", "4.0 | \n", "... | \n", "0.048839 | \n", "0.048839 | \n", "-143.544324 | \n", "-143.544324 | \n", "0.772304 | \n", "97046 | \n", "130000 | \n", "True | \n", "2506.85130 | \n", "0.822200 | \n", "
4722 | \n", "4722 | \n", "4722 | \n", "POINT (99.8260198 19.9058235) | \n", "8931701767 | \n", "14.0 | \n", "1.0 | \n", "99.825790 | \n", "19.905846 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "0.050350 | \n", "0.050350 | \n", "-144.775981 | \n", "-144.775981 | \n", "0.771019 | \n", "97944 | \n", "130000 | \n", "True | \n", "2071.77270 | \n", "0.818710 | \n", "
4723 rows × 32 columns
\n", "