The Algorithms logo
算法
关于我们捐赠

支持向量机

H
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ibm = pd.read_csv('/WA_Fn-UseC_-HR-Employee-Attrition.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('display.max_columns', None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dateset Information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1470, 35)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EmployeeCount</th>\n",
       "      <th>EmployeeNumber</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>HourlyRate</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>PercentSalaryHike</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StandardHours</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.0</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.0</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>36.923810</td>\n",
       "      <td>802.485714</td>\n",
       "      <td>9.192517</td>\n",
       "      <td>2.912925</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1024.865306</td>\n",
       "      <td>2.721769</td>\n",
       "      <td>65.891156</td>\n",
       "      <td>2.729932</td>\n",
       "      <td>2.063946</td>\n",
       "      <td>2.728571</td>\n",
       "      <td>6502.931293</td>\n",
       "      <td>14313.103401</td>\n",
       "      <td>2.693197</td>\n",
       "      <td>15.209524</td>\n",
       "      <td>3.153741</td>\n",
       "      <td>2.712245</td>\n",
       "      <td>80.0</td>\n",
       "      <td>0.793878</td>\n",
       "      <td>11.279592</td>\n",
       "      <td>2.799320</td>\n",
       "      <td>2.761224</td>\n",
       "      <td>7.008163</td>\n",
       "      <td>4.229252</td>\n",
       "      <td>2.187755</td>\n",
       "      <td>4.123129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>9.135373</td>\n",
       "      <td>403.509100</td>\n",
       "      <td>8.106864</td>\n",
       "      <td>1.024165</td>\n",
       "      <td>0.0</td>\n",
       "      <td>602.024335</td>\n",
       "      <td>1.093082</td>\n",
       "      <td>20.329428</td>\n",
       "      <td>0.711561</td>\n",
       "      <td>1.106940</td>\n",
       "      <td>1.102846</td>\n",
       "      <td>4707.956783</td>\n",
       "      <td>7117.786044</td>\n",
       "      <td>2.498009</td>\n",
       "      <td>3.659938</td>\n",
       "      <td>0.360824</td>\n",
       "      <td>1.081209</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.852077</td>\n",
       "      <td>7.780782</td>\n",
       "      <td>1.289271</td>\n",
       "      <td>0.706476</td>\n",
       "      <td>6.126525</td>\n",
       "      <td>3.623137</td>\n",
       "      <td>3.222430</td>\n",
       "      <td>3.568136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>18.000000</td>\n",
       "      <td>102.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1009.000000</td>\n",
       "      <td>2094.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>30.000000</td>\n",
       "      <td>465.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>491.250000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>48.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2911.000000</td>\n",
       "      <td>8047.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>12.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>36.000000</td>\n",
       "      <td>802.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1020.500000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>66.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4919.000000</td>\n",
       "      <td>14235.500000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>43.000000</td>\n",
       "      <td>1157.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1555.750000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>83.750000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>8379.000000</td>\n",
       "      <td>20461.500000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>7.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>60.000000</td>\n",
       "      <td>1499.000000</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2068.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>19999.000000</td>\n",
       "      <td>26999.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>17.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Age    DailyRate  DistanceFromHome    Education  EmployeeCount  \\\n",
       "count  1470.000000  1470.000000       1470.000000  1470.000000         1470.0   \n",
       "mean     36.923810   802.485714          9.192517     2.912925            1.0   \n",
       "std       9.135373   403.509100          8.106864     1.024165            0.0   \n",
       "min      18.000000   102.000000          1.000000     1.000000            1.0   \n",
       "25%      30.000000   465.000000          2.000000     2.000000            1.0   \n",
       "50%      36.000000   802.000000          7.000000     3.000000            1.0   \n",
       "75%      43.000000  1157.000000         14.000000     4.000000            1.0   \n",
       "max      60.000000  1499.000000         29.000000     5.000000            1.0   \n",
       "\n",
       "       EmployeeNumber  EnvironmentSatisfaction   HourlyRate  JobInvolvement  \\\n",
       "count     1470.000000              1470.000000  1470.000000     1470.000000   \n",
       "mean      1024.865306                 2.721769    65.891156        2.729932   \n",
       "std        602.024335                 1.093082    20.329428        0.711561   \n",
       "min          1.000000                 1.000000    30.000000        1.000000   \n",
       "25%        491.250000                 2.000000    48.000000        2.000000   \n",
       "50%       1020.500000                 3.000000    66.000000        3.000000   \n",
       "75%       1555.750000                 4.000000    83.750000        3.000000   \n",
       "max       2068.000000                 4.000000   100.000000        4.000000   \n",
       "\n",
       "          JobLevel  JobSatisfaction  MonthlyIncome   MonthlyRate  \\\n",
       "count  1470.000000      1470.000000    1470.000000   1470.000000   \n",
       "mean      2.063946         2.728571    6502.931293  14313.103401   \n",
       "std       1.106940         1.102846    4707.956783   7117.786044   \n",
       "min       1.000000         1.000000    1009.000000   2094.000000   \n",
       "25%       1.000000         2.000000    2911.000000   8047.000000   \n",
       "50%       2.000000         3.000000    4919.000000  14235.500000   \n",
       "75%       3.000000         4.000000    8379.000000  20461.500000   \n",
       "max       5.000000         4.000000   19999.000000  26999.000000   \n",
       "\n",
       "       NumCompaniesWorked  PercentSalaryHike  PerformanceRating  \\\n",
       "count         1470.000000        1470.000000        1470.000000   \n",
       "mean             2.693197          15.209524           3.153741   \n",
       "std              2.498009           3.659938           0.360824   \n",
       "min              0.000000          11.000000           3.000000   \n",
       "25%              1.000000          12.000000           3.000000   \n",
       "50%              2.000000          14.000000           3.000000   \n",
       "75%              4.000000          18.000000           3.000000   \n",
       "max              9.000000          25.000000           4.000000   \n",
       "\n",
       "       RelationshipSatisfaction  StandardHours  StockOptionLevel  \\\n",
       "count               1470.000000         1470.0       1470.000000   \n",
       "mean                   2.712245           80.0          0.793878   \n",
       "std                    1.081209            0.0          0.852077   \n",
       "min                    1.000000           80.0          0.000000   \n",
       "25%                    2.000000           80.0          0.000000   \n",
       "50%                    3.000000           80.0          1.000000   \n",
       "75%                    4.000000           80.0          1.000000   \n",
       "max                    4.000000           80.0          3.000000   \n",
       "\n",
       "       TotalWorkingYears  TrainingTimesLastYear  WorkLifeBalance  \\\n",
       "count        1470.000000            1470.000000      1470.000000   \n",
       "mean           11.279592               2.799320         2.761224   \n",
       "std             7.780782               1.289271         0.706476   \n",
       "min             0.000000               0.000000         1.000000   \n",
       "25%             6.000000               2.000000         2.000000   \n",
       "50%            10.000000               3.000000         3.000000   \n",
       "75%            15.000000               3.000000         3.000000   \n",
       "max            40.000000               6.000000         4.000000   \n",
       "\n",
       "       YearsAtCompany  YearsInCurrentRole  YearsSinceLastPromotion  \\\n",
       "count     1470.000000         1470.000000              1470.000000   \n",
       "mean         7.008163            4.229252                 2.187755   \n",
       "std          6.126525            3.623137                 3.222430   \n",
       "min          0.000000            0.000000                 0.000000   \n",
       "25%          3.000000            2.000000                 0.000000   \n",
       "50%          5.000000            3.000000                 1.000000   \n",
       "75%          9.000000            7.000000                 3.000000   \n",
       "max         40.000000           18.000000                15.000000   \n",
       "\n",
       "       YearsWithCurrManager  \n",
       "count           1470.000000  \n",
       "mean               4.123129  \n",
       "std                3.568136  \n",
       "min                0.000000  \n",
       "25%                2.000000  \n",
       "50%                3.000000  \n",
       "75%                7.000000  \n",
       "max               17.000000  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Age  mode:  35\n",
      "Attrition  mode:  No\n",
      "BusinessTravel  mode:  Travel_Rarely\n",
      "DailyRate  mode:  691\n",
      "Department  mode:  Research & Development\n",
      "DistanceFromHome  mode:  2\n",
      "Education  mode:  3\n",
      "EducationField  mode:  Life Sciences\n",
      "EmployeeCount  mode:  1\n",
      "EmployeeNumber  mode:  1\n",
      "EnvironmentSatisfaction  mode:  3\n",
      "Gender  mode:  Male\n",
      "HourlyRate  mode:  66\n",
      "JobInvolvement  mode:  3\n",
      "JobLevel  mode:  1\n",
      "JobRole  mode:  Sales Executive\n",
      "JobSatisfaction  mode:  4\n",
      "MaritalStatus  mode:  Married\n",
      "MonthlyIncome  mode:  2342\n",
      "MonthlyRate  mode:  9150\n",
      "NumCompaniesWorked  mode:  1\n",
      "Over18  mode:  Y\n",
      "OverTime  mode:  No\n",
      "PercentSalaryHike  mode:  11\n",
      "PerformanceRating  mode:  3\n",
      "RelationshipSatisfaction  mode:  3\n",
      "StandardHours  mode:  80\n",
      "StockOptionLevel  mode:  0\n",
      "TotalWorkingYears  mode:  10\n",
      "TrainingTimesLastYear  mode:  2\n",
      "WorkLifeBalance  mode:  3\n",
      "YearsAtCompany  mode:  5\n",
      "YearsInCurrentRole  mode:  2\n",
      "YearsSinceLastPromotion  mode:  0\n",
      "YearsWithCurrManager  mode:  2\n"
     ]
    }
   ],
   "source": [
    "import statistics\n",
    "for i in ibm.columns:\n",
    "    print(i, \" mode: \", statistics.mode(ibm[i]));"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1470 entries, 0 to 1469\n",
      "Data columns (total 35 columns):\n",
      " #   Column                    Non-Null Count  Dtype \n",
      "---  ------                    --------------  ----- \n",
      " 0   Age                       1470 non-null   int64 \n",
      " 1   Attrition                 1470 non-null   object\n",
      " 2   BusinessTravel            1470 non-null   object\n",
      " 3   DailyRate                 1470 non-null   int64 \n",
      " 4   Department                1470 non-null   object\n",
      " 5   DistanceFromHome          1470 non-null   int64 \n",
      " 6   Education                 1470 non-null   int64 \n",
      " 7   EducationField            1470 non-null   object\n",
      " 8   EmployeeCount             1470 non-null   int64 \n",
      " 9   EmployeeNumber            1470 non-null   int64 \n",
      " 10  EnvironmentSatisfaction   1470 non-null   int64 \n",
      " 11  Gender                    1470 non-null   object\n",
      " 12  HourlyRate                1470 non-null   int64 \n",
      " 13  JobInvolvement            1470 non-null   int64 \n",
      " 14  JobLevel                  1470 non-null   int64 \n",
      " 15  JobRole                   1470 non-null   object\n",
      " 16  JobSatisfaction           1470 non-null   int64 \n",
      " 17  MaritalStatus             1470 non-null   object\n",
      " 18  MonthlyIncome             1470 non-null   int64 \n",
      " 19  MonthlyRate               1470 non-null   int64 \n",
      " 20  NumCompaniesWorked        1470 non-null   int64 \n",
      " 21  Over18                    1470 non-null   object\n",
      " 22  OverTime                  1470 non-null   object\n",
      " 23  PercentSalaryHike         1470 non-null   int64 \n",
      " 24  PerformanceRating         1470 non-null   int64 \n",
      " 25  RelationshipSatisfaction  1470 non-null   int64 \n",
      " 26  StandardHours             1470 non-null   int64 \n",
      " 27  StockOptionLevel          1470 non-null   int64 \n",
      " 28  TotalWorkingYears         1470 non-null   int64 \n",
      " 29  TrainingTimesLastYear     1470 non-null   int64 \n",
      " 30  WorkLifeBalance           1470 non-null   int64 \n",
      " 31  YearsAtCompany            1470 non-null   int64 \n",
      " 32  YearsInCurrentRole        1470 non-null   int64 \n",
      " 33  YearsSinceLastPromotion   1470 non-null   int64 \n",
      " 34  YearsWithCurrManager      1470 non-null   int64 \n",
      "dtypes: int64(26), object(9)\n",
      "memory usage: 402.1+ KB\n"
     ]
    }
   ],
   "source": [
    "ibm.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "ibm.drop(columns = 'EmployeeCount', inplace = True)\n",
    "ibm.drop(columns = 'EmployeeNumber', inplace = True)\n",
    "ibm.drop(columns = 'Over18', inplace = True)\n",
    "ibm.drop(columns = 'StandardHours', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Attrition</th>\n",
       "      <th>BusinessTravel</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>Department</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EducationField</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>Gender</th>\n",
       "      <th>HourlyRate</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobRole</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>MaritalStatus</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>OverTime</th>\n",
       "      <th>PercentSalaryHike</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>1102</td>\n",
       "      <td>Sales</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>2</td>\n",
       "      <td>Female</td>\n",
       "      <td>94</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>Sales Executive</td>\n",
       "      <td>4</td>\n",
       "      <td>Single</td>\n",
       "      <td>5993</td>\n",
       "      <td>19479</td>\n",
       "      <td>8</td>\n",
       "      <td>Yes</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>279</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>3</td>\n",
       "      <td>Male</td>\n",
       "      <td>61</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Research Scientist</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>5130</td>\n",
       "      <td>24907</td>\n",
       "      <td>1</td>\n",
       "      <td>No</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>1373</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Other</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>92</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>3</td>\n",
       "      <td>Single</td>\n",
       "      <td>2090</td>\n",
       "      <td>2396</td>\n",
       "      <td>6</td>\n",
       "      <td>Yes</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>1392</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>4</td>\n",
       "      <td>Female</td>\n",
       "      <td>56</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Research Scientist</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>2909</td>\n",
       "      <td>23159</td>\n",
       "      <td>1</td>\n",
       "      <td>Yes</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>27</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>591</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Medical</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>40</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>3468</td>\n",
       "      <td>16632</td>\n",
       "      <td>9</td>\n",
       "      <td>No</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1465</th>\n",
       "      <td>36</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>884</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>23</td>\n",
       "      <td>2</td>\n",
       "      <td>Medical</td>\n",
       "      <td>3</td>\n",
       "      <td>Male</td>\n",
       "      <td>41</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>4</td>\n",
       "      <td>Married</td>\n",
       "      <td>2571</td>\n",
       "      <td>12290</td>\n",
       "      <td>4</td>\n",
       "      <td>No</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>39</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>613</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>Medical</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>Healthcare Representative</td>\n",
       "      <td>1</td>\n",
       "      <td>Married</td>\n",
       "      <td>9991</td>\n",
       "      <td>21457</td>\n",
       "      <td>4</td>\n",
       "      <td>No</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1467</th>\n",
       "      <td>27</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>155</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>2</td>\n",
       "      <td>Male</td>\n",
       "      <td>87</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>Manufacturing Director</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>6142</td>\n",
       "      <td>5174</td>\n",
       "      <td>1</td>\n",
       "      <td>Yes</td>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>49</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>1023</td>\n",
       "      <td>Sales</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>Medical</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>63</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Sales Executive</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>5390</td>\n",
       "      <td>13243</td>\n",
       "      <td>2</td>\n",
       "      <td>No</td>\n",
       "      <td>14</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1469</th>\n",
       "      <td>34</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>628</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>Medical</td>\n",
       "      <td>2</td>\n",
       "      <td>Male</td>\n",
       "      <td>82</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>4404</td>\n",
       "      <td>10228</td>\n",
       "      <td>2</td>\n",
       "      <td>No</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1470 rows × 31 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Age Attrition     BusinessTravel  DailyRate              Department  \\\n",
       "0      41       Yes      Travel_Rarely       1102                   Sales   \n",
       "1      49        No  Travel_Frequently        279  Research & Development   \n",
       "2      37       Yes      Travel_Rarely       1373  Research & Development   \n",
       "3      33        No  Travel_Frequently       1392  Research & Development   \n",
       "4      27        No      Travel_Rarely        591  Research & Development   \n",
       "...   ...       ...                ...        ...                     ...   \n",
       "1465   36        No  Travel_Frequently        884  Research & Development   \n",
       "1466   39        No      Travel_Rarely        613  Research & Development   \n",
       "1467   27        No      Travel_Rarely        155  Research & Development   \n",
       "1468   49        No  Travel_Frequently       1023                   Sales   \n",
       "1469   34        No      Travel_Rarely        628  Research & Development   \n",
       "\n",
       "      DistanceFromHome  Education EducationField  EnvironmentSatisfaction  \\\n",
       "0                    1          2  Life Sciences                        2   \n",
       "1                    8          1  Life Sciences                        3   \n",
       "2                    2          2          Other                        4   \n",
       "3                    3          4  Life Sciences                        4   \n",
       "4                    2          1        Medical                        1   \n",
       "...                ...        ...            ...                      ...   \n",
       "1465                23          2        Medical                        3   \n",
       "1466                 6          1        Medical                        4   \n",
       "1467                 4          3  Life Sciences                        2   \n",
       "1468                 2          3        Medical                        4   \n",
       "1469                 8          3        Medical                        2   \n",
       "\n",
       "      Gender  HourlyRate  JobInvolvement  JobLevel                    JobRole  \\\n",
       "0     Female          94               3         2            Sales Executive   \n",
       "1       Male          61               2         2         Research Scientist   \n",
       "2       Male          92               2         1      Laboratory Technician   \n",
       "3     Female          56               3         1         Research Scientist   \n",
       "4       Male          40               3         1      Laboratory Technician   \n",
       "...      ...         ...             ...       ...                        ...   \n",
       "1465    Male          41               4         2      Laboratory Technician   \n",
       "1466    Male          42               2         3  Healthcare Representative   \n",
       "1467    Male          87               4         2     Manufacturing Director   \n",
       "1468    Male          63               2         2            Sales Executive   \n",
       "1469    Male          82               4         2      Laboratory Technician   \n",
       "\n",
       "      JobSatisfaction MaritalStatus  MonthlyIncome  MonthlyRate  \\\n",
       "0                   4        Single           5993        19479   \n",
       "1                   2       Married           5130        24907   \n",
       "2                   3        Single           2090         2396   \n",
       "3                   3       Married           2909        23159   \n",
       "4                   2       Married           3468        16632   \n",
       "...               ...           ...            ...          ...   \n",
       "1465                4       Married           2571        12290   \n",
       "1466                1       Married           9991        21457   \n",
       "1467                2       Married           6142         5174   \n",
       "1468                2       Married           5390        13243   \n",
       "1469                3       Married           4404        10228   \n",
       "\n",
       "      NumCompaniesWorked OverTime  PercentSalaryHike  PerformanceRating  \\\n",
       "0                      8      Yes                 11                  3   \n",
       "1                      1       No                 23                  4   \n",
       "2                      6      Yes                 15                  3   \n",
       "3                      1      Yes                 11                  3   \n",
       "4                      9       No                 12                  3   \n",
       "...                  ...      ...                ...                ...   \n",
       "1465                   4       No                 17                  3   \n",
       "1466                   4       No                 15                  3   \n",
       "1467                   1      Yes                 20                  4   \n",
       "1468                   2       No                 14                  3   \n",
       "1469                   2       No                 12                  3   \n",
       "\n",
       "      RelationshipSatisfaction  StockOptionLevel  TotalWorkingYears  \\\n",
       "0                            1                 0                  8   \n",
       "1                            4                 1                 10   \n",
       "2                            2                 0                  7   \n",
       "3                            3                 0                  8   \n",
       "4                            4                 1                  6   \n",
       "...                        ...               ...                ...   \n",
       "1465                         3                 1                 17   \n",
       "1466                         1                 1                  9   \n",
       "1467                         2                 1                  6   \n",
       "1468                         4                 0                 17   \n",
       "1469                         1                 0                  6   \n",
       "\n",
       "      TrainingTimesLastYear  WorkLifeBalance  YearsAtCompany  \\\n",
       "0                         0                1               6   \n",
       "1                         3                3              10   \n",
       "2                         3                3               0   \n",
       "3                         3                3               8   \n",
       "4                         3                3               2   \n",
       "...                     ...              ...             ...   \n",
       "1465                      3                3               5   \n",
       "1466                      5                3               7   \n",
       "1467                      0                3               6   \n",
       "1468                      3                2               9   \n",
       "1469                      3                4               4   \n",
       "\n",
       "      YearsInCurrentRole  YearsSinceLastPromotion  YearsWithCurrManager  \n",
       "0                      4                        0                     5  \n",
       "1                      7                        1                     7  \n",
       "2                      0                        0                     0  \n",
       "3                      7                        3                     0  \n",
       "4                      2                        2                     2  \n",
       "...                  ...                      ...                   ...  \n",
       "1465                   2                        0                     3  \n",
       "1466                   7                        1                     7  \n",
       "1467                   2                        0                     3  \n",
       "1468                   6                        0                     8  \n",
       "1469                   3                        1                     2  \n",
       "\n",
       "[1470 rows x 31 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Age                         0\n",
       "Attrition                   0\n",
       "BusinessTravel              0\n",
       "DailyRate                   0\n",
       "Department                  0\n",
       "DistanceFromHome            0\n",
       "Education                   0\n",
       "EducationField              0\n",
       "EnvironmentSatisfaction     0\n",
       "Gender                      0\n",
       "HourlyRate                  0\n",
       "JobInvolvement              0\n",
       "JobLevel                    0\n",
       "JobRole                     0\n",
       "JobSatisfaction             0\n",
       "MaritalStatus               0\n",
       "MonthlyIncome               0\n",
       "MonthlyRate                 0\n",
       "NumCompaniesWorked          0\n",
       "OverTime                    0\n",
       "PercentSalaryHike           0\n",
       "PerformanceRating           0\n",
       "RelationshipSatisfaction    0\n",
       "StockOptionLevel            0\n",
       "TotalWorkingYears           0\n",
       "TrainingTimesLastYear       0\n",
       "WorkLifeBalance             0\n",
       "YearsAtCompany              0\n",
       "YearsInCurrentRole          0\n",
       "YearsSinceLastPromotion     0\n",
       "YearsWithCurrManager        0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Attrition (0 - No, 1 - Yes)\n",
    "ibm.replace({'Attrition' : {'Yes': 1, 'No': 0}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace BusinessTravel (0 - Non-Travel, 1 - Travel_Rarely, 2 - Travel_Frequently)\n",
    "ibm.replace({'BusinessTravel' : {'Non-Travel': 0, 'Travel_Rarely': 1, 'Travel_Frequently': 2}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Department\n",
    "dummy = pd.get_dummies(ibm['Department'])\n",
    "ibm.insert(5,'Dp_Sales&Development', dummy['Research & Development'])\n",
    "ibm.insert(6,'Dp_Sales', dummy['Sales'])\n",
    "ibm.insert(7,'Dp_HumanResources', dummy['Human Resources'])\n",
    "\n",
    "ibm.drop(columns = 'Department', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "#EducationField\n",
    "dummy = pd.get_dummies(ibm['EducationField'])\n",
    "ibm.insert(11,'EF_Life Sciences',dummy['Life Sciences'])\n",
    "ibm.insert(12,'EF_Medical',dummy['Medical'])\n",
    "ibm.insert(13,'EF_Marketing',dummy['Marketing'])\n",
    "ibm.insert(14,'EF_TechnicalDegree',dummy['Technical Degree'])\n",
    "ibm.insert(15,'EF_HumanResources',dummy['Human Resources'])\n",
    "ibm.insert(16,'EF_Other',dummy['Other'])\n",
    "\n",
    "ibm.drop(columns = 'EducationField', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Gender (0 - Male; 1 - Female)\n",
    "ibm.replace({'Gender': {'Male': 0, 'Female': 1}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Job role dummy variables\n",
    "dummy=pd.get_dummies(ibm['JobRole'])\n",
    "ibm.insert(23, 'JR_HealthcareRepresentive', dummy['Healthcare Representative'])\n",
    "ibm.insert(24, 'JR_HumanResource', dummy['Human Resources'])\n",
    "ibm.insert(25, 'JR_LaboratoryTechnician', dummy['Laboratory Technician'])\n",
    "ibm.insert(26, 'JR_Manager', dummy['Manager'])\n",
    "ibm.insert(27, 'JR_ManufacturingDirector', dummy['Manufacturing Director'])\n",
    "ibm.insert(28, 'JR_ResearchDirector', dummy['Research Director'])\n",
    "ibm.insert(29, 'JR_ResearchScientist', dummy['Research Scientist'])\n",
    "ibm.insert(30, 'JR_SalesExecutive', dummy['Sales Executive'])\n",
    "ibm.insert(31, 'JR_SalesRepresentative', dummy['Sales Representative'])\n",
    "\n",
    "ibm.drop(columns = 'JobRole', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# MaritalStatus role dummy variables\n",
    "dummy=pd.get_dummies(ibm['MaritalStatus'])\n",
    "ibm.insert(34, 'MS_Married', dummy['Married'])\n",
    "ibm.insert(35, 'MS_Single', dummy['Single'])\n",
    "ibm.insert(36, 'MS_Divorced', dummy['Divorced'])\n",
    "\n",
    "ibm.drop(columns = 'MaritalStatus', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Overtime (0 - No; 1 - Yes)\n",
    "ibm.replace({'OverTime': {'No': 0, 'Yes': 1}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Over18 (0 - N; 1 - Y)\n",
    "ibm.replace({'Over18': {'N': 0, 'Y': 1}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def iqr_outliers(data):\n",
    "    out=[]\n",
    "    \n",
    "    firstQuartile = data.quantile(0.25)\n",
    "    thirdQuartile = data.quantile(0.75)\n",
    "    \n",
    "    iqr = thirdQuartile-firstQuartile\n",
    "    \n",
    "    Lower_bound = firstQuartile - 1.5 * iqr\n",
    "    Upper_bound = thirdQuartile + 1.5 * iqr\n",
    "    \n",
    "    for i in data:\n",
    "        if i > Upper_bound or i < Lower_bound:\n",
    "            out.append(i)\n",
    "            \n",
    "    print(\"Outliers:\",out , \"\\nCount: \", len(out), \"\\n\")\n",
    "    return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Age\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Attrition\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  237 \n",
      "\n",
      "BusinessTravel\n",
      "Outliers: [2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 2, 2] \n",
      "Count:  427 \n",
      "\n",
      "DailyRate\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Dp_Sales&Development\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Dp_Sales\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Dp_HumanResources\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  63 \n",
      "\n",
      "DistanceFromHome\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Education\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EnvironmentSatisfaction\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EF_Life Sciences\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EF_Medical\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EF_Marketing\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  159 \n",
      "\n",
      "EF_TechnicalDegree\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  132 \n",
      "\n",
      "EF_HumanResources\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  27 \n",
      "\n",
      "EF_Other\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  82 \n",
      "\n",
      "Gender\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "HourlyRate\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JobInvolvement\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JobLevel\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JobSatisfaction\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JR_HealthcareRepresentive\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  131 \n",
      "\n",
      "JR_HumanResource\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  52 \n",
      "\n",
      "JR_LaboratoryTechnician\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  259 \n",
      "\n",
      "JR_Manager\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  102 \n",
      "\n",
      "JR_ManufacturingDirector\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  145 \n",
      "\n",
      "JR_ResearchDirector\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  80 \n",
      "\n",
      "JR_ResearchScientist\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  292 \n",
      "\n",
      "JR_SalesExecutive\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  326 \n",
      "\n",
      "JR_SalesRepresentative\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  83 \n",
      "\n",
      "MonthlyIncome\n",
      "Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] \n",
      "Count:  114 \n",
      "\n",
      "MonthlyRate\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "NumCompaniesWorked\n",
      "Outliers: [9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9] \n",
      "Count:  52 \n",
      "\n",
      "MS_Married\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "MS_Single\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "MS_Divorced\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  327 \n",
      "\n",
      "OverTime\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "PercentSalaryHike\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "PerformanceRating\n",
      "Outliers: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] \n",
      "Count:  226 \n",
      "\n",
      "RelationshipSatisfaction\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "StockOptionLevel\n",
      "Outliers: [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] \n",
      "Count:  85 \n",
      "\n",
      "TotalWorkingYears\n",
      "Outliers: [31, 29, 37, 38, 30, 40, 36, 34, 32, 33, 37, 30, 36, 31, 33, 32, 37, 31, 32, 32, 30, 34, 30, 40, 29, 35, 31, 33, 31, 29, 32, 30, 33, 30, 29, 31, 32, 33, 36, 34, 31, 36, 33, 31, 29, 33, 29, 32, 31, 35, 29, 32, 34, 36, 32, 30, 36, 29, 34, 37, 29, 29, 35] \n",
      "Count:  63 \n",
      "\n",
      "TrainingTimesLastYear\n",
      "Outliers: [0, 5, 5, 5, 6, 5, 5, 5, 6, 6, 0, 0, 0, 5, 0, 5, 5, 5, 6, 6, 5, 0, 6, 5, 5, 0, 5, 5, 6, 5, 5, 5, 0, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 0, 0, 5, 5, 5, 6, 6, 5, 0, 5, 0, 5, 5, 0, 6, 0, 5, 5, 6, 6, 5, 6, 5, 0, 5, 5, 5, 5, 0, 6, 5, 5, 5, 5, 6, 5, 5, 6, 5, 5, 5, 0, 5, 0, 5, 5, 6, 5, 6, 5, 0, 5, 5, 0, 6, 6, 5, 6, 0, 5, 0, 6, 6, 6, 6, 5, 5, 0, 5, 0, 0, 6, 0, 6, 5, 6, 5, 5, 0, 5, 6, 6, 5, 5, 0, 0, 6, 0, 0, 5, 0, 5, 6, 5, 5, 6, 6, 5, 5, 5, 5, 5, 6, 5, 6, 6, 0, 6, 6, 5, 5, 0, 0, 6, 6, 0, 5, 0, 0, 0, 0, 0, 5, 5, 6, 5, 5, 0, 5, 5, 0, 5, 5, 6, 5, 5, 5, 6, 5, 5, 5, 0, 0, 5, 5, 5, 5, 6, 0, 0, 6, 6, 6, 6, 5, 5, 5, 6, 5, 0, 5, 5, 6, 5, 6, 6, 5, 6, 6, 5, 0, 5, 5, 5, 5, 5, 0, 0, 0, 6, 5, 6, 6, 5, 6, 0, 6, 6, 5, 6, 6, 5, 5, 5, 0] \n",
      "Count:  238 \n",
      "\n",
      "WorkLifeBalance\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "YearsAtCompany\n",
      "Outliers: [25, 22, 22, 27, 21, 22, 37, 25, 20, 40, 20, 24, 20, 24, 33, 20, 19, 22, 33, 24, 19, 21, 20, 36, 20, 20, 22, 24, 21, 21, 25, 21, 29, 20, 27, 20, 31, 32, 20, 20, 21, 22, 22, 34, 24, 26, 31, 20, 31, 26, 19, 21, 21, 32, 21, 19, 20, 22, 20, 21, 26, 20, 22, 24, 33, 29, 25, 21, 19, 19, 20, 19, 33, 19, 19, 20, 20, 20, 20, 20, 32, 20, 21, 33, 36, 26, 30, 22, 23, 23, 21, 21, 22, 22, 19, 22, 19, 22, 20, 20, 20, 22, 20, 20] \n",
      "Count:  104 \n",
      "\n",
      "YearsInCurrentRole\n",
      "Outliers: [15, 16, 18, 15, 18, 17, 16, 15, 16, 15, 16, 16, 15, 16, 17, 15, 15, 15, 17, 17, 16] \n",
      "Count:  21 \n",
      "\n",
      "YearsSinceLastPromotion\n",
      "Outliers: [8, 15, 8, 8, 9, 13, 12, 10, 11, 9, 12, 15, 15, 15, 9, 11, 11, 9, 12, 11, 15, 11, 10, 9, 11, 9, 8, 11, 11, 8, 13, 9, 9, 12, 10, 11, 15, 13, 9, 11, 10, 8, 8, 11, 9, 11, 12, 11, 14, 13, 14, 8, 11, 15, 10, 11, 11, 15, 11, 13, 11, 13, 15, 8, 13, 15, 11, 14, 15, 15, 9, 11, 9, 8, 9, 15, 11, 12, 9, 8, 10, 14, 8, 13, 13, 12, 14, 8, 8, 8, 14, 14, 8, 12, 13, 14, 14, 12, 11, 8, 11, 9, 12, 8, 9, 11, 9] \n",
      "Count:  107 \n",
      "\n",
      "YearsWithCurrManager\n",
      "Outliers: [17, 15, 15, 15, 15, 17, 16, 17, 15, 17, 17, 17, 17, 16] \n",
      "Count:  14 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "for c_name in ibm.columns:\n",
    "    print (c_name)\n",
    "    iqr_outliers(ibm[c_name])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def remove_outliers(c_name):\n",
    "    outliers = iqr_outliers(ibm[c_name])\n",
    "\n",
    "    while (len(outliers)!=0):\n",
    "        for i in outliers:\n",
    "            ibm.drop(ibm.loc[ibm[c_name]==i].index, inplace = True)\n",
    "        outliers = iqr_outliers(ibm[c_name])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] \n",
      "Count:  114 \n",
      "\n",
      "Outliers: [15427, 13458, 14756, 13245, 13664, 13503, 13549, 13872, 13734, 13591, 16064, 13675, 13496, 13603, 13525, 16015, 13964, 15992, 14336, 13212, 16555, 14118, 13610, 13237, 16184, 15402, 14814, 13770, 16307, 13826, 14275, 13582, 14852, 13194, 13973, 13726, 13320, 13120, 13499, 13758, 13191, 16124, 13577, 14026, 13142, 13695, 13402, 13247, 14732, 16422, 13757, 16032, 16328, 14411, 16437, 15202, 16413, 13269, 13966, 15972, 15379, 12936, 12965, 13116, 13464, 16291, 15787, 13225, 13348, 13341, 13206, 13744, 13570] \n",
      "Count:  73 \n",
      "\n",
      "Outliers: [11994, 12490, 12185, 11849, 11996, 12061, 11878, 12504, 11935, 12808, 11836, 12742, 11904, 12169, 11916, 11957, 12031] \n",
      "Count:  17 \n",
      "\n",
      "Outliers: [11713, 11691] \n",
      "Count:  2 \n",
      "\n",
      "Outliers: [11631] \n",
      "Count:  1 \n",
      "\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "remove_outliers('MonthlyIncome')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Attrition</th>\n",
       "      <th>BusinessTravel</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>Dp_Sales&amp;Development</th>\n",
       "      <th>Dp_Sales</th>\n",
       "      <th>Dp_HumanResources</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>EF_Life Sciences</th>\n",
       "      <th>EF_Medical</th>\n",
       "      <th>EF_Marketing</th>\n",
       "      <th>EF_TechnicalDegree</th>\n",
       "      <th>EF_HumanResources</th>\n",
       "      <th>EF_Other</th>\n",
       "      <th>Gender</th>\n",
       "      <th>HourlyRate</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>JR_HealthcareRepresentive</th>\n",
       "      <th>JR_HumanResource</th>\n",
       "      <th>JR_LaboratoryTechnician</th>\n",
       "      <th>JR_Manager</th>\n",
       "      <th>JR_ManufacturingDirector</th>\n",
       "      <th>JR_ResearchDirector</th>\n",
       "      <th>JR_ResearchScientist</th>\n",
       "      <th>JR_SalesExecutive</th>\n",
       "      <th>JR_SalesRepresentative</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>MS_Married</th>\n",
       "      <th>MS_Single</th>\n",
       "      <th>MS_Divorced</th>\n",
       "      <th>OverTime</th>\n",
       "      <th>PercentSalaryHike</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1102</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>94</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5993</td>\n",
       "      <td>19479</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>279</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>61</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5130</td>\n",
       "      <td>24907</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1373</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>92</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2090</td>\n",
       "      <td>2396</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1392</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>56</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2909</td>\n",
       "      <td>23159</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>591</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3468</td>\n",
       "      <td>16632</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1465</th>\n",
       "      <td>36</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>884</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>41</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2571</td>\n",
       "      <td>12290</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>613</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9991</td>\n",
       "      <td>21457</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1467</th>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>155</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>87</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6142</td>\n",
       "      <td>5174</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1023</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>63</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5390</td>\n",
       "      <td>13243</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1469</th>\n",
       "      <td>34</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>628</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>82</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4404</td>\n",
       "      <td>10228</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1263 rows × 48 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Age  Attrition  BusinessTravel  DailyRate  Dp_Sales&Development  \\\n",
       "0      41          1               1       1102                     0   \n",
       "1      49          0               2        279                     1   \n",
       "2      37          1               1       1373                     1   \n",
       "3      33          0               2       1392                     1   \n",
       "4      27          0               1        591                     1   \n",
       "...   ...        ...             ...        ...                   ...   \n",
       "1465   36          0               2        884                     1   \n",
       "1466   39          0               1        613                     1   \n",
       "1467   27          0               1        155                     1   \n",
       "1468   49          0               2       1023                     0   \n",
       "1469   34          0               1        628                     1   \n",
       "\n",
       "      Dp_Sales  Dp_HumanResources  DistanceFromHome  Education  \\\n",
       "0            1                  0                 1          2   \n",
       "1            0                  0                 8          1   \n",
       "2            0                  0                 2          2   \n",
       "3            0                  0                 3          4   \n",
       "4            0                  0                 2          1   \n",
       "...        ...                ...               ...        ...   \n",
       "1465         0                  0                23          2   \n",
       "1466         0                  0                 6          1   \n",
       "1467         0                  0                 4          3   \n",
       "1468         1                  0                 2          3   \n",
       "1469         0                  0                 8          3   \n",
       "\n",
       "      EnvironmentSatisfaction  EF_Life Sciences  EF_Medical  EF_Marketing  \\\n",
       "0                           2                 1           0             0   \n",
       "1                           3                 1           0             0   \n",
       "2                           4                 0           0             0   \n",
       "3                           4                 1           0             0   \n",
       "4                           1                 0           1             0   \n",
       "...                       ...               ...         ...           ...   \n",
       "1465                        3                 0           1             0   \n",
       "1466                        4                 0           1             0   \n",
       "1467                        2                 1           0             0   \n",
       "1468                        4                 0           1             0   \n",
       "1469                        2                 0           1             0   \n",
       "\n",
       "      EF_TechnicalDegree  EF_HumanResources  EF_Other  Gender  HourlyRate  \\\n",
       "0                      0                  0         0       1          94   \n",
       "1                      0                  0         0       0          61   \n",
       "2                      0                  0         1       0          92   \n",
       "3                      0                  0         0       1          56   \n",
       "4                      0                  0         0       0          40   \n",
       "...                  ...                ...       ...     ...         ...   \n",
       "1465                   0                  0         0       0          41   \n",
       "1466                   0                  0         0       0          42   \n",
       "1467                   0                  0         0       0          87   \n",
       "1468                   0                  0         0       0          63   \n",
       "1469                   0                  0         0       0          82   \n",
       "\n",
       "      JobInvolvement  JobLevel  JobSatisfaction  JR_HealthcareRepresentive  \\\n",
       "0                  3         2                4                          0   \n",
       "1                  2         2                2                          0   \n",
       "2                  2         1                3                          0   \n",
       "3                  3         1                3                          0   \n",
       "4                  3         1                2                          0   \n",
       "...              ...       ...              ...                        ...   \n",
       "1465               4         2                4                          0   \n",
       "1466               2         3                1                          1   \n",
       "1467               4         2                2                          0   \n",
       "1468               2         2                2                          0   \n",
       "1469               4         2                3                          0   \n",
       "\n",
       "      JR_HumanResource  JR_LaboratoryTechnician  JR_Manager  \\\n",
       "0                    0                        0           0   \n",
       "1                    0                        0           0   \n",
       "2                    0                        1           0   \n",
       "3                    0                        0           0   \n",
       "4                    0                        1           0   \n",
       "...                ...                      ...         ...   \n",
       "1465                 0                        1           0   \n",
       "1466                 0                        0           0   \n",
       "1467                 0                        0           0   \n",
       "1468                 0                        0           0   \n",
       "1469                 0                        1           0   \n",
       "\n",
       "      JR_ManufacturingDirector  JR_ResearchDirector  JR_ResearchScientist  \\\n",
       "0                            0                    0                     0   \n",
       "1                            0                    0                     1   \n",
       "2                            0                    0                     0   \n",
       "3                            0                    0                     1   \n",
       "4                            0                    0                     0   \n",
       "...                        ...                  ...                   ...   \n",
       "1465                         0                    0                     0   \n",
       "1466                         0                    0                     0   \n",
       "1467                         1                    0                     0   \n",
       "1468                         0                    0                     0   \n",
       "1469                         0                    0                     0   \n",
       "\n",
       "      JR_SalesExecutive  JR_SalesRepresentative  MonthlyIncome  MonthlyRate  \\\n",
       "0                     1                       0           5993        19479   \n",
       "1                     0                       0           5130        24907   \n",
       "2                     0                       0           2090         2396   \n",
       "3                     0                       0           2909        23159   \n",
       "4                     0                       0           3468        16632   \n",
       "...                 ...                     ...            ...          ...   \n",
       "1465                  0                       0           2571        12290   \n",
       "1466                  0                       0           9991        21457   \n",
       "1467                  0                       0           6142         5174   \n",
       "1468                  1                       0           5390        13243   \n",
       "1469                  0                       0           4404        10228   \n",
       "\n",
       "      NumCompaniesWorked  MS_Married  MS_Single  MS_Divorced  OverTime  \\\n",
       "0                      8           0          1            0         1   \n",
       "1                      1           1          0            0         0   \n",
       "2                      6           0          1            0         1   \n",
       "3                      1           1          0            0         1   \n",
       "4                      9           1          0            0         0   \n",
       "...                  ...         ...        ...          ...       ...   \n",
       "1465                   4           1          0            0         0   \n",
       "1466                   4           1          0            0         0   \n",
       "1467                   1           1          0            0         1   \n",
       "1468                   2           1          0            0         0   \n",
       "1469                   2           1          0            0         0   \n",
       "\n",
       "      PercentSalaryHike  PerformanceRating  RelationshipSatisfaction  \\\n",
       "0                    11                  3                         1   \n",
       "1                    23                  4                         4   \n",
       "2                    15                  3                         2   \n",
       "3                    11                  3                         3   \n",
       "4                    12                  3                         4   \n",
       "...                 ...                ...                       ...   \n",
       "1465                 17                  3                         3   \n",
       "1466                 15                  3                         1   \n",
       "1467                 20                  4                         2   \n",
       "1468                 14                  3                         4   \n",
       "1469                 12                  3                         1   \n",
       "\n",
       "      StockOptionLevel  TotalWorkingYears  TrainingTimesLastYear  \\\n",
       "0                    0                  8                      0   \n",
       "1                    1                 10                      3   \n",
       "2                    0                  7                      3   \n",
       "3                    0                  8                      3   \n",
       "4                    1                  6                      3   \n",
       "...                ...                ...                    ...   \n",
       "1465                 1                 17                      3   \n",
       "1466                 1                  9                      5   \n",
       "1467                 1                  6                      0   \n",
       "1468                 0                 17                      3   \n",
       "1469                 0                  6                      3   \n",
       "\n",
       "      WorkLifeBalance  YearsAtCompany  YearsInCurrentRole  \\\n",
       "0                   1               6                   4   \n",
       "1                   3              10                   7   \n",
       "2                   3               0                   0   \n",
       "3                   3               8                   7   \n",
       "4                   3               2                   2   \n",
       "...               ...             ...                 ...   \n",
       "1465                3               5                   2   \n",
       "1466                3               7                   7   \n",
       "1467                3               6                   2   \n",
       "1468                2               9                   6   \n",
       "1469                4               4                   3   \n",
       "\n",
       "      YearsSinceLastPromotion  YearsWithCurrManager  \n",
       "0                           0                     5  \n",
       "1                           1                     7  \n",
       "2                           0                     0  \n",
       "3                           3                     0  \n",
       "4                           2                     2  \n",
       "...                       ...                   ...  \n",
       "1465                        0                     3  \n",
       "1466                        1                     7  \n",
       "1467                        0                     3  \n",
       "1468                        0                     8  \n",
       "1469                        1                     2  \n",
       "\n",
       "[1263 rows x 48 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Classification"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Support Vector Machine (prepared by Teh Liang Sean) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import important library to do SVM\n",
    "from sklearn import svm\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "#The target for SVM will be the attrition of IBM employees to know whether the employees will continue stay or leave IBM\n",
    "x_svm_find = ibm.drop(columns = 'Attrition')\n",
    "y_svm = ibm['Attrition']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                     Features         Score\n",
      "29              MonthlyIncome  26471.159476\n",
      "30                MonthlyRate   1308.443569\n",
      "2                   DailyRate   1111.594737\n",
      "44         YearsInCurrentRole    109.263859\n",
      "43             YearsAtCompany    103.805057\n",
      "46       YearsWithCurrManager    100.636711\n",
      "40          TotalWorkingYears     95.843571\n",
      "35                   OverTime     60.367656\n",
      "6            DistanceFromHome     57.197704\n",
      "0                         Age     46.705340\n",
      "28     JR_SalesRepresentative     27.299127\n",
      "33                  MS_Single     26.251695\n",
      "39           StockOptionLevel     24.376114\n",
      "20  JR_HealthcareRepresentive     10.935616\n",
      "24   JR_ManufacturingDirector      9.987076\n"
     ]
    }
   ],
   "source": [
    "# Try use SelectKBest and chi-squared (chi²) statistical test for non-negative feature to find top 15 best features\n",
    "#Import library\n",
    "from sklearn.feature_selection import SelectKBest\n",
    "from sklearn.feature_selection import chi2\n",
    "#Use SelectKBest class to find top 15 best features\n",
    "best_15_features = SelectKBest(score_func=chi2, k=15)\n",
    "fit = best_15_features.fit(x_svm_find,y_svm)\n",
    "dfscores = pd.DataFrame(fit.scores_)\n",
    "dfcolumns = pd.DataFrame(x_svm_find.columns)\n",
    "#Try to concat two dataframes for a better visualization \n",
    "top_15_feature_scores = pd.concat([dfcolumns,dfscores],axis=1)\n",
    "#Name the dataframe columns\n",
    "top_15_feature_scores.columns = ['Features','Score']  \n",
    "#Show 15 best features\n",
    "print(top_15_feature_scores.nlargest(15,'Score'))  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "ibm_svm_features_df = pd.DataFrame()\n",
    "# Set up data to do SVM using top 15 best features identified\n",
    "ibm_svm_features_df.insert(0,'MonthlyIncome',ibm['MonthlyIncome'])\n",
    "ibm_svm_features_df.insert(1,'MonthlyRate',ibm['MonthlyRate'])\n",
    "ibm_svm_features_df.insert(2,'DailyRate',ibm['DailyRate'])\n",
    "ibm_svm_features_df.insert(3,'YearsInCurrentRole',ibm['YearsInCurrentRole'])\n",
    "ibm_svm_features_df.insert(4,'YearsAtCompany',ibm['YearsAtCompany'])\n",
    "ibm_svm_features_df.insert(5,'YearsWithCurrManager',ibm['YearsWithCurrManager'])\n",
    "ibm_svm_features_df.insert(6,'TotalWorkingYears',ibm['TotalWorkingYears'])\n",
    "ibm_svm_features_df.insert(7,'OverTime',ibm['OverTime'])\n",
    "ibm_svm_features_df.insert(8,'DistanceFromHome',ibm['DistanceFromHome'])\n",
    "ibm_svm_features_df.insert(9,'Age',ibm['Age'])\n",
    "ibm_svm_features_df.insert(10,'JR_SalesRepresentative',ibm['JR_SalesRepresentative'])\n",
    "ibm_svm_features_df.insert(11,'MS_Single',ibm['MS_Single'])\n",
    "ibm_svm_features_df.insert(12,'StockOptionLevel',ibm['StockOptionLevel'])\n",
    "ibm_svm_features_df.insert(13,'JR_HealthcareRepresentive ',ibm['JR_HealthcareRepresentive'])\n",
    "ibm_svm_features_df.insert(14,'JR_ManufacturingDirector',ibm['JR_ManufacturingDirector'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>OverTime</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Age</th>\n",
       "      <th>JR_SalesRepresentative</th>\n",
       "      <th>MS_Single</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>JR_HealthcareRepresentive</th>\n",
       "      <th>JR_ManufacturingDirector</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5993</td>\n",
       "      <td>19479</td>\n",
       "      <td>1102</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>41</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5130</td>\n",
       "      <td>24907</td>\n",
       "      <td>279</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2090</td>\n",
       "      <td>2396</td>\n",
       "      <td>1373</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>37</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2909</td>\n",
       "      <td>23159</td>\n",
       "      <td>1392</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>33</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3468</td>\n",
       "      <td>16632</td>\n",
       "      <td>591</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1465</th>\n",
       "      <td>2571</td>\n",
       "      <td>12290</td>\n",
       "      <td>884</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>36</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>9991</td>\n",
       "      <td>21457</td>\n",
       "      <td>613</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1467</th>\n",
       "      <td>6142</td>\n",
       "      <td>5174</td>\n",
       "      <td>155</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>5390</td>\n",
       "      <td>13243</td>\n",
       "      <td>1023</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1469</th>\n",
       "      <td>4404</td>\n",
       "      <td>10228</td>\n",
       "      <td>628</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>34</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1263 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      MonthlyIncome  MonthlyRate  DailyRate  YearsInCurrentRole  \\\n",
       "0              5993        19479       1102                   4   \n",
       "1              5130        24907        279                   7   \n",
       "2              2090         2396       1373                   0   \n",
       "3              2909        23159       1392                   7   \n",
       "4              3468        16632        591                   2   \n",
       "...             ...          ...        ...                 ...   \n",
       "1465           2571        12290        884                   2   \n",
       "1466           9991        21457        613                   7   \n",
       "1467           6142         5174        155                   2   \n",
       "1468           5390        13243       1023                   6   \n",
       "1469           4404        10228        628                   3   \n",
       "\n",
       "      YearsAtCompany  YearsWithCurrManager  TotalWorkingYears  OverTime  \\\n",
       "0                  6                     5                  8         1   \n",
       "1                 10                     7                 10         0   \n",
       "2                  0                     0                  7         1   \n",
       "3                  8                     0                  8         1   \n",
       "4                  2                     2                  6         0   \n",
       "...              ...                   ...                ...       ...   \n",
       "1465               5                     3                 17         0   \n",
       "1466               7                     7                  9         0   \n",
       "1467               6                     3                  6         1   \n",
       "1468               9                     8                 17         0   \n",
       "1469               4                     2                  6         0   \n",
       "\n",
       "      DistanceFromHome  Age  JR_SalesRepresentative  MS_Single  \\\n",
       "0                    1   41                       0          1   \n",
       "1                    8   49                       0          0   \n",
       "2                    2   37                       0          1   \n",
       "3                    3   33                       0          0   \n",
       "4                    2   27                       0          0   \n",
       "...                ...  ...                     ...        ...   \n",
       "1465                23   36                       0          0   \n",
       "1466                 6   39                       0          0   \n",
       "1467                 4   27                       0          0   \n",
       "1468                 2   49                       0          0   \n",
       "1469                 8   34                       0          0   \n",
       "\n",
       "      StockOptionLevel  JR_HealthcareRepresentive   JR_ManufacturingDirector  \n",
       "0                    0                           0                         0  \n",
       "1                    1                           0                         0  \n",
       "2                    0                           0                         0  \n",
       "3                    0                           0                         0  \n",
       "4                    1                           0                         0  \n",
       "...                ...                         ...                       ...  \n",
       "1465                 1                           0                         0  \n",
       "1466                 1                           1                         0  \n",
       "1467                 1                           0                         1  \n",
       "1468                 0                           0                         0  \n",
       "1469                 0                           0                         0  \n",
       "\n",
       "[1263 rows x 15 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm_svm_features_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "#assignment ibm_svm_features to x\n",
    "x_svm = ibm_svm_features_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Try to scale all the numeric data of each features to make svm model train more effective\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "s_scaler = StandardScaler()\n",
    "x_scaled_svm = s_scaler.fit_transform(x_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Try to use tomek link to solve undersampling problem as attriction too few 'yes' value for imbalanced classification \n",
    "from imblearn.under_sampling import TomekLinks\n",
    "\n",
    "tl_svm = TomekLinks(sampling_strategy='not minority')\n",
    "x_tl_svm, y_tl_svm= tl_svm.fit_resample(x_svm, y_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Train the modals with 80% and test 20% of the data\n",
    "x_train_svm, x_test_svm, y_train_svm, y_test_svm = train_test_split(x_tl_svm,y_tl_svm, test_size=0.2,random_state=40, stratify=y_tl_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Model 1 is using the manual tuning for some hyperparameters of SVM\n",
    "model_1_svm=svm.SVC(C=2,kernel='sigmoid',gamma='scale',coef0=0.6,random_state=40,probability=True)\n",
    "model_1_svm.fit(x_train_svm,y_train_svm)\n",
    "y_predict_1_svm=model_1_svm.predict(x_test_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 4 folds for each of 5400 candidates, totalling 21600 fits\n"
     ]
    }
   ],
   "source": [
    "# Modal 2 is using GridSearchCV to find the best hyperparameters for SVM using cross validation\n",
    "# Only some hyperparameters are tuned \n",
    "\n",
    "# import GridSearchCV library\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "#Try to tune the hyperparameter with\n",
    "#kernel type: linear/rbf/sigmoid\n",
    "#C which is the regularization parameter: range 0-1 increase by 0.1\n",
    "#coef0 that is the independent term for kernel method (only for sigmoid): range 0.0-0.5 increase by 0.1\n",
    "#degree for the polynomial ('poly') kernel method: range 0-5 increase by 1\n",
    "#gamma that are kernel coefficient for 'rbf' and 'poly': scale/auto\n",
    "\n",
    "param_grid={'kernel':('linear','rbf','sigmoid'),\n",
    "        'C':[i for i in np.arange(1.0,3.0,0.1)],\n",
    "        'coef0':[y for y in np.arange(0.0,1.5,0.1)],\n",
    "        'degree':[z for z in np.arange(3,6,1)],\n",
    "        'gamma':('auto','scale'),}\n",
    "# set random state to 40\n",
    "find_best_para_model=svm.SVC(random_state=40)\n",
    "Grid_search_svm=GridSearchCV(find_best_para_model,param_grid, n_jobs=-1,verbose=2,cv=4)\n",
    "# this may take some time to run\n",
    "Grid_search_svm.fit(x_train_svm,y_train_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'C': 2.8000000000000016,\n",
       " 'coef0': 0.0,\n",
       " 'degree': 3,\n",
       " 'gamma': 'scale',\n",
       " 'kernel': 'rbf'}"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Show the best hyperparameter found by grid search\n",
    "Grid_search_svm.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use hyperparameter found grid search to build modal \n",
    "model_2_svm=svm.SVC(C=2.8000000000000016,kernel='rbf',degree=3,gamma='scale',coef0=0.0,probability=True,random_state=40)\n",
    "model_2_svm.fit(x_train_svm,y_train_svm)\n",
    "y_predict_2_svm=model_2_svm.predict(x_test_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy of prediction classification result for 2 model\n",
      "Hyperparameters that try to tune manually (model 1):  0.7416666666666667\n",
      "Best hyperparameters found using GridSearchCV (model 2):  0.8166666666666667\n"
     ]
    }
   ],
   "source": [
    "#Evaluate accurracy of classification result\n",
    "print('Accuracy of prediction classification result for 2 model')\n",
    "print('Hyperparameters that try to tune manually (model 1): ',metrics.accuracy_score(y_test_svm, y_predict_1_svm))\n",
    "print('Best hyperparameters found using GridSearchCV (model 2): ',metrics.accuracy_score(y_test_svm, y_predict_2_svm)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[183  12]\n",
      " [ 32  13]]\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.85      0.94      0.89       195\n",
      "           1       0.52      0.29      0.37        45\n",
      "\n",
      "    accuracy                           0.82       240\n",
      "   macro avg       0.69      0.61      0.63       240\n",
      "weighted avg       0.79      0.82      0.79       240\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:70: FutureWarning: Pass labels=[0, 1] as keyword args. From version 1.0 (renaming of 0.25) passing these as positional arguments will result in an error\n",
      "  warnings.warn(f\"Pass {args_msg} as keyword args. From version \"\n"
     ]
    }
   ],
   "source": [
    "#Evaluating classification result by confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "print (confusion_matrix(y_test_svm, y_predict_2_svm,[0,1]))\n",
    "\n",
    "#Evaluating classification result by Precision, Recall and F1-Measure\n",
    "from sklearn.metrics import classification_report\n",
    "print (classification_report(y_test_svm, y_predict_2_svm))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAz+UlEQVR4nO3de5xN9frA8c/jfhvkmtyT2wwhg3QKxYlSEerERIkzlK5O/VIicolURIgiOXGcSFIJUS5RSpLLSMepk5RObrlPGM/vj7XG2e3msjFrr71nP+/Xa71mr71uz9ozs579/X7X+n5FVTHGGBO78vgdgDHGGH9ZIjDGmBhnicAYY2KcJQJjjIlxlgiMMSbGWSIwxpgYZ4nAGGNinCUCc4aIrBCRAyJSMIP3ewe910pEdgXMi4jcLyJbROSoiOwSkbkiUj/EYxcUkekickhEfhaR/lms+7iIHAmYjovIaREp4y5/RkR+cPf1vYgMDNr+RjfOIyKyVkTig85juIj8KCIH3XNPCIhxmrvPwyLypYhcF7Tv3iKyw933YhG5KGjfo0Vknzs9IyISsPw/7rmkn9fSoG0HishO97zmiEjxgOWlROSfIrLXnWalLxeRWiLytojsEZH9IrJERGoHffZjReQn9/c/SUTyB+37Lff3+r2IdAtYVk1ENOj3MShgeUkReU1EfnGnIUGfV0MRWe1+1rtEZHBmv3fjIVW1ySaAakAasB+4JWjZCqB30HutgF0B8+OBfwPXAAWBIkASMCDE4z8NrAYuAOoCPwPtQtx2CPBhwHxtoKj7uiKwFejkztcEDgFXAvmAx4AdQD53+a3AT8DFQF43rg3usqLusarhfIm6ATgMVHOXtwR+ARKAAsBkYGVAXH2A7UAlN64UoG/A8v8AbTI5xzuAr4HKQDHgbeC1gOWTgKVAcaAEsAx43l3WFOgFlALyA8OArwO2fdL97EsBZYFPgaEBy/8B/NM97pXAQSAh4O9G0z+/DOJ+FZjr/j1Uc/9GegYsTwFGuJ91DWA3cJPf/w+xNvkegE2RMQGDgTXA88C7QctWkEUicC+uaUDT8zj+j8C1AfPDgDkhbCfuxeWOTJZXBDYD/+fO3wu8F7A8D3AcaO3OPwq8EbA8AUjN4vibgM7u62eBiQHLLnIvkjXc+bVAcsDyXsCnAfNZJYJ5wCMB81cAqUARd/594J6A5f2AJZnsq5QbV2l3fj0ByR/oBvzgvi4KnABqBSz/OzDKfZ1dItgLNAmYfxxYHTB/DIgPmJ8LPOb3/0OsTVY1ZNL1AGa5U1sRKX8W27bGSQqfZbaCiHQTkU2ZLLsA56L5VcDbX+FchLNzFVAeeDNonwNE5AiwC+diNjt9kTsRNF/PnZ8DXOJWp+TH+Sa+OJO4ywO1cEocme2bgH0nkP05znKrcJaKSIMM4gycL4iThAEmAjeIyAXu59kZJzlkpAXws6ruy2LflUSkhHt+aar6TTZxf+9W7byaXkUXtL/A1/UC5scBPUQkv1td1RynNGPCyBKBQUSuBKrifBP+Aucbdrest/qd0jhF+kyp6mxVvTSTxcXcnwcD3jsIxIVw7DuAeap6JOh4o9ztL8P5Bpu+7w+AluK0cRTA+YZaAKfqAvc8VuNU4RwHbgEeCj6omyRm4VTPfO2+vQi4VUQuFZHCOKUsDdh3sQzOsVhAO0ESzjfsqsBHwBIRKekuex/o7dbJl8ApuRCw7w3ueexzpzSc6qLguCvhJI3ANpj3gQdEpKyIXAjcH7Dv4JjT407/3ewFmrgxN3bfnxWw7mJggIjEicglwF0BMQO8C3TB+ay/Bqap6ufBcRtvWSIw4FxMl6rqXnd+tvteulM4dcuB8gMn3df7gArncfz0i3jxgPeK49S/Z8q92N4CvJbRcnV8iXORGeq+9zXOub2Ic9Evg1NPnd7w/STOha0yUMjd7kMROXPxEpE8OMnlBE5VU/rxlrvbvwl8j1PVczhg30cyOMcj6taJqOoaVT2uqsdU9WngV5wSD8B0nLr6FTglkI/c99P3PRf4BudCXBwnmb8e9HmVxWlHmKSq/whYNAL4EtiIU321AOd3+0sGMafHfdiN+YiqrlfVU6r6X/fzuDagIft+nM//XzjtGv9Ij1lESuEkiqdwPuvKOKXRezBhZYkgxrkX01txviX/LCI/43wDbhBQNbET55tqoOo4FzuA5ThVCYnnEoOqHsC5KAdWhTTgf1UumemE07i9Ipv18uE0RKYfb56q1lPV0jgX7qpA+rfQBsA/VXWXe3GbgdOAHQ/O3TvANJzqqM6qepIAqjpRVWuqajmchJAP2OIu3nqW56i41SqqelpVn1TVaqpayd3uR3dK39cUVT3qlo5eAq5P35FbXbQUWKiqI4JiPq6q96pqRVW9GCexf6GqaTjJJZ+I1AzYJKu407szTo97v6omqeqFqpqAc81Jr0K8GKfaaab7We/CqZq7/g97Nd7yu5HCJn8noCvOxbQKcGHAtAp4zl2nLc63w6Y4/+C1gG38/o6XCTjf+lrhVFEUAm4j9LuGRgErcS66dXASQ5Z3DeFc2J4Kei8Pzt05F7ixNnX3dX/AOo1x7lIpi3M3zOyAZU8CH+Nc6PMA3YGjQEl3+Us4d9UUyyCeQjj13+J+niuAkQHL+7qfW0WcNpGt6Z+hu/6fAj67R4A9/K9BtxROMhOcpLSF3zc8f+T+Dgq70yRgjbusOM7F98VMPsf0eAS4HPiB3zfcz8H5Jl/UjTHwrqFmOHdp5cGpIvwn8FHAtjXc9/MC1+FUJSUExPUrTjVkHpy/u0+AEX7/X8Ta5HsANvn8B+AUzZ/L4P1bcW7hTL+t8i73wnUI53bLAUCegPUFeMBd5xjON9V/BvzTJwFbs4ijIE71xyHgv0D/oOVHgKsC5iviVFldErReHvec9rvbfIPTDiAB63yMU7WxH5iCe6upu6wQTh36bjeWDbgJCafkoDh36xwJmJLc5SVx7iI66n52TwN5gz6jZ9zj7ndfi7ssIWDbfTilrMSAbWvhtFscwymJBX8+1YF33G33u59BTXfZHW7cR4PiruIub4FTjXXMPUZS0L5L4VQXHcUpHXYLWNYV+M5dthuYCVwY9Hf0k7vvjUDboH1fg1MaO+h+Zi/j3gllU/im9D9CY4wxMcraCIwxJsZZIjDGmBhnicAYY2KcJQJjjIlx+fwO4GyVKVNGq1Wr5ncYxhgTVb744ou9qlo2o2VRlwiqVavG+vXr/Q7DGGOiioh8n9kyqxoyxpgYZ4nAGGNinCUCY4yJcZYIjDEmxlkiMMaYGOdZIhBnIPJfRGRLJstFRMaLM9D3JhG5zKtYjDHGZM7LEsEMoF0Wy6/DGWavJpCMM9C3McaYMPPsOQJVXSUi1bJYpQMwU53uTz8VkZIiUkFVsxzy0Bhjwm32up28vfHH7Ff0iKqSmppK4xrlefLGUIbyPjt+thFUxBkAI90u970/EJFkEVkvIuv37NkTluCMMSbd2xt/JGX3IV+OfeTIETZs2MDGjRs5efJk9hucAz+fLJYM3stwcARVnQpMBUhMTLQBFIwxYRdfoTj/7NM8bMdLTU1l6NChjBkzhjJlyjBp0iQ6dWroybH8TAS7cAarTlcJZyQjY4yJeR07dmTJkiX07NmT5557jgsuuMCzY/lZNbQQ6OHePXQ5cNDaB4wxsezw4cOkpqYCMGDAAJYuXcr06dM9TQLgYYlARP6BM5B5GRHZhTMoeH4AVX0JWARcjzP+7TGgp1exGGMik9+NsKFK2X2I+ArFPT3GkiVLSE5O5vbbb2fEiBG0atXK0+MF8vKuoa7ZLFegn1fHN8ZEvvRGWK8vsucrvkJxOjTM8F6W87Z//3769+/Pa6+9Rp06dWjfvr0nx8lK1HVDbYzJXcLdCBtJli9fTlJSEvv27WPgwIE88cQTFCpUKOxxWCIwxhiflCtXjurVq7N48WIaNmzoWxyWCIwxnsusLSAaqoVykqry2muvsWHDBsaPH0/9+vVZu3YtIhndTR8+1umcMcZzmT2Q5WXde6T57rvvaNu2LT179mTjxo0cP34cwPckAFYiMMaESay2BaSlpTFx4kQee+wx8uTJw6RJk+jTpw958kTO93BLBMYY46G9e/cyePBgWrZsyUsvvUSVKlX8DukPLBEYY87Kudz7H2ttASdPnmTWrFn06NGD8uXLs2HDBqpXrx4R1UAZiZyyiTEmKpxLB2yx1BbwxRdfkJiYSM+ePfnggw8AuPjiiyM2CYCVCIwx5yBW6/uzcvz4cYYOHcqzzz5LuXLleOutt2jbtq3fYYXEEoExxuSAjh07snTpUnr37s2YMWMoWbKk3yGFzKqGjDHmHB06dOhMJ3GPP/44y5Yt4+WXX46qJABWIjDGcHYNwLHW8JuZRYsW0bdvX26//XZGjhxJy5Yt/Q7pnFmJwBhzVg3AsdTwm5G9e/fSvXt32rdvT1xcHDfddJPfIZ03KxEYYwBrAA7FBx98QFJSEgcOHGDw4ME8/vjjFCxY0O+wzpslAmOMCVGFChWoVasWkydPpn79+n6Hk2MsERgTA7JrA7B6/4ypKtOmTePLL79k4sSJ1KtXj9WrV0f0MwHnwtoIjIkB2bUBxHq9f0a+/fZb2rRpw1//+ldSUlIiqpO4nGYlAmNihLUBhCYtLY3x48czcOBA8uXLx5QpU+jdu3dEdRKX0ywRGGNMgL179zJ06FBat27N5MmTqVSpkt8heS73pjhjjAnRiRMnmD59OqdPn6Z8+fJs3LiRhQsXxkQSACsRGBP1QnkYzBqDM/f5559z1113sWXLFipVqsS1115LtWrV/A4rrKxEYEyUC+VhMGsM/qNjx47x8MMPc/nll3PgwAEWLlzItdde63dYvrASgTG5gDUEn70OHTqwbNkykpOTeeaZZyhRooTfIfnGSgTGmJhx8ODBM53EDRo0iA8//JApU6bEdBIAKxEYEzHOZeQvsPr/UL377rv07duX7t278/TTT9OiRQu/Q4oYViIwJkKcy8hfYPX/2dmzZw/dunXjxhtvpFSpUnTq1MnvkCKOlQiMiSBW15+zli5dSlJSEgcPHmTo0KEMGDCAAgUK+B1WxLFEYIzJtSpWrEjdunWZPHkyCQkJfocTsSwRGOOjwHYBq+s/f6dPn+aVV17hyy+/PHPxX7Vqld9hRTxrIzDGR4HtAlbXf3527NhB69at6dOnD9u3bz/TSZzJnpUIjPGZtQucn7S0NMaNG8egQYPInz8/L7/8Mr169cqVvYR6xdMSgYi0E5HtIrJDRAZksLyEiLwjIl+JyFYR6ellPMaY3Gfv3r0MHz6cP//5z6SkpNC7d29LAmfJs0QgInmBicB1QDzQVUTig1brB6SoagOgFfCciFiTvjEmS7/99hsvv/zy7zqJW7BgARUrWtXaufCyRNAU2KGq36rqCWAO0CFoHQXixEnfxYD9wCkPYzImIsxet5O/TPnknJ4biHXr1q2jcePGJCcns2zZMgCqVq1qpYDz4GUiqAj8EDC/y30v0ItAXeAnYDPwgKqeDt6RiCSLyHoRWb9nzx6v4jUmbNIbia2BOHRHjx6lf//+NG/enIMHD/Lee+/FbCdxOc3LxuKM0rMGzbcFNgLXADWAD0Rktar+7muSqk4FpgIkJiYG78OYqGSNxGenY8eOLFu2jLvvvptRo0ZRvLjdaptTvCwR7AIqB8xXwvnmH6gnMF8dO4DvgDoexmSMiSK//vrrmdtABw8ezMqVK5k0aZIlgRzmZYngc6CmiFQHfgRuA7oFrbMTaA2sFpHyQG3gWw9jMsYXwR3K2cNj2Vu4cCF333033bt3Z9SoUVx11VV+h5RreVYiUNVTwL3AEmAb8IaqbhWRviLS111tGHCFiGwGlgOPquper2Iyxi/BHcpZ20DmfvnlF2677TY6dOhAmTJl6NKli98h5XqePlCmqouARUHvvRTw+ifAWntMTLA2gewtXryYpKQkjhw5wrBhw3j00UfJnz+/32HlevZksTEmYlSuXJn69eszadIk4uODHzsyXrG+howxvjl9+jSTJ0+mT58+ACQkJLBixQpLAmFmJQJjOPfRwUJljcN/9M0339C7d29Wr17Nn//8Z1JTUylUqJDfYcUkKxEYw7mPDhYqaxz+n1OnTjF69GguvfRSNm/ezKuvvsqSJUssCfjISgTGuKwxNzz27dvH6NGjuf7665k4cSIVKlTwO6SYZyUCY4znfvvtN6ZMmXKmk7ivvvqK+fPnWxKIEFYiMLleKPX/VofvnU8++YRevXqxbds2atSoQZs2bahcuXL2G5qwsRKByfVCqf+3Ovycd+TIER588EH+9Kc/cfToURYvXkybNm38DstkwEoEJiZY/X/4dezYkeXLl3PvvfcycuRI4uLi/A7JZMJKBMaYHHPgwIEzncQNGTKE1atXM2HCBEsCES7kEoGIFFXVo14GY0xGzvcef6v/D4/58+fTr18/evTowejRo7nyyiv9DsmEKNsSgYhcISIpOB3HISINRGSS55EZ4zrfe/yt/t9bP//8M126dKFz585ceOGF3HbbbX6HZM5SKCWCsTgDyCwEUNWvRKSFp1EZE8Tq+CPT+++/T1JSEseOHWPkyJE8/PDD1klcFAqpakhVfwgaDzTNm3CMMdGkatWqNGrUiIkTJ1Knjo0pFa1CaSz+QUSuAFRECojIw7jVRMaY2HL69GlefPFF/vrXvwIQHx/P8uXLLQlEuVBKBH2BF3AGnt8FLAXu8TIok3vkRGdu1tgbGbZv306vXr1Ys2YNbdu2tU7icpFQSgS1VTVJVcurajlVvR2o63VgJnfIic7crLHXXydPnuTpp5+mQYMGpKSkMGPGDN5//31LArlIKCWCCcBlIbxnTIasoTe6HThwgDFjxnDjjTcyYcIELrzwQr9DMjks00QgIs2BK4CyItI/YFFxIK/XgRlj/JOamsr06dPp27cv5cqVY9OmTVSqVMnvsIxHsioRFACKuesEPhZ4CLDRpGPY2dT7W/1+9Pn444/p1asX33zzDbVq1aJNmzaWBHK5TBOBqq4EVorIDFX9PowxmQiXXu8fygXe6vejx+HDh3nssceYOHEi1apVY+nSpdZJXIwIpY3gmIiMARKAM61DqnqNZ1GZiGf1/rlPx44d+eijj3jggQcYPnw4xYoV8zskEyahJIJZwD+BG3BuJb0D2ONlUMaY8Ni/fz+FChWiSJEiDBs2DBGheXNL8LEmlNtHS6vqNOCkqq5U1buAyz2OyxjjsXnz5lG3bl2GDBkCwBVXXGFJIEaFkghOuj93i0h7EWkEWMtRjJq9bifrvtvvdxjmPOzevZtOnTpxyy23ULlyZZKSkvwOyfgslKqh4SJSAvgbzvMDxYEHvQzKRK70u4WsATg6vffee9x+++2kpqYyevRo+vfvT758Nj5VrMv2L0BV33VfHgSuBhCRP3kZlIlszaqXoluzKn6HYc7BxRdfTJMmTXjxxRepVauW3+GYCJFp1ZCI5BWRriLysIjUc9+7QUTWAi+GLUJjzDlLS0vjhRdeoFevXgDUrVuXpUuXWhIwv5NViWAaUBn4DBgvIt8DzYEBqrogDLGZCJL+EJk9IBY9UlJS6N27N5988gnXX3+9dRJnMpVVIkgELlXV0yJSCNgLXKKqP4cnNBNJApOAtQ9EthMnTvDMM88wbNgw4uLieP311+nWrRtBY4oYc0ZWieCEqp4GUNVUEfnmbJOAiLTD6cI6L/CKqo7KYJ1WwDggP7BXVVuezTFM+NhDZNHh119/ZezYsdx8882MHz+ecuXK+R2SiXBZJYI6IrLJfS1ADXdeAFXVS7PasYjkBSYCf8YZx+BzEVmoqikB65QEJgHtVHWniNhfrDHn4Pjx40ybNo177rmHcuXKsXnzZi666CK/wzJRIqtEcL5jDjQFdqjqtwAiMgfoAKQErNMNmK+qOwFU9ZfzPKY5C9Z5XO6watUqevfuzb/+9S/q1q1L69atLQmYs5LpXUOq+n1WUwj7rgj8EDC/y30vUC3gAhFZISJfiEiPjHYkIskisl5E1u/ZY71b5JSzGTTG2gYiz6FDh7jnnnto2bIlp06dYtmyZbRu3drvsEwU8vJJkoxapjSD4zcGWgOFgU9E5FNV/eZ3G6lOBaYCJCYmBu/DnAer949eHTt2ZMWKFTz00EMMGzaMokWL+h2SiVJeJoJdOLefpqsE/JTBOntV9ShwVERWAQ2AbzDG/MHevXspUqQIRYoUYcSIEYgIl19uXX+Z8xNKX0OISGERqX2W+/4cqCki1UWkAHAbsDBonbeBq0Qkn4gUAZoB287yOMbkeqrKnDlzqFu3Lk8++SQAzZs3tyRgckS2iUBEbgQ2Aovd+YYiEnxB/wNVPQXcCyzBubi/oapbRaSviPR119nm7ncTzoNrr6jqlnM8F2NypR9//JGOHTvStWtXqlevTo8eGTalGXPOQqkaGoJzB9AKAFXdKCLVQtm5qi4CFgW991LQ/BhgTCj7MybWvPvuuyQlJXHy5EmeffZZHnzwQfLmtSHDTc4KJRGcUtWD9lSiMeF3ySWXcMUVVzBhwgQuueQSv8MxuVQobQRbRKQbkFdEaorIBGCtx3EZE5PS0tIYO3Ysd955JwB16tTh/ffftyRgPBVKieA+YCDwGzAbp85/uJdBmXNnD4lFr61bt9KrVy/WrVtH+/btrZM4EzahlAhqq+pAVW3iTk+oaqrnkZlzYg+JRZ8TJ07w1FNP0ahRI/79738ze/Zs3nnnHUsCJmxCKRE8LyIVgLnAHFXd6nFM5jzZQ2LR5ddff2X8+PHccsstjBs3jrJly/odkokx2ZYIVPVqoBWwB5gqIptF5AmvAzMmNzt27BgvvPACaWlpZzqJmzVrliUB44uQnix2u58eLyIfAf8HDMbaCcIulPp/q/ePfB999BG9e/fm22+/pV69erRu3ZoKFSr4HZaJYaE8UFZXRIaIyBacISrX4nQXYcIslPp/q/ePXAcPHqRPnz5cc801iAgfffSRdRJnIkIoJYJXgX8A16pqcF9BJsys/j96dezYkVWrVvHII48wZMgQihQp4ndIxgAhJAJVtc5MjDlHe/bsoWjRohQpUoSnn36avHnz0qRJE7/DMuZ3Mq0aEpE33J+bRWRTwLQ5YOQyY0wGVJXZs2f/rpO4yy+/3JKAiUhZlQgecH/eEI5AjMktdu3axd133827775Ls2bNzjwlbEykymqEst3uy3syGJ3snvCEZ0x0WbhwIfHx8Xz44YeMHTuWNWvWkJCQ4HdYxmQplCeL/5zBe9fldCDG5Aa1atXiyiuvZPPmzdZTqIkamVYNicjdON/8Lw5qE4gD1ngdmDHR4NSpU4wbN45NmzYxc+ZM6tSpw6JFi7Lf0JgIklUbwWzgfeBpYEDA+4dVdb+nUcWo7B4Ys4fFIsumTZvo1asX69evp0OHDtZJnIlaWVUNqar+B+gHHA6YEJFS3ocWe7J7YMweFosMv/32G08++SSNGzdm586dvPHGG7z11luWBEzUyq5EcAPwBaBA4Mg0ClzsYVwxyx4Yi3yHDh1i0qRJdO3albFjx1K6dGm/QzLmvGSaCFT1Bvdn9fCFY0xkOnr0KFOnTuX++++nbNmybNmyhfLly/sdljE5IpS+hv4kIkXd17eLyPMiUsX70IyJDMuXL6d+/fr079+flStXAlgSMLlKKLePTgaOiUgDnJ5Hvwf+7mlUMWb2up38ZconIQ8oY8Lj119/pXfv3rRp04Z8+fKxcuVKrrnmGr/DMibHhZIITqmqAh2AF1T1BZxbSE0OSW8ktsbgyHLzzTczY8YMHn30Ub766itatGjhd0jGeCKU3kcPi8hjQHfgKhHJC+T3NqzYY43EkeG///0vxYoVo2jRoowaNYp8+fLRuHFjv8MyxlOhlAj+gjNw/V3uADUVgTGeRmVMmKkqf//734mPjz/TSVyzZs0sCZiYEMpQlT8Ds4ASInIDkKqqMz2PzJgw2blzJ+3bt6dHjx7Url2bXr16+R2SMWEVyl1DtwKfAbcAtwLrRKSL14EZEw5vv/02CQkJrFq1ivHjx7N69Wrq1q3rd1jGhFUobQQDgSaq+guAiJQFlgHzvAzMGC+pKiJCnTp1aNWqFRMmTKBatWp+h2WML0JpI8iTngRc+0LczpiIc+rUKUaPHk337t0BqF27Nu+8844lARPTQrmgLxaRJSJyp4jcCbwHWPeKJup89dVXNGvWjAEDBnDs2DFSU1P9DsmYiBBKY/EjwBTgUqABMFVVH/U6MGNySmpqKk888QSJiYn8+OOPzJs3j/nz51sncca4shqPoCbwLFAD2Aw8rKqZ95FsTIQ6fPgwU6ZMISkpieeff55SpazzXGMCZVUimA68C3TG6YF0wtnuXETaich2EdkhIgOyWK+JiKTZ3Ugmpxw5coRnn32WtLQ0ypYtS0pKCjNmzLAkYEwGsrprKE5VX3ZfbxeRDWezY/cJ5Ik4Q13uAj4XkYWqmpLBeqOBJWezf2Mys3TpUpKTk9m5cyeNGzfm6quvpmzZsn6HZUzEyqpEUEhEGonIZSJyGVA4aD47TYEdqvqtqp4A5uD0VxTsPuBN4JcMluV6s9ftZN13NuBbTti/fz89e/akbdu2FCpUiNWrV3P11Vf7HZYxES+rEsFu4PmA+Z8D5hXIrhvGisAPAfO7gGaBK4hIReBmd19NMtuRiCQDyQBVquSuHrDTh6a0zubO380338yaNWt4/PHHGTRokDUGGxOirAamOd+vUpLBexo0Pw54VFXTRDJa/UwsU4GpAImJicH7iHrNqpeiW7PcleDC5eeffyYuLo6iRYsyZswYChQoQMOGDf0Oy5io4uWDYbuAygHzlYCfgtZJBOaIyH+ALsAkEenoYUwml1BVZsyYQXx8PIMHDwagadOmlgSMOQehdDFxrj4HaopIdeBH4DagW+AKgcNgisgM4F1VXeBhTGExe93OM1U+2Ukfh8CE7j//+Q99+vRh6dKlXHnllSQnJ/sdkjFRzbMSgaqeAu7FuRtoG/CGqm4Vkb4i0ter40aC9IFmQmGD0Zydt956i3r16rF27VpefPFFVq5cSe3atf0Oy5iolm2JQJzK+yTgYlV9yh2v+EJV/Sy7bVV1EUHdUajqS5mse2dIEUcJG2gmZ6V3EpeQkECbNm144YUXqFq1qt9hGZMrhFIimAQ0B7q684dxng8wxnMnT55k5MiRJCUlAVCrVi0WLFhgScCYHBRKImimqv2AVABVPQAU8DQqY4ANGzbQtGlTBg4cSFpaGr/99pvfIRmTK4XSWHzSffpX4cx4BKc9jSoKBTYQWwPw+Tl+/DhPPfUUY8aMoWzZsrz11lt07NjR77CMybVCKRGMB94CyonICOBjYKSnUUWhwAZiawA+P0ePHmXatGnccccdpKSkWBIwxmPZlghUdZaIfAG0xnlIrKOqbvM8sihkDcTn7vDhw0yePJm//e1vlClThpSUFMqUKeN3WMbEhFDGLK4CHAPeARYCR933jMkRixcvpl69egwYMIDVq1cDWBIwJoxCaSN4D6d9QIBCQHVgO5DgYVwmBuzbt4/+/fszc+ZM6taty5o1a2je3EpUxoRbKFVD9QPn3Z5H+3gWkYkZnTp1Yu3atQwaNIiBAwdSsGBBv0MyJiaddRcTqrpBRDLtKdSYrOzevZu4uDiKFSvGs88+S4ECBWjQoIHfYRkT00J5srh/wGwe4DJgj2cRmVxJVXn11Vfp378/d911F88//zxNmtj3CWMiQSglgriA16dw2gze9Cac6GLPDoTm22+/pU+fPixbtowWLVrQt2+u7mrKmKiTZSJwHyQrpqqPhCmeqJL+7EB8heL27EAm5s+fT/fu3cmbNy+TJ08mOTmZPHm87P3cGHO2Mk0EIpJPVU+FOCxlzLJnBzKW3klc/fr1adeuHePGjaNy5crZb2iMCbusSgSf4bQHbBSRhcBc4Gj6QlWd73FsJgqdOHGCZ555hq1btzJ79mxq1qzJm29aTaIxkSyUMnopYB/OuMI3ADe6P435nfXr19OkSRMGDRoEOEnBGBP5sioRlHPvGNrC/x4oS5frxg0OlTUQ/9Hx48d58sknee6557jwwgt5++23uemmm/wOyxgToqxKBHmBYu4UF/A6fYpJ1rncHx09epQZM2bQq1cvtm7daknAmCiTVYlgt6o+FbZIoog1EMOhQ4eYNGkSjzzyCGXKlGHbtm2ULl3a77CMMecgqxKBZLHMxLD33nuPhIQEBg4ceKaTOEsCxkSvrEoErcMWRQQLbBOA2G4X2LNnDw8++CCzZ88mISGBefPm0axZM7/DMsacp0xLBKq6P5yBRKrANgGI7XaBzp07M3fuXIYMGcKGDRssCRiTS5x1p3OxKJbbBH788UdKlChBsWLFGDt2LAULFqRevXp+h2WMyUH2rL/JkKry8ssvEx8fz+DBgwFo3LixJQFjciFLBOYP/v3vf9O6dWuSk5Np3Lgx/fr18zskY4yHLBFkYfa6naz7LraaSubNm0f9+vX54osvmDp1KsuXL6dGjRp+h2WM8ZC1EWQh/W6hWGgcTu8krkGDBrRv356xY8dSqVIlv8MyxoSBlQiy0ax6Kbo1q+J3GJ45ceIEQ4cO5bbbbkNVqVmzJnPnzrUkYEwMsUQQwz777DMaN27MkCFDyJcvn3USZ0yMskQQg44dO8bDDz9M8+bNOXDgAO+88w6zZs2yweONiVGWCGLQ8ePHef3110lOTiYlJYUbbrBexY2JZZ4mAhFpJyLbRWSHiAzIYHmSiGxyp7Ui0sDLeGLZwYMHGTFiBKdOnaJ06dJs27aNyZMnU7x4bHaXYYz5H88SgTve8UTgOiAe6Coi8UGrfQe0VNVLgWHAVK/iiWXvvPPOmQfDPv74YwAuuOACn6MyxkQKL0sETYEdqvqtqp4A5gAdAldQ1bWqesCd/RSIiFtVZq/byV+mfPK7Poai0Z49e+jatSs33XQTpUuXZt26dbRq1crvsIwxEcbLRFAR+CFgfpf7XmZ6Ae9ntEBEkkVkvYis37NnTw6GmLH0juaivYO5zp078+abb/LUU0+xfv16EhMT/Q7JGBOBvHygLKPxDDIc4lJErsZJBFdmtFxVp+JWGyUmJoZlmMxo7Whu165dlCxZkmLFijFu3DgKFixIQkKC32EZYyKYlyWCXUDlgPlKwE/BK4nIpcArQAdV3edhPLna6dOnmTJlCvHx8WcGj7/sssssCRhjsuVlIvgcqCki1UWkAHAbsDBwBRGpAswHuqvqNx7Gkqv961//4pprrqFv3740bdqU++67z++QjDFRxLOqIVU9JSL3AkuAvMB0Vd0qIn3d5S8Bg4HSwCQRATilqr5UZAeORBZNo5DNnTuXHj16ULBgQaZNm0bPnj1xP0tjjAmJp53OqeoiYFHQey8FvO4N9PYyhlAFNhBHQyNxeidxjRo1okOHDjz//PNcdNFFfodljIlC1vtogGhoIP7tt98YMWIE27Zt44033uCSSy5hzpw5fodljIli1sVEFPn000+57LLLGDZsGIULF7ZO4owxOcISQRQ4evQoDz30EFdccQWHDx9m0aJFzJw50zqJM8bkCEsEUSA1NZU5c+Zwzz33sHXrVq677jq/QzLG5CLWRhChfv31VyZMmMBjjz12ppO4kiVL+h2WMSYXshJBBFqwYAHx8fEMHTqUtWvXAlgSMMZ4xhJBBPnvf//Lrbfeys0330y5cuVYt24dLVq08DssY0wuZ1VDEaRLly589tlnDB8+nP/7v/8jf/78fodkjIkBlgh8tnPnTi644ALi4uIYP348BQsWJD4+eNgGY4zxjlUN+eT06dNMnDiRhIQEBg8eDECjRo0sCRhjws4SgQ+2b99Oy5Ytuffee2nevDkPPPCA3yEZY2JYTFcN+dHR3BtvvEGPHj0oXLgwr776KnfccYd1EmeM8VVMlwjSO5oDPO9oTtUZT6dx48Z06tSJbdu2ceedd1oSMMb4LqZLBOB9R3OpqakMGzaMr7/+mnnz5lGjRg1mz57t2fGMMeZsxXSJwGtr166lUaNGjBw5kri4OOskzhgTkSwReODIkSPcf//9XHnllRw7dozFixczY8YM6yTOGBORLBF44MSJE8ybN49+/fqxZcsW2rZt63dIxhiTqZhvI8gp+/fvZ/z48TzxxBOUKlWKbdu2UaJECb/DMsaYbFmJIAe8+eabxMfHM3z48DOdxFkSMMZEC0sE52H37t107tyZLl26cNFFF7F+/XrrJM4YE3ViNhHMXreTdd/tP6993Hrrrbz33nuMGjWKzz77jIYNG+ZMcMYYE0Yx20aQ/kTx2T5E9v3331OqVCni4uKYMGEChQsXpnbt2l6EaIwxYRGzJQKAZtVL0a1ZlZDWPX36NBMmTCAhIYFBgwYB0LBhQ0sCxpioF7MlgrPx9ddf07t3b9asWUO7du146KGH/A7JGGNyTEyXCEIxZ84cGjRowLZt25g5cyaLFi2iatWqfodljDE5xhJBJk6fPg1AkyZNuOWWW0hJSaF79+7WSZwxJtexRBDk+PHjDBgwgM6dO6Oq1KhRg9dff53y5cv7HZoxxnjCEkGA1atX07BhQ0aPHk3p0qU5efKk3yEZY4znLBEAhw8fpl+/frRo0YKTJ0/ywQcf8Morr1CgQAG/QzPGGM/FzF1DgaORwe9HJDt58iQLFizgwQcfZPjw4RQtWtSvMI0xJuxiJhGkj0aWfvGvVbYwsnM9p041oVSpUnz99dfExcX5HKUxxoSfp1VDItJORLaLyA4RGZDBchGR8e7yTSJymZfxxFcozpzky+lSahcfP3ULbz7Tn08++QTAkoAxJmZ5lghEJC8wEbgOiAe6ikh80GrXATXdKRmY7FU84IwT0KlTJ2699VYqV67M+vXrueqqq7w8pDHGRDwvSwRNgR2q+q2qngDmAB2C1ukAzFTHp0BJEangVUBbU7ayePFinnnmGT799FMaNGjg1aGMMSZqeNlGUBH4IWB+F9AshHUqArsDVxKRZJwSA1WqhNY3ULD4i4pTLn8C9z30FbVq1TqnfRhjTG7kZSLI6BFcPYd1UNWpwFSAxMTEPywPxZM3JpzLZsYYk+t5WTW0C6gcMF8J+Okc1jHGGOMhLxPB50BNEakuIgWA24CFQessBHq4dw9dDhxU1d3BOzLGGOMdz6qGVPWUiNwLLAHyAtNVdauI9HWXvwQsAq4HdgDHgJ5exWOMMSZjnj5QpqqLcC72ge+9FPBagX5exmCMMSZr1teQMcbEOEsExhgT4ywRGGNMjLNEYIwxMU6c9troISJ7gO/PcfMywN4cDCca2DnHBjvn2HA+51xVVctmtCDqEsH5EJH1qprodxzhZOccG+ycY4NX52xVQ8YYE+MsERhjTIyLtUQw1e8AfGDnHBvsnGODJ+ccU20Exhhj/ijWSgTGGGOCWCIwxpgYlysTgYi0E5HtIrJDRAZksFxEZLy7fJOIXOZHnDkphHNOcs91k4isFZGoH6czu3MOWK+JiKSJSJdwxueFUM5ZRFqJyEYR2SoiK8MdY04L4W+7hIi8IyJfuecc1b0Yi8h0EflFRLZksjznr1+qmqsmnC6v/w1cDBQAvgLig9a5HngfZ4S0y4F1fscdhnO+ArjAfX1dLJxzwHof4vSC28XvuMPwey4JpABV3PlyfscdhnN+HBjtvi4L7AcK+B37eZxzC+AyYEsmy3P8+pUbSwRNgR2q+q2qngDmAB2C1ukAzFTHp0BJEakQ7kBzULbnrKprVfWAO/spzmhw0SyU3zPAfcCbwC/hDM4joZxzN2C+qu4EUNVoP+9QzlmBOBERoBhOIjgV3jBzjqquwjmHzOT49Ss3JoKKwA8B87vc9852nWhytufTC+cbRTTL9pxFpCJwM/ASuUMov+dawAUiskJEvhCRHmGLzhuhnPOLQF2cYW43Aw+o6unwhOeLHL9+eTowjU8kg/eC75ENZZ1oEvL5iMjVOIngSk8j8l4o5zwOeFRV05wvi1EvlHPOBzQGWgOFgU9E5FNV/cbr4DwSyjm3BTYC1wA1gA9EZLWqHvI4Nr/k+PUrNyaCXUDlgPlKON8UznadaBLS+YjIpcArwHWqui9MsXkllHNOBOa4SaAMcL2InFLVBWGJMOeF+re9V1WPAkdFZBXQAIjWRBDKOfcERqlTgb5DRL4D6gCfhSfEsMvx61durBr6HKgpItVFpABwG7AwaJ2FQA+39f1y4KCq7g53oDko23MWkSrAfKB7FH87DJTtOatqdVWtpqrVgHnAPVGcBCC0v+23gatEJJ+IFAGaAdvCHGdOCuWcd+KUgBCR8kBt4NuwRhleOX79ynUlAlU9JSL3Aktw7jiYrqpbRaSvu/wlnDtIrgd2AMdwvlFErRDPeTBQGpjkfkM+pVHcc2OI55yrhHLOqrpNRBYDm4DTwCuqmuFtiNEgxN/zMGCGiGzGqTZ5VFWjtntqEfkH0AooIyK7gCeB/ODd9cu6mDDGmBiXG6uGjDHGnAVLBMYYE+MsERhjTIyzRGCMMTHOEoExxsQ4SwQmIrm9hW4MmKplse6RHDjeDBH5zj3WBhFpfg77eEVE4t3XjwctW3u+Mbr7Sf9ctrg9bpbMZv2GInJ9Thzb5F52+6iJSCJyRFWL5fS6WexjBvCuqs4TkWuBZ1X10vPY33nHlN1+ReQ14BtVHZHF+ncCiap6b07HYnIPKxGYqCAixURkufttfbOI/KGnURGpICKrAr4xX+W+f62IfOJuO1dEsrtArwIucbft7+5ri4g86L5XVETec/u/3yIif3HfXyEiiSIyCijsxjHLXXbE/fnPwG/obkmks4jkFZExIvK5OH3M9wnhY/kEt7MxEWkqzjgTX7o/a7tP4j4F/MWN5S9u7NPd43yZ0edoYpDffW/bZFNGE5CG05HYRuAtnKfgi7vLyuA8VZleoj3i/vwbMNB9nReIc9ddBRR1338UGJzB8WbgjlcA3AKsw+m8bTNQFKd7461AI6Az8HLAtiXcnytwvn2fiSlgnfQYbwZec18XwOlFsjCQDDzhvl8QWA9UzyDOIwHnNxdo584XB/K5r9sAb7qv7wReDNh+JHC7+7okTh9ERf3+fdvk75TrupgwucZxVW2YPiMi+YGRItICp+uEikB54OeAbT4HprvrLlDVjSLSEogH1rhdaxTA+SadkTEi8gSwB6eH1tbAW+p04IaIzAeuAhYDz4rIaJzqpNVncV7vA+NFpCDQDlilqsfd6qhL5X+jqJUAagLfBW1fWEQ2AtWAL4APAtZ/TURq4vREmT+T418L3CQiD7vzhYAqRHd/ROY8WSIw0SIJZ/Spxqp6UkT+g3MRO0NVV7mJoj3wdxEZAxwAPlDVriEc4xFVnZc+IyJtMlpJVb8RkcY4/b08LSJLVfWpUE5CVVNFZAVO18l/Af6RfjjgPlVdks0ujqtqQxEpAbwL9APG4/S385Gq3uw2rK/IZHsBOqvq9lDiNbHB2ghMtCgB/OImgauBqsEriEhVd52XgWk4w/19CvxJRNLr/IuISK0Qj7kK6OhuUxSnWme1iFwEHFPV14Fn3eMEO+mWTDIyB6ejsKtwOlPD/Xl3+jYiUss9ZoZU9SBwP/Cwu00J4Ed38Z0Bqx7GqSJLtwS4T9zikYg0yuwYJnZYIjDRYhaQKCLrcUoHX2ewTitgo4h8iVOP/4Kq7sG5MP5DRDbhJIY6oRxQVTfgtB18htNm8IqqfgnUBz5zq2gGAsMz2HwqsCm9sTjIUpxxaZepM/wiOONEpAAbxBm0fArZlNjdWL7C6Zr5GZzSyRqc9oN0HwHx6Y3FOCWH/G5sW9x5E+Ps9lFjjIlxViIwxpgYZ4nAGGNinCUCY4yJcZYIjDEmxlkiMMaYGGeJwBhjYpwlAmOMiXH/D95IOHfpsjmgAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "#Evaluating classification result by ROC curves\n",
    "from sklearn.metrics import roc_curve\n",
    "y_pred_prob_svm = model_2_svm.predict_proba(x_test_svm)[:,1]\n",
    "fpr, tpr, threshold = roc_curve(y_test_svm, y_pred_prob_svm)\n",
    "plt.plot([0, 1], [0, 1], 'k--')\n",
    "plt.plot(fpr,tpr)\n",
    "auc = roc_auc_score(y_test_svm,  y_pred_prob_svm)\n",
    "plt.title(f'AUC: {auc}')\n",
    "plt.xlabel('False Positive Rate')\n",
    "plt.ylabel('True Positive Rate')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
关于此算法

导入库

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

读取数据

ibm = pd.read_csv('/WA_Fn-UseC_-HR-Employee-Attrition.csv')
pd.set_option('display.max_columns', None)

数据集信息

ibm.shape
(1470, 35)
ibm.describe()
年龄 日薪 家到公司的距离 教育程度 员工数量 员工编号 工作环境满意度 时薪 工作投入度 职位等级 工作满意度 月收入 月费率 曾就职公司数量 薪资涨幅百分比 绩效评级 人际关系满意度 标准工时 股票期权等级 总工作年限 去年培训次数 工作与生活平衡 在公司年限 在当前职位年限 自上次升职年限 与当前经理共事年限
计数 1470.000000 1470.000000 1470.000000 1470.000000 1470.0 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.0 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000 1470.000000
平均值 36.923810 802.485714 9.192517 2.912925 1.0 1024.865306 2.721769 65.891156 2.729932 2.063946 2.728571 6502.931293 14313.103401 2.693197 15.209524 3.153741 2.712245 80.0 0.793878 11.279592 2.799320 2.761224 7.008163 4.229252 2.187755 4.123129
标准差 9.135373 403.509100 8.106864 1.024165 0.0 602.024335 1.093082 20.329428 0.711561 1.106940 1.102846 4707.956783 7117.786044 2.498009 3.659938 0.360824 1.081209 0.0 0.852077 7.780782 1.289271 0.706476 6.126525 3.623137 3.222430 3.568136
最小值 18.000000 102.000000 1.000000 1.000000 1.0 1.000000 1.000000 30.000000 1.000000 1.000000 1.000000 1009.000000 2094.000000 0.000000 11.000000 3.000000 1.000000 80.0 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000
25% 30.000000 465.000000 2.000000 2.000000 1.0 491.250000 2.000000 48.000000 2.000000 1.000000 2.000000 2911.000000 8047.000000 1.000000 12.000000 3.000000 2.000000 80.0 0.000000 6.000000 2.000000 2.000000 3.000000 2.000000 0.000000 2.000000
50% 36.000000 802.000000 7.000000 3.000000 1.0 1020.500000 3.000000 66.000000 3.000000 2.000000 3.000000 4919.000000 14235.500000 2.000000 14.000000 3.000000 3.000000 80.0 1.000000 10.000000 3.000000 3.000000 5.000000 3.000000 1.000000 3.000000
75% 43.000000 1157.000000 14.000000 4.000000 1.0 1555.750000 4.000000 83.750000 3.000000 3.000000 4.000000 8379.000000 20461.500000 4.000000 18.000000 3.000000 4.000000 80.0 1.000000 15.000000 3.000000 3.000000 9.000000 7.000000 3.000000 7.000000
最大值 60.000000 1499.000000 29.000000 5.000000 1.0 2068.000000 4.000000 100.000000 4.000000 5.000000 4.000000 19999.000000 26999.000000 9.000000 25.000000 4.000000 4.000000 80.0 3.000000 40.000000 6.000000 4.000000 40.000000 18.000000 15.000000 17.000000
import statistics
for i in ibm.columns:
    print(i, " mode: ", statistics.mode(ibm[i]));
Age  mode:  35
Attrition  mode:  No
BusinessTravel  mode:  Travel_Rarely
DailyRate  mode:  691
Department  mode:  Research &amp; Development
DistanceFromHome  mode:  2
Education  mode:  3
EducationField  mode:  Life Sciences
EmployeeCount  mode:  1
EmployeeNumber  mode:  1
EnvironmentSatisfaction  mode:  3
Gender  mode:  Male
HourlyRate  mode:  66
JobInvolvement  mode:  3
JobLevel  mode:  1
JobRole  mode:  Sales Executive
JobSatisfaction  mode:  4
MaritalStatus  mode:  Married
MonthlyIncome  mode:  2342
MonthlyRate  mode:  9150
NumCompaniesWorked  mode:  1
Over18  mode:  Y
OverTime  mode:  No
PercentSalaryHike  mode:  11
PerformanceRating  mode:  3
RelationshipSatisfaction  mode:  3
StandardHours  mode:  80
StockOptionLevel  mode:  0
TotalWorkingYears  mode:  10
TrainingTimesLastYear  mode:  2
WorkLifeBalance  mode:  3
YearsAtCompany  mode:  5
YearsInCurrentRole  mode:  2
YearsSinceLastPromotion  mode:  0
YearsWithCurrManager  mode:  2
ibm.info()
&amp;lt;class &#x27;pandas.core.frame.DataFrame&#x27;&amp;gt;
RangeIndex: 1470 entries, 0 to 1469
Data columns (total 35 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Age                       1470 non-null   int64 
 1   Attrition                 1470 non-null   object
 2   BusinessTravel            1470 non-null   object
 3   DailyRate                 1470 non-null   int64 
 4   Department                1470 non-null   object
 5   DistanceFromHome          1470 non-null   int64 
 6   Education                 1470 non-null   int64 
 7   EducationField            1470 non-null   object
 8   EmployeeCount             1470 non-null   int64 
 9   EmployeeNumber            1470 non-null   int64 
 10  EnvironmentSatisfaction   1470 non-null   int64 
 11  Gender                    1470 non-null   object
 12  HourlyRate                1470 non-null   int64 
 13  JobInvolvement            1470 non-null   int64 
 14  JobLevel                  1470 non-null   int64 
 15  JobRole                   1470 non-null   object
 16  JobSatisfaction           1470 non-null   int64 
 17  MaritalStatus             1470 non-null   object
 18  MonthlyIncome             1470 non-null   int64 
 19  MonthlyRate               1470 non-null   int64 
 20  NumCompaniesWorked        1470 non-null   int64 
 21  Over18                    1470 non-null   object
 22  OverTime                  1470 non-null   object
 23  PercentSalaryHike         1470 non-null   int64 
 24  PerformanceRating         1470 non-null   int64 
 25  RelationshipSatisfaction  1470 non-null   int64 
 26  StandardHours             1470 non-null   int64 
 27  StockOptionLevel          1470 non-null   int64 
 28  TotalWorkingYears         1470 non-null   int64 
 29  TrainingTimesLastYear     1470 non-null   int64 
 30  WorkLifeBalance           1470 non-null   int64 
 31  YearsAtCompany            1470 non-null   int64 
 32  YearsInCurrentRole        1470 non-null   int64 
 33  YearsSinceLastPromotion   1470 non-null   int64 
 34  YearsWithCurrManager      1470 non-null   int64 
dtypes: int64(26), object(9)
memory usage: 402.1+ KB

数据预处理

ibm.drop(columns = 'EmployeeCount', inplace = True)
ibm.drop(columns = 'EmployeeNumber', inplace = True)
ibm.drop(columns = 'Over18', inplace = True)
ibm.drop(columns = 'StandardHours', inplace = True)
ibm.drop_duplicates()
年龄 离职 出差 日薪 部门 家到公司的距离 教育程度 教育领域 工作环境满意度 性别 时薪 工作投入度 职位等级 职位 工作满意度 婚姻状况 月收入 月费率 曾就职公司数量 加班 薪资涨幅百分比 绩效评级 人际关系满意度 股票期权等级 总工作年限 去年培训次数 工作与生活平衡 在公司年限 在当前职位年限 自上次升职年限 与当前经理共事年限
0 41 很少出差 1102 销售部 1 2 生命科学 2 女性 94 3 2 销售主管 4 单身 5993 19479 8 11 3 1 0 8 0 1 6 4 0 5
1 49 经常出差 279 研发部 8 1 生命科学 3 男性 61 2 2 研究科学家 2 已婚 5130 24907 1 23 4 4 1 10 3 3 10 7 1 7
2 37 很少出差 1373 研发部 2 2 其他 4 男性 92 2 1 实验室技术员 3 单身 2090 2396 6 15 3 2 0 7 3 3 0 0 0 0
3 33 经常出差 1392 研发部 3 4 生命科学 4 女性 56 3 1 研究科学家 3 已婚 2909 23159 1 11 3 3 0 8 3 3 8 7 3 0
4 27 很少出差 591 研发部 2 1 医疗 1 男性 40 3 1 实验室技术员 2 已婚 3468 16632 9 12 3 4 1 6 3 3 2 2 2 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1465 36 经常出差 884 研发部 23 2 医疗 3 男性 41 4 2 实验室技术员 4 已婚 2571 12290 4 17 3 3 1 17 3 3 5 2 0 3
1466 39 很少出差 613 研发部 6 1 医疗 4 男性 42 2 3 医疗保健代表 1 已婚 9991 21457 4 15 3 1 1 9 5 3 7 7 1 7
1467 27 很少出差 155 研发部 4 3 生命科学 2 男性 87 4 2 制造总监 2 已婚 6142 5174 1 20 4 2 1 6 0 3 6 2 0 3
1468 49 经常出差 1023 销售部 2 3 医疗 4 男性 63 2 2 销售主管 2 已婚 5390 13243 2 14 3 4 0 17 3 2 9 6 0 8
1469 34 很少出差 628 研发部 8 3 医疗 2 男性 82 4 2 实验室技术员 3 已婚 4404 10228 2 12 3 1 0 6 3 4 4 3 1 2

1470 行 × 31 列

ibm.isnull().sum()
Age                         0
Attrition                   0
BusinessTravel              0
DailyRate                   0
Department                  0
DistanceFromHome            0
Education                   0
EducationField              0
EnvironmentSatisfaction     0
Gender                      0
HourlyRate                  0
JobInvolvement              0
JobLevel                    0
JobRole                     0
JobSatisfaction             0
MaritalStatus               0
MonthlyIncome               0
MonthlyRate                 0
NumCompaniesWorked          0
OverTime                    0
PercentSalaryHike           0
PerformanceRating           0
RelationshipSatisfaction    0
StockOptionLevel            0
TotalWorkingYears           0
TrainingTimesLastYear       0
WorkLifeBalance             0
YearsAtCompany              0
YearsInCurrentRole          0
YearsSinceLastPromotion     0
YearsWithCurrManager        0
dtype: int64
# replace Attrition (0 - No, 1 - Yes)
ibm.replace({'Attrition' : {'Yes': 1, 'No': 0}}, inplace = True)
# replace BusinessTravel (0 - Non-Travel, 1 - Travel_Rarely, 2 - Travel_Frequently)
ibm.replace({'BusinessTravel' : {'Non-Travel': 0, 'Travel_Rarely': 1, 'Travel_Frequently': 2}}, inplace = True)
#Department
dummy = pd.get_dummies(ibm['Department'])
ibm.insert(5,'Dp_Sales&Development', dummy['Research & Development'])
ibm.insert(6,'Dp_Sales', dummy['Sales'])
ibm.insert(7,'Dp_HumanResources', dummy['Human Resources'])

ibm.drop(columns = 'Department', inplace = True)
#EducationField
dummy = pd.get_dummies(ibm['EducationField'])
ibm.insert(11,'EF_Life Sciences',dummy['Life Sciences'])
ibm.insert(12,'EF_Medical',dummy['Medical'])
ibm.insert(13,'EF_Marketing',dummy['Marketing'])
ibm.insert(14,'EF_TechnicalDegree',dummy['Technical Degree'])
ibm.insert(15,'EF_HumanResources',dummy['Human Resources'])
ibm.insert(16,'EF_Other',dummy['Other'])

ibm.drop(columns = 'EducationField', inplace = True)
# replace Gender (0 - Male; 1 - Female)
ibm.replace({'Gender': {'Male': 0, 'Female': 1}}, inplace = True)
# Job role dummy variables
dummy=pd.get_dummies(ibm['JobRole'])
ibm.insert(23, 'JR_HealthcareRepresentive', dummy['Healthcare Representative'])
ibm.insert(24, 'JR_HumanResource', dummy['Human Resources'])
ibm.insert(25, 'JR_LaboratoryTechnician', dummy['Laboratory Technician'])
ibm.insert(26, 'JR_Manager', dummy['Manager'])
ibm.insert(27, 'JR_ManufacturingDirector', dummy['Manufacturing Director'])
ibm.insert(28, 'JR_ResearchDirector', dummy['Research Director'])
ibm.insert(29, 'JR_ResearchScientist', dummy['Research Scientist'])
ibm.insert(30, 'JR_SalesExecutive', dummy['Sales Executive'])
ibm.insert(31, 'JR_SalesRepresentative', dummy['Sales Representative'])

ibm.drop(columns = 'JobRole', inplace = True)
# MaritalStatus role dummy variables
dummy=pd.get_dummies(ibm['MaritalStatus'])
ibm.insert(34, 'MS_Married', dummy['Married'])
ibm.insert(35, 'MS_Single', dummy['Single'])
ibm.insert(36, 'MS_Divorced', dummy['Divorced'])

ibm.drop(columns = 'MaritalStatus', inplace = True)
# replace Overtime (0 - No; 1 - Yes)
ibm.replace({'OverTime': {'No': 0, 'Yes': 1}}, inplace = True)
# replace Over18 (0 - N; 1 - Y)
ibm.replace({'Over18': {'N': 0, 'Y': 1}}, inplace = True)
def iqr_outliers(data):
    out=[]
    
    firstQuartile = data.quantile(0.25)
    thirdQuartile = data.quantile(0.75)
    
    iqr = thirdQuartile-firstQuartile
    
    Lower_bound = firstQuartile - 1.5 * iqr
    Upper_bound = thirdQuartile + 1.5 * iqr
    
    for i in data:
        if i &gt; Upper_bound or i &lt; Lower_bound:
            out.append(i)
            
    print("Outliers:",out , "\nCount: ", len(out), "\n")
    return out
for c_name in ibm.columns:
    print (c_name)
    iqr_outliers(ibm[c_name])
Age
Outliers: [] 
Count:  0 

Attrition
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  237 

BusinessTravel
Outliers: [2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 2, 2] 
Count:  427 

DailyRate
Outliers: [] 
Count:  0 

Dp_Sales&amp;Development
Outliers: [] 
Count:  0 

Dp_Sales
Outliers: [] 
Count:  0 

Dp_HumanResources
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  63 

DistanceFromHome
Outliers: [] 
Count:  0 

Education
Outliers: [] 
Count:  0 

EnvironmentSatisfaction
Outliers: [] 
Count:  0 

EF_Life Sciences
Outliers: [] 
Count:  0 

EF_Medical
Outliers: [] 
Count:  0 

EF_Marketing
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  159 

EF_TechnicalDegree
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  132 

EF_HumanResources
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  27 

EF_Other
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  82 

Gender
Outliers: [] 
Count:  0 

HourlyRate
Outliers: [] 
Count:  0 

JobInvolvement
Outliers: [] 
Count:  0 

JobLevel
Outliers: [] 
Count:  0 

JobSatisfaction
Outliers: [] 
Count:  0 

JR_HealthcareRepresentive
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  131 

JR_HumanResource
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  52 

JR_LaboratoryTechnician
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  259 

JR_Manager
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  102 

JR_ManufacturingDirector
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  145 

JR_ResearchDirector
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  80 

JR_ResearchScientist
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  292 

JR_SalesExecutive
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  326 

JR_SalesRepresentative
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  83 

MonthlyIncome
Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] 
Count:  114 

MonthlyRate
Outliers: [] 
Count:  0 

NumCompaniesWorked
Outliers: [9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9] 
Count:  52 

MS_Married
Outliers: [] 
Count:  0 

MS_Single
Outliers: [] 
Count:  0 

MS_Divorced
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  327 

OverTime
Outliers: [] 
Count:  0 

PercentSalaryHike
Outliers: [] 
Count:  0 

PerformanceRating
Outliers: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] 
Count:  226 

RelationshipSatisfaction
Outliers: [] 
Count:  0 

StockOptionLevel
Outliers: [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] 
Count:  85 

TotalWorkingYears
Outliers: [31, 29, 37, 38, 30, 40, 36, 34, 32, 33, 37, 30, 36, 31, 33, 32, 37, 31, 32, 32, 30, 34, 30, 40, 29, 35, 31, 33, 31, 29, 32, 30, 33, 30, 29, 31, 32, 33, 36, 34, 31, 36, 33, 31, 29, 33, 29, 32, 31, 35, 29, 32, 34, 36, 32, 30, 36, 29, 34, 37, 29, 29, 35] 
Count:  63 

TrainingTimesLastYear
Outliers: [0, 5, 5, 5, 6, 5, 5, 5, 6, 6, 0, 0, 0, 5, 0, 5, 5, 5, 6, 6, 5, 0, 6, 5, 5, 0, 5, 5, 6, 5, 5, 5, 0, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 0, 0, 5, 5, 5, 6, 6, 5, 0, 5, 0, 5, 5, 0, 6, 0, 5, 5, 6, 6, 5, 6, 5, 0, 5, 5, 5, 5, 0, 6, 5, 5, 5, 5, 6, 5, 5, 6, 5, 5, 5, 0, 5, 0, 5, 5, 6, 5, 6, 5, 0, 5, 5, 0, 6, 6, 5, 6, 0, 5, 0, 6, 6, 6, 6, 5, 5, 0, 5, 0, 0, 6, 0, 6, 5, 6, 5, 5, 0, 5, 6, 6, 5, 5, 0, 0, 6, 0, 0, 5, 0, 5, 6, 5, 5, 6, 6, 5, 5, 5, 5, 5, 6, 5, 6, 6, 0, 6, 6, 5, 5, 0, 0, 6, 6, 0, 5, 0, 0, 0, 0, 0, 5, 5, 6, 5, 5, 0, 5, 5, 0, 5, 5, 6, 5, 5, 5, 6, 5, 5, 5, 0, 0, 5, 5, 5, 5, 6, 0, 0, 6, 6, 6, 6, 5, 5, 5, 6, 5, 0, 5, 5, 6, 5, 6, 6, 5, 6, 6, 5, 0, 5, 5, 5, 5, 5, 0, 0, 0, 6, 5, 6, 6, 5, 6, 0, 6, 6, 5, 6, 6, 5, 5, 5, 0] 
Count:  238 

WorkLifeBalance
Outliers: [] 
Count:  0 

YearsAtCompany
Outliers: [25, 22, 22, 27, 21, 22, 37, 25, 20, 40, 20, 24, 20, 24, 33, 20, 19, 22, 33, 24, 19, 21, 20, 36, 20, 20, 22, 24, 21, 21, 25, 21, 29, 20, 27, 20, 31, 32, 20, 20, 21, 22, 22, 34, 24, 26, 31, 20, 31, 26, 19, 21, 21, 32, 21, 19, 20, 22, 20, 21, 26, 20, 22, 24, 33, 29, 25, 21, 19, 19, 20, 19, 33, 19, 19, 20, 20, 20, 20, 20, 32, 20, 21, 33, 36, 26, 30, 22, 23, 23, 21, 21, 22, 22, 19, 22, 19, 22, 20, 20, 20, 22, 20, 20] 
Count:  104 

YearsInCurrentRole
Outliers: [15, 16, 18, 15, 18, 17, 16, 15, 16, 15, 16, 16, 15, 16, 17, 15, 15, 15, 17, 17, 16] 
Count:  21 

YearsSinceLastPromotion
Outliers: [8, 15, 8, 8, 9, 13, 12, 10, 11, 9, 12, 15, 15, 15, 9, 11, 11, 9, 12, 11, 15, 11, 10, 9, 11, 9, 8, 11, 11, 8, 13, 9, 9, 12, 10, 11, 15, 13, 9, 11, 10, 8, 8, 11, 9, 11, 12, 11, 14, 13, 14, 8, 11, 15, 10, 11, 11, 15, 11, 13, 11, 13, 15, 8, 13, 15, 11, 14, 15, 15, 9, 11, 9, 8, 9, 15, 11, 12, 9, 8, 10, 14, 8, 13, 13, 12, 14, 8, 8, 8, 14, 14, 8, 12, 13, 14, 14, 12, 11, 8, 11, 9, 12, 8, 9, 11, 9] 
Count:  107 

YearsWithCurrManager
Outliers: [17, 15, 15, 15, 15, 17, 16, 17, 15, 17, 17, 17, 17, 16] 
Count:  14 

def remove_outliers(c_name):
    outliers = iqr_outliers(ibm[c_name])

    while (len(outliers)!=0):
        for i in outliers:
            ibm.drop(ibm.loc[ibm[c_name]==i].index, inplace = True)
        outliers = iqr_outliers(ibm[c_name])
remove_outliers('MonthlyIncome')
Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] 
Count:  114 

Outliers: [15427, 13458, 14756, 13245, 13664, 13503, 13549, 13872, 13734, 13591, 16064, 13675, 13496, 13603, 13525, 16015, 13964, 15992, 14336, 13212, 16555, 14118, 13610, 13237, 16184, 15402, 14814, 13770, 16307, 13826, 14275, 13582, 14852, 13194, 13973, 13726, 13320, 13120, 13499, 13758, 13191, 16124, 13577, 14026, 13142, 13695, 13402, 13247, 14732, 16422, 13757, 16032, 16328, 14411, 16437, 15202, 16413, 13269, 13966, 15972, 15379, 12936, 12965, 13116, 13464, 16291, 15787, 13225, 13348, 13341, 13206, 13744, 13570] 
Count:  73 

Outliers: [11994, 12490, 12185, 11849, 11996, 12061, 11878, 12504, 11935, 12808, 11836, 12742, 11904, 12169, 11916, 11957, 12031] 
Count:  17 

Outliers: [11713, 11691] 
Count:  2 

Outliers: [11631] 
Count:  1 

Outliers: [] 
Count:  0 

ibm
年龄 离职 出差 日薪 销售与研发部门 销售部门 人力资源部 家到公司的距离 教育程度 工作环境满意度 生命科学教育领域 医学教育领域 市场营销教育领域 技术学位教育领域 人力资源教育领域 其他教育领域 性别 时薪 工作投入度 职位等级 工作满意度 医疗保健代表职位 人力资源职位 实验室技术员职位 经理职位 制造总监职位 研究总监职位 研究科学家职位 销售主管职位 销售代表职位 月收入 月费率 曾就职公司数量 已婚婚姻状况 单身婚姻状况 离婚婚姻状况 加班 薪资涨幅百分比 绩效评级 人际关系满意度 股票期权等级 总工作年限 去年培训次数 工作与生活平衡 在公司年限 在当前职位年限 自上次升职年限 与当前经理共事年限
0 41 1 1 1102 0 1 0 1 2 2 1 0 0 0 0 0 1 94 3 2 4 0 0 0 0 0 0 0 1 0 5993 19479 8 0 1 0 1 11 3 1 0 8 0 1 6 4 0 5
1 49 0 2 279 1 0 0 8 1 3 1 0 0 0 0 0 0 61 2 2 2 0 0 0 0 0 0 1 0 0 5130 24907 1 1 0 0 0 23 4 4 1 10 3 3 10 7 1 7
2 37 1 1 1373 1 0 0 2 2 4 0 0 0 0 0 1 0 92 2 1 3 0 0 1 0 0 0 0 0 0 2090 2396 6 0 1 0 1 15 3 2 0 7 3 3 0 0 0 0
3 33 0 2 1392 1 0 0 3 4 4 1 0 0 0 0 0 1 56 3 1 3 0 0 0 0 0 0 1 0 0 2909 23159 1 1 0 0 1 11 3 3 0 8 3 3 8 7 3 0
4 27 0 1 591 1 0 0 2 1 1 0 1 0 0 0 0 0 40 3 1 2 0 0 1 0 0 0 0 0 0 3468 16632 9 1 0 0 0 12 3 4 1 6 3 3 2 2 2 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1465 36 0 2 884 1 0 0 23 2 3 0 1 0 0 0 0 0 41 4 2 4 0 0 1 0 0 0 0 0 0 2571 12290 4 1 0 0 0 17 3 3 1 17 3 3 5 2 0 3
1466 39 0 1 613 1 0 0 6 1 4 0 1 0 0 0 0 0 42 2 3 1 1 0 0 0 0 0 0 0 0 9991 21457 4 1 0 0 0 15 3 1 1 9 5 3 7 7 1 7
1467 27 0 1 155 1 0 0 4 3 2 1 0 0 0 0 0 0 87 4 2 2 0 0 0 0 1 0 0 0 0 6142 5174 1 1 0 0 1 20 4 2 1 6 0 3 6 2 0 3
1468 49 0 2 1023 0 1 0 2 3 4 0 1 0 0 0 0 0 63 2 2 2 0 0 0 0 0 0 0 1 0 5390 13243 2 1 0 0 0 14 3 4 0 17 3 2 9 6 0 8
1469 34 0 1 628 1 0 0 8 3 2 0 1 0 0 0 0 0 82 4 2 3 0 0 1 0 0 0 0 0 0 4404 10228 2 1 0 0 0 12 3 1 0 6 3 4 4 3 1 2

1263 行 × 48 列

分类

支持向量机 (由 Teh Liang Sean 编写)

# import important library to do SVM
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import metrics
#The target for SVM will be the attrition of IBM employees to know whether the employees will continue stay or leave IBM
x_svm_find = ibm.drop(columns = 'Attrition')
y_svm = ibm['Attrition']
# Try use SelectKBest and chi-squared (chi²) statistical test for non-negative feature to find top 15 best features
#Import library
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
#Use SelectKBest class to find top 15 best features
best_15_features = SelectKBest(score_func=chi2, k=15)
fit = best_15_features.fit(x_svm_find,y_svm)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(x_svm_find.columns)
#Try to concat two dataframes for a better visualization 
top_15_feature_scores = pd.concat([dfcolumns,dfscores],axis=1)
#Name the dataframe columns
top_15_feature_scores.columns = ['Features','Score']  
#Show 15 best features
print(top_15_feature_scores.nlargest(15,'Score'))  
                     Features         Score
29              MonthlyIncome  26471.159476
30                MonthlyRate   1308.443569
2                   DailyRate   1111.594737
44         YearsInCurrentRole    109.263859
43             YearsAtCompany    103.805057
46       YearsWithCurrManager    100.636711
40          TotalWorkingYears     95.843571
35                   OverTime     60.367656
6            DistanceFromHome     57.197704
0                         Age     46.705340
28     JR_SalesRepresentative     27.299127
33                  MS_Single     26.251695
39           StockOptionLevel     24.376114
20  JR_HealthcareRepresentive     10.935616
24   JR_ManufacturingDirector      9.987076
ibm_svm_features_df = pd.DataFrame()
# Set up data to do SVM using top 15 best features identified
ibm_svm_features_df.insert(0,'MonthlyIncome',ibm['MonthlyIncome'])
ibm_svm_features_df.insert(1,'MonthlyRate',ibm['MonthlyRate'])
ibm_svm_features_df.insert(2,'DailyRate',ibm['DailyRate'])
ibm_svm_features_df.insert(3,'YearsInCurrentRole',ibm['YearsInCurrentRole'])
ibm_svm_features_df.insert(4,'YearsAtCompany',ibm['YearsAtCompany'])
ibm_svm_features_df.insert(5,'YearsWithCurrManager',ibm['YearsWithCurrManager'])
ibm_svm_features_df.insert(6,'TotalWorkingYears',ibm['TotalWorkingYears'])
ibm_svm_features_df.insert(7,'OverTime',ibm['OverTime'])
ibm_svm_features_df.insert(8,'DistanceFromHome',ibm['DistanceFromHome'])
ibm_svm_features_df.insert(9,'Age',ibm['Age'])
ibm_svm_features_df.insert(10,'JR_SalesRepresentative',ibm['JR_SalesRepresentative'])
ibm_svm_features_df.insert(11,'MS_Single',ibm['MS_Single'])
ibm_svm_features_df.insert(12,'StockOptionLevel',ibm['StockOptionLevel'])
ibm_svm_features_df.insert(13,'JR_HealthcareRepresentive ',ibm['JR_HealthcareRepresentive'])
ibm_svm_features_df.insert(14,'JR_ManufacturingDirector',ibm['JR_ManufacturingDirector'])
ibm_svm_features_df
月收入 月费率 日薪 在当前职位年限 在公司年限 与当前经理共事年限 总工作年限 加班 家到公司的距离 年龄 销售代表职位 单身婚姻状况 股票期权等级 医疗保健代表职位 制造总监职位
0 5993 19479 1102 4 6 5 8 1 1 41 0 1 0 0 0
1 5130 24907 279 7 10 7 10 0 8 49 0 0 1 0 0
2 2090 2396 1373 0 0 0 7 1 2 37 0 1 0 0 0
3 2909 23159 1392 7 8 0 8 1 3 33 0 0 0 0 0
4 3468 16632 591 2 2 2 6 0 2 27 0 0 1 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1465 2571 12290 884 2 5 3 17 0 23 36 0 0 1 0 0
1466 9991 21457 613 7 7 7 9 0 6 39 0 0 1 1 0
1467 6142 5174 155 2 6 3 6 1 4 27 0 0 1 0 1
1468 5390 13243 1023 6 9 8 17 0 2 49 0 0 0 0 0
1469 4404 10228 628 3 4 2 6 0 8 34 0 0 0 0 0

1263 行 × 15 列

#assignment ibm_svm_features to x
x_svm = ibm_svm_features_df
#Try to scale all the numeric data of each features to make svm model train more effective
from sklearn.preprocessing import StandardScaler
s_scaler = StandardScaler()
x_scaled_svm = s_scaler.fit_transform(x_svm)
#Try to use tomek link to solve undersampling problem as attriction too few 'yes' value for imbalanced classification 
from imblearn.under_sampling import TomekLinks

tl_svm = TomekLinks(sampling_strategy='not minority')
x_tl_svm, y_tl_svm= tl_svm.fit_resample(x_svm, y_svm)
#Train the modals with 80% and test 20% of the data
x_train_svm, x_test_svm, y_train_svm, y_test_svm = train_test_split(x_tl_svm,y_tl_svm, test_size=0.2,random_state=40, stratify=y_tl_svm)
# Model 1 is using the manual tuning for some hyperparameters of SVM
model_1_svm=svm.SVC(C=2,kernel='sigmoid',gamma='scale',coef0=0.6,random_state=40,probability=True)
model_1_svm.fit(x_train_svm,y_train_svm)
y_predict_1_svm=model_1_svm.predict(x_test_svm)
# Modal 2 is using GridSearchCV to find the best hyperparameters for SVM using cross validation
# Only some hyperparameters are tuned 

# import GridSearchCV library
from sklearn.model_selection import GridSearchCV

#Try to tune the hyperparameter with
#kernel type: linear/rbf/sigmoid
#C which is the regularization parameter: range 0-1 increase by 0.1
#coef0 that is the independent term for kernel method (only for sigmoid): range 0.0-0.5 increase by 0.1
#degree for the polynomial ('poly') kernel method: range 0-5 increase by 1
#gamma that are kernel coefficient for 'rbf' and 'poly': scale/auto

param_grid={'kernel':('linear','rbf','sigmoid'),
        'C':[i for i in np.arange(1.0,3.0,0.1)],
        'coef0':[y for y in np.arange(0.0,1.5,0.1)],
        'degree':[z for z in np.arange(3,6,1)],
        'gamma':('auto','scale'),}
# set random state to 40
find_best_para_model=svm.SVC(random_state=40)
Grid_search_svm=GridSearchCV(find_best_para_model,param_grid, n_jobs=-1,verbose=2,cv=4)
# this may take some time to run
Grid_search_svm.fit(x_train_svm,y_train_svm)
Fitting 4 folds for each of 5400 candidates, totalling 21600 fits
# Show the best hyperparameter found by grid search
Grid_search_svm.best_params_
{&#x27;C&#x27;: 2.8000000000000016,
 &#x27;coef0&#x27;: 0.0,
 &#x27;degree&#x27;: 3,
 &#x27;gamma&#x27;: &#x27;scale&#x27;,
 &#x27;kernel&#x27;: &#x27;rbf&#x27;}
# Use hyperparameter found grid search to build modal 
model_2_svm=svm.SVC(C=2.8000000000000016,kernel='rbf',degree=3,gamma='scale',coef0=0.0,probability=True,random_state=40)
model_2_svm.fit(x_train_svm,y_train_svm)
y_predict_2_svm=model_2_svm.predict(x_test_svm)
#Evaluate accurracy of classification result
print('Accuracy of prediction classification result for 2 model')
print('Hyperparameters that try to tune manually (model 1): ',metrics.accuracy_score(y_test_svm, y_predict_1_svm))
print('Best hyperparameters found using GridSearchCV (model 2): ',metrics.accuracy_score(y_test_svm, y_predict_2_svm)) 
Accuracy of prediction classification result for 2 model
Hyperparameters that try to tune manually (model 1):  0.7416666666666667
Best hyperparameters found using GridSearchCV (model 2):  0.8166666666666667
#Evaluating classification result by confusion matrix
from sklearn.metrics import confusion_matrix
print (confusion_matrix(y_test_svm, y_predict_2_svm,[0,1]))

#Evaluating classification result by Precision, Recall and F1-Measure
from sklearn.metrics import classification_report
print (classification_report(y_test_svm, y_predict_2_svm))
[[183  12]
 [ 32  13]]
              precision    recall  f1-score   support

           0       0.85      0.94      0.89       195
           1       0.52      0.29      0.37        45

    accuracy                           0.82       240
   macro avg       0.69      0.61      0.63       240
weighted avg       0.79      0.82      0.79       240

C:\Users\USER\anaconda3\lib\site-packages\sklearn\utils\validation.py:70: FutureWarning: Pass labels=[0, 1] as keyword args. From version 1.0 (renaming of 0.25) passing these as positional arguments will result in an error
  warnings.warn(f&quot;Pass {args_msg} as keyword args. From version &quot;
from matplotlib import pyplot as plt
#Evaluating classification result by ROC curves
from sklearn.metrics import roc_curve
y_pred_prob_svm = model_2_svm.predict_proba(x_test_svm)[:,1]
fpr, tpr, threshold = roc_curve(y_test_svm, y_pred_prob_svm)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr,tpr)
auc = roc_auc_score(y_test_svm,  y_pred_prob_svm)
plt.title(f'AUC: {auc}')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()