#### Naive Bayes

H
E
```{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"collapsed": true
},
"outputs": [],
"source": [
]
},
{
"cell_type": "code",
"execution_count": 3,
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame(iris.data)\n",
"df.columns = [\"sl\", \"sw\", 'pl', 'pw']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"collapsed": true
},
"outputs": [],
"source": [
"def abc(k, *val):\n",
"    if k < val[0]:\n",
"        return 0\n",
"    else:\n",
"        return 1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": [
"0      1\n",
"1      0\n",
"2      0\n",
"3      0\n",
"4      1\n",
"5      1\n",
"6      0\n",
"7      1\n",
"8      0\n",
"9      0\n",
"10     1\n",
"11     0\n",
"12     0\n",
"13     0\n",
"14     1\n",
"15     1\n",
"16     1\n",
"17     1\n",
"18     1\n",
"19     1\n",
"20     1\n",
"21     1\n",
"22     0\n",
"23     1\n",
"24     0\n",
"25     1\n",
"26     1\n",
"27     1\n",
"28     1\n",
"29     0\n",
"      ..\n",
"120    1\n",
"121    1\n",
"122    1\n",
"123    1\n",
"124    1\n",
"125    1\n",
"126    1\n",
"127    1\n",
"128    1\n",
"129    1\n",
"130    1\n",
"131    1\n",
"132    1\n",
"133    1\n",
"134    1\n",
"135    1\n",
"136    1\n",
"137    1\n",
"138    1\n",
"139    1\n",
"140    1\n",
"141    1\n",
"142    1\n",
"143    1\n",
"144    1\n",
"145    1\n",
"146    1\n",
"147    1\n",
"148    1\n",
"149    1\n",
"Name: sl, dtype: int64"
]
},
"execution_count": 5,
"output_type": "execute_result"
}
],
"source": [
"df.sl.apply(abc, args=(5,))"
]
},
{
"cell_type": "code",
"execution_count": null,
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [],
"source": [
"def label(val, *boundaries):\n",
"    if (val < boundaries[0]):\n",
"        return 'a'\n",
"    elif (val < boundaries[1]):\n",
"        return 'b'\n",
"    elif (val < boundaries[2]):\n",
"        return 'c'\n",
"    else:\n",
"        return 'd'\n",
"\n",
"def toLabel(df, old_feature_name):\n",
"    second = df[old_feature_name].mean()\n",
"    minimum = df[old_feature_name].min()\n",
"    first = (minimum + second)/2\n",
"    maximum = df[old_feature_name].max()\n",
"    third = (maximum + second)/2\n",
"    return df[old_feature_name].apply(label, args= (first, second, third))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
"    <tr style=\"text-align: right;\">\n",
"      <th></th>\n",
"      <th>sl</th>\n",
"      <th>sw</th>\n",
"      <th>pl</th>\n",
"      <th>pw</th>\n",
"      <th>sl_labeled</th>\n",
"      <th>sw_labeled</th>\n",
"      <th>pl_labeled</th>\n",
"      <th>pw_labeled</th>\n",
"    </tr>\n",
"  <tbody>\n",
"    <tr>\n",
"      <th>0</th>\n",
"      <td>5.1</td>\n",
"      <td>3.5</td>\n",
"      <td>1.4</td>\n",
"      <td>0.2</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>1</th>\n",
"      <td>4.9</td>\n",
"      <td>3.0</td>\n",
"      <td>1.4</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>2</th>\n",
"      <td>4.7</td>\n",
"      <td>3.2</td>\n",
"      <td>1.3</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>3</th>\n",
"      <td>4.6</td>\n",
"      <td>3.1</td>\n",
"      <td>1.5</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>4</th>\n",
"      <td>5.0</td>\n",
"      <td>3.6</td>\n",
"      <td>1.4</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>5</th>\n",
"      <td>5.4</td>\n",
"      <td>3.9</td>\n",
"      <td>1.7</td>\n",
"      <td>0.4</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>6</th>\n",
"      <td>4.6</td>\n",
"      <td>3.4</td>\n",
"      <td>1.4</td>\n",
"      <td>0.3</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>7</th>\n",
"      <td>5.0</td>\n",
"      <td>3.4</td>\n",
"      <td>1.5</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>8</th>\n",
"      <td>4.4</td>\n",
"      <td>2.9</td>\n",
"      <td>1.4</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>9</th>\n",
"      <td>4.9</td>\n",
"      <td>3.1</td>\n",
"      <td>1.5</td>\n",
"      <td>0.1</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>10</th>\n",
"      <td>5.4</td>\n",
"      <td>3.7</td>\n",
"      <td>1.5</td>\n",
"      <td>0.2</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>11</th>\n",
"      <td>4.8</td>\n",
"      <td>3.4</td>\n",
"      <td>1.6</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>12</th>\n",
"      <td>4.8</td>\n",
"      <td>3.0</td>\n",
"      <td>1.4</td>\n",
"      <td>0.1</td>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>13</th>\n",
"      <td>4.3</td>\n",
"      <td>3.0</td>\n",
"      <td>1.1</td>\n",
"      <td>0.1</td>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>14</th>\n",
"      <td>5.8</td>\n",
"      <td>4.0</td>\n",
"      <td>1.2</td>\n",
"      <td>0.2</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>15</th>\n",
"      <td>5.7</td>\n",
"      <td>4.4</td>\n",
"      <td>1.5</td>\n",
"      <td>0.4</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>16</th>\n",
"      <td>5.4</td>\n",
"      <td>3.9</td>\n",
"      <td>1.3</td>\n",
"      <td>0.4</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>17</th>\n",
"      <td>5.1</td>\n",
"      <td>3.5</td>\n",
"      <td>1.4</td>\n",
"      <td>0.3</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>18</th>\n",
"      <td>5.7</td>\n",
"      <td>3.8</td>\n",
"      <td>1.7</td>\n",
"      <td>0.3</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>19</th>\n",
"      <td>5.1</td>\n",
"      <td>3.8</td>\n",
"      <td>1.5</td>\n",
"      <td>0.3</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>20</th>\n",
"      <td>5.4</td>\n",
"      <td>3.4</td>\n",
"      <td>1.7</td>\n",
"      <td>0.2</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>21</th>\n",
"      <td>5.1</td>\n",
"      <td>3.7</td>\n",
"      <td>1.5</td>\n",
"      <td>0.4</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>22</th>\n",
"      <td>4.6</td>\n",
"      <td>3.6</td>\n",
"      <td>1.0</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>23</th>\n",
"      <td>5.1</td>\n",
"      <td>3.3</td>\n",
"      <td>1.7</td>\n",
"      <td>0.5</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>24</th>\n",
"      <td>4.8</td>\n",
"      <td>3.4</td>\n",
"      <td>1.9</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>25</th>\n",
"      <td>5.0</td>\n",
"      <td>3.0</td>\n",
"      <td>1.6</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>26</th>\n",
"      <td>5.0</td>\n",
"      <td>3.4</td>\n",
"      <td>1.6</td>\n",
"      <td>0.4</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>27</th>\n",
"      <td>5.2</td>\n",
"      <td>3.5</td>\n",
"      <td>1.5</td>\n",
"      <td>0.2</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>28</th>\n",
"      <td>5.2</td>\n",
"      <td>3.4</td>\n",
"      <td>1.4</td>\n",
"      <td>0.2</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>29</th>\n",
"      <td>4.7</td>\n",
"      <td>3.2</td>\n",
"      <td>1.6</td>\n",
"      <td>0.2</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>...</th>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>120</th>\n",
"      <td>6.9</td>\n",
"      <td>3.2</td>\n",
"      <td>5.7</td>\n",
"      <td>2.3</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>121</th>\n",
"      <td>5.6</td>\n",
"      <td>2.8</td>\n",
"      <td>4.9</td>\n",
"      <td>2.0</td>\n",
"      <td>b</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>122</th>\n",
"      <td>7.7</td>\n",
"      <td>2.8</td>\n",
"      <td>6.7</td>\n",
"      <td>2.0</td>\n",
"      <td>d</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>123</th>\n",
"      <td>6.3</td>\n",
"      <td>2.7</td>\n",
"      <td>4.9</td>\n",
"      <td>1.8</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>124</th>\n",
"      <td>6.7</td>\n",
"      <td>3.3</td>\n",
"      <td>5.7</td>\n",
"      <td>2.1</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>125</th>\n",
"      <td>7.2</td>\n",
"      <td>3.2</td>\n",
"      <td>6.0</td>\n",
"      <td>1.8</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>126</th>\n",
"      <td>6.2</td>\n",
"      <td>2.8</td>\n",
"      <td>4.8</td>\n",
"      <td>1.8</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>127</th>\n",
"      <td>6.1</td>\n",
"      <td>3.0</td>\n",
"      <td>4.9</td>\n",
"      <td>1.8</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>128</th>\n",
"      <td>6.4</td>\n",
"      <td>2.8</td>\n",
"      <td>5.6</td>\n",
"      <td>2.1</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>129</th>\n",
"      <td>7.2</td>\n",
"      <td>3.0</td>\n",
"      <td>5.8</td>\n",
"      <td>1.6</td>\n",
"      <td>d</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>130</th>\n",
"      <td>7.4</td>\n",
"      <td>2.8</td>\n",
"      <td>6.1</td>\n",
"      <td>1.9</td>\n",
"      <td>d</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>131</th>\n",
"      <td>7.9</td>\n",
"      <td>3.8</td>\n",
"      <td>6.4</td>\n",
"      <td>2.0</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>132</th>\n",
"      <td>6.4</td>\n",
"      <td>2.8</td>\n",
"      <td>5.6</td>\n",
"      <td>2.2</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>133</th>\n",
"      <td>6.3</td>\n",
"      <td>2.8</td>\n",
"      <td>5.1</td>\n",
"      <td>1.5</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>134</th>\n",
"      <td>6.1</td>\n",
"      <td>2.6</td>\n",
"      <td>5.6</td>\n",
"      <td>1.4</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>135</th>\n",
"      <td>7.7</td>\n",
"      <td>3.0</td>\n",
"      <td>6.1</td>\n",
"      <td>2.3</td>\n",
"      <td>d</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>136</th>\n",
"      <td>6.3</td>\n",
"      <td>3.4</td>\n",
"      <td>5.6</td>\n",
"      <td>2.4</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>137</th>\n",
"      <td>6.4</td>\n",
"      <td>3.1</td>\n",
"      <td>5.5</td>\n",
"      <td>1.8</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>138</th>\n",
"      <td>6.0</td>\n",
"      <td>3.0</td>\n",
"      <td>4.8</td>\n",
"      <td>1.8</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>139</th>\n",
"      <td>6.9</td>\n",
"      <td>3.1</td>\n",
"      <td>5.4</td>\n",
"      <td>2.1</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>140</th>\n",
"      <td>6.7</td>\n",
"      <td>3.1</td>\n",
"      <td>5.6</td>\n",
"      <td>2.4</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>141</th>\n",
"      <td>6.9</td>\n",
"      <td>3.1</td>\n",
"      <td>5.1</td>\n",
"      <td>2.3</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>142</th>\n",
"      <td>5.8</td>\n",
"      <td>2.7</td>\n",
"      <td>5.1</td>\n",
"      <td>1.9</td>\n",
"      <td>b</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>143</th>\n",
"      <td>6.8</td>\n",
"      <td>3.2</td>\n",
"      <td>5.9</td>\n",
"      <td>2.3</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>144</th>\n",
"      <td>6.7</td>\n",
"      <td>3.3</td>\n",
"      <td>5.7</td>\n",
"      <td>2.5</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>145</th>\n",
"      <td>6.7</td>\n",
"      <td>3.0</td>\n",
"      <td>5.2</td>\n",
"      <td>2.3</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>146</th>\n",
"      <td>6.3</td>\n",
"      <td>2.5</td>\n",
"      <td>5.0</td>\n",
"      <td>1.9</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>147</th>\n",
"      <td>6.5</td>\n",
"      <td>3.0</td>\n",
"      <td>5.2</td>\n",
"      <td>2.0</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>148</th>\n",
"      <td>6.2</td>\n",
"      <td>3.4</td>\n",
"      <td>5.4</td>\n",
"      <td>2.3</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>149</th>\n",
"      <td>5.9</td>\n",
"      <td>3.0</td>\n",
"      <td>5.1</td>\n",
"      <td>1.8</td>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"    </tr>\n",
"  </tbody>\n",
"</table>\n",
"<p>150 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"      sl   sw   pl   pw sl_labeled sw_labeled pl_labeled pw_labeled\n",
"0    5.1  3.5  1.4  0.2          b          c          a          a\n",
"1    4.9  3.0  1.4  0.2          a          b          a          a\n",
"2    4.7  3.2  1.3  0.2          a          c          a          a\n",
"3    4.6  3.1  1.5  0.2          a          c          a          a\n",
"4    5.0  3.6  1.4  0.2          a          c          a          a\n",
"5    5.4  3.9  1.7  0.4          b          d          a          a\n",
"6    4.6  3.4  1.4  0.3          a          c          a          a\n",
"7    5.0  3.4  1.5  0.2          a          c          a          a\n",
"8    4.4  2.9  1.4  0.2          a          b          a          a\n",
"9    4.9  3.1  1.5  0.1          a          c          a          a\n",
"10   5.4  3.7  1.5  0.2          b          c          a          a\n",
"11   4.8  3.4  1.6  0.2          a          c          a          a\n",
"12   4.8  3.0  1.4  0.1          a          b          a          a\n",
"13   4.3  3.0  1.1  0.1          a          b          a          a\n",
"14   5.8  4.0  1.2  0.2          b          d          a          a\n",
"15   5.7  4.4  1.5  0.4          b          d          a          a\n",
"16   5.4  3.9  1.3  0.4          b          d          a          a\n",
"17   5.1  3.5  1.4  0.3          b          c          a          a\n",
"18   5.7  3.8  1.7  0.3          b          d          a          a\n",
"19   5.1  3.8  1.5  0.3          b          d          a          a\n",
"20   5.4  3.4  1.7  0.2          b          c          a          a\n",
"21   5.1  3.7  1.5  0.4          b          c          a          a\n",
"22   4.6  3.6  1.0  0.2          a          c          a          a\n",
"23   5.1  3.3  1.7  0.5          b          c          a          a\n",
"24   4.8  3.4  1.9  0.2          a          c          a          a\n",
"25   5.0  3.0  1.6  0.2          a          b          a          a\n",
"26   5.0  3.4  1.6  0.4          a          c          a          a\n",
"27   5.2  3.5  1.5  0.2          b          c          a          a\n",
"28   5.2  3.4  1.4  0.2          b          c          a          a\n",
"29   4.7  3.2  1.6  0.2          a          c          a          a\n",
"..   ...  ...  ...  ...        ...        ...        ...        ...\n",
"120  6.9  3.2  5.7  2.3          d          c          d          d\n",
"121  5.6  2.8  4.9  2.0          b          b          c          d\n",
"122  7.7  2.8  6.7  2.0          d          b          d          d\n",
"123  6.3  2.7  4.9  1.8          c          b          c          c\n",
"124  6.7  3.3  5.7  2.1          c          c          d          d\n",
"125  7.2  3.2  6.0  1.8          d          c          d          c\n",
"126  6.2  2.8  4.8  1.8          c          b          c          c\n",
"127  6.1  3.0  4.9  1.8          c          b          c          c\n",
"128  6.4  2.8  5.6  2.1          c          b          d          d\n",
"129  7.2  3.0  5.8  1.6          d          b          d          c\n",
"130  7.4  2.8  6.1  1.9          d          b          d          d\n",
"131  7.9  3.8  6.4  2.0          d          d          d          d\n",
"132  6.4  2.8  5.6  2.2          c          b          d          d\n",
"133  6.3  2.8  5.1  1.5          c          b          c          c\n",
"134  6.1  2.6  5.6  1.4          c          b          d          c\n",
"135  7.7  3.0  6.1  2.3          d          b          d          d\n",
"136  6.3  3.4  5.6  2.4          c          c          d          d\n",
"137  6.4  3.1  5.5  1.8          c          c          d          c\n",
"138  6.0  3.0  4.8  1.8          c          b          c          c\n",
"139  6.9  3.1  5.4  2.1          d          c          d          d\n",
"140  6.7  3.1  5.6  2.4          c          c          d          d\n",
"141  6.9  3.1  5.1  2.3          d          c          c          d\n",
"142  5.8  2.7  5.1  1.9          b          b          c          d\n",
"143  6.8  3.2  5.9  2.3          c          c          d          d\n",
"144  6.7  3.3  5.7  2.5          c          c          d          d\n",
"145  6.7  3.0  5.2  2.3          c          b          c          d\n",
"146  6.3  2.5  5.0  1.9          c          a          c          d\n",
"147  6.5  3.0  5.2  2.0          c          b          c          d\n",
"148  6.2  3.4  5.4  2.3          c          c          d          d\n",
"149  5.9  3.0  5.1  1.8          c          b          c          c\n",
"\n",
"[150 rows x 8 columns]"
]
},
"execution_count": 7,
"output_type": "execute_result"
}
],
"source": [
"df['sl_labeled'] = toLabel(df, 'sl')\n",
"df['sw_labeled'] = toLabel(df, 'sw')\n",
"df['pl_labeled'] = toLabel(df, 'pl')\n",
"df['pw_labeled'] = toLabel(df, 'pw')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [],
"source": [
"df.drop(['sl', 'sw', 'pl', 'pw'], axis = 1, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [
{
"data": {
"text/plain": [
"{'a', 'b', 'c', 'd'}"
]
},
"execution_count": 9,
"output_type": "execute_result"
}
],
"source": [
"set(df['sl_labeled'])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [],
"source": [
"df[\"output\"] = iris.target"
]
},
{
"cell_type": "code",
"execution_count": 13,
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
"    <tr style=\"text-align: right;\">\n",
"      <th></th>\n",
"      <th>sl_labeled</th>\n",
"      <th>sw_labeled</th>\n",
"      <th>pl_labeled</th>\n",
"      <th>pw_labeled</th>\n",
"      <th>output</th>\n",
"    </tr>\n",
"  <tbody>\n",
"    <tr>\n",
"      <th>0</th>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>1</th>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>2</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>3</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>4</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>5</th>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>6</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>7</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>8</th>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>9</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>10</th>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>11</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>12</th>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>13</th>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>14</th>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>15</th>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>16</th>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>17</th>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>18</th>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>19</th>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>20</th>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>21</th>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>22</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>23</th>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>24</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>25</th>\n",
"      <td>a</td>\n",
"      <td>b</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>26</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>27</th>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>28</th>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>29</th>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>a</td>\n",
"      <td>0</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>...</th>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"      <td>...</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>120</th>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>121</th>\n",
"      <td>b</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>122</th>\n",
"      <td>d</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>123</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>124</th>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>125</th>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>126</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>127</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>128</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>129</th>\n",
"      <td>d</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>130</th>\n",
"      <td>d</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>131</th>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>132</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>133</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>134</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>135</th>\n",
"      <td>d</td>\n",
"      <td>b</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>136</th>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>137</th>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>138</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>139</th>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>140</th>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>141</th>\n",
"      <td>d</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>142</th>\n",
"      <td>b</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>143</th>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>144</th>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>145</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>146</th>\n",
"      <td>c</td>\n",
"      <td>a</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>147</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>148</th>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>d</td>\n",
"      <td>d</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>149</th>\n",
"      <td>c</td>\n",
"      <td>b</td>\n",
"      <td>c</td>\n",
"      <td>c</td>\n",
"      <td>2</td>\n",
"    </tr>\n",
"  </tbody>\n",
"</table>\n",
"<p>150 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
"    sl_labeled sw_labeled pl_labeled pw_labeled  output\n",
"0            b          c          a          a       0\n",
"1            a          b          a          a       0\n",
"2            a          c          a          a       0\n",
"3            a          c          a          a       0\n",
"4            a          c          a          a       0\n",
"5            b          d          a          a       0\n",
"6            a          c          a          a       0\n",
"7            a          c          a          a       0\n",
"8            a          b          a          a       0\n",
"9            a          c          a          a       0\n",
"10           b          c          a          a       0\n",
"11           a          c          a          a       0\n",
"12           a          b          a          a       0\n",
"13           a          b          a          a       0\n",
"14           b          d          a          a       0\n",
"15           b          d          a          a       0\n",
"16           b          d          a          a       0\n",
"17           b          c          a          a       0\n",
"18           b          d          a          a       0\n",
"19           b          d          a          a       0\n",
"20           b          c          a          a       0\n",
"21           b          c          a          a       0\n",
"22           a          c          a          a       0\n",
"23           b          c          a          a       0\n",
"24           a          c          a          a       0\n",
"25           a          b          a          a       0\n",
"26           a          c          a          a       0\n",
"27           b          c          a          a       0\n",
"28           b          c          a          a       0\n",
"29           a          c          a          a       0\n",
"..         ...        ...        ...        ...     ...\n",
"120          d          c          d          d       2\n",
"121          b          b          c          d       2\n",
"122          d          b          d          d       2\n",
"123          c          b          c          c       2\n",
"124          c          c          d          d       2\n",
"125          d          c          d          c       2\n",
"126          c          b          c          c       2\n",
"127          c          b          c          c       2\n",
"128          c          b          d          d       2\n",
"129          d          b          d          c       2\n",
"130          d          b          d          d       2\n",
"131          d          d          d          d       2\n",
"132          c          b          d          d       2\n",
"133          c          b          c          c       2\n",
"134          c          b          d          c       2\n",
"135          d          b          d          d       2\n",
"136          c          c          d          d       2\n",
"137          c          c          d          c       2\n",
"138          c          b          c          c       2\n",
"139          d          c          d          d       2\n",
"140          c          c          d          d       2\n",
"141          d          c          c          d       2\n",
"142          b          b          c          d       2\n",
"143          c          c          d          d       2\n",
"144          c          c          d          d       2\n",
"145          c          b          c          d       2\n",
"146          c          a          c          d       2\n",
"147          c          b          c          d       2\n",
"148          c          c          d          d       2\n",
"149          c          b          c          c       2\n",
"\n",
"[150 rows x 5 columns]"
]
},
"execution_count": 13,
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 14,
"collapsed": true
},
"outputs": [],
"source": [
"def fit(data):\n",
"    output_name = data.columns[-1]\n",
"    features = data.columns[0:-1]\n",
"    counts = {}\n",
"    possible_outputs = set(data[output_name])\n",
"    for output in possible_outputs:\n",
"        counts[output] = {}\n",
"        smallData = data[data[output_name] == output]\n",
"        counts[output][\"total_count\"] = len(smallData)\n",
"        for f in features:\n",
"            counts[output][f] = {}\n",
"            possible_values = set(smallData[f])\n",
"            for value in possible_values:\n",
"                val_count = len(smallData[smallData[f] == value])\n",
"                counts[output][f][value] = val_count\n",
"    return counts"
]
},
{
"cell_type": "code",
"execution_count": 15,
"outputs": [
{
"data": {
"text/plain": [
"{0: {'pl_labeled': {'a': 50},\n",
"  'pw_labeled': {'a': 50},\n",
"  'sl_labeled': {'a': 28, 'b': 22},\n",
"  'sw_labeled': {'a': 1, 'b': 7, 'c': 32, 'd': 10},\n",
"  'total_count': 50},\n",
" 1: {'pl_labeled': {'b': 7, 'c': 43},\n",
"  'pw_labeled': {'b': 10, 'c': 40},\n",
"  'sl_labeled': {'a': 3, 'b': 21, 'c': 24, 'd': 2},\n",
"  'sw_labeled': {'a': 13, 'b': 29, 'c': 8},\n",
"  'total_count': 50},\n",
" 2: {'pl_labeled': {'c': 20, 'd': 30},\n",
"  'pw_labeled': {'c': 16, 'd': 34},\n",
"  'sl_labeled': {'a': 1, 'b': 5, 'c': 29, 'd': 15},\n",
"  'sw_labeled': {'a': 5, 'b': 28, 'c': 15, 'd': 2},\n",
"  'total_count': 50}}"
]
},
"execution_count": 15,
"output_type": "execute_result"
}
],
"source": [
"fit(df)"
]
}
],
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
```
``````from sklearn import datasets
import pandas as pd``````
``iris = datasets.load_iris()``
``````df = pd.DataFrame(iris.data)
df.columns = ["sl", "sw", 'pl', 'pw']``````
``````def abc(k, *val):
if k &lt; val[0]:
return 0
else:
return 1``````
``df.sl.apply(abc, args=(5,))``
```0      1
1      0
2      0
3      0
4      1
5      1
6      0
7      1
8      0
9      0
10     1
11     0
12     0
13     0
14     1
15     1
16     1
17     1
18     1
19     1
20     1
21     1
22     0
23     1
24     0
25     1
26     1
27     1
28     1
29     0
..
120    1
121    1
122    1
123    1
124    1
125    1
126    1
127    1
128    1
129    1
130    1
131    1
132    1
133    1
134    1
135    1
136    1
137    1
138    1
139    1
140    1
141    1
142    1
143    1
144    1
145    1
146    1
147    1
148    1
149    1
Name: sl, dtype: int64```
``````def label(val, *boundaries):
if (val &lt; boundaries[0]):
return 'a'
elif (val &lt; boundaries[1]):
return 'b'
elif (val &lt; boundaries[2]):
return 'c'
else:
return 'd'

def toLabel(df, old_feature_name):
second = df[old_feature_name].mean()
minimum = df[old_feature_name].min()
first = (minimum + second)/2
maximum = df[old_feature_name].max()
third = (maximum + second)/2
return df[old_feature_name].apply(label, args= (first, second, third))``````
``````df['sl_labeled'] = toLabel(df, 'sl')
df['sw_labeled'] = toLabel(df, 'sw')
df['pl_labeled'] = toLabel(df, 'pl')
df['pw_labeled'] = toLabel(df, 'pw')
df``````
sl sw pl pw sl_labeled sw_labeled pl_labeled pw_labeled
0 5.1 3.5 1.4 0.2 b c a a
1 4.9 3.0 1.4 0.2 a b a a
2 4.7 3.2 1.3 0.2 a c a a
3 4.6 3.1 1.5 0.2 a c a a
4 5.0 3.6 1.4 0.2 a c a a
5 5.4 3.9 1.7 0.4 b d a a
6 4.6 3.4 1.4 0.3 a c a a
7 5.0 3.4 1.5 0.2 a c a a
8 4.4 2.9 1.4 0.2 a b a a
9 4.9 3.1 1.5 0.1 a c a a
10 5.4 3.7 1.5 0.2 b c a a
11 4.8 3.4 1.6 0.2 a c a a
12 4.8 3.0 1.4 0.1 a b a a
13 4.3 3.0 1.1 0.1 a b a a
14 5.8 4.0 1.2 0.2 b d a a
15 5.7 4.4 1.5 0.4 b d a a
16 5.4 3.9 1.3 0.4 b d a a
17 5.1 3.5 1.4 0.3 b c a a
18 5.7 3.8 1.7 0.3 b d a a
19 5.1 3.8 1.5 0.3 b d a a
20 5.4 3.4 1.7 0.2 b c a a
21 5.1 3.7 1.5 0.4 b c a a
22 4.6 3.6 1.0 0.2 a c a a
23 5.1 3.3 1.7 0.5 b c a a
24 4.8 3.4 1.9 0.2 a c a a
25 5.0 3.0 1.6 0.2 a b a a
26 5.0 3.4 1.6 0.4 a c a a
27 5.2 3.5 1.5 0.2 b c a a
28 5.2 3.4 1.4 0.2 b c a a
29 4.7 3.2 1.6 0.2 a c a a
... ... ... ... ... ... ... ... ...
120 6.9 3.2 5.7 2.3 d c d d
121 5.6 2.8 4.9 2.0 b b c d
122 7.7 2.8 6.7 2.0 d b d d
123 6.3 2.7 4.9 1.8 c b c c
124 6.7 3.3 5.7 2.1 c c d d
125 7.2 3.2 6.0 1.8 d c d c
126 6.2 2.8 4.8 1.8 c b c c
127 6.1 3.0 4.9 1.8 c b c c
128 6.4 2.8 5.6 2.1 c b d d
129 7.2 3.0 5.8 1.6 d b d c
130 7.4 2.8 6.1 1.9 d b d d
131 7.9 3.8 6.4 2.0 d d d d
132 6.4 2.8 5.6 2.2 c b d d
133 6.3 2.8 5.1 1.5 c b c c
134 6.1 2.6 5.6 1.4 c b d c
135 7.7 3.0 6.1 2.3 d b d d
136 6.3 3.4 5.6 2.4 c c d d
137 6.4 3.1 5.5 1.8 c c d c
138 6.0 3.0 4.8 1.8 c b c c
139 6.9 3.1 5.4 2.1 d c d d
140 6.7 3.1 5.6 2.4 c c d d
141 6.9 3.1 5.1 2.3 d c c d
142 5.8 2.7 5.1 1.9 b b c d
143 6.8 3.2 5.9 2.3 c c d d
144 6.7 3.3 5.7 2.5 c c d d
145 6.7 3.0 5.2 2.3 c b c d
146 6.3 2.5 5.0 1.9 c a c d
147 6.5 3.0 5.2 2.0 c b c d
148 6.2 3.4 5.4 2.3 c c d d
149 5.9 3.0 5.1 1.8 c b c c

150 rows × 8 columns

``df.drop(['sl', 'sw', 'pl', 'pw'], axis = 1, inplace = True)``
``set(df['sl_labeled'])``
`{&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;}`
``df["output"] = iris.target``
``df``
sl_labeled sw_labeled pl_labeled pw_labeled output
0 b c a a 0
1 a b a a 0
2 a c a a 0
3 a c a a 0
4 a c a a 0
5 b d a a 0
6 a c a a 0
7 a c a a 0
8 a b a a 0
9 a c a a 0
10 b c a a 0
11 a c a a 0
12 a b a a 0
13 a b a a 0
14 b d a a 0
15 b d a a 0
16 b d a a 0
17 b c a a 0
18 b d a a 0
19 b d a a 0
20 b c a a 0
21 b c a a 0
22 a c a a 0
23 b c a a 0
24 a c a a 0
25 a b a a 0
26 a c a a 0
27 b c a a 0
28 b c a a 0
29 a c a a 0
... ... ... ... ... ...
120 d c d d 2
121 b b c d 2
122 d b d d 2
123 c b c c 2
124 c c d d 2
125 d c d c 2
126 c b c c 2
127 c b c c 2
128 c b d d 2
129 d b d c 2
130 d b d d 2
131 d d d d 2
132 c b d d 2
133 c b c c 2
134 c b d c 2
135 d b d d 2
136 c c d d 2
137 c c d c 2
138 c b c c 2
139 d c d d 2
140 c c d d 2
141 d c c d 2
142 b b c d 2
143 c c d d 2
144 c c d d 2
145 c b c d 2
146 c a c d 2
147 c b c d 2
148 c c d d 2
149 c b c c 2

150 rows × 5 columns

``````def fit(data):
output_name = data.columns[-1]
features = data.columns[0:-1]
counts = {}
possible_outputs = set(data[output_name])
for output in possible_outputs:
counts[output] = {}
smallData = data[data[output_name] == output]
counts[output]["total_count"] = len(smallData)
for f in features:
counts[output][f] = {}
possible_values = set(smallData[f])
for value in possible_values:
val_count = len(smallData[smallData[f] == value])
counts[output][f][value] = val_count
return counts``````
``fit(df)``
```{0: {&#x27;pl_labeled&#x27;: {&#x27;a&#x27;: 50},
&#x27;pw_labeled&#x27;: {&#x27;a&#x27;: 50},
&#x27;sl_labeled&#x27;: {&#x27;a&#x27;: 28, &#x27;b&#x27;: 22},
&#x27;sw_labeled&#x27;: {&#x27;a&#x27;: 1, &#x27;b&#x27;: 7, &#x27;c&#x27;: 32, &#x27;d&#x27;: 10},
&#x27;total_count&#x27;: 50},
1: {&#x27;pl_labeled&#x27;: {&#x27;b&#x27;: 7, &#x27;c&#x27;: 43},
&#x27;pw_labeled&#x27;: {&#x27;b&#x27;: 10, &#x27;c&#x27;: 40},
&#x27;sl_labeled&#x27;: {&#x27;a&#x27;: 3, &#x27;b&#x27;: 21, &#x27;c&#x27;: 24, &#x27;d&#x27;: 2},
&#x27;sw_labeled&#x27;: {&#x27;a&#x27;: 13, &#x27;b&#x27;: 29, &#x27;c&#x27;: 8},
&#x27;total_count&#x27;: 50},
2: {&#x27;pl_labeled&#x27;: {&#x27;c&#x27;: 20, &#x27;d&#x27;: 30},
&#x27;pw_labeled&#x27;: {&#x27;c&#x27;: 16, &#x27;d&#x27;: 34},
&#x27;sl_labeled&#x27;: {&#x27;a&#x27;: 1, &#x27;b&#x27;: 5, &#x27;c&#x27;: 29, &#x27;d&#x27;: 15},
&#x27;sw_labeled&#x27;: {&#x27;a&#x27;: 5, &#x27;b&#x27;: 28, &#x27;c&#x27;: 15, &#x27;d&#x27;: 2},
&#x27;total_count&#x27;: 50}}```