The Algorithms logo
The Algorithms
关于捐赠

Price Prediction Model

H
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 158,
   "id": "e4ce5cb1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "id": "f9236408",
   "metadata": {},
   "outputs": [],
   "source": [
    "house = pd.read_csv(\"Housing.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "id": "1cbee77c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "price               0\n",
       "area                0\n",
       "bedrooms            0\n",
       "bathrooms           0\n",
       "stories             0\n",
       "mainroad            0\n",
       "guestroom           0\n",
       "basement            0\n",
       "hotwaterheating     0\n",
       "airconditioning     0\n",
       "parking             0\n",
       "prefarea            0\n",
       "furnishingstatus    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 160,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "house.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "id": "b5ddd0fa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>price</th>\n",
       "      <th>area</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>stories</th>\n",
       "      <th>mainroad</th>\n",
       "      <th>guestroom</th>\n",
       "      <th>basement</th>\n",
       "      <th>hotwaterheating</th>\n",
       "      <th>airconditioning</th>\n",
       "      <th>parking</th>\n",
       "      <th>prefarea</th>\n",
       "      <th>furnishingstatus</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>13300000</td>\n",
       "      <td>7420</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12250000</td>\n",
       "      <td>8960</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>3</td>\n",
       "      <td>no</td>\n",
       "      <td>furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12250000</td>\n",
       "      <td>9960</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>semi-furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12215000</td>\n",
       "      <td>7500</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>3</td>\n",
       "      <td>yes</td>\n",
       "      <td>furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11410000</td>\n",
       "      <td>7420</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>yes</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>2</td>\n",
       "      <td>no</td>\n",
       "      <td>furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>10850000</td>\n",
       "      <td>7500</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>semi-furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>10150000</td>\n",
       "      <td>8580</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>semi-furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>10150000</td>\n",
       "      <td>16200</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "      <td>no</td>\n",
       "      <td>unfurnished</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      price   area  bedrooms  bathrooms  stories mainroad guestroom basement  \\\n",
       "0  13300000   7420         4          2        3      yes        no       no   \n",
       "1  12250000   8960         4          4        4      yes        no       no   \n",
       "2  12250000   9960         3          2        2      yes        no      yes   \n",
       "3  12215000   7500         4          2        2      yes        no      yes   \n",
       "4  11410000   7420         4          1        2      yes       yes      yes   \n",
       "5  10850000   7500         3          3        1      yes        no      yes   \n",
       "6  10150000   8580         4          3        4      yes        no       no   \n",
       "7  10150000  16200         5          3        2      yes        no       no   \n",
       "\n",
       "  hotwaterheating airconditioning  parking prefarea furnishingstatus  \n",
       "0              no             yes        2      yes        furnished  \n",
       "1              no             yes        3       no        furnished  \n",
       "2              no              no        2      yes   semi-furnished  \n",
       "3              no             yes        3      yes        furnished  \n",
       "4              no             yes        2       no        furnished  \n",
       "5              no             yes        2      yes   semi-furnished  \n",
       "6              no             yes        2      yes   semi-furnished  \n",
       "7              no              no        0       no      unfurnished  "
      ]
     },
     "execution_count": 226,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "house.head(8)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55ef0588",
   "metadata": {},
   "source": [
    "# Simple Regression \n",
    "- predict 'price' using 'area'\n",
    "- y  =  b0  +  b1 * x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "1cf9d7a0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>price</th>\n",
       "      <th>area</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>13300000</td>\n",
       "      <td>7420</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12250000</td>\n",
       "      <td>8960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12250000</td>\n",
       "      <td>9960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12215000</td>\n",
       "      <td>7500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      price  area\n",
       "0  13300000  7420\n",
       "1  12250000  8960\n",
       "2  12250000  9960\n",
       "3  12215000  7500"
      ]
     },
     "execution_count": 162,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = house[['price','area']]\n",
    "data.head(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "id": "6707b8bc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(545,)"
      ]
     },
     "execution_count": 227,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = house['price'].to_numpy()\n",
    "x = house['area'].to_numpy()\n",
    "\n",
    "x.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "id": "fc666f54",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x12ffccee160>"
      ]
     },
     "execution_count": 232,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(x, y, color = \"b\",marker = \"*\", s = 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "022d38a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_mean = np.mean(y)\n",
    "x_mean = np.mean(x)\n",
    "\n",
    "num = 0   # numerator\n",
    "den = 0   # denominator\n",
    "\n",
    "n = np.size(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "id": "9e2db6a9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2387308.482396433 461.9748942727828\n"
     ]
    }
   ],
   "source": [
    "for i in range(n):\n",
    "    num += (x[i] - x_mean) * (y[i] - y_mean)\n",
    "    den += (x[i] - x_mean)**2\n",
    "    \n",
    "b1 = num / den\n",
    "b0 = y_mean - b1*x_mean\n",
    "\n",
    "print(b0,b1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "id": "5b9e67b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "prediction = b0 + b1*x\n",
    "for i in range(n):\n",
    "    prediction[i] = prediction[i]//1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "id": "980808bf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x12ffccc0c40>]"
      ]
     },
     "execution_count": 167,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(x, y, color = \"r\",marker = \"o\", s = 10)\n",
    "plt.plot(x, prediction, color = \"b\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4b8f1d05",
   "metadata": {},
   "source": [
    "# Multiple regression\n",
    "\n",
    "- y = b0 + b1 * x1 + b2 * x2 + ..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "id": "f0e76409",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>price</th>\n",
       "      <th>area</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>stories</th>\n",
       "      <th>mainroad</th>\n",
       "      <th>guestroom</th>\n",
       "      <th>basement</th>\n",
       "      <th>hotwaterheating</th>\n",
       "      <th>airconditioning</th>\n",
       "      <th>parking</th>\n",
       "      <th>prefarea</th>\n",
       "      <th>furnishingstatus</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>13300000</td>\n",
       "      <td>7420</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12250000</td>\n",
       "      <td>8960</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>3</td>\n",
       "      <td>no</td>\n",
       "      <td>furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12250000</td>\n",
       "      <td>9960</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>semi-furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12215000</td>\n",
       "      <td>7500</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>3</td>\n",
       "      <td>yes</td>\n",
       "      <td>furnished</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11410000</td>\n",
       "      <td>7420</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>yes</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>2</td>\n",
       "      <td>no</td>\n",
       "      <td>furnished</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \\\n",
       "0  13300000  7420         4          2        3      yes        no       no   \n",
       "1  12250000  8960         4          4        4      yes        no       no   \n",
       "2  12250000  9960         3          2        2      yes        no      yes   \n",
       "3  12215000  7500         4          2        2      yes        no      yes   \n",
       "4  11410000  7420         4          1        2      yes       yes      yes   \n",
       "\n",
       "  hotwaterheating airconditioning  parking prefarea furnishingstatus  \n",
       "0              no             yes        2      yes        furnished  \n",
       "1              no             yes        3       no        furnished  \n",
       "2              no              no        2      yes   semi-furnished  \n",
       "3              no             yes        3      yes        furnished  \n",
       "4              no             yes        2       no        furnished  "
      ]
     },
     "execution_count": 168,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "house.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "id": "70c757ce",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[7420,    4,    2,    3],\n",
       "       [8960,    4,    4,    4],\n",
       "       [9960,    3,    2,    2],\n",
       "       ...,\n",
       "       [3620,    2,    1,    1],\n",
       "       [2910,    3,    1,    1],\n",
       "       [3850,    3,    1,    2]], dtype=int64)"
      ]
     },
     "execution_count": 178,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# dependent -> price\n",
    "y = house['price'].to_numpy()\n",
    "\n",
    "# -> independent variables\n",
    "x = house[['area','bedrooms','bathrooms','stories']].to_numpy()\n",
    "\n",
    "n = np.size(y)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "9ea64dd9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[5150.54128440367, 2.9651376146788992, 1.2862385321100918, 1.8055045871559634]"
      ]
     },
     "execution_count": 177,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_mean = np.mean(y)\n",
    "x_mean = [0,0,0,0]\n",
    "\n",
    "for i in range(n):\n",
    "    for j in range(4):\n",
    "        x_mean[j] += x[i][j]\n",
    "\n",
    "for i in range(4):\n",
    "    x_mean[i] /= n\n",
    "\n",
    "x_mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "id": "b607bb0e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[461.9748942727828, 928788.1189320377, 1926558.8901060484, 907116.9031974602]"
      ]
     },
     "execution_count": 181,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b = []\n",
    "for j in range(4):\n",
    "    num = 0    # numerator\n",
    "    den = 0    # denominator\n",
    "    \n",
    "    for i in range(n):\n",
    "        num += (x[i][j] - x_mean[j]) * (y[i] - y_mean)\n",
    "        den += (x[i][j] - x_mean[j])**2\n",
    "    b.append(num / den)\n",
    "\n",
    "b"
   ]
  },
  
  {
   "cell_type": "code",
   "execution_count": 182,
   "id": "6ddbcb2a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-4482494.113759189"
      ]
     },
     "execution_count": 182,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# y = b0 + b1x1 + b2x2 + b3x3 + ...\n",
    "# finding b0\n",
    "\n",
    "b0 = y_mean\n",
    "for i in range(4):\n",
    "    b0 -= b[i]*x_mean[i]\n",
    "b0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "id": "42ce8d7f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[9234980,\n",
       " 14706656,\n",
       " 8572491,\n",
       " 8364821,\n",
       " 6401304,\n",
       " 8455475,\n",
       " 12604547,\n",
       " 15239350,\n",
       " 6715447,\n",
       " 8441811,\n",
       " 8142731,\n",
       " 9598418,\n",
       " 7925945,\n",
       " 6516922,\n",
       " 7574626,\n",
       " 5745300,\n",
       " 7949044,\n",
       " 9712242,\n",
       " 6096306,\n",
       " 6937100,\n",
       " 4040394,\n",
       " 6369535,\n",
       " 4856443,\n",
       " 6077827,\n",
       " 8036600,\n",
       " 7921325,\n",
       " 8557305,\n",
       " 5237573,\n",
       " 9501498,\n",
       " 7440871,\n",
       " 9238717,\n",
       " 7092721,\n",
       " 7154447,\n",
       " 8651151,\n",
       " 7062147,\n",
       " 9019279,\n",
       " 8334834,\n",
       " 10872017,\n",
       " 6630746,\n",
       " 9486093,\n",
       " 5070598,\n",
       " 8723615,\n",
       " 8779052,\n",
       " 9486093,\n",
       " 9486093,\n",
       " 7650188,\n",
       " 8557305,\n",
       " 6907931,\n",
       " 5957713,\n",
       " 6501198,\n",
       " 9222548,\n",
       " 6780887,\n",
       " 9486093,\n",
       " 8164626,\n",
       " 6743071,\n",
       " 4816512,\n",
       " 8258443,\n",
       " 10872017,\n",
       " 10262210,\n",
       " 8557305,\n",
       " 6743071,\n",
       " 4311094,\n",
       " 7782733,\n",
       " 8745287,\n",
       " 6300115,\n",
       " 8073558,\n",
       " 6306826,\n",
       " 6621311,\n",
       " 3909395,\n",
       " 10485286,\n",
       " 5819121,\n",
       " 9486093,\n",
       " 6178010,\n",
       " 7905701,\n",
       " 3911041,\n",
       " 6868022,\n",
       " 7844217,\n",
       " 7881175,\n",
       " 3770802,\n",
       " 7650188,\n",
       " 4816512,\n",
       " 5819121,\n",
       " 7914841,\n",
       " 8557305,\n",
       " 3781688,\n",
       " 8689631,\n",
       " 6033152,\n",
       " 2966966,\n",
       " 4560780,\n",
       " 11719101,\n",
       " 4354537,\n",
       " 3327088,\n",
       " 8002935,\n",
       " 6390324,\n",
       " 9486093,\n",
       " 6772435,\n",
       " 5295320,\n",
       " 4094185,\n",
       " 7927373,\n",
       " 6652417,\n",
       " 6113139,\n",
       " 5492641,\n",
       " 8326317,\n",
       " 7811879,\n",
       " 5604966,\n",
       " 5937783,\n",
       " 6510656,\n",
       " 5917658,\n",
       " 5377366,\n",
       " 7955973,\n",
       " 4186580,\n",
       " 6819433,\n",
       " 8744078,\n",
       " 5581744,\n",
       " 3350187,\n",
       " 4833345,\n",
       " 6251731,\n",
       " 4682758,\n",
       " 4103424,\n",
       " 4380609,\n",
       " 4158861,\n",
       " 5385203,\n",
       " 6882084,\n",
       " 8281666,\n",
       " 8799841,\n",
       " 8344354,\n",
       " 4445286,\n",
       " 7881175,\n",
       " 5492641,\n",
       " 8246012,\n",
       " 3355025,\n",
       " 7480074,\n",
       " 5354049,\n",
       " 5169259,\n",
       " 4371370,\n",
       " 8557305,\n",
       " 7394674,\n",
       " 5117014,\n",
       " 5261654,\n",
       " 4075706,\n",
       " 8464910,\n",
       " 7976762,\n",
       " 9750746,\n",
       " 8953394,\n",
       " 5144733,\n",
       " 6168771,\n",
       " 5059494,\n",
       " 6512083,\n",
       " 5889940,\n",
       " 7041927,\n",
       " 4417365,\n",
       " 5006140,\n",
       " 6396903,\n",
       " 7422297,\n",
       " 5657430,\n",
       " 5882151,\n",
       " 4325172,\n",
       " 6201393,\n",
       " 4824105,\n",
       " 4519325,\n",
       " 7656548,\n",
       " 5769826,\n",
       " 7949044,\n",
       " 4290524,\n",
       " 7071073,\n",
       " 6043842,\n",
       " 4740950,\n",
       " 4260401,\n",
       " 4941464,\n",
       " 7921325,\n",
       " 5604966,\n",
       " 5881566,\n",
       " 5925252,\n",
       " 6441359,\n",
       " 3800167,\n",
       " 9427363,\n",
       " 5073572,\n",
       " 3932494,\n",
       " 4410638,\n",
       " 5440301,\n",
       " 6071780,\n",
       " 5370882,\n",
       " 3619997,\n",
       " 4824105,\n",
       " 5357146,\n",
       " 3430587,\n",
       " 6387008,\n",
       " 3955592,\n",
       " 3758371,\n",
       " 1844149,\n",
       " 6484460,\n",
       " 6987794,\n",
       " 4186580,\n",
       " 2426237,\n",
       " 6829200,\n",
       " 8863878,\n",
       " 4688285,\n",
       " 5264751,\n",
       " 4792489,\n",
       " 3984957,\n",
       " 4132789,\n",
       " 3936450,\n",
       " 2112094,\n",
       " 5468115,\n",
       " 3341166,\n",
       " 4047987,\n",
       " 2888212,\n",
       " 3430587,\n",
       " 4323845,\n",
       " 4242017,\n",
       " 4190998,\n",
       " 7097022,\n",
       " 6479964,\n",
       " 6300457,\n",
       " 2218348,\n",
       " 4873595,\n",
       " 3927874,\n",
       " 5214734,\n",
       " 2433167,\n",
       " 5278487,\n",
       " 8529681,\n",
       " 6479964,\n",
       " 4443219,\n",
       " 4964806,\n",
       " 4939380,\n",
       " 3183876,\n",
       " 6247306,\n",
       " 2980607,\n",
       " 3721631,\n",
       " 9365921,\n",
       " 3610539,\n",
       " 3133277,\n",
       " 3774758,\n",
       " 3059361,\n",
       " 5763684,\n",
       " 4668680,\n",
       " 2441742,\n",
       " 4359157,\n",
       " 6983516,\n",
       " 3892562,\n",
       " 3818646,\n",
       " 2874571,\n",
       " 3726251,\n",
       " 3222698,\n",
       " 4502369,\n",
       " 4520848,\n",
       " 2763697,\n",
       " 8668273,\n",
       " 4029413,\n",
       " 7205264,\n",
       " 4573311,\n",
       " 3638476,\n",
       " 5692618,\n",
       " 2742026,\n",
       " 5056957,\n",
       " 2927480,\n",
       " 3892562,\n",
       " 4948838,\n",
       " 3911041,\n",
       " 3146918,\n",
       " 3505427,\n",
       " 3666194,\n",
       " 1941163,\n",
       " 3877779,\n",
       " 3379551,\n",
       " 3375150,\n",
       " 3391983,\n",
       " 4317579,\n",
       " 5260226,\n",
       " 3846365,\n",
       " 6957225,\n",
       " 4782301,\n",
       " 3020093,\n",
       " 4590363,\n",
       " 5953189,\n",
       " 2071440,\n",
       " 2241447,\n",
       " 4994817,\n",
       " 3615377,\n",
       " 3146918,\n",
       " 4054035,\n",
       " 2287644,\n",
       " 3049458,\n",
       " 4987661,\n",
       " 3798302,\n",
       " 5116795,\n",
       " 2425070,\n",
       " 4585524,\n",
       " 4373016,\n",
       " 2911311,\n",
       " 8032323,\n",
       " 3408874,\n",
       " 6169054,\n",
       " 2246067,\n",
       " 6747909,\n",
       " 3118754,\n",
       " 6096306,\n",
       " 5652810,\n",
       " 3817000,\n",
       " 4371370,\n",
       " 4836175,\n",
       " 3670814,\n",
       " 3942715,\n",
       " 3216433,\n",
       " 4948838,\n",
       " 3638476,\n",
       " 4280621,\n",
       " 3929520,\n",
       " 3913813,\n",
       " 5113318,\n",
       " 3902465,\n",
       " 3008325,\n",
       " 2800655,\n",
       " 5671289,\n",
       " 2982253,\n",
       " 2795817,\n",
       " 7625661,\n",
       " 6277400,\n",
       " 3142517,\n",
       " 5266492,\n",
       " 4040394,\n",
       " 5648190,\n",
       " 4662538,\n",
       " 3632210,\n",
       " 4123549,\n",
       " 4571884,\n",
       " 3960431,\n",
       " 5038260,\n",
       " 6050108,\n",
       " 3874083,\n",
       " 2986873,\n",
       " 6418042,\n",
       " 5514312,\n",
       " 3430587,\n",
       " 1728655,\n",
       " 1971654,\n",
       " 4870303,\n",
       " 4983829,\n",
       " 2862139,\n",
       " 6369090,\n",
       " 8277265,\n",
       " 3604396,\n",
       " 5348707,\n",
       " 2093615,\n",
       " 1987361,\n",
       " 2975542,\n",
       " 2121132,\n",
       " 3592278,\n",
       " 3590537,\n",
       " 4271381,\n",
       " 2695828,\n",
       " 1871867,\n",
       " 2902071,\n",
       " 4271180,\n",
       " 4089347,\n",
       " 4948838,\n",
       " 6085857,\n",
       " 6174937,\n",
       " 1816430,\n",
       " 2800655,\n",
       " 2075136,\n",
       " 3918634,\n",
       " 2079756,\n",
       " 1864475,\n",
       " 3486024,\n",
       " 2726521,\n",
       " 1885726,\n",
       " 1885726,\n",
       " 2814296,\n",
       " 1871867,\n",
       " 2186010,\n",
       " 3693913,\n",
       " 3513743,\n",
       " 2501799,\n",
       " 3761468,\n",
       " 4798033,\n",
       " 5879178,\n",
       " 5287850,\n",
       " 4002772,\n",
       " 2763697,\n",
       " 2287644,\n",
       " 2056657,\n",
       " 3499883,\n",
       " 6978896,\n",
       " 2287644,\n",
       " 1890346,\n",
       " 2916149,\n",
       " 4003436,\n",
       " 3730871,\n",
       " 5098535,\n",
       " 4957538,\n",
       " 3447218,\n",
       " 3887942,\n",
       " 4567247,\n",
       " 2745218,\n",
       " 6494136,\n",
       " 1890346,\n",
       " 2934409,\n",
       " 3486024,\n",
       " 3604273,\n",
       " 1831213,\n",
       " 6433424,\n",
       " 2925170,\n",
       " 7117349,\n",
       " 4308339,\n",
       " 2551189,\n",
       " 2666464,\n",
       " 3942715,\n",
       " 2056657,\n",
       " 1680147,\n",
       " 2916149,\n",
       " 3942715,\n",
       " 3250417,\n",
       " 4872072,\n",
       " 2075136,\n",
       " 4255212,\n",
       " 2731359,\n",
       " 1890346,\n",
       " 4590363,\n",
       " 6171963,\n",
       " 3019211,\n",
       " 2403138,\n",
       " 1927304,\n",
       " 2869951,\n",
       " 3476785,\n",
       " 1680147,\n",
       " 2384878,\n",
       " 3035599,\n",
       " 2075136,\n",
       " 5179381,\n",
       " 1363695,\n",
       " 4442531,\n",
       " 3937113,\n",
       " 4581123,\n",
       " 4725259,\n",
       " 2075136,\n",
       " 3035599,\n",
       " 3853958,\n",
       " 2287644,\n",
       " 2024319,\n",
       " 4655039,\n",
       " 4063493,\n",
       " 1448698,\n",
       " 3133277,\n",
       " 3486024,\n",
       " 873783,\n",
       " 3976748,\n",
       " 1825670,\n",
       " 2100545,\n",
       " 2806921,\n",
       " 3638476,\n",
       " 3327088,\n",
       " 6202436,\n",
       " 1626558,\n",
       " 4123549,\n",
       " 3676098,\n",
       " 2245361,\n",
       " 2523470,\n",
       " 3823266,\n",
       " 1825670,\n",
       " 3950754,\n",
       " 2500153,\n",
       " 3042528,\n",
       " 1636260,\n",
       " 3194761,\n",
       " 1964262,\n",
       " 3472165,\n",
       " 3541461,\n",
       " 1518456,\n",
       " 2333842,\n",
       " 3482530,\n",
       " 3777068,\n",
       " 4650419,\n",
       " 3927655,\n",
       " 4983965,\n",
       " 2501799,\n",
       " 4747216,\n",
       " 2500153,\n",
       " 3707772,\n",
       " 4664279,\n",
       " 3652335,\n",
       " 1456090,\n",
       " 3499883,\n",
       " 5100626,\n",
       " 1613161,\n",
       " 1885726,\n",
       " 2980607,\n",
       " 5468115,\n",
       " 6282837,\n",
       " 3569180,\n",
       " 4054253,\n",
       " 1428371,\n",
       " 3268896,\n",
       " 2966966,\n",
       " 3350187,\n",
       " 3892562,\n",
       " 2056657,\n",
       " 2026167,\n",
       " 2039824,\n",
       " 7574749,\n",
       " 2431075,\n",
       " 2260145,\n",
       " 1816430,\n",
       " 2985445,\n",
       " 1680147,\n",
       " 3892562,\n",
       " 1553104,\n",
       " 1871867,\n",
       " 2241447,\n",
       " 4705543,\n",
       " 2468033,\n",
       " 2606626,\n",
       " 2501799,\n",
       " 4077352,\n",
       " 3430587,\n",
       " 3527602,\n",
       " 1705556,\n",
       " 1594682,\n",
       " 1825670,\n",
       " 3351833,\n",
       " 3765964,\n",
       " 1888036,\n",
       " 3188050,\n",
       " 6187533,\n",
       " 1716643,\n",
       " 1890346,\n",
       " 1677838,\n",
       " 1056943,\n",
       " 1114010,\n",
       " 3878703,\n",
       " 2038397,\n",
       " 3586012,\n",
       " 1594682,\n",
       " 3153402,\n",
       " 4359375,\n",
       " 1760993,\n",
       " 5482193,\n",
       " 2830020,\n",
       " 1894504,\n",
       " 1590062,\n",
       " 1594682,\n",
       " 2246285,\n",
       " 1881107,\n",
       " 2481892,\n",
       " 3823266]"
      ]
     },
     "execution_count": 210,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions = []\n",
    "for i in range(n):\n",
    "    predictions.append(int(b0 + b[0]*x[i][0] + b[1]*x[i][1] + b[2]*x[i][2] + b[3]*x[i][3]))\n",
    "\n",
    "predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 211,
   "id": "d1c40144",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Predictions</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9234980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>14706656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8572491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8364821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6401304</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540</th>\n",
       "      <td>1594682</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>541</th>\n",
       "      <td>2246285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>542</th>\n",
       "      <td>1881107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>543</th>\n",
       "      <td>2481892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>544</th>\n",
       "      <td>3823266</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>545 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Predictions\n",
       "0        9234980\n",
       "1       14706656\n",
       "2        8572491\n",
       "3        8364821\n",
       "4        6401304\n",
       "..           ...\n",
       "540      1594682\n",
       "541      2246285\n",
       "542      1881107\n",
       "543      2481892\n",
       "544      3823266\n",
       "\n",
       "[545 rows x 1 columns]"
      ]
     },
     "execution_count": 211,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions = pd.DataFrame(predictions)\n",
    "predictions.rename(columns={0:'Predictions'},inplace=True)\n",
    "predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "id": "eb9a0533",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Predictions</th>\n",
       "      <th>Original</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9234980</td>\n",
       "      <td>13300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>14706656</td>\n",
       "      <td>12250000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8572491</td>\n",
       "      <td>12250000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8364821</td>\n",
       "      <td>12215000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6401304</td>\n",
       "      <td>11410000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540</th>\n",
       "      <td>1594682</td>\n",
       "      <td>1820000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>541</th>\n",
       "      <td>2246285</td>\n",
       "      <td>1767150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>542</th>\n",
       "      <td>1881107</td>\n",
       "      <td>1750000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>543</th>\n",
       "      <td>2481892</td>\n",
       "      <td>1750000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>544</th>\n",
       "      <td>3823266</td>\n",
       "      <td>1750000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>545 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Predictions  Original\n",
       "0        9234980  13300000\n",
       "1       14706656  12250000\n",
       "2        8572491  12250000\n",
       "3        8364821  12215000\n",
       "4        6401304  11410000\n",
       "..           ...       ...\n",
       "540      1594682   1820000\n",
       "541      2246285   1767150\n",
       "542      1881107   1750000\n",
       "543      2481892   1750000\n",
       "544      3823266   1750000\n",
       "\n",
       "[545 rows x 2 columns]"
      ]
     },
     "execution_count": 212,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions['Original'] = y\n",
    "predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "id": "c543c7ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions['Difference'] = predictions['Original'] - predictions['Predictions']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "id": "b4c98048",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Predictions</th>\n",
       "      <th>Original</th>\n",
       "      <th>Difference</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9234980</td>\n",
       "      <td>13300000</td>\n",
       "      <td>4065020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>14706656</td>\n",
       "      <td>12250000</td>\n",
       "      <td>-2456656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8572491</td>\n",
       "      <td>12250000</td>\n",
       "      <td>3677509</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8364821</td>\n",
       "      <td>12215000</td>\n",
       "      <td>3850179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6401304</td>\n",
       "      <td>11410000</td>\n",
       "      <td>5008696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540</th>\n",
       "      <td>1594682</td>\n",
       "      <td>1820000</td>\n",
       "      <td>225318</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>541</th>\n",
       "      <td>2246285</td>\n",
       "      <td>1767150</td>\n",
       "      <td>-479135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>542</th>\n",
       "      <td>1881107</td>\n",
       "      <td>1750000</td>\n",
       "      <td>-131107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>543</th>\n",
       "      <td>2481892</td>\n",
       "      <td>1750000</td>\n",
       "      <td>-731892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>544</th>\n",
       "      <td>3823266</td>\n",
       "      <td>1750000</td>\n",
       "      <td>-2073266</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>545 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Predictions  Original  Difference\n",
       "0        9234980  13300000     4065020\n",
       "1       14706656  12250000    -2456656\n",
       "2        8572491  12250000     3677509\n",
       "3        8364821  12215000     3850179\n",
       "4        6401304  11410000     5008696\n",
       "..           ...       ...         ...\n",
       "540      1594682   1820000      225318\n",
       "541      2246285   1767150     -479135\n",
       "542      1881107   1750000     -131107\n",
       "543      2481892   1750000     -731892\n",
       "544      3823266   1750000    -2073266\n",
       "\n",
       "[545 rows x 3 columns]"
      ]
     },
     "execution_count": 217,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "id": "e6ebd934",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<bound method DataFrame.info of         price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \\\n",
       "0    13300000  7420         4          2        3      yes        no       no   \n",
       "1    12250000  8960         4          4        4      yes        no       no   \n",
       "2    12250000  9960         3          2        2      yes        no      yes   \n",
       "3    12215000  7500         4          2        2      yes        no      yes   \n",
       "4    11410000  7420         4          1        2      yes       yes      yes   \n",
       "..        ...   ...       ...        ...      ...      ...       ...      ...   \n",
       "540   1820000  3000         2          1        1      yes        no      yes   \n",
       "541   1767150  2400         3          1        1       no        no       no   \n",
       "542   1750000  3620         2          1        1      yes        no       no   \n",
       "543   1750000  2910         3          1        1       no        no       no   \n",
       "544   1750000  3850         3          1        2      yes        no       no   \n",
       "\n",
       "    hotwaterheating airconditioning  parking prefarea furnishingstatus  \n",
       "0                no             yes        2      yes        furnished  \n",
       "1                no             yes        3       no        furnished  \n",
       "2                no              no        2      yes   semi-furnished  \n",
       "3                no             yes        3      yes        furnished  \n",
       "4                no             yes        2       no        furnished  \n",
       "..              ...             ...      ...      ...              ...  \n",
       "540              no              no        2       no      unfurnished  \n",
       "541              no              no        0       no   semi-furnished  \n",
       "542              no              no        0       no      unfurnished  \n",
       "543              no              no        0       no        furnished  \n",
       "544              no              no        0       no      unfurnished  \n",
       "\n",
       "[545 rows x 13 columns]>"
      ]
     },
     "execution_count": 219,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "house.info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "id": "55e943e9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<seaborn.axisgrid.PairGrid at 0x12ffcbca6d0>"
      ]
     },
     "execution_count": 220,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 1080x1080 with 42 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.pairplot(house)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8d8b6836",
   "metadata": {},
   "source": [
    "Only viable pair is  AREA <-> PRICE  which best fits linear regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "id": "1a40cc94",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x12ffe5ecb80>"
      ]
     },
     "execution_count": 225,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(house['area'], house['price'], color = \"black\",marker = \"*\", s = 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d88325c9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
关于这个算法
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
house = pd.read_csv("Housing.csv")
house.isnull().sum()
price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64
house.head(8)
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea furnishingstatus
0 13300000 7420 4 2 3 yes no no no yes 2 yes furnished
1 12250000 8960 4 4 4 yes no no no yes 3 no furnished
2 12250000 9960 3 2 2 yes no yes no no 2 yes semi-furnished
3 12215000 7500 4 2 2 yes no yes no yes 3 yes furnished
4 11410000 7420 4 1 2 yes yes yes no yes 2 no furnished
5 10850000 7500 3 3 1 yes no yes no yes 2 yes semi-furnished
6 10150000 8580 4 3 4 yes no no no yes 2 yes semi-furnished
7 10150000 16200 5 3 2 yes no no no no 0 no unfurnished

Simple Regression

  • predict 'price' using 'area'
  • y = b0 + b1 * x
data = house[['price','area']]
data.head(4)
price area
0 13300000 7420
1 12250000 8960
2 12250000 9960
3 12215000 7500
y = house['price'].to_numpy()
x = house['area'].to_numpy()

x.shape
(545,)
plt.scatter(x, y, color = "b",marker = "*", s = 10)
&lt;matplotlib.collections.PathCollection at 0x12ffccee160&gt;
y_mean = np.mean(y)
x_mean = np.mean(x)

num = 0   # numerator
den = 0   # denominator

n = np.size(y)
for i in range(n):
    num += (x[i] - x_mean) * (y[i] - y_mean)
    den += (x[i] - x_mean)**2
    
b1 = num / den
b0 = y_mean - b1*x_mean

print(b0,b1)
2387308.482396433 461.9748942727828
prediction = b0 + b1*x
for i in range(n):
    prediction[i] = prediction[i]//1
plt.scatter(x, y, color = "r",marker = "o", s = 10)
plt.plot(x, prediction, color = "b")
[&lt;matplotlib.lines.Line2D at 0x12ffccc0c40&gt;]

Multiple regression

  • y = b0 + b1 * x1 + b2 * x2 + ...
house.head(5)
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea furnishingstatus
0 13300000 7420 4 2 3 yes no no no yes 2 yes furnished
1 12250000 8960 4 4 4 yes no no no yes 3 no furnished
2 12250000 9960 3 2 2 yes no yes no no 2 yes semi-furnished
3 12215000 7500 4 2 2 yes no yes no yes 3 yes furnished
4 11410000 7420 4 1 2 yes yes yes no yes 2 no furnished
# dependent -&gt; price
y = house['price'].to_numpy()

# -&gt; independent variables
x = house[['area','bedrooms','bathrooms','stories']].to_numpy()

n = np.size(y)
x
array([[7420,    4,    2,    3],
       [8960,    4,    4,    4],
       [9960,    3,    2,    2],
       ...,
       [3620,    2,    1,    1],
       [2910,    3,    1,    1],
       [3850,    3,    1,    2]], dtype=int64)
y_mean = np.mean(y)
x_mean = [0,0,0,0]

for i in range(n):
    for j in range(4):
        x_mean[j] += x[i][j]

for i in range(4):
    x_mean[i] /= n

x_mean
[5150.54128440367, 2.9651376146788992, 1.2862385321100918, 1.8055045871559634]
b = []
for j in range(4):
    num = 0    # numerator
    den = 0    # denominator
    
    for i in range(n):
        num += (x[i][j] - x_mean[j]) * (y[i] - y_mean)
        den += (x[i][j] - x_mean[j])**2
    b.append(num / den)

b
[461.9748942727828, 928788.1189320377, 1926558.8901060484, 907116.9031974602]
# y = b0 + b1x1 + b2x2 + b3x3 + ...
# finding b0

b0 = y_mean
for i in range(4):
    b0 -= b[i]*x_mean[i]
b0
-4482494.113759189
predictions = []
for i in range(n):
    predictions.append(int(b0 + b[0]*x[i][0] + b[1]*x[i][1] + b[2]*x[i][2] + b[3]*x[i][3]))

predictions
[9234980,
 14706656,
 8572491,
 8364821,
 6401304,
 8455475,
 12604547,
 15239350,
 6715447,
 8441811,
 8142731,
 9598418,
 7925945,
 6516922,
 7574626,
 5745300,
 7949044,
 9712242,
 6096306,
 6937100,
 4040394,
 6369535,
 4856443,
 6077827,
 8036600,
 7921325,
 8557305,
 5237573,
 9501498,
 7440871,
 9238717,
 7092721,
 7154447,
 8651151,
 7062147,
 9019279,
 8334834,
 10872017,
 6630746,
 9486093,
 5070598,
 8723615,
 8779052,
 9486093,
 9486093,
 7650188,
 8557305,
 6907931,
 5957713,
 6501198,
 9222548,
 6780887,
 9486093,
 8164626,
 6743071,
 4816512,
 8258443,
 10872017,
 10262210,
 8557305,
 6743071,
 4311094,
 7782733,
 8745287,
 6300115,
 8073558,
 6306826,
 6621311,
 3909395,
 10485286,
 5819121,
 9486093,
 6178010,
 7905701,
 3911041,
 6868022,
 7844217,
 7881175,
 3770802,
 7650188,
 4816512,
 5819121,
 7914841,
 8557305,
 3781688,
 8689631,
 6033152,
 2966966,
 4560780,
 11719101,
 4354537,
 3327088,
 8002935,
 6390324,
 9486093,
 6772435,
 5295320,
 4094185,
 7927373,
 6652417,
 6113139,
 5492641,
 8326317,
 7811879,
 5604966,
 5937783,
 6510656,
 5917658,
 5377366,
 7955973,
 4186580,
 6819433,
 8744078,
 5581744,
 3350187,
 4833345,
 6251731,
 4682758,
 4103424,
 4380609,
 4158861,
 5385203,
 6882084,
 8281666,
 8799841,
 8344354,
 4445286,
 7881175,
 5492641,
 8246012,
 3355025,
 7480074,
 5354049,
 5169259,
 4371370,
 8557305,
 7394674,
 5117014,
 5261654,
 4075706,
 8464910,
 7976762,
 9750746,
 8953394,
 5144733,
 6168771,
 5059494,
 6512083,
 5889940,
 7041927,
 4417365,
 5006140,
 6396903,
 7422297,
 5657430,
 5882151,
 4325172,
 6201393,
 4824105,
 4519325,
 7656548,
 5769826,
 7949044,
 4290524,
 7071073,
 6043842,
 4740950,
 4260401,
 4941464,
 7921325,
 5604966,
 5881566,
 5925252,
 6441359,
 3800167,
 9427363,
 5073572,
 3932494,
 4410638,
 5440301,
 6071780,
 5370882,
 3619997,
 4824105,
 5357146,
 3430587,
 6387008,
 3955592,
 3758371,
 1844149,
 6484460,
 6987794,
 4186580,
 2426237,
 6829200,
 8863878,
 4688285,
 5264751,
 4792489,
 3984957,
 4132789,
 3936450,
 2112094,
 5468115,
 3341166,
 4047987,
 2888212,
 3430587,
 4323845,
 4242017,
 4190998,
 7097022,
 6479964,
 6300457,
 2218348,
 4873595,
 3927874,
 5214734,
 2433167,
 5278487,
 8529681,
 6479964,
 4443219,
 4964806,
 4939380,
 3183876,
 6247306,
 2980607,
 3721631,
 9365921,
 3610539,
 3133277,
 3774758,
 3059361,
 5763684,
 4668680,
 2441742,
 4359157,
 6983516,
 3892562,
 3818646,
 2874571,
 3726251,
 3222698,
 4502369,
 4520848,
 2763697,
 8668273,
 4029413,
 7205264,
 4573311,
 3638476,
 5692618,
 2742026,
 5056957,
 2927480,
 3892562,
 4948838,
 3911041,
 3146918,
 3505427,
 3666194,
 1941163,
 3877779,
 3379551,
 3375150,
 3391983,
 4317579,
 5260226,
 3846365,
 6957225,
 4782301,
 3020093,
 4590363,
 5953189,
 2071440,
 2241447,
 4994817,
 3615377,
 3146918,
 4054035,
 2287644,
 3049458,
 4987661,
 3798302,
 5116795,
 2425070,
 4585524,
 4373016,
 2911311,
 8032323,
 3408874,
 6169054,
 2246067,
 6747909,
 3118754,
 6096306,
 5652810,
 3817000,
 4371370,
 4836175,
 3670814,
 3942715,
 3216433,
 4948838,
 3638476,
 4280621,
 3929520,
 3913813,
 5113318,
 3902465,
 3008325,
 2800655,
 5671289,
 2982253,
 2795817,
 7625661,
 6277400,
 3142517,
 5266492,
 4040394,
 5648190,
 4662538,
 3632210,
 4123549,
 4571884,
 3960431,
 5038260,
 6050108,
 3874083,
 2986873,
 6418042,
 5514312,
 3430587,
 1728655,
 1971654,
 4870303,
 4983829,
 2862139,
 6369090,
 8277265,
 3604396,
 5348707,
 2093615,
 1987361,
 2975542,
 2121132,
 3592278,
 3590537,
 4271381,
 2695828,
 1871867,
 2902071,
 4271180,
 4089347,
 4948838,
 6085857,
 6174937,
 1816430,
 2800655,
 2075136,
 3918634,
 2079756,
 1864475,
 3486024,
 2726521,
 1885726,
 1885726,
 2814296,
 1871867,
 2186010,
 3693913,
 3513743,
 2501799,
 3761468,
 4798033,
 5879178,
 5287850,
 4002772,
 2763697,
 2287644,
 2056657,
 3499883,
 6978896,
 2287644,
 1890346,
 2916149,
 4003436,
 3730871,
 5098535,
 4957538,
 3447218,
 3887942,
 4567247,
 2745218,
 6494136,
 1890346,
 2934409,
 3486024,
 3604273,
 1831213,
 6433424,
 2925170,
 7117349,
 4308339,
 2551189,
 2666464,
 3942715,
 2056657,
 1680147,
 2916149,
 3942715,
 3250417,
 4872072,
 2075136,
 4255212,
 2731359,
 1890346,
 4590363,
 6171963,
 3019211,
 2403138,
 1927304,
 2869951,
 3476785,
 1680147,
 2384878,
 3035599,
 2075136,
 5179381,
 1363695,
 4442531,
 3937113,
 4581123,
 4725259,
 2075136,
 3035599,
 3853958,
 2287644,
 2024319,
 4655039,
 4063493,
 1448698,
 3133277,
 3486024,
 873783,
 3976748,
 1825670,
 2100545,
 2806921,
 3638476,
 3327088,
 6202436,
 1626558,
 4123549,
 3676098,
 2245361,
 2523470,
 3823266,
 1825670,
 3950754,
 2500153,
 3042528,
 1636260,
 3194761,
 1964262,
 3472165,
 3541461,
 1518456,
 2333842,
 3482530,
 3777068,
 4650419,
 3927655,
 4983965,
 2501799,
 4747216,
 2500153,
 3707772,
 4664279,
 3652335,
 1456090,
 3499883,
 5100626,
 1613161,
 1885726,
 2980607,
 5468115,
 6282837,
 3569180,
 4054253,
 1428371,
 3268896,
 2966966,
 3350187,
 3892562,
 2056657,
 2026167,
 2039824,
 7574749,
 2431075,
 2260145,
 1816430,
 2985445,
 1680147,
 3892562,
 1553104,
 1871867,
 2241447,
 4705543,
 2468033,
 2606626,
 2501799,
 4077352,
 3430587,
 3527602,
 1705556,
 1594682,
 1825670,
 3351833,
 3765964,
 1888036,
 3188050,
 6187533,
 1716643,
 1890346,
 1677838,
 1056943,
 1114010,
 3878703,
 2038397,
 3586012,
 1594682,
 3153402,
 4359375,
 1760993,
 5482193,
 2830020,
 1894504,
 1590062,
 1594682,
 2246285,
 1881107,
 2481892,
 3823266]
predictions = pd.DataFrame(predictions)
predictions.rename(columns={0:'Predictions'},inplace=True)
predictions
Predictions
0 9234980
1 14706656
2 8572491
3 8364821
4 6401304
... ...
540 1594682
541 2246285
542 1881107
543 2481892
544 3823266

545 rows × 1 columns

predictions['Original'] = y
predictions
Predictions Original
0 9234980 13300000
1 14706656 12250000
2 8572491 12250000
3 8364821 12215000
4 6401304 11410000
... ... ...
540 1594682 1820000
541 2246285 1767150
542 1881107 1750000
543 2481892 1750000
544 3823266 1750000

545 rows × 2 columns

predictions['Difference'] = predictions['Original'] - predictions['Predictions']
predictions
Predictions Original Difference
0 9234980 13300000 4065020
1 14706656 12250000 -2456656
2 8572491 12250000 3677509
3 8364821 12215000 3850179
4 6401304 11410000 5008696
... ... ... ...
540 1594682 1820000 225318
541 2246285 1767150 -479135
542 1881107 1750000 -131107
543 2481892 1750000 -731892
544 3823266 1750000 -2073266

545 rows × 3 columns

house.info
&lt;bound method DataFrame.info of         price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0    13300000  7420         4          2        3      yes        no       no   
1    12250000  8960         4          4        4      yes        no       no   
2    12250000  9960         3          2        2      yes        no      yes   
3    12215000  7500         4          2        2      yes        no      yes   
4    11410000  7420         4          1        2      yes       yes      yes   
..        ...   ...       ...        ...      ...      ...       ...      ...   
540   1820000  3000         2          1        1      yes        no      yes   
541   1767150  2400         3          1        1       no        no       no   
542   1750000  3620         2          1        1      yes        no       no   
543   1750000  2910         3          1        1       no        no       no   
544   1750000  3850         3          1        2      yes        no       no   

    hotwaterheating airconditioning  parking prefarea furnishingstatus  
0                no             yes        2      yes        furnished  
1                no             yes        3       no        furnished  
2                no              no        2      yes   semi-furnished  
3                no             yes        3      yes        furnished  
4                no             yes        2       no        furnished  
..              ...             ...      ...      ...              ...  
540              no              no        2       no      unfurnished  
541              no              no        0       no   semi-furnished  
542              no              no        0       no      unfurnished  
543              no              no        0       no        furnished  
544              no              no        0       no      unfurnished  

[545 rows x 13 columns]&gt;
sns.pairplot(house)
&lt;seaborn.axisgrid.PairGrid at 0x12ffcbca6d0&gt;

Only viable pair is AREA <-> PRICE which best fits linear regression

plt.scatter(house['area'], house['price'], color = "black",marker = "*", s = 10)
&lt;matplotlib.collections.PathCollection at 0x12ffe5ecb80&gt;