#### Similarity

H
```{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"outputs": [],
"source": [
"import pandas as pd\n",
"from math import sqrt, pow\n",
"import matplotlib.pyplot as plt\n",
" \n",
"def euclidean_distance(x,y):\n",
"    return sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))\n",
"\n",
"def manhattan_distance(x,y):\n",
"    return sum(abs(a-b) for a,b in zip(x,y))\n",
"\n",
]
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 720x504 with 1 Axes>"
]
},
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Draw Graph\n",
"x = data[data['Gender']=='Male'].plot(kind='scatter', x= 'Height',y = 'Weight',color='blue',figsize=(10,7))\n",
"data[data['Gender']=='Female'].plot(kind='scatter',x= 'Height',y = 'Weight',color='pink',figsize=(10,7) ,ax=x)\n",
"\n",
"plt.xlabel('Height')\n",
"plt.ylabel('weight')\n",
"plt.title('Analyze Height and Weight of men and women')\n",
"plt.legend(labels=['Males','Females'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This is no similarity using Euclidean Distance between heights and weights for this dataset with similarity value = 0.00010071759753209523\n",
"------------------------------------------------------\n",
"This is no similarity using Manhattan Distance between heights and weights for this dataset with similarity value = 1.0518244744130195e-06\n"
]
}
],
"source": [
"# Retrieving all heights and weights\n",
"heights = data['Height']\n",
"weights = data['Weight']\n",
"\n",
"# Calculating Euclidean Distance \n",
"euclidean_result = euclidean_distance(heights, weights)\n",
"\n",
"# Calculating Manhattan Distance \n",
"manhattan_result = manhattan_distance(heights, weights)\n",
"\n",
"# To get value between 0 and 1 \n",
"euclidean_result = 1 / (1 + euclidean_result)\n",
"manhattan_result = 1 / (1 + manhattan_result)\n",
"\n",
"# Checking if the similarity value is nearest to 0 or 1\n",
"# Eucliean Distance\n",
"if round(euclidean_result) == 0:\n",
"    print(f\"This is no similarity using Euclidean Distance between heights and weights for this dataset with similarity value = {euclidean_result}\")\n",
"else:\n",
"    print(f\"This is similarity using Euclidean Distance between heights and weights for this dataset with similarity value = {euclidean_result}\")\n",
"\n",
"print('------------------------------------------------------')\n",
"\n",
"# Manhattan Distance\n",
"if round(manhattan_result) == 0:\n",
"    print(f\"This is no similarity using Manhattan Distance between heights and weights for this dataset with similarity value = {manhattan_result}\")\n",
"else:\n",
"    print(f\"This is similarity using Manhattan Distance between heights and weights for this dataset with similarity value = {manhattan_result}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": []
}
],
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
```
``````import pandas as pd
from math import sqrt, pow
import matplotlib.pyplot as plt

def euclidean_distance(x,y):
return sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))

def manhattan_distance(x,y):
return sum(abs(a-b) for a,b in zip(x,y))

``````# Draw Graph
x = data[data['Gender']=='Male'].plot(kind='scatter', x= 'Height',y = 'Weight',color='blue',figsize=(10,7))
data[data['Gender']=='Female'].plot(kind='scatter',x= 'Height',y = 'Weight',color='pink',figsize=(10,7) ,ax=x)

plt.xlabel('Height')
plt.ylabel('weight')
plt.title('Analyze Height and Weight of men and women')
plt.legend(labels=['Males','Females'])
plt.show()``````
``````# Retrieving all heights and weights
heights = data['Height']
weights = data['Weight']

# Calculating Euclidean Distance
euclidean_result = euclidean_distance(heights, weights)

# Calculating Manhattan Distance
manhattan_result = manhattan_distance(heights, weights)

# To get value between 0 and 1
euclidean_result = 1 / (1 + euclidean_result)
manhattan_result = 1 / (1 + manhattan_result)

# Checking if the similarity value is nearest to 0 or 1
# Eucliean Distance
if round(euclidean_result) == 0:
print(f"This is no similarity using Euclidean Distance between heights and weights for this dataset with similarity value = {euclidean_result}")
else:
print(f"This is similarity using Euclidean Distance between heights and weights for this dataset with similarity value = {euclidean_result}")

print('------------------------------------------------------')

# Manhattan Distance
if round(manhattan_result) == 0:
print(f"This is no similarity using Manhattan Distance between heights and weights for this dataset with similarity value = {manhattan_result}")
else:
print(f"This is similarity using Manhattan Distance between heights and weights for this dataset with similarity value = {manhattan_result}")

``````
```This is no similarity using Euclidean Distance between heights and weights for this dataset with similarity value = 0.00010071759753209523
------------------------------------------------------
This is no similarity using Manhattan Distance between heights and weights for this dataset with similarity value = 1.0518244744130195e-06
```