from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
import pandas as pd
inmuebles = pd.read_csv('/content/drive/MyDrive/inmersion_datos/inmuebles_bogota.csv')
inmuebles.head()
Tipo | Descripcion | Habitaciones | Baños | Área | Barrio | UPZ | Valor | |
---|---|---|---|---|---|---|---|---|
0 | Apartamento | Apartamento en venta en Zona Noroccidental | 3 | 2 | 70 | Zona Noroccidental | CHAPINERO: Pardo Rubio + Chapinero | $ 360.000.000 |
1 | Casa | Casa en venta en Castilla | 4 | 3 | 170 | Castilla | KENNEDY: Castilla + Bavaria | $ 670.000.000 |
2 | Apartamento | Apartamento en venta en Chico Reservado | 3 | 3 | 144 | Chico Reservado | CHAPINERO: Chicó Lago + El Refugio | $ 1.120.000.000 |
3 | Apartamento | Apartamento en venta en Usaquén | 3 | 2 | 154 | Usaquén | Usaquén | $ 890.000.000 |
4 | Apartamento | Apartamento en venta en Bella Suiza | 2 | 3 | 128 | Bella Suiza | USAQUÉN: Country Club + Santa Bárbara | $ 970.000.000 |
columnas = {'Baños':'Banos','Área':'Area'}
inmuebles = inmuebles.rename(columns=columnas)
inmuebles.sample()
Tipo | Descripcion | Habitaciones | Banos | Area | Barrio | UPZ | Valor | Moneda | Precio | Precio_Millon | |
---|---|---|---|---|---|---|---|---|---|---|---|
9095 | Apartamento | Apartamento en venta en Tintala | 3 | 2 | 89 | Tintala | Tintala | $ 388.900.000 | $ | 388900000 | 388.9 |
Aula 2
inmuebles.sample(5)
Tipo | Descripcion | Habitaciones | Baños | Área | Barrio | UPZ | Valor | |
---|---|---|---|---|---|---|---|---|
7434 | Apartamento | Apartamento en venta en Tunjuelito | 1 | 1 | 27 | Tunjuelito | Tunjuelito | $ 181.000.000 |
3351 | Apartamento | Apartamento en venta en Villemar | 3 | 2 | 59 | Villemar | Fontibón | $ 205.000.000 |
503 | Apartamento | Apartamento en venta en San Patricio | 2 | 2 | 72 | San Patricio | USAQUÉN: Country Club + Santa Bárbara | $ 450.000.000 |
4281 | Apartamento | Apartamento en venta en Usaquén | 2 | 2 | 58 | Usaquén | Usaquén | $ 330.000.000 |
8446 | Casa | Casa en venta en Suba | 5 | 3 | 220 | Suba | Suba | $ 900.000.000 |
inmuebles.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 9520 entries, 0 to 9519 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Tipo 9520 non-null object 1 Descripcion 9520 non-null object 2 Habitaciones 9520 non-null int64 3 Baños 9520 non-null int64 4 Área 9520 non-null int64 5 Barrio 9520 non-null object 6 UPZ 9478 non-null object 7 Valor 9520 non-null object dtypes: int64(3), object(5) memory usage: 595.1+ KB
inmuebles.Valor[0]
'$ 360.000.000'
type(inmuebles.Valor[0])
str
inmuebles.Valor[0]+inmuebles.Valor[1]
'$ 360.000.000$ 670.000.000'
inmuebles.Valor[0]+inmuebles.Valor[1]+inmuebles.Valor[:5]
0 $ 360.000.000$ 670.000.000$ 360.000.000 1 $ 360.000.000$ 670.000.000$ 670.000.000 2 $ 360.000.000$ 670.000.000$ 1.120.000.000 3 $ 360.000.000$ 670.000.000$ 890.000.000 4 $ 360.000.000$ 670.000.000$ 970.000.000 Name: Valor, dtype: object
inmuebles.Valor[0].split()
['$', '360.000.000']
inmuebles.Valor.str.split()
0 [$, 360.000.000] 1 [$, 670.000.000] 2 [$, 1.120.000.000] 3 [$, 890.000.000] 4 [$, 970.000.000] ... 9515 [$, 290.000.000] 9516 [$, 670.000.000] 9517 [$, 900.000.000] 9518 [$, 335.000.000] 9519 [$, 144.990.000] Name: Valor, Length: 9520, dtype: object
type(inmuebles.Valor.str.split())
pandas.core.series.Series
inmuebles.Valor.str.split(expand=True)
0 | 1 | |
---|---|---|
0 | $ | 360.000.000 |
1 | $ | 670.000.000 |
2 | $ | 1.120.000.000 |
3 | $ | 890.000.000 |
4 | $ | 970.000.000 |
... | ... | ... |
9515 | $ | 290.000.000 |
9516 | $ | 670.000.000 |
9517 | $ | 900.000.000 |
9518 | $ | 335.000.000 |
9519 | $ | 144.990.000 |
9520 rows × 2 columns
type(inmuebles.Valor.str.split(expand=True))
pandas.core.frame.DataFrame
valor = inmuebles.Valor.str.split(expand=True)
inmuebles['Moneda'] = valor[0]
inmuebles['Precio'] = valor[1]
inmuebles.sample(3)
Tipo | Descripcion | Habitaciones | Baños | Área | Barrio | UPZ | Valor | Moneda | Precio | |
---|---|---|---|---|---|---|---|---|---|---|
1451 | Apartamento | Apartamento en venta en Cedritos | 2 | 3 | 97 | Cedritos | Usaquén | $ 432.990.000 | $ | 432.990.000 |
8184 | Apartamento | Apartamento en venta en Suba | 3 | 2 | 75 | Suba | Suba | $ 350.000.000 | $ | 350.000.000 |
3091 | Bodega | Bodega en venta en Puente Aranda | 1 | 4 | 1200 | Puente Aranda | PUENTE ARANDA: Zona Industrial + Puente Aranda | $ 2.800.000.000 | $ | 2.800.000.000 |
inmuebles.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 9520 entries, 0 to 9519 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Tipo 9520 non-null object 1 Descripcion 9520 non-null object 2 Habitaciones 9520 non-null int64 3 Baños 9520 non-null int64 4 Área 9520 non-null int64 5 Barrio 9520 non-null object 6 UPZ 9478 non-null object 7 Valor 9520 non-null object 8 Moneda 9520 non-null object 9 Precio 9520 non-null object dtypes: int64(3), object(7) memory usage: 743.9+ KB
inmuebles['Precio'] = inmuebles['Precio'].str.replace('.','',regex=True)
inmuebles[['Precio','Barrio']]
Precio | Barrio | |
---|---|---|
0 | 360000000 | Zona Noroccidental |
1 | 670000000 | Castilla |
2 | 1120000000 | Chico Reservado |
3 | 890000000 | Usaquén |
4 | 970000000 | Bella Suiza |
... | ... | ... |
9515 | 290000000 | Kennedy |
9516 | 670000000 | Parque Central Bavaria |
9517 | 900000000 | Restrepo |
9518 | 335000000 | Engativa |
9519 | 144990000 | Tintala |
9520 rows × 2 columns
inmuebles.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 9520 entries, 0 to 9519 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Tipo 9520 non-null object 1 Descripcion 9520 non-null object 2 Habitaciones 9520 non-null int64 3 Baños 9520 non-null int64 4 Área 9520 non-null int64 5 Barrio 9520 non-null object 6 UPZ 9478 non-null object 7 Valor 9520 non-null object 8 Moneda 9520 non-null object 9 Precio 9520 non-null object dtypes: int64(3), object(7) memory usage: 743.9+ KB
inmuebles['Precio_Millon'] = inmuebles.Precio.astype('float')/1000000
inmuebles.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 9520 entries, 0 to 9519 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Tipo 9520 non-null object 1 Descripcion 9520 non-null object 2 Habitaciones 9520 non-null int64 3 Baños 9520 non-null int64 4 Área 9520 non-null int64 5 Barrio 9520 non-null object 6 UPZ 9478 non-null object 7 Valor 9520 non-null object 8 Moneda 9520 non-null object 9 Precio 9520 non-null object 10 Precio_Millon 9520 non-null float64 dtypes: float64(1), int64(3), object(7) memory usage: 818.2+ KB
inmuebles.describe()
Habitaciones | Banos | Area | Precio_Millon | |
---|---|---|---|---|
count | 9520.000000 | 9520.000000 | 9520.000000 | 9520.000000 |
mean | 3.072479 | 2.447899 | 146.664706 | 602.062354 |
std | 2.049856 | 1.255314 | 1731.379622 | 704.602062 |
min | 1.000000 | 0.000000 | 2.000000 | 60.000000 |
25% | 2.000000 | 2.000000 | 57.000000 | 250.000000 |
50% | 3.000000 | 2.000000 | 80.000000 | 409.182500 |
75% | 3.000000 | 3.000000 | 135.000000 | 680.000000 |
max | 110.000000 | 9.000000 | 166243.000000 | 16000.000000 |
pd.set_option('display.precision',2)
pd.set_option('display.float_format', lambda x: '%.2f' % x)
inmuebles.describe()
Habitaciones | Banos | Area | Precio_Millon | |
---|---|---|---|---|
count | 9520.00 | 9520.00 | 9520.00 | 9520.00 |
mean | 3.07 | 2.45 | 146.66 | 602.06 |
std | 2.05 | 1.26 | 1731.38 | 704.60 |
min | 1.00 | 0.00 | 2.00 | 60.00 |
25% | 2.00 | 2.00 | 57.00 | 250.00 |
50% | 3.00 | 2.00 | 80.00 | 409.18 |
75% | 3.00 | 3.00 | 135.00 | 680.00 |
max | 110.00 | 9.00 | 166243.00 | 16000.00 |
inmuebles.loc[inmuebles.Habitaciones == 110]
Tipo | Descripcion | Habitaciones | Banos | Area | Barrio | UPZ | Valor | Moneda | Precio | Precio_Millon | |
---|---|---|---|---|---|---|---|---|---|---|---|
897 | Casa | Casa en venta en La Uribe | 110 | 2 | 110 | La Uribe | Usaquén | $ 480.000.000 | $ | 480000000 | 480.00 |
inmuebles.loc[inmuebles.Area == 2]
Tipo | Descripcion | Habitaciones | Banos | Area | Barrio | UPZ | Valor | Moneda | Precio | Precio_Millon | |
---|---|---|---|---|---|---|---|---|---|---|---|
8401 | Apartamento | Apartamento en venta en San Cristobal | 3 | 1 | 2 | San Cristobal | Sosiego | $ 180.000.000 | $ | 180000000 | 180.00 |
inmuebles['Precio_Millon'].plot.hist(bins=100)
<Axes: ylabel='Frequency'>
inmuebles['Precio_Millon'].plot.hist(bins=10)
<Axes: ylabel='Frequency'>
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10,8))
grafica = sns.histplot(data=inmuebles, x='Precio_Millon')
plt.show()
plt.figure(figsize=(10,8))
grafica = sns.histplot(data=inmuebles, x='Precio_Millon')
grafica.set_title('Distribución de Valores de los inmuebles en Bogotá')
plt.xlim((50,2000))
plt.show()
plt.figure(figsize=(10,6))
grafica = sns.histplot(data=inmuebles, x='Precio_Millon')
grafica.set_title('Distribución de Valores de los inmuebles en Bogotá')
plt.xlim((50,1000))
plt.show()
plt.figure(figsize=(10,6))
grafica = sns.histplot(data=inmuebles, x='Precio_Millon', kde=True)
grafica.set_title('Distribución de Valores de los inmuebles en Bogotá')
plt.xlim((50,1000))
plt.show()
plt.figure(figsize=(10,6))
grafica = sns.histplot(data=inmuebles, x='Precio_Millon', kde=True, hue='Tipo')
grafica.set_title('Distribución de Valores de los inmuebles en Bogotá')
plt.xlim((50,1000))
plt.savefig('/content/drive/MyDrive/inmersion_datos/valor_inmuealbes.png',format='png')
plt.show()
plt.figure(figsize=(10,6))
grafica = sns.histplot(data=inmuebles, x='Precio_Millon', kde=True, hue='Tipo')
grafica.set_title('Distribución de Valores de los inmuebles en Bogotá')
plt.xlim((50,1000))
plt.ylim(0,100)
plt.savefig('/content/drive/MyDrive/inmersion_datos/valor_inmuealbes.png',format='png')
plt.show()
plt.figure(figsize=(10,6))
grafica = sns.histplot(data=inmuebles, x='Precio_Millon', kde=True, hue='Tipo')
grafica.set_title('Distribución de Valores de los inmuebles en Bogotá')
plt.xlim((50,1000))
plt.ylim(0,20)
plt.savefig('/content/drive/MyDrive/inmersion_datos/valor_inmuealbes.png',format='png')
plt.show()
Desafío
tipos = ['Casa', 'Apartamento', 'Lote']
plt.figure(figsize=(10,6))
inmuebles_filtrados = inmuebles[(inmuebles.Tipo == tipos[0]) | (inmuebles.Tipo == tipos[1]) | (inmuebles.Tipo == tipos[2])]
grafica = sns.histplot(data=inmuebles_filtrados, x='Precio_Millon',kde=True, hue='Tipo')
grafica.set_title('Distribución de Valores de los inmuebles en Bogotá')
plt.xlabel('Precio en millones de pesos Colombianos')
plt.ylabel('Cantidad de Inmuebles')
plt.xlim((50,1000))
plt.ylim(0,500)
plt.show()
! pwd
/content
pip install nbconvert
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Requirement already satisfied: nbconvert in /usr/local/lib/python3.9/dist-packages (6.5.4) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (2.1.2) Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (0.4) Requirement already satisfied: defusedxml in /usr/local/lib/python3.9/dist-packages (from nbconvert) (0.7.1) Requirement already satisfied: pygments>=2.4.1 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (2.14.0) Requirement already satisfied: tinycss2 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (1.2.1) Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (1.5.0) Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.9/dist-packages (from nbconvert) (0.2.2) Requirement already satisfied: lxml in /usr/local/lib/python3.9/dist-packages (from nbconvert) (4.9.2) Requirement already satisfied: nbformat>=5.1 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (5.8.0) Requirement already satisfied: bleach in /usr/local/lib/python3.9/dist-packages (from nbconvert) (6.0.0) Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (4.11.2) Requirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from nbconvert) (23.0) Requirement already satisfied: traitlets>=5.0 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (5.7.1) Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (0.7.2) Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (0.8.4) Requirement already satisfied: jupyter-core>=4.7 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (5.3.0) Requirement already satisfied: jinja2>=3.0 in /usr/local/lib/python3.9/dist-packages (from nbconvert) (3.1.2) Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.9/dist-packages (from jupyter-core>=4.7->nbconvert) (3.1.1) Requirement already satisfied: jupyter-client>=6.1.12 in /usr/local/lib/python3.9/dist-packages (from nbclient>=0.5.0->nbconvert) (6.1.12) Requirement already satisfied: fastjsonschema in /usr/local/lib/python3.9/dist-packages (from nbformat>=5.1->nbconvert) (2.16.3) Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.9/dist-packages (from nbformat>=5.1->nbconvert) (4.3.3) Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.9/dist-packages (from beautifulsoup4->nbconvert) (2.4) Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.9/dist-packages (from bleach->nbconvert) (1.16.0) Requirement already satisfied: webencodings in /usr/local/lib/python3.9/dist-packages (from bleach->nbconvert) (0.5.1) Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.9/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert) (22.2.0) Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.9/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert) (0.19.3) Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.9/dist-packages (from jupyter-client>=6.1.12->nbclient>=0.5.0->nbconvert) (23.2.1) Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.9/dist-packages (from jupyter-client>=6.1.12->nbclient>=0.5.0->nbconvert) (2.8.2) Requirement already satisfied: tornado>=4.1 in /usr/local/lib/python3.9/dist-packages (from jupyter-client>=6.1.12->nbclient>=0.5.0->nbconvert) (6.2)
! pwd
/content
%%shell
jupyter nbconvert --to html INMERSION_DATOS_AUTLA_2.ipynb
[NbConvertApp] Converting notebook INMERSION_DATOS_AUTLA_2.ipynb to html [NbConvertApp] Writing 1021166 bytes to INMERSION_DATOS_AUTLA_2.html