School/february/task_180222/pd_pandas_k_f_cagulis.py
Kristofers-Solo 8f9c22e1d6 task_180222
2022-02-20 13:13:31 +02:00

120 lines
2.8 KiB
Python

# Author - Kristiāns Francis Cagulis
# Date - 17.02.2022.
# Title - Patstāvīgais darbs - pandas
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from ss_scraper import SS
# flats_few = SS("https://www.ss.com/lv/real-estate/flats/riga-region/all/sell/")
# flats_few.get_data()
output_path = "output/graphs"
all_df = []
def read(path):
df = pd.read_excel(path)
all_df.append(df)
def get_data():
files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx"))
for file in files:
read(file)
df_out = pd.concat(all_df).reset_index(drop=True)
# df_out.to_excel("output/excel/combined.xlsx", index=False)
# replaces floor value to intiger
for value in df_out["Stāvs"]:
df_out = df_out.replace(value, int(value[:value.find("/")]))
# replaces price value to intiger
for value in df_out["Cena"]:
df_out = df_out.replace(value, replace_value(value))
return df_out.sort_values(by="Cena")
def replace_value(value):
new_value = value[:value.find(" ")]
new_value = new_value.replace(",", "")
return int(new_value)
def graph_plot():
data = get_data()
graph_corr(data)
graph_price(data)
def graph_corr(data):
data_corr = data.copy()
series = []
for i in data_corr["Sērija"]:
if i not in series:
series.append(i)
j = 0
for s in series:
data_corr = list(map(lambda x: x.replace(s, j), data_corr))
j += 1
print(data_corr["Sērija"])
sns.heatmap(data_corr.corr())
plt.savefig(f"{output_path}/korelacija.png")
def graph_price(data):
# plot settings
plt.figure(figsize=(50, 30))
plt.rc("font", size=15)
# plt.rc("font", titlesize=24)
# placing the plots in the plane
plot1 = plt.subplot2grid((3, 2), (0, 0))
plot2 = plt.subplot2grid((3, 2), (0, 1))
plot3 = plt.subplot2grid((3, 2), (1, 0))
plot4 = plt.subplot2grid((3, 2), (1, 1))
plot5 = plt.subplot2grid((3, 2), (2, 0))
# floor to price
plot1.scatter(data["Cena"], data["Stāvs"])
plot1.set_title("Floor to price")
plot1.set_xlabel("Price")
plot1.set_ylabel("Floor")
# room amount to price
plot2.scatter(data["Cena"], data["Istabu skaits"])
plot2.set_title("Room amount to price")
plot2.set_xlabel("Price")
plot2.set_ylabel("Room amount")
# quadrature to price
plot3.scatter(data["Cena"], data["Kvadratūra"])
plot3.set_title("Quadrature to price")
plot3.set_xlabel("Price")
plot3.set_ylabel("Quadrature")
# series to price
plot4.scatter(data["Cena"], data["Sērija"])
plot4.set_title("Series to price")
plot4.set_xlabel("Price")
plot4.set_ylabel("Series")
# date to price
plot5.scatter(data["Cena"], data["Izvietošanas datums"])
plot5.set_title("Date to price")
plot5.set_xlabel("Price")
plot5.set_ylabel("Date")
plt.savefig(f"{output_path}/cenu_grafiki.png")
def main():
graph_plot()
if __name__ == "__main__":
main()