School/february/task_180222/pd_pandas_k_f_cagulis.py
Krisotfers-Solo 43a25eb411 task_180222
2022-02-17 20:43:29 +02:00

92 lines
2.0 KiB
Python

# Author - Kristiāns Francis Cagulis
# Date - 17.02.2022.
# Title - Patstāvīgais darbs - pandas
from pathlib import Path
import matplotlib
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from ss_scraper import SS
# flats_few = SS("https://www.ss.com/lv/real-estate/flats/riga-region/all/sell/")
# flats_few.get_data()
output_path = "output/graphs"
all_df = []
def read(path):
df = pd.read_excel(path)
all_df.append(df)
def address():
pass
def get_data():
files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx"))
for file in files:
read(file)
df_out = pd.concat(all_df).reset_index(drop=True)
df_out.to_excel("output/excel/combined.xlsx", index=False)
return df_out
def graph_plot():
data = get_data()
# graph_corr(data)
graph_price(data)
def graph_corr(data):
data_corr = data.copy()
sns.set_style("whitegrid")
# plt.figure(figsize=(15, 10))
sns.heatmap(data_corr.corr())
plt.savefig(f"{output_path}/korelacija.png")
def graph_price(data):
# plot settings
plt.figure(figsize=(50, 30))
plt.rc("font", size=15)
# plt.rc("font", titlesize=24)
# placing the plots in the plane
plot1 = plt.subplot2grid((3, 2), (0, 0))
plot2 = plt.subplot2grid((3, 2), (0, 1))
plot3 = plt.subplot2grid((3, 2), (1, 0))
plot4 = plt.subplot2grid((3, 2), (1, 1))
plot5 = plt.subplot2grid((3, 2), (2, 0))
# price to floor
plot1.scatter(data["Cena"], data["Stāvs"])
plot1.set_title("Price to floor")
# price to room amount
plot2.scatter(data["Cena"], data["Istabu skaits"])
plot2.set_title("Price to room amount")
# price to quadrature
plot3.scatter(data["Cena"], data["Kvadratūra"])
plot3.set_title("Price to quadrature")
# price to series
plot4.scatter(data["Cena"], data["Sērija"])
plot4.set_title("Price to series")
# price to date
plot5.scatter(data["Cena"], data["Izvietošanas datums"])
plot5.set_title("Price to floor")
plt.savefig(f"{output_path}/cenu_grafiki.png")
def main():
graph_plot()
if __name__ == "__main__":
main()