task_180222

This commit is contained in:
Kristofers-Solo 2022-02-20 13:13:31 +02:00
parent 891e77616d
commit 8f9c22e1d6
8 changed files with 59 additions and 24 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 390 KiB

After

Width:  |  Height:  |  Size: 341 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -3,7 +3,6 @@
# Title - Patstāvīgais darbs - pandas
from pathlib import Path
import matplotlib
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
@ -20,30 +19,49 @@ def read(path):
all_df.append(df)
def address():
pass
def get_data():
files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx"))
for file in files:
read(file)
df_out = pd.concat(all_df).reset_index(drop=True)
df_out.to_excel("output/excel/combined.xlsx", index=False)
return df_out
# df_out.to_excel("output/excel/combined.xlsx", index=False)
# replaces floor value to intiger
for value in df_out["Stāvs"]:
df_out = df_out.replace(value, int(value[:value.find("/")]))
# replaces price value to intiger
for value in df_out["Cena"]:
df_out = df_out.replace(value, replace_value(value))
return df_out.sort_values(by="Cena")
def replace_value(value):
new_value = value[:value.find(" ")]
new_value = new_value.replace(",", "")
return int(new_value)
def graph_plot():
data = get_data()
# graph_corr(data)
graph_corr(data)
graph_price(data)
def graph_corr(data):
data_corr = data.copy()
sns.set_style("whitegrid")
# plt.figure(figsize=(15, 10))
series = []
for i in data_corr["Sērija"]:
if i not in series:
series.append(i)
j = 0
for s in series:
data_corr = list(map(lambda x: x.replace(s, j), data_corr))
j += 1
print(data_corr["Sērija"])
sns.heatmap(data_corr.corr())
plt.savefig(f"{output_path}/korelacija.png")
@ -61,25 +79,35 @@ def graph_price(data):
plot4 = plt.subplot2grid((3, 2), (1, 1))
plot5 = plt.subplot2grid((3, 2), (2, 0))
# price to floor
# floor to price
plot1.scatter(data["Cena"], data["Stāvs"])
plot1.set_title("Price to floor")
plot1.set_title("Floor to price")
plot1.set_xlabel("Price")
plot1.set_ylabel("Floor")
# price to room amount
# room amount to price
plot2.scatter(data["Cena"], data["Istabu skaits"])
plot2.set_title("Price to room amount")
plot2.set_title("Room amount to price")
plot2.set_xlabel("Price")
plot2.set_ylabel("Room amount")
# price to quadrature
# quadrature to price
plot3.scatter(data["Cena"], data["Kvadratūra"])
plot3.set_title("Price to quadrature")
plot3.set_title("Quadrature to price")
plot3.set_xlabel("Price")
plot3.set_ylabel("Quadrature")
# price to series
# series to price
plot4.scatter(data["Cena"], data["Sērija"])
plot4.set_title("Price to series")
plot4.set_title("Series to price")
plot4.set_xlabel("Price")
plot4.set_ylabel("Series")
# price to date
# date to price
plot5.scatter(data["Cena"], data["Izvietošanas datums"])
plot5.set_title("Price to floor")
plot5.set_title("Date to price")
plot5.set_xlabel("Price")
plot5.set_ylabel("Date")
plt.savefig(f"{output_path}/cenu_grafiki.png")

View File

@ -5,6 +5,7 @@
from bs4 import BeautifulSoup
import requests
import pandas as pd
from datetime import datetime
HEADERS = {
"User-Agent":
@ -76,7 +77,8 @@ class SS:
chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)] # combines each 'chunk_size' elements into array
columns = ["Atrašanās vieta", "Istabu skaits", "Kvadratūra", "Stāvs", "Sērija", "Cena", "Pilns sludinājuma teksts", "Izvietošanas datums"]
df = pd.DataFrame(chunked_items_list, columns=columns)
df.to_excel(excel_writer=f"output/excel/output_{self.name}.xlsx", index=False)
time = datetime.now().strftime("%d%m%Y%H%M%S")
df.to_excel(excel_writer=f"output/excel/output_{self.name}_{time}.xlsx", index=False)
print("Done")
@ -88,11 +90,11 @@ flats_ogre = SS("https://www.ss.com/lv/real-estate/flats/ogre-and-reg/sell/", "o
def main():
flats_aizkraukle.get_data()
flats_tukums.get_data()
# flats_aizkraukle.get_data()
# flats_tukums.get_data()
# flats_ogre.get_data()
# flats_few.get_data()
# flats_many.get_data()
flats_many.get_data()
if __name__ == '__main__':

View File

@ -0,0 +1,5 @@
from datetime import datetime
time = datetime.now().strftime("%d%m%Y%H%M%S")
print(time)

BIN
output_many.xlsx Normal file

Binary file not shown.