task_180222

This commit is contained in:
Kristofers-Solo 2022-02-20 13:13:31 +02:00
parent 891e77616d
commit 8f9c22e1d6
8 changed files with 59 additions and 24 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 390 KiB

After

Width:  |  Height:  |  Size: 341 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -3,7 +3,6 @@
# Title - Patstāvīgais darbs - pandas # Title - Patstāvīgais darbs - pandas
from pathlib import Path from pathlib import Path
import matplotlib
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@ -20,30 +19,49 @@ def read(path):
all_df.append(df) all_df.append(df)
def address():
pass
def get_data(): def get_data():
files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx")) files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx"))
for file in files: for file in files:
read(file) read(file)
df_out = pd.concat(all_df).reset_index(drop=True) df_out = pd.concat(all_df).reset_index(drop=True)
df_out.to_excel("output/excel/combined.xlsx", index=False) # df_out.to_excel("output/excel/combined.xlsx", index=False)
return df_out
# replaces floor value to intiger
for value in df_out["Stāvs"]:
df_out = df_out.replace(value, int(value[:value.find("/")]))
# replaces price value to intiger
for value in df_out["Cena"]:
df_out = df_out.replace(value, replace_value(value))
return df_out.sort_values(by="Cena")
def replace_value(value):
new_value = value[:value.find(" ")]
new_value = new_value.replace(",", "")
return int(new_value)
def graph_plot(): def graph_plot():
data = get_data() data = get_data()
# graph_corr(data) graph_corr(data)
graph_price(data) graph_price(data)
def graph_corr(data): def graph_corr(data):
data_corr = data.copy() data_corr = data.copy()
sns.set_style("whitegrid")
# plt.figure(figsize=(15, 10)) series = []
for i in data_corr["Sērija"]:
if i not in series:
series.append(i)
j = 0
for s in series:
data_corr = list(map(lambda x: x.replace(s, j), data_corr))
j += 1
print(data_corr["Sērija"])
sns.heatmap(data_corr.corr()) sns.heatmap(data_corr.corr())
plt.savefig(f"{output_path}/korelacija.png") plt.savefig(f"{output_path}/korelacija.png")
@ -61,25 +79,35 @@ def graph_price(data):
plot4 = plt.subplot2grid((3, 2), (1, 1)) plot4 = plt.subplot2grid((3, 2), (1, 1))
plot5 = plt.subplot2grid((3, 2), (2, 0)) plot5 = plt.subplot2grid((3, 2), (2, 0))
# price to floor # floor to price
plot1.scatter(data["Cena"], data["Stāvs"]) plot1.scatter(data["Cena"], data["Stāvs"])
plot1.set_title("Price to floor") plot1.set_title("Floor to price")
plot1.set_xlabel("Price")
plot1.set_ylabel("Floor")
# price to room amount # room amount to price
plot2.scatter(data["Cena"], data["Istabu skaits"]) plot2.scatter(data["Cena"], data["Istabu skaits"])
plot2.set_title("Price to room amount") plot2.set_title("Room amount to price")
plot2.set_xlabel("Price")
plot2.set_ylabel("Room amount")
# price to quadrature # quadrature to price
plot3.scatter(data["Cena"], data["Kvadratūra"]) plot3.scatter(data["Cena"], data["Kvadratūra"])
plot3.set_title("Price to quadrature") plot3.set_title("Quadrature to price")
plot3.set_xlabel("Price")
plot3.set_ylabel("Quadrature")
# price to series # series to price
plot4.scatter(data["Cena"], data["Sērija"]) plot4.scatter(data["Cena"], data["Sērija"])
plot4.set_title("Price to series") plot4.set_title("Series to price")
plot4.set_xlabel("Price")
plot4.set_ylabel("Series")
# price to date # date to price
plot5.scatter(data["Cena"], data["Izvietošanas datums"]) plot5.scatter(data["Cena"], data["Izvietošanas datums"])
plot5.set_title("Price to floor") plot5.set_title("Date to price")
plot5.set_xlabel("Price")
plot5.set_ylabel("Date")
plt.savefig(f"{output_path}/cenu_grafiki.png") plt.savefig(f"{output_path}/cenu_grafiki.png")

View File

@ -5,6 +5,7 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import requests import requests
import pandas as pd import pandas as pd
from datetime import datetime
HEADERS = { HEADERS = {
"User-Agent": "User-Agent":
@ -76,7 +77,8 @@ class SS:
chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)] # combines each 'chunk_size' elements into array chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)] # combines each 'chunk_size' elements into array
columns = ["Atrašanās vieta", "Istabu skaits", "Kvadratūra", "Stāvs", "Sērija", "Cena", "Pilns sludinājuma teksts", "Izvietošanas datums"] columns = ["Atrašanās vieta", "Istabu skaits", "Kvadratūra", "Stāvs", "Sērija", "Cena", "Pilns sludinājuma teksts", "Izvietošanas datums"]
df = pd.DataFrame(chunked_items_list, columns=columns) df = pd.DataFrame(chunked_items_list, columns=columns)
df.to_excel(excel_writer=f"output/excel/output_{self.name}.xlsx", index=False) time = datetime.now().strftime("%d%m%Y%H%M%S")
df.to_excel(excel_writer=f"output/excel/output_{self.name}_{time}.xlsx", index=False)
print("Done") print("Done")
@ -88,11 +90,11 @@ flats_ogre = SS("https://www.ss.com/lv/real-estate/flats/ogre-and-reg/sell/", "o
def main(): def main():
flats_aizkraukle.get_data() # flats_aizkraukle.get_data()
flats_tukums.get_data() # flats_tukums.get_data()
# flats_ogre.get_data() # flats_ogre.get_data()
# flats_few.get_data() # flats_few.get_data()
# flats_many.get_data() flats_many.get_data()
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -0,0 +1,5 @@
from datetime import datetime
time = datetime.now().strftime("%d%m%Y%H%M%S")
print(time)

BIN
output_many.xlsx Normal file

Binary file not shown.