mirror of
https://github.com/kristoferssolo/School.git
synced 2025-10-21 20:10:38 +00:00
task_180222
This commit is contained in:
parent
891e77616d
commit
8f9c22e1d6
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 390 KiB After Width: | Height: | Size: 341 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 17 KiB |
@ -3,7 +3,6 @@
|
|||||||
# Title - Patstāvīgais darbs - pandas
|
# Title - Patstāvīgais darbs - pandas
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import matplotlib
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
@ -20,30 +19,49 @@ def read(path):
|
|||||||
all_df.append(df)
|
all_df.append(df)
|
||||||
|
|
||||||
|
|
||||||
def address():
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def get_data():
|
def get_data():
|
||||||
files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx"))
|
files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx"))
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
read(file)
|
read(file)
|
||||||
df_out = pd.concat(all_df).reset_index(drop=True)
|
df_out = pd.concat(all_df).reset_index(drop=True)
|
||||||
df_out.to_excel("output/excel/combined.xlsx", index=False)
|
# df_out.to_excel("output/excel/combined.xlsx", index=False)
|
||||||
return df_out
|
|
||||||
|
# replaces floor value to intiger
|
||||||
|
for value in df_out["Stāvs"]:
|
||||||
|
df_out = df_out.replace(value, int(value[:value.find("/")]))
|
||||||
|
|
||||||
|
# replaces price value to intiger
|
||||||
|
for value in df_out["Cena"]:
|
||||||
|
df_out = df_out.replace(value, replace_value(value))
|
||||||
|
return df_out.sort_values(by="Cena")
|
||||||
|
|
||||||
|
|
||||||
|
def replace_value(value):
|
||||||
|
new_value = value[:value.find(" ")]
|
||||||
|
new_value = new_value.replace(",", "")
|
||||||
|
return int(new_value)
|
||||||
|
|
||||||
|
|
||||||
def graph_plot():
|
def graph_plot():
|
||||||
data = get_data()
|
data = get_data()
|
||||||
# graph_corr(data)
|
graph_corr(data)
|
||||||
graph_price(data)
|
graph_price(data)
|
||||||
|
|
||||||
|
|
||||||
def graph_corr(data):
|
def graph_corr(data):
|
||||||
data_corr = data.copy()
|
data_corr = data.copy()
|
||||||
sns.set_style("whitegrid")
|
|
||||||
# plt.figure(figsize=(15, 10))
|
series = []
|
||||||
|
for i in data_corr["Sērija"]:
|
||||||
|
if i not in series:
|
||||||
|
series.append(i)
|
||||||
|
j = 0
|
||||||
|
for s in series:
|
||||||
|
data_corr = list(map(lambda x: x.replace(s, j), data_corr))
|
||||||
|
j += 1
|
||||||
|
|
||||||
|
print(data_corr["Sērija"])
|
||||||
sns.heatmap(data_corr.corr())
|
sns.heatmap(data_corr.corr())
|
||||||
plt.savefig(f"{output_path}/korelacija.png")
|
plt.savefig(f"{output_path}/korelacija.png")
|
||||||
|
|
||||||
@ -61,25 +79,35 @@ def graph_price(data):
|
|||||||
plot4 = plt.subplot2grid((3, 2), (1, 1))
|
plot4 = plt.subplot2grid((3, 2), (1, 1))
|
||||||
plot5 = plt.subplot2grid((3, 2), (2, 0))
|
plot5 = plt.subplot2grid((3, 2), (2, 0))
|
||||||
|
|
||||||
# price to floor
|
# floor to price
|
||||||
plot1.scatter(data["Cena"], data["Stāvs"])
|
plot1.scatter(data["Cena"], data["Stāvs"])
|
||||||
plot1.set_title("Price to floor")
|
plot1.set_title("Floor to price")
|
||||||
|
plot1.set_xlabel("Price")
|
||||||
|
plot1.set_ylabel("Floor")
|
||||||
|
|
||||||
# price to room amount
|
# room amount to price
|
||||||
plot2.scatter(data["Cena"], data["Istabu skaits"])
|
plot2.scatter(data["Cena"], data["Istabu skaits"])
|
||||||
plot2.set_title("Price to room amount")
|
plot2.set_title("Room amount to price")
|
||||||
|
plot2.set_xlabel("Price")
|
||||||
|
plot2.set_ylabel("Room amount")
|
||||||
|
|
||||||
# price to quadrature
|
# quadrature to price
|
||||||
plot3.scatter(data["Cena"], data["Kvadratūra"])
|
plot3.scatter(data["Cena"], data["Kvadratūra"])
|
||||||
plot3.set_title("Price to quadrature")
|
plot3.set_title("Quadrature to price")
|
||||||
|
plot3.set_xlabel("Price")
|
||||||
|
plot3.set_ylabel("Quadrature")
|
||||||
|
|
||||||
# price to series
|
# series to price
|
||||||
plot4.scatter(data["Cena"], data["Sērija"])
|
plot4.scatter(data["Cena"], data["Sērija"])
|
||||||
plot4.set_title("Price to series")
|
plot4.set_title("Series to price")
|
||||||
|
plot4.set_xlabel("Price")
|
||||||
|
plot4.set_ylabel("Series")
|
||||||
|
|
||||||
# price to date
|
# date to price
|
||||||
plot5.scatter(data["Cena"], data["Izvietošanas datums"])
|
plot5.scatter(data["Cena"], data["Izvietošanas datums"])
|
||||||
plot5.set_title("Price to floor")
|
plot5.set_title("Date to price")
|
||||||
|
plot5.set_xlabel("Price")
|
||||||
|
plot5.set_ylabel("Date")
|
||||||
|
|
||||||
plt.savefig(f"{output_path}/cenu_grafiki.png")
|
plt.savefig(f"{output_path}/cenu_grafiki.png")
|
||||||
|
|
||||||
|
|||||||
@ -5,6 +5,7 @@
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
import requests
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
"User-Agent":
|
"User-Agent":
|
||||||
@ -76,7 +77,8 @@ class SS:
|
|||||||
chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)] # combines each 'chunk_size' elements into array
|
chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)] # combines each 'chunk_size' elements into array
|
||||||
columns = ["Atrašanās vieta", "Istabu skaits", "Kvadratūra", "Stāvs", "Sērija", "Cena", "Pilns sludinājuma teksts", "Izvietošanas datums"]
|
columns = ["Atrašanās vieta", "Istabu skaits", "Kvadratūra", "Stāvs", "Sērija", "Cena", "Pilns sludinājuma teksts", "Izvietošanas datums"]
|
||||||
df = pd.DataFrame(chunked_items_list, columns=columns)
|
df = pd.DataFrame(chunked_items_list, columns=columns)
|
||||||
df.to_excel(excel_writer=f"output/excel/output_{self.name}.xlsx", index=False)
|
time = datetime.now().strftime("%d%m%Y%H%M%S")
|
||||||
|
df.to_excel(excel_writer=f"output/excel/output_{self.name}_{time}.xlsx", index=False)
|
||||||
print("Done")
|
print("Done")
|
||||||
|
|
||||||
|
|
||||||
@ -88,11 +90,11 @@ flats_ogre = SS("https://www.ss.com/lv/real-estate/flats/ogre-and-reg/sell/", "o
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
flats_aizkraukle.get_data()
|
# flats_aizkraukle.get_data()
|
||||||
flats_tukums.get_data()
|
# flats_tukums.get_data()
|
||||||
# flats_ogre.get_data()
|
# flats_ogre.get_data()
|
||||||
# flats_few.get_data()
|
# flats_few.get_data()
|
||||||
# flats_many.get_data()
|
flats_many.get_data()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
5
february/task_180222/test.py
Normal file
5
february/task_180222/test.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
time = datetime.now().strftime("%d%m%Y%H%M%S")
|
||||||
|
|
||||||
|
print(time)
|
||||||
BIN
output_many.xlsx
Normal file
BIN
output_many.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user