Update task_011221_homework

2025-10-21 20:10:38 +00:00 · 2021-12-02 08:04:48 +02:00 · 2021-12-02 08:04:48 +02:00 · e10100c1ba
commit e10100c1ba
parent 2eb3f800d6
3 changed files with 162 additions and 1 deletions
--- a/december/task_011221/task_011221_classwork.py
+++ b/december/task_011221/task_011221_classwork.py
@ -1,2 +1,44 @@
 # Author - Kristiāns Francis Cagulis
 # Date - 01.12.2021
 # Title - Stundas kopdarbs
 from bs4 import BeautifulSoup
 import requests
 url = "https://www.ikea.lv/"
 all_page = requests.get(url)
 if all_page.status_code == 200:
 	page = BeautifulSoup(all_page.content, 'html.parser')
 	found = page.find_all(class_="itemBlock")
 	info = []
 	item_array = []
 	for item in found:
 		item = item.findChild("div").findChild(class_="card-body")
 		item_name = item.findChild(class_="itemName")
 		item_name = item_name.findChild("div").findChild("h6")
 		item_array.append(item_name.string)
 		price = item.findChild(class_="itemPrice-wrapper")
 		price = price.findChild("p").findChild("span")
 		try:
 			item_array.append(price.attrs["data-price"])
 		except:
 			item_array.append(price.attrs["data-pricefamily"])
 		all_facts = []
 		for facts in all_facts:
 			if len(facts) == 1:
 				all_facts.append(facts.string)
 			else:
 				atrasts = facts.findChildren("span")
 				for i in atrasts:
 					all_facts.append(i.string)
 		item_array.append(all_facts)
 		info.append(item_array)
 	for ieraksts in info:
 		print(ieraksts)
--- a/december/task_011221/task_011221_homework.py
+++ b/december/task_011221/task_011221_homework.py
@ -0,0 +1,98 @@
 # Author - Kristiāns Francis Cagulis
 # Date - 01.12.2021
 # Title - Patstāvīgais darbs
 from bs4 import BeautifulSoup
 import requests
 import pandas as pd
 import openpyxl
 HEADERS = {
    "User-Agent":
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36 Vivaldi/4.1.2369.21'
 }
 class SS:
 	def __init__(self, url):
 		self.url = url
 	def _get_page_amount(self):
 		current_page = None
 		page_amount = 1
 		url = self.url
 		while current_page != page_amount:
 			current_page = page_amount
 			page = requests.get(url, headers=HEADERS)
 			soup = BeautifulSoup(page.content, 'html.parser')
 			# getting max page amount
 			for el in soup.find_all(class_='navi'):
 				cropped_number = el.get_text().strip()
 				if cropped_number.isnumeric():
 					cropped_number = int(cropped_number)
 					if cropped_number > page_amount:
 						page_amount = cropped_number
 			url = self.url + f"/page{page_amount}.html"
 		return page_amount
 	def get_data(self):
 		items = []
 		test = []
 		combined_list = []
 		combined_list.clear()
 		# combined_list.clear()
 		for page_number in range(1, self._get_page_amount() + 1):
 			url = self.url + f"/page{page_number}.html"
 			page = requests.get(url, headers=HEADERS)
 			soup = BeautifulSoup(page.content, 'html.parser')
 			ids = [tag['id'] for tag in soup.select('tr[id]')]  # creates list with ids
 			ids = [x for x in ids if "tr_bnr" not in x]  # removes "tr_bnr" from list
 			ids.pop(0)  # removes first "head_line" id
 			# TODO
 			# Atrašānās vieta
 			# stāvs
 			# istabu skaits
 			# kvadratūra
 			# cena
 			# sērija
 			# Pilns sludinājuma teksts
 			# Sludinājuma ievietošanas datums
 			# getting product name
 			for el in soup.find_all(id=ids):
 				items.clear()
 				for elem in el.find_all(class_='msga2-o pp6'):
 					item = elem.get_text()
 					items.append(item)
 				print(items)
 				combined_list.append(items)
 			# print(combined_list)
 		columns = [
 		    "Atrašanās vieta",
 		    "Istabu skaits",
 		    "Kvadratūra",
 		    "Stāvs",
 		    "Sērija",
 		    "Cena",
 		    #"Pilns sludinājuma teksts",
 		    #"Izvietošanas datums"
 		]
 		# df = pd.DataFrame(combined_list)
 		# df.to_excel(excel_writer='test.xlsx', index=False)
 		# print(df)
 flats = SS("https://www.ss.com/lv/real-estate/flats/riga-region/all/sell")
 def main():
 	flats.get_data()
 if __name__ == '__main__':
 	main()
--- a/december/task_011221/test.py
+++ b/december/task_011221/test.py
@ -0,0 +1,21 @@
 from bs4 import BeautifulSoup
 import requests
 HEADERS = {
    "User-Agent":
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36 Vivaldi/4.1.2369.21'
 }
 url = "https://www.ss.com/lv/real-estate/flats/riga/all/sell/page61.html"
 page = requests.get(url, headers=HEADERS)
 soup = BeautifulSoup(page.content, 'html.parser')
 # print(soup.find_all(class_="navi"))
 ids = [tag['id'] for tag in soup.select('tr[id]')]
 ids.pop(0)
 ids = [x for x in ids if "tr_bnr" not in x]
 print(ids)