Finished task_071221 SS.com scraper

2026-03-22 00:26:35 +00:00 · 2021-12-07 23:35:44 +02:00
parent 344f36ecd8
commit 7aeec062ed
9 changed files with 14792 additions and 111 deletions
--- a/november/task_061021/book.txt
+++ b/november/task_061021/book.txt
--- a/november/task_061021/kcagulis_061021.py
+++ b/november/task_061021/kcagulis_061021.py
@@ -0,0 +1,44 @@
+# Author - Kristiāns Francis Cagulis
+# Date - 06.10.2021
+import re
+
+CHAPTERS = 61
+
+
+# creates file with chapters and row numbers
+def read_array(document):
+	with open(document, "r", encoding='utf-8') as book:
+		lines = [line.strip('\n') for line in book]  # removes 'enter' characters
+	with open('array_output.txt', 'w') as output:
+		for i in range(1, CHAPTERS + 1):
+			line = lines.index(f"Chapter {i}") + 1  # finds all chapter indexes/lines
+			output.write(f"Line {line} - Chapter {i}\n")  # writes line in file
+
+
+# creates file with chapter positions
+def read_string(document):
+	with open(document, "r", encoding='utf-8') as book:
+		lines = book.read()
+	with open('str_output.txt', 'w') as output:
+		for i in range(1, CHAPTERS + 1):
+			_, position = re.finditer(rf"\bChapter {i}\b", lines)  # finds all chapter positions
+			output.write(f"Position {position.start()} - Chapter {i}\n")  # writes position in file
+
+
+def read_book(document):
+	read_array(document)
+	read_string(document)
+
+
+def main():
+	try:
+		read_book("book.txt")
+	except:
+		try:
+			read_book("1342-0.txt")
+		except:
+			read_book(input("Ievadiet faila nosaukumu: "))
+
+
+if __name__ == '__main__':
+	main()
--- a/november/task_241121/demo_ikea.py
+++ b/november/task_241121/demo_ikea.py
@@ -0,0 +1,44 @@
+# Author - Kristiāns Francis Cagulis
+# Date - 01.12.2021
+# Title - Stundas kopdarbs
+from bs4 import BeautifulSoup
+import requests
+
+url = "https://www.ikea.lv/"
+all_page = requests.get(url)
+
+if all_page.status_code == 200:
+	page = BeautifulSoup(all_page.content, 'html.parser')
+	found = page.find_all(class_="itemBlock")
+
+	info = []
+	item_array = []
+	for item in found:
+		item = item.findChild("div").findChild(class_="card-body")
+
+		item_name = item.findChild(class_="itemName")
+		item_name = item_name.findChild("div").findChild("h6")
+
+		item_array.append(item_name.string)
+
+		price = item.findChild(class_="itemPrice-wrapper")
+		price = price.findChild("p").findChild("span")
+
+		try:
+			item_array.append(price.attrs["data-price"])
+		except:
+			item_array.append(price.attrs["data-pricefamily"])
+
+		all_facts = []
+		for facts in all_facts:
+			if len(facts) == 1:
+				all_facts.append(facts.string)
+			else:
+				atrasts = facts.findChildren("span")
+				for i in atrasts:
+					all_facts.append(i.string)
+
+		item_array.append(all_facts)
+		info.append(item_array)
+	for ieraksts in info:
+		print(ieraksts)