From 611a70ad68f6ded8b7322ea2e5fc4f108a027463 Mon Sep 17 00:00:00 2001 From: Kristofers Solo Date: Sat, 9 Sep 2023 23:37:16 +0300 Subject: [PATCH] Minor release update Added `pyproject.toml` file. --- .gitignore | 2 -- README.md | 14 ++++----- config.json | 3 ++ main.py | 14 ++++----- pyproject.toml | 47 ++++++++++++++++++++++++++++++ requirements.txt | 20 ++----------- requirements_dev.txt | 2 ++ scraper.py => src/SScom/scraper.py | 26 +++++++---------- 8 files changed, 77 insertions(+), 51 deletions(-) create mode 100644 config.json mode change 100644 => 100755 main.py create mode 100644 pyproject.toml create mode 100644 requirements_dev.txt rename scraper.py => src/SScom/scraper.py (82%) diff --git a/.gitignore b/.gitignore index 626ab9a..510c73d 100644 --- a/.gitignore +++ b/.gitignore @@ -112,5 +112,3 @@ dmypy.json # Pyre type checker .pyre/ - -config.json diff --git a/README.md b/README.md index 3241ce1..37fa29d 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,23 @@ # Webscraper + Webscraper of SS marketplace for GPUs ## Installation + ```sh git clone https://github.com/kristoferssolo/SScom-scraper cd SScom-scraper +pip install . ``` -Or download [zip](https://github.com/kristoferssolo/SScom-scraper/archive/refs/heads/master.zip). -Create `config.json` file with following content: + +Add [Telegram bot API token](https://core.telegram.org/bots/tutorial#obtain-your-bot-token) to the `config.json` file. ```json { "API_TOKEN": "" } ``` -Install required libraries: +Run the bot. ```sh -pip install -r requirements.txt python main.py ``` - -## Libraries used -- [Aiogram](https://github.com/aiogram/aiogram) -- [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/) diff --git a/config.json b/config.json new file mode 100644 index 0000000..59db65f --- /dev/null +++ b/config.json @@ -0,0 +1,3 @@ +{ + "API_TOKEN": "" +} diff --git a/main.py b/main.py old mode 100644 new mode 100755 index 7fa3b03..0771020 --- a/main.py +++ b/main.py @@ -1,13 +1,10 @@ -""" -Telegram bot for scraper -Author - Kristofers Solo -Licence - MIT -""" +#!/usr/bin/env python3 import json import logging from pathlib import Path + from aiogram import Bot, Dispatcher, executor, types -from scraper import gpus +from SScom.scraper import gpus BASE_DIR = Path(__file__).resolve().parent @@ -30,8 +27,9 @@ async def gpu_price_message(message: types.Message): """Returns all scraped GPUs and their prices to telegram""" data = gpus.get_data() message_size = 100 - chunked_data = [data[i:i + message_size] - for i in range(0, len(data), message_size)] + chunked_data = [ + data[i : i + message_size] for i in range(0, len(data), message_size) + ] for i in chunked_data: await message.answer("\n".join(i)) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d1332a4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,47 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "SScom-scraper" +version = "0.1.0" +description = "Webscraper of SS marketplace for GPUs " +authors = [{ name = "Kristofers Solo", email = "dev@kristofers.xyz" }] +keywords = ["scraper", "telegram"] +readme = "README.md" +requires-python = ">=3.10" +license = { text = "GPLv3" } +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3:10", + "Programming Language :: Python :: 3:11", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: Linux", +] +dependencies = ["aiogram~=2.0", "beautifulsoup4~=4.12", "requests~=2.31"] + +[project.urls] +"Source" = "https://github.com/kristoferssolo/SScom-scraper" +"Bug Tracker" = "https://github.com/kristoferssolo/SScom-scraper/issues" + + +[tool.mypy] +check_untyped_defs = true +disallow_any_generics = true +ignore_missing_imports = true +mypy_path = "src" +no_implicit_optional = true +no_implicit_reexport = true +show_error_codes = true +strict_equality = true +warn_redundant_casts = true +warn_return_any = true +warn_unreachable = true +warn_unused_configs = true + + +[tool.ruff] +line-length = 160 + +[tool.ruff.flake8-quotes] +docstring-quotes = "double" diff --git a/requirements.txt b/requirements.txt index 79372d5..bd2dc64 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,3 @@ -aiogram==2.22.1 -aiohttp==3.8.3 -aiosignal==1.2.0 -async-timeout==4.0.2 -attrs==22.1.0 -Babel==2.9.1 -beautifulsoup4==4.11.1 -certifi==2022.9.24 -charset-normalizer==2.1.1 -frozenlist==1.3.1 -idna==3.4 -multidict==6.0.2 -pytz==2022.2.1 -requests==2.28.1 -soupsieve==2.3.2.post1 -urllib3==1.26.12 -yarl==1.8.1 +aiogram==2.0.* +beautifulsoup4==4.12.* +requests==2.31.* diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..d874ab9 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,2 @@ +mypy==1.5.* +ruff==0.0.* diff --git a/scraper.py b/src/SScom/scraper.py similarity index 82% rename from scraper.py rename to src/SScom/scraper.py index f3cd1ae..cc69827 100644 --- a/scraper.py +++ b/src/SScom/scraper.py @@ -1,14 +1,9 @@ -""" -Webscaper of SS marketplace for GPUs -Author - Kristofers Solo -Licence - MIT -""" - -from bs4 import BeautifulSoup import requests +from bs4 import BeautifulSoup HEADERS = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21"} + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21" +} class SS: @@ -20,8 +15,7 @@ class SS: soup = BeautifulSoup(page.content, "html.parser") last_url = soup.find(class_="td2").findChild("a")["href"] - page_amount = last_url[last_url.find( - "page") + 4:last_url.find(".html")] + page_amount = last_url[last_url.find("page") + 4 : last_url.find(".html")] print(f"Page amount = {page_amount}") return int(page_amount) @@ -35,8 +29,7 @@ class SS: soup = BeautifulSoup(page.content, "html.parser") # item ids - ids = [tag["id"] - for tag in soup.select("tr[id]")] # creates list with ids + ids = [tag["id"] for tag in soup.select("tr[id]")] # creates list with ids # removes "tr_bnr" elements from list ids = [x for x in ids if "tr_bnr" not in x] ids.remove("head_line") # removes first "head_line" id @@ -70,7 +63,7 @@ class SS: # convert price back to string and add `€` gpu[1] = str(gpu[1]) + " €" # transform 2D array to 1D - gpus_list[index] = (" - ".join(gpu)) + gpus_list[index] = " - ".join(gpu) return gpus_list @@ -79,12 +72,13 @@ gpus = SS("https://www.ss.com/lv/electronics/computers/completing-pc/video/sell" def main(): - """Main funcion to test scraper""" + """Funcion to test scraper""" data = gpus.get_data() message_size = 100 - chunked_data = [data[i:i + message_size] - for i in range(0, len(data), message_size)] + chunked_data = [ + data[i : i + message_size] for i in range(0, len(data), message_size) + ] for i in chunked_data: print("\n".join(i))