Minor release update

Added `pyproject.toml` file.
This commit is contained in:
Kristofers Solo 2023-09-09 23:37:16 +03:00
parent cef627e196
commit 611a70ad68
8 changed files with 77 additions and 51 deletions

2
.gitignore vendored
View File

@ -112,5 +112,3 @@ dmypy.json
# Pyre type checker # Pyre type checker
.pyre/ .pyre/
config.json

View File

@ -1,25 +1,23 @@
# Webscraper # Webscraper
Webscraper of SS marketplace for GPUs Webscraper of SS marketplace for GPUs
## Installation ## Installation
```sh ```sh
git clone https://github.com/kristoferssolo/SScom-scraper git clone https://github.com/kristoferssolo/SScom-scraper
cd SScom-scraper cd SScom-scraper
pip install .
``` ```
Or download [zip](https://github.com/kristoferssolo/SScom-scraper/archive/refs/heads/master.zip).
Create `config.json` file with following content: Add [Telegram bot API token](https://core.telegram.org/bots/tutorial#obtain-your-bot-token) to the `config.json` file.
```json ```json
{ {
"API_TOKEN": "<TOKEN_FROM_BOT_FATHER>" "API_TOKEN": "<TOKEN_FROM_BOT_FATHER>"
} }
``` ```
Install required libraries: Run the bot.
```sh ```sh
pip install -r requirements.txt
python main.py python main.py
``` ```
## Libraries used
- [Aiogram](https://github.com/aiogram/aiogram)
- [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/)

3
config.json Normal file
View File

@ -0,0 +1,3 @@
{
"API_TOKEN": ""
}

14
main.py Normal file → Executable file
View File

@ -1,13 +1,10 @@
""" #!/usr/bin/env python3
Telegram bot for scraper
Author - Kristofers Solo
Licence - MIT
"""
import json import json
import logging import logging
from pathlib import Path from pathlib import Path
from aiogram import Bot, Dispatcher, executor, types from aiogram import Bot, Dispatcher, executor, types
from scraper import gpus from SScom.scraper import gpus
BASE_DIR = Path(__file__).resolve().parent BASE_DIR = Path(__file__).resolve().parent
@ -30,8 +27,9 @@ async def gpu_price_message(message: types.Message):
"""Returns all scraped GPUs and their prices to telegram""" """Returns all scraped GPUs and their prices to telegram"""
data = gpus.get_data() data = gpus.get_data()
message_size = 100 message_size = 100
chunked_data = [data[i:i + message_size] chunked_data = [
for i in range(0, len(data), message_size)] data[i : i + message_size] for i in range(0, len(data), message_size)
]
for i in chunked_data: for i in chunked_data:
await message.answer("\n".join(i)) await message.answer("\n".join(i))

47
pyproject.toml Normal file
View File

@ -0,0 +1,47 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "SScom-scraper"
version = "0.1.0"
description = "Webscraper of SS marketplace for GPUs "
authors = [{ name = "Kristofers Solo", email = "dev@kristofers.xyz" }]
keywords = ["scraper", "telegram"]
readme = "README.md"
requires-python = ">=3.10"
license = { text = "GPLv3" }
classifiers = [
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3:10",
"Programming Language :: Python :: 3:11",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: Linux",
]
dependencies = ["aiogram~=2.0", "beautifulsoup4~=4.12", "requests~=2.31"]
[project.urls]
"Source" = "https://github.com/kristoferssolo/SScom-scraper"
"Bug Tracker" = "https://github.com/kristoferssolo/SScom-scraper/issues"
[tool.mypy]
check_untyped_defs = true
disallow_any_generics = true
ignore_missing_imports = true
mypy_path = "src"
no_implicit_optional = true
no_implicit_reexport = true
show_error_codes = true
strict_equality = true
warn_redundant_casts = true
warn_return_any = true
warn_unreachable = true
warn_unused_configs = true
[tool.ruff]
line-length = 160
[tool.ruff.flake8-quotes]
docstring-quotes = "double"

View File

@ -1,17 +1,3 @@
aiogram==2.22.1 aiogram==2.0.*
aiohttp==3.8.3 beautifulsoup4==4.12.*
aiosignal==1.2.0 requests==2.31.*
async-timeout==4.0.2
attrs==22.1.0
Babel==2.9.1
beautifulsoup4==4.11.1
certifi==2022.9.24
charset-normalizer==2.1.1
frozenlist==1.3.1
idna==3.4
multidict==6.0.2
pytz==2022.2.1
requests==2.28.1
soupsieve==2.3.2.post1
urllib3==1.26.12
yarl==1.8.1

2
requirements_dev.txt Normal file
View File

@ -0,0 +1,2 @@
mypy==1.5.*
ruff==0.0.*

View File

@ -1,14 +1,9 @@
"""
Webscaper of SS marketplace for GPUs
Author - Kristofers Solo
Licence - MIT
"""
from bs4 import BeautifulSoup
import requests import requests
from bs4 import BeautifulSoup
HEADERS = { HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21"} "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21"
}
class SS: class SS:
@ -20,8 +15,7 @@ class SS:
soup = BeautifulSoup(page.content, "html.parser") soup = BeautifulSoup(page.content, "html.parser")
last_url = soup.find(class_="td2").findChild("a")["href"] last_url = soup.find(class_="td2").findChild("a")["href"]
page_amount = last_url[last_url.find( page_amount = last_url[last_url.find("page") + 4 : last_url.find(".html")]
"page") + 4:last_url.find(".html")]
print(f"Page amount = {page_amount}") print(f"Page amount = {page_amount}")
return int(page_amount) return int(page_amount)
@ -35,8 +29,7 @@ class SS:
soup = BeautifulSoup(page.content, "html.parser") soup = BeautifulSoup(page.content, "html.parser")
# item ids # item ids
ids = [tag["id"] ids = [tag["id"] for tag in soup.select("tr[id]")] # creates list with ids
for tag in soup.select("tr[id]")] # creates list with ids
# removes "tr_bnr" elements from list # removes "tr_bnr" elements from list
ids = [x for x in ids if "tr_bnr" not in x] ids = [x for x in ids if "tr_bnr" not in x]
ids.remove("head_line") # removes first "head_line" id ids.remove("head_line") # removes first "head_line" id
@ -70,7 +63,7 @@ class SS:
# convert price back to string and add `€` # convert price back to string and add `€`
gpu[1] = str(gpu[1]) + "" gpu[1] = str(gpu[1]) + ""
# transform 2D array to 1D # transform 2D array to 1D
gpus_list[index] = (" - ".join(gpu)) gpus_list[index] = " - ".join(gpu)
return gpus_list return gpus_list
@ -79,12 +72,13 @@ gpus = SS("https://www.ss.com/lv/electronics/computers/completing-pc/video/sell"
def main(): def main():
"""Main funcion to test scraper""" """Funcion to test scraper"""
data = gpus.get_data() data = gpus.get_data()
message_size = 100 message_size = 100
chunked_data = [data[i:i + message_size] chunked_data = [
for i in range(0, len(data), message_size)] data[i : i + message_size] for i in range(0, len(data), message_size)
]
for i in chunked_data: for i in chunked_data:
print("\n".join(i)) print("\n".join(i))