mirror of
https://github.com/kristoferssolo/SScom-scraper.git
synced 2025-10-21 19:50:33 +00:00
Minor release update
Added `pyproject.toml` file.
This commit is contained in:
parent
cef627e196
commit
611a70ad68
2
.gitignore
vendored
2
.gitignore
vendored
@ -112,5 +112,3 @@ dmypy.json
|
|||||||
|
|
||||||
# Pyre type checker
|
# Pyre type checker
|
||||||
.pyre/
|
.pyre/
|
||||||
|
|
||||||
config.json
|
|
||||||
|
|||||||
14
README.md
14
README.md
@ -1,25 +1,23 @@
|
|||||||
# Webscraper
|
# Webscraper
|
||||||
|
|
||||||
Webscraper of SS marketplace for GPUs
|
Webscraper of SS marketplace for GPUs
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
git clone https://github.com/kristoferssolo/SScom-scraper
|
git clone https://github.com/kristoferssolo/SScom-scraper
|
||||||
cd SScom-scraper
|
cd SScom-scraper
|
||||||
|
pip install .
|
||||||
```
|
```
|
||||||
Or download [zip](https://github.com/kristoferssolo/SScom-scraper/archive/refs/heads/master.zip).
|
|
||||||
Create `config.json` file with following content:
|
Add [Telegram bot API token](https://core.telegram.org/bots/tutorial#obtain-your-bot-token) to the `config.json` file.
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"API_TOKEN": "<TOKEN_FROM_BOT_FATHER>"
|
"API_TOKEN": "<TOKEN_FROM_BOT_FATHER>"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Install required libraries:
|
Run the bot.
|
||||||
```sh
|
```sh
|
||||||
pip install -r requirements.txt
|
|
||||||
python main.py
|
python main.py
|
||||||
```
|
```
|
||||||
|
|
||||||
## Libraries used
|
|
||||||
- [Aiogram](https://github.com/aiogram/aiogram)
|
|
||||||
- [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/)
|
|
||||||
|
|||||||
3
config.json
Normal file
3
config.json
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"API_TOKEN": ""
|
||||||
|
}
|
||||||
14
main.py
Normal file → Executable file
14
main.py
Normal file → Executable file
@ -1,13 +1,10 @@
|
|||||||
"""
|
#!/usr/bin/env python3
|
||||||
Telegram bot for scraper
|
|
||||||
Author - Kristofers Solo
|
|
||||||
Licence - MIT
|
|
||||||
"""
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from aiogram import Bot, Dispatcher, executor, types
|
from aiogram import Bot, Dispatcher, executor, types
|
||||||
from scraper import gpus
|
from SScom.scraper import gpus
|
||||||
|
|
||||||
BASE_DIR = Path(__file__).resolve().parent
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
|
|
||||||
@ -30,8 +27,9 @@ async def gpu_price_message(message: types.Message):
|
|||||||
"""Returns all scraped GPUs and their prices to telegram"""
|
"""Returns all scraped GPUs and their prices to telegram"""
|
||||||
data = gpus.get_data()
|
data = gpus.get_data()
|
||||||
message_size = 100
|
message_size = 100
|
||||||
chunked_data = [data[i:i + message_size]
|
chunked_data = [
|
||||||
for i in range(0, len(data), message_size)]
|
data[i : i + message_size] for i in range(0, len(data), message_size)
|
||||||
|
]
|
||||||
|
|
||||||
for i in chunked_data:
|
for i in chunked_data:
|
||||||
await message.answer("\n".join(i))
|
await message.answer("\n".join(i))
|
||||||
|
|||||||
47
pyproject.toml
Normal file
47
pyproject.toml
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=61.0"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "SScom-scraper"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Webscraper of SS marketplace for GPUs "
|
||||||
|
authors = [{ name = "Kristofers Solo", email = "dev@kristofers.xyz" }]
|
||||||
|
keywords = ["scraper", "telegram"]
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
license = { text = "GPLv3" }
|
||||||
|
classifiers = [
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3:10",
|
||||||
|
"Programming Language :: Python :: 3:11",
|
||||||
|
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||||
|
"Operating System :: Linux",
|
||||||
|
]
|
||||||
|
dependencies = ["aiogram~=2.0", "beautifulsoup4~=4.12", "requests~=2.31"]
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
"Source" = "https://github.com/kristoferssolo/SScom-scraper"
|
||||||
|
"Bug Tracker" = "https://github.com/kristoferssolo/SScom-scraper/issues"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
check_untyped_defs = true
|
||||||
|
disallow_any_generics = true
|
||||||
|
ignore_missing_imports = true
|
||||||
|
mypy_path = "src"
|
||||||
|
no_implicit_optional = true
|
||||||
|
no_implicit_reexport = true
|
||||||
|
show_error_codes = true
|
||||||
|
strict_equality = true
|
||||||
|
warn_redundant_casts = true
|
||||||
|
warn_return_any = true
|
||||||
|
warn_unreachable = true
|
||||||
|
warn_unused_configs = true
|
||||||
|
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
line-length = 160
|
||||||
|
|
||||||
|
[tool.ruff.flake8-quotes]
|
||||||
|
docstring-quotes = "double"
|
||||||
@ -1,17 +1,3 @@
|
|||||||
aiogram==2.22.1
|
aiogram==2.0.*
|
||||||
aiohttp==3.8.3
|
beautifulsoup4==4.12.*
|
||||||
aiosignal==1.2.0
|
requests==2.31.*
|
||||||
async-timeout==4.0.2
|
|
||||||
attrs==22.1.0
|
|
||||||
Babel==2.9.1
|
|
||||||
beautifulsoup4==4.11.1
|
|
||||||
certifi==2022.9.24
|
|
||||||
charset-normalizer==2.1.1
|
|
||||||
frozenlist==1.3.1
|
|
||||||
idna==3.4
|
|
||||||
multidict==6.0.2
|
|
||||||
pytz==2022.2.1
|
|
||||||
requests==2.28.1
|
|
||||||
soupsieve==2.3.2.post1
|
|
||||||
urllib3==1.26.12
|
|
||||||
yarl==1.8.1
|
|
||||||
|
|||||||
2
requirements_dev.txt
Normal file
2
requirements_dev.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
mypy==1.5.*
|
||||||
|
ruff==0.0.*
|
||||||
@ -1,14 +1,9 @@
|
|||||||
"""
|
|
||||||
Webscaper of SS marketplace for GPUs
|
|
||||||
Author - Kristofers Solo
|
|
||||||
Licence - MIT
|
|
||||||
"""
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import requests
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21"}
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class SS:
|
class SS:
|
||||||
@ -20,8 +15,7 @@ class SS:
|
|||||||
soup = BeautifulSoup(page.content, "html.parser")
|
soup = BeautifulSoup(page.content, "html.parser")
|
||||||
|
|
||||||
last_url = soup.find(class_="td2").findChild("a")["href"]
|
last_url = soup.find(class_="td2").findChild("a")["href"]
|
||||||
page_amount = last_url[last_url.find(
|
page_amount = last_url[last_url.find("page") + 4 : last_url.find(".html")]
|
||||||
"page") + 4:last_url.find(".html")]
|
|
||||||
print(f"Page amount = {page_amount}")
|
print(f"Page amount = {page_amount}")
|
||||||
|
|
||||||
return int(page_amount)
|
return int(page_amount)
|
||||||
@ -35,8 +29,7 @@ class SS:
|
|||||||
soup = BeautifulSoup(page.content, "html.parser")
|
soup = BeautifulSoup(page.content, "html.parser")
|
||||||
|
|
||||||
# item ids
|
# item ids
|
||||||
ids = [tag["id"]
|
ids = [tag["id"] for tag in soup.select("tr[id]")] # creates list with ids
|
||||||
for tag in soup.select("tr[id]")] # creates list with ids
|
|
||||||
# removes "tr_bnr" elements from list
|
# removes "tr_bnr" elements from list
|
||||||
ids = [x for x in ids if "tr_bnr" not in x]
|
ids = [x for x in ids if "tr_bnr" not in x]
|
||||||
ids.remove("head_line") # removes first "head_line" id
|
ids.remove("head_line") # removes first "head_line" id
|
||||||
@ -70,7 +63,7 @@ class SS:
|
|||||||
# convert price back to string and add `€`
|
# convert price back to string and add `€`
|
||||||
gpu[1] = str(gpu[1]) + " €"
|
gpu[1] = str(gpu[1]) + " €"
|
||||||
# transform 2D array to 1D
|
# transform 2D array to 1D
|
||||||
gpus_list[index] = (" - ".join(gpu))
|
gpus_list[index] = " - ".join(gpu)
|
||||||
|
|
||||||
return gpus_list
|
return gpus_list
|
||||||
|
|
||||||
@ -79,12 +72,13 @@ gpus = SS("https://www.ss.com/lv/electronics/computers/completing-pc/video/sell"
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main funcion to test scraper"""
|
"""Funcion to test scraper"""
|
||||||
data = gpus.get_data()
|
data = gpus.get_data()
|
||||||
|
|
||||||
message_size = 100
|
message_size = 100
|
||||||
chunked_data = [data[i:i + message_size]
|
chunked_data = [
|
||||||
for i in range(0, len(data), message_size)]
|
data[i : i + message_size] for i in range(0, len(data), message_size)
|
||||||
|
]
|
||||||
for i in chunked_data:
|
for i in chunked_data:
|
||||||
print("\n".join(i))
|
print("\n".join(i))
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user