refactor(instagram): organize files

This commit is contained in:
Kristofers Solo 2025-09-19 18:26:36 +03:00
parent bae194b2bf
commit a7e5d43423
Signed by: kristoferssolo
GPG Key ID: 8687F2D3EEE6F0ED
8 changed files with 354 additions and 162 deletions

13
Cargo.lock generated
View File

@ -62,6 +62,17 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "async-trait"
version = "0.1.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "atomic-waker" name = "atomic-waker"
version = "1.1.2" version = "1.1.2"
@ -1689,8 +1700,10 @@ dependencies = [
name = "tg-relay-rs" name = "tg-relay-rs"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"async-trait",
"color-eyre", "color-eyre",
"dotenv", "dotenv",
"futures",
"infer", "infer",
"once_cell", "once_cell",
"regex", "regex",

View File

@ -6,15 +6,22 @@ license = "MIT"
edition = "2024" edition = "2024"
[dependencies] [dependencies]
async-trait = "0.1"
color-eyre = "0.6" color-eyre = "0.6"
dotenv = "0.15" dotenv = "0.15"
futures = "0.3.31"
infer = "0.19" infer = "0.19"
once_cell = "1.21.3" once_cell = "1.21.3"
regex = "1.11" regex = "1.11"
teloxide = { version = "0.17", features = ["macros"] } teloxide = { version = "0.17", features = ["macros"] }
tempfile = "3" tempfile = "3"
thiserror = "2.0" thiserror = "2.0"
tokio = { version = "1", features = ["macros", "rt-multi-thread", "process"] } tokio = { version = "1", features = [
"macros",
"rt-multi-thread",
"process",
"fs",
] }
tracing = "0.1" tracing = "0.1"
tracing-appender = "0.2" tracing-appender = "0.2"
tracing-bunyan-formatter = { version = "0.3", default-features = false } tracing-bunyan-formatter = { version = "0.3", default-features = false }

27
src/error.rs Normal file
View File

@ -0,0 +1,27 @@
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("instaloader failed: {0}")]
InstaloaderFaileled(String),
#[error("no media found")]
NoMediaFound,
#[error("unknown media kind")]
UnknownMediaKind,
#[error("teloxide error: {0}")]
Teloxide(#[from] teloxide::RequestError),
#[error("join error: {0}")]
Join(#[from] tokio::task::JoinError),
#[error("other: {0}")]
Other(String),
}
pub type Result<T> = std::result::Result<T, Error>;

127
src/handlers/instagram.rs Normal file
View File

@ -0,0 +1,127 @@
use crate::error::{Error, Result};
use crate::handlers::SocialHandler;
use crate::utils::{MediaKind, detect_media_kind_async, send_media_from_path};
use futures::{StreamExt, stream};
use regex::Regex;
use std::path::PathBuf;
use std::{process::Stdio, sync::OnceLock};
use teloxide::{Bot, types::ChatId};
use tempfile::tempdir;
use tokio::fs::read_dir;
use tokio::process::Command;
use tracing::{error, info};
static SHORTCODE_RE: OnceLock<Regex> = OnceLock::new();
fn shortcode_regex() -> &'static Regex {
SHORTCODE_RE.get_or_init(|| {
Regex::new(
r"https?://(?:www\.)?(?:instagram\.com|instagr\.am)/(?:p|reel|tv)/([A-Za-z0-9_-]+)",
)
.expect("filed to compile regex")
})
}
/// Handler for Instagram posts / reels / tv
#[derive(Clone, Default)]
pub struct InstagramHandler;
impl InstagramHandler {
#[inline]
#[must_use]
pub const fn new() -> Self {
Self
}
}
#[async_trait::async_trait]
impl SocialHandler for InstagramHandler {
fn name(&self) -> &'static str {
"instagram"
}
fn try_extract(&self, text: &str) -> Option<String> {
shortcode_regex()
.captures(text)
.and_then(|c| c.get(1).map(|m| m.as_str().to_owned()))
}
async fn handle(&self, bot: &Bot, chat_id: ChatId, shortcode: String) -> Result<()> {
info!(handler = %self.name(), shortcode = %shortcode, "handling instagram code");
let tmp = tempdir().map_err(Error::from)?;
let cwd = tmp.path().to_path_buf();
let target = format!("-{shortcode}");
let status = Command::new("instaloader")
.current_dir(&cwd)
.args([
"--dirname-pattern=.",
"--no-metadata-json",
"--no-compress-json",
"--quiet",
"--",
&target,
])
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.await
.map_err(Error::from)?;
if !status.success() {
return Err(Error::InstaloaderFaileled(status.to_string()));
}
let mut dir = read_dir(&cwd).await?;
let mut paths = Vec::new();
while let Some(entry) = dir.next_entry().await? {
if entry.file_type().await?.is_file() {
paths.push(entry.path());
}
}
let concurrency = 8;
let results = stream::iter(paths)
.map(|path| async move {
let kind = detect_media_kind_async(&path).await;
match kind {
MediaKind::Unknown => None,
k => Some((path, k)),
}
})
.buffer_unordered(concurrency)
.collect::<Vec<Option<(PathBuf, MediaKind)>>>()
.await;
let mut media = results
.into_iter()
.flatten()
.collect::<Vec<(PathBuf, MediaKind)>>();
if media.is_empty() {
error!("no media found in tmp dir after instaloader");
return Err(Error::NoMediaFound);
}
// deterministic ordering
media.sort_by_key(|(p, _)| p.clone());
// prefer video over image
if let Some((path, MediaKind::Video)) = media.iter().find(|(_, k)| *k == MediaKind::Video) {
return send_media_from_path(bot, chat_id, path.clone(), Some(MediaKind::Video)).await;
}
if let Some((path, MediaKind::Image)) = media.iter().find(|(_, k)| *k == MediaKind::Image) {
return send_media_from_path(bot, chat_id, path.clone(), Some(MediaKind::Image)).await;
}
error!("no supported media kind found after scanning");
Err(Error::NoMediaFound)
}
fn box_clone(&self) -> Box<dyn SocialHandler> {
Box::new(self.clone())
}
}

28
src/handlers/mod.rs Normal file
View File

@ -0,0 +1,28 @@
mod instagram;
use crate::error::Result;
use teloxide::{Bot, types::ChatId};
#[async_trait::async_trait]
pub trait SocialHandler: Send + Sync {
/// Short name used for logging etc.
fn name(&self) -> &'static str;
/// Try to extract a platform-specific identifier (shortcode, id, url)
/// from arbitrary text. Return `Some` if the handler should handle this message.
fn try_extract(&self, text: &str) -> Option<String>;
/// Do the heavy-lifting: fetch media and send to `chat_id`.
async fn handle(&self, bot: &Bot, chat_id: ChatId, id: String) -> Result<()>;
/// Clone a boxed handler.
fn box_clone(&self) -> Box<dyn SocialHandler>;
}
impl Clone for Box<dyn SocialHandler> {
fn clone(&self) -> Self {
self.box_clone()
}
}
pub use instagram::InstagramHandler;

View File

@ -1 +1,4 @@
pub mod error;
pub mod handlers;
pub mod telemetry; pub mod telemetry;
pub mod utils;

View File

@ -1,179 +1,52 @@
mod telemetry;
use crate::telemetry::setup_logger;
use color_eyre::{
Result,
eyre::{Context, eyre},
};
use dotenv::dotenv; use dotenv::dotenv;
use regex::Regex; use std::sync::Arc;
use std::{ use teloxide::{Bot, prelude::Requester, respond, types::Message};
fs::{File, read_dir}, use tg_relay_rs::{
io::Read, handlers::{InstagramHandler, SocialHandler},
path::Path, telemetry::setup_logger,
}; };
use teloxide::{ use tracing::{error, info};
Bot,
prelude::Requester,
respond,
types::{ChatId, InputFile, Message},
};
use tempfile::tempdir;
use tokio::process::Command;
use tracing::error;
static VIDEO_EXTS: &[&str] = &["mp4", "webm"];
static IMAGE_EXTS: &[&str] = &["jpg", "jpeg", "png"];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum MediaKind {
Video,
Image,
Unknown,
}
#[tokio::main] #[tokio::main]
async fn main() -> color_eyre::Result<()> { async fn main() -> color_eyre::Result<()> {
dotenv().ok(); dotenv().ok();
color_eyre::install()?; color_eyre::install().expect("color-eyre install");
setup_logger(); setup_logger();
let bot = Bot::from_env(); let bot = Bot::from_env();
info!("bot starting");
teloxide::repl(bot, |bot: Bot, msg: Message| async move { let handlers = vec![Arc::new(InstagramHandler)];
if let Some(text) = msg.text()
&& let Some(shortcode) = extract_instagram_shortcode(text) teloxide::repl(bot.clone(), move |bot: Bot, msg: Message| {
{ // clone the handlers vector into the closure
let bot_cloned = bot.clone(); let handlers = handlers.clone();
async move {
if let Some(text) = msg.text() {
for handler in handlers {
if let Some(id) = handler.try_extract(text) {
let handler = handler.clone();
let bot_for_task = bot.clone();
let chat = msg.chat.id; let chat = msg.chat.id;
tokio::spawn(async move { tokio::spawn(async move {
if let Err(e) = fetch_and_send(&bot_cloned, chat, &shortcode).await { if let Err(err) = handler.handle(&bot_for_task, chat, id).await {
error!("error fetching/sending: {:?}", e); error!(%err, "handler failed");
let _ = bot_cloned
.send_message(chat, "Failed to fetch Instagram media.") let _ = bot_for_task
.send_message(chat, "Failed to fetch media.")
.await; .await;
} }
}); });
// if one handler matcher, stop checking
break;
}
}
} }
respond(()) respond(())
}
}) })
.await; .await;
Ok(()) Ok(())
} }
fn extract_instagram_shortcode(text: &str) -> Option<String> {
let re = Regex::new(
r"https?://(?:www\.)?(?:instagram\.com|instagr\.am)/(?:p|reel|tv)/([A-Za-z0-9_-]+)",
)
.unwrap();
re.captures(text)
.and_then(|cap| cap.get(1).map(|m| m.as_str().to_string()))
}
async fn fetch_and_send(bot: &Bot, chat_id: ChatId, shortcode: &str) -> Result<()> {
let dir = tempdir().context("create tempdir")?;
let dir_path = dir.path().to_path_buf();
dbg!(&dir_path);
let target = format!("-{shortcode}");
dbg!(&target);
let status = Command::new("instaloader")
.arg("--dirname-pattern")
.arg(dir_path.to_string_lossy().as_ref())
.arg("--no-metadata-json")
.arg("--no-compress-json")
.arg("--quiet")
.arg("--")
.arg(&target)
.status()
.await
.context("runnning instaloader")?;
if !status.success() {
error!("instaloader exit: {:?}", status);
return Err(eyre!("instaloader failed"));
}
let mut media_files = Vec::new();
for entry in read_dir(&dir_path)? {
let p = entry?.path();
if p.is_file() {
let ext = p.extension().and_then(|s| s.to_str()).unwrap_or("");
if matches!(ext, "jpg" | "jpeg" | "mp4" | "webm") {
media_files.push(p);
}
}
}
if media_files.is_empty() {
return Err(eyre!("no media found"));
}
dbg!(&media_files);
if let Some(video_path) = media_files.iter().find(|p| is_video(p)) {
let input = InputFile::file(video_path.clone());
bot.send_video(chat_id, input).await?;
return Ok(());
}
if let Some(image_path) = media_files.iter().find(|p| is_image(p)) {
let input = InputFile::file(image_path.clone());
bot.send_photo(chat_id, input).await?;
return Ok(());
}
bot.send_message(chat_id, "No supported media found")
.await?;
Ok(())
}
fn ext_lower(path: &Path) -> Option<String> {
path.extension()
.and_then(|s| s.to_str())
.map(str::to_ascii_lowercase)
}
fn kind_by_magic(path: &Path) -> Option<MediaKind> {
let mut f = File::open(path).ok()?;
let mut buf = [0u8; 8192];
let n = f.read(&mut buf).ok()?;
if n == 0 {
return None;
}
if let Some(kind) = infer::get(&buf[..n]) {
let mt = kind.mime_type();
if mt.starts_with("video/") {
return Some(MediaKind::Video);
}
if mt.starts_with("image/") {
return Some(MediaKind::Image);
}
}
None
}
fn detect_media_kind(path: &Path) -> MediaKind {
if let Some(ext) = ext_lower(path) {
if VIDEO_EXTS.iter().any(|e| e.eq_ignore_ascii_case(&ext)) {
return MediaKind::Video;
}
if IMAGE_EXTS.iter().any(|e| e.eq_ignore_ascii_case(&ext)) {
return MediaKind::Image;
}
}
kind_by_magic(path).unwrap_or(MediaKind::Unknown)
}
fn is_video(path: &Path) -> bool {
detect_media_kind(path) == MediaKind::Video
}
fn is_image(path: &Path) -> bool {
detect_media_kind(path) == MediaKind::Image
}

114
src/utils.rs Normal file
View File

@ -0,0 +1,114 @@
use crate::error::{Error, Result};
use std::{
ffi::OsStr,
path::{Path, PathBuf},
};
use teloxide::{
Bot,
prelude::Requester,
types::{ChatId, InputFile},
};
use tokio::{fs::File, io::AsyncReadExt};
/// Simple media kind enum shared by handlers.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MediaKind {
Video,
Image,
Unknown,
}
static VIDEO_EXTS: &[&str] = &["mp4", "webm", "mov", "mkv", "avi"];
static IMAGE_EXTS: &[&str] = &["jpg", "jpeg", "png", "webp"];
/// Detect media kind first by extension, then by content/magic (sync).
/// NOTE: `infer::get_from_path` is blocking — use `detect_media_kind_async` in
/// async contexts to avoid blocking the Tokio runtime.
pub fn detect_media_kind(path: &Path) -> MediaKind {
if let Some(ext) = path.extension().and_then(OsStr::to_str) {
if VIDEO_EXTS.iter().any(|e| e.eq_ignore_ascii_case(ext)) {
return MediaKind::Video;
}
if IMAGE_EXTS.iter().any(|e| e.eq_ignore_ascii_case(ext)) {
return MediaKind::Image;
}
}
if let Ok(Some(kind)) = infer::get_from_path(path) {
let mt = kind.mime_type();
if mt.starts_with("video/") {
return MediaKind::Video;
}
if mt.starts_with("image/") {
return MediaKind::Image;
}
}
MediaKind::Unknown
}
/// Async/non-blocking detection: check extension first, otherwise read a small
/// sample asynchronously and run `infer::get` on the buffer.
pub async fn detect_media_kind_async(path: &Path) -> MediaKind {
if let Some(ext) = path.extension().and_then(OsStr::to_str) {
if VIDEO_EXTS.iter().any(|e| e.eq_ignore_ascii_case(ext)) {
return MediaKind::Video;
}
if IMAGE_EXTS.iter().any(|e| e.eq_ignore_ascii_case(ext)) {
return MediaKind::Image;
}
}
// Read a small prefix (8 KiB) asynchronously and probe
if let Ok(mut f) = File::open(path).await {
let mut buf = vec![0u8; 8192];
match f.read(&mut buf).await {
Ok(n) if n > 0 => {
buf.truncate(n);
if let Some(k) = infer::get(&buf) {
let mt = k.mime_type();
if mt.starts_with("video/") {
return MediaKind::Video;
}
if mt.starts_with("image/") {
return MediaKind::Image;
}
}
}
_ => {}
}
}
MediaKind::Unknown
}
/// Given a path, send it to chat as photo or video depending on detected kind.
///
/// # Errors
///
/// Returns an error if sending fails or the media kind is unknown.
pub async fn send_media_from_path(
bot: &Bot,
chat_id: ChatId,
path: PathBuf,
kind: Option<MediaKind>,
) -> Result<()> {
let kind = kind.unwrap_or_else(|| detect_media_kind(&path));
match kind {
MediaKind::Video => {
let video = InputFile::file(path);
bot.send_video(chat_id, video).await.map_err(Error::from)?;
}
MediaKind::Image => {
let photo = InputFile::file(path);
bot.send_photo(chat_id, photo).await.map_err(Error::from)?;
}
MediaKind::Unknown => {
bot.send_message(chat_id, "No supported media found")
.await
.map_err(Error::from)?;
return Err(Error::UnknownMediaKind);
}
}
Ok(())
}