Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
SHELL := /bin/bash

install:
pip install poetry
poetry install

auth:
poetry run prefect cloud login --key $(PREFECT_API_KEY) --workspace $(PREFECT_WORKSPACE_ID)

run:
poetry run sh src/dags/prefect.sh
6 changes: 6 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
FROM python:3.11.6

ARG PREFECT_API_KEY
ARG PREFECT_WORKSPACE_ID

ENV PREFECT_API_KEY=${PREFECT_API_KEY}
ENV PREFECT_WORKSPACE_ID=${PREFECT_WORKSPACE_ID}

WORKDIR /app

COPY . /app
Expand Down
21 changes: 21 additions & 0 deletions docker/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ services:
dockerfile: docker/Dockerfile
depends_on:
- postgres_db
- prefect_flow
environment:
- ENV_STATE=docker
env_file:
Expand All @@ -32,3 +33,23 @@ services:
env_file:
- ../.env-docker
command: ["/app/docker/bot.sh"]

prefect_flow:
container_name: prefect_flow
build:
context: ../
dockerfile: docker/Dockerfile
args:
- PREFECT_API_KEY=${PREFECT_API_KEY}
- PREFECT_WORKSPACE_ID=${PREFECT_WORKSPACE_ID}
depends_on:
- postgres_db
env_file:
- ../.env-docker
entrypoint: /bin/bash
command:
- -c
- |
source /app/.env-docker &&
make auth &&
make run
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pytest = "^7.4.4"
pytest-asyncio = "^0.23.3"
langchain-openai = "^0.0.2.post1"
ruamel-yaml = "^0.18.6"
prefect = "^2.18.3"

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.3"
Expand Down
Empty file added src/dags/__init__.py
Empty file.
38 changes: 38 additions & 0 deletions src/dags/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from loguru import logger
from prefect import flow, task
from telethon import TelegramClient

from src.utils.scrapper import scrape_telegram_messages
from src.config import API_ID, API_HASH

CHANNELS_TO_SCRAPE = ["cryptovalerii", "KarpovCourses"]


@task
async def scrape_channels():
""" "Scraping messages from telegram-channel"""
async with TelegramClient(
"src/artifacts/sessions/post_finder.session", API_ID, API_HASH
) as client:
results = []
for channel in CHANNELS_TO_SCRAPE:
logger.info(f"Starting scrape for {channel}")
result = await scrape_telegram_messages(client, channel)
results.append(result)
logger.info(f"Completed scrape for {channel}")
return results


@flow
async def daily_scraper():
"""Initiates a daily task to scrape messages from Telegram channels"""
logger.info("Starting the scraping process...")
results = await scrape_channels()
for result in results:
print(result)


if __name__ == "__main__":
daily_scraper.serve(
name="Daily Channel Scraper", tags=["scraping", "daily"], cron="0 0 * * *"
)
1 change: 1 addition & 0 deletions src/dags/prefect.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry run python -m src.dags.parser