diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bfe8256 --- /dev/null +++ b/.gitignore @@ -0,0 +1,132 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +#api keys +api_keys.json diff --git a/Literature review .docx b/Literature review .docx new file mode 100644 index 0000000..c4ec9ab Binary files /dev/null and b/Literature review .docx differ diff --git a/README.md b/README.md index e421ba3..51c2d09 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Review Assignment Due Date](https://classroom.github.com/assets/deadline-readme-button-24ddc0f5d75046c5622901739e7c5dd533143b0c8e959d652212380cedb1ea36.svg)](https://classroom.github.com/a/fSA_TVih) +[![Review Assignment Due Date](https://classroom.github.com/assets/deadline-readme-button-8d59dc4de5201274e310e4c54b9627a8934c3b88527886e3b421487c677d23eb.svg)](https://classroom.github.com/a/fSA_TVih) [![Open in Visual Studio Code](https://classroom.github.com/assets/open-in-vscode-c66648af7eb3fe8bc4f294546bfd86ef473780cde1dea487d3c4ff354943c9ae.svg)](https://classroom.github.com/online_ide?assignment_repo_id=10680461&assignment_repo_type=AssignmentRepo) ## Final Project Repo diff --git a/api_filter.py b/api_filter.py new file mode 100644 index 0000000..94d6856 --- /dev/null +++ b/api_filter.py @@ -0,0 +1,130 @@ +from telethon.sync import TelegramClient +from datetime import datetime, timedelta +from telethon.tl.functions.channels import GetParticipantsRequest +from telethon.tl.types import ChannelParticipantsSearch +import json +import pytz +import statistics +from urllib.request import urlopen + +class TelegramAPI: + def __init__(self): + with open('api_keys.json') as f: + keys = json.load(f) + + self.api_id = keys['api_id'] + self.api_hash = keys['api_hash'] + self.bot_token = keys['bot_token'] + + def get_all_messages(self,username): + einelist = [] + with TelegramClient('test', self.api_id, self.api_hash) as client: + + #offset_date=lastweekcutoff + for message in client.iter_messages(username,reverse=False): + #finds all reaction types and converts it to likes + einelist.append(message) + #print(type(message.date)) + #print(len(einelist)) + return einelist + + def filter_timeframe(self,einelist, timeframe): + now = datetime.now(pytz.timezone('US/Eastern')) + + timeframes = { + 'hour': timedelta(hours=1), + 'day': timedelta(days=1), + 'week': timedelta(weeks=1), + 'month': timedelta(days=30), # Approximate one month + 'year': timedelta(days=365), + 'recent': timedelta.max + } + + if timeframe not in timeframes: + raise ValueError(f"Invalid timeframe '{timeframe}', should be one of {list(timeframes.keys())}") + + if timeframe == 'recent': + most_recent_item = max(einelist, key=lambda item: getattr(item, "date")) + return [most_recent_item] + + start_time = now - timeframes[timeframe] + + return [item for item in einelist if getattr(item, "date") >= start_time] + + def filter_metric(self,einelist,metric,username=""): + timeframes = { + 'reactions': [], + 'comments': [], + 'forwards': [], + 'views': [], + } + for message in einelist: + reactions,comments,forwards,views = 0,0,0,0 + if message.reactions != None: + messagereactions = message.reactions.results + for reacttype in messagereactions: + # print(reacttype.reaction) + # print(reacttype.count) + reactions += int(reacttype.count) + + try: + if message.views != None: + views += int(message.views) + except:print("L") + try: + if message.forwards != None: + forwards += int(message.forwards) + except:print("L") + + if message.replies != None: + comments += int(message.replies.replies) + + timeframes['reactions'].append(reactions) + timeframes['comments'].append(comments) + timeframes['forwards'].append(forwards) + timeframes['views'].append(views) + + return timeframes[metric] + + def filter_stat(self,metriclist,stat): + if stat=="top": + return max(metriclist) + elif stat=="bottom": + return min(metriclist) + elif stat=="average": + return statistics.mean(metriclist) + elif stat=="median": + statistics.median(metriclist) + else: + return -1 + + def response_from_labels(self,timeframe,stat,metric,user): + fullacountlist = self.get_all_messages(user) + timefilteredlist = self.filter_timeframe(fullacountlist,timeframe) + metricfilteredlist = self.filter_metric(timefilteredlist,metric) + wantedstat = self.filter_stat(metricfilteredlist,stat) + return wantedstat + + def get_follower_count(self,username): + url =f"https://api.telegram.org/bot{self.bot_token}/getChatMembersCount?chat_id=@{username}" + with urlopen(url) as f: + resp = json.load(f) + + return int(resp['result']) + + + + + + +if __name__ == "__main__": + pass + # t = TelegramAPI() + # einelist = t.get_all_messages("") + # filteredlist = filter_timeframe(einelist,"day") + # print(filteredlist) + # print(len(filteredlist)) + + + # print(filter_metric(filteredlist,"comments")) + # print(response_from_labels("recent","top","comments","disclosetv")) diff --git a/chatgptprompts.py b/chatgptprompts.py new file mode 100644 index 0000000..ca4fc3f --- /dev/null +++ b/chatgptprompts.py @@ -0,0 +1,30 @@ +import requests +import json +import openai +import os + + +class ChatBot: + def __init__(self) -> None: + openai.api_key = os.environ.get("OPENAI_API_KEY") + + self.messages = [ + {"role": "system", "content": "You are a kind helpful social media assistant."}, + ] + + + def querychatgpt(self, initmessage,stat): + message = f"You are a helpful social media assistant for an account using Telegram. A user asks '{initmessage}'. The answer to this question is '{str(stat)}'. Please write a response to that and don't worry about missing context, and do not mention that you lack any context. Also try to keep the response in two sentences or under." + # print(message) + + self.messages.append( {"role": "user", "content": message},) + + chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=self.messages) + + reply = chat.choices[0].message.content + self.messages.append({"role": "assistant", "content": reply}) + return reply + +# c = ChatBot() + +# print(c.querychatgpt( "what was the average amount of likes i got in the last month?",12)) \ No newline at end of file diff --git a/gui.py b/gui.py new file mode 100644 index 0000000..163a331 --- /dev/null +++ b/gui.py @@ -0,0 +1,108 @@ +import sys +from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QTextEdit, QLineEdit, QPushButton +from PyQt5.QtCore import pyqtSignal, Qt +from vocial import Vocial + +class ChatWidget(QWidget): + + message_sent = pyqtSignal(str) + + def __init__(self, parent=None): + super().__init__(parent) + + self.init_ui() + + def init_ui(self): + + self.setWindowTitle("Vocial") + self.setGeometry(100, 100, 500, 400) + self.setStyleSheet(""" + QWidget { + background-color: #2e2e2e; + } + QTextEdit { + background-color: #383838; + color: #ffffff; + border: 1px solid #5f5f5f; + border-radius: 5px; + } + QLineEdit { + background-color: #383838; + color: #ffffff; + border: 1px solid #5f5f5f; + border-radius: 5px; + padding: 5px; + } + QPushButton { + background-color: #3e3e3e; + color: #ffffff; + border: 1px solid #5f5f5f; + border-radius: 5px; + padding: 5px; + } + QPushButton:hover { + background-color: #4e4e4e; + } + """) + + self.layout = QVBoxLayout() + + self.chat_history = QTextEdit() + self.chat_history.setReadOnly(True) + + self.message_input = QLineEdit() + self.message_input.setPlaceholderText("Type your message here...") + + self.send_button = QPushButton("Send") + self.send_button.clicked.connect(self.send_message) + + self.layout.addWidget(self.chat_history) + self.layout.addWidget(self.message_input) + self.layout.addWidget(self.send_button) + + self.setLayout(self.layout) + + def send_message(self): + message = self.message_input.text() + self.message_sent.emit(message) + self.message_input.clear() + + def add_message(self, message): + self.chat_history.append(message) + + +class ChatApp(QApplication): + + def __init__(self, sys_argv): + super().__init__(sys_argv) + self.asking_first_question = True + self.first_question_response = None + self.vocial = Vocial() + self.init_ui() + + def init_ui(self): + self.chat_widget = ChatWidget() + self.chat_widget.message_sent.connect(self.handle_message_sent) + self.chat_widget.add_message("Vocial: What is your username?\n") + self.chat_widget.show() + + def handle_message_sent(self, message): + # Replace this line with your function that processes the message and returns a response + if self.asking_first_question: + self.first_question_response = message + self.chat_widget.add_message(f"Vocial: Welcome, {self.first_question_response}!\n") + self.asking_first_question = False + else: + response = f"{self.first_question_response}: {message}\n" + self.chat_widget.add_message(response) + responsething = f"Vocial: {self.vocial.main(message,testing=False)}\n" + self.chat_widget.add_message(responsething) + + +def main(): + app = ChatApp(sys.argv) + sys.exit(app.exec_()) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/model.py b/model.py new file mode 100644 index 0000000..2a3c42c --- /dev/null +++ b/model.py @@ -0,0 +1,94 @@ +import torch +from transformers import AutoTokenizer, AutoModelForSequenceClassification + +import warnings +from transformers import logging as transformers_logging + +warnings.filterwarnings("ignore") +transformers_logging.set_verbosity(transformers_logging.ERROR) + +class LabelModel: + def __init__(self): + self.modelpaths = {"time":"models/time.pt","stat":None,"metric":"models/metric.pt"} + self.max_length = 128 + self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) + + self.label_map_time = { + 0: 'recent', + 1: 'hour', + 2: 'day', + 3: 'week', + 4: 'month', + 5: 'year', + } + + self.label_map_metric = { + 0: 'reactions', + 1: 'comments', + 2: 'forwards', + 3: 'views', + 4: 'followers', + } + + + self.label_map_stat = { + 0: 'top', + 1: 'average', + 2: 'median', + 3: 'bottom', + } + + self.label_maps = { + "time":self.label_map_time, + "metric":self.label_map_metric, + "stat":self.label_map_stat, + } + + def load_model(self, model_path): + model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased',num_labels=5) + state_dict = torch.load(model_path) + model.load_state_dict(state_dict) + model.eval() + return model + + def preprocess_input(self, input_string, tokenizer, max_length): + tokens = tokenizer.tokenize(input_string) + tokens = tokens[:max_length] + [''] * (max_length - len(tokens)) + token_indices = tokenizer.convert_tokens_to_ids(tokens) + input_tensor = torch.tensor(token_indices).unsqueeze(0) # Add batch dimension + return input_tensor + + def perform_inference(self, model, input_tensor): + with torch.no_grad(): + output = model(input_tensor) + return output.logits + + def postprocess_output(self, output_tensor, label_map): + predicted_index = torch.argmax(output_tensor, dim=1).item() + predicted_label = label_map[predicted_index] + return predicted_label + + def main(self,message): + finallabels = {"time":"NONE","stat":"NONE","metric":"NONE"} + + for category in self.modelpaths: + # print(f"{category} THINF") + #print(self.modelpaths[category]) + + if self.modelpaths[category]: + thingy = self.modelpaths[category] + model = self.load_model(thingy) + input_tensor = self.preprocess_input(message, self.tokenizer, self.max_length) + output_tensor = self.perform_inference(model, input_tensor) + predicted_label = self.postprocess_output(output_tensor, self.label_maps[category]) + finallabels[category] = predicted_label + else: + #runs if no model availible + predicted_label + finallabels[category] = self.label_maps[category][0] + print(finallabels) + return finallabels + +if __name__ == "__main__": + x = LabelModel() + print(x.main("what posts gave the most reactions in the last month")) \ No newline at end of file diff --git a/paperdead.docx b/paperdead.docx new file mode 100644 index 0000000..a40a983 Binary files /dev/null and b/paperdead.docx differ diff --git a/test.session b/test.session new file mode 100644 index 0000000..2cbd55a Binary files /dev/null and b/test.session differ diff --git a/vocial.py b/vocial.py new file mode 100644 index 0000000..35bc50f --- /dev/null +++ b/vocial.py @@ -0,0 +1,30 @@ +from model import LabelModel +from api_filter import TelegramAPI +from chatgptprompts import ChatBot + + +class Vocial: + def __init__(self,username=""): + self.model = LabelModel() + self.telapi = TelegramAPI() + self.chatgpt = ChatBot() + self.username = username + + def main(self,message,testing=False): + if testing == True: + message = "what was the average amount of likes i got in the last month?" + valwewant = self.telapi.response_from_labels("month","average","reactions",self.username) + response = self.chatgpt.querychatgpt(message,valwewant) + return response + + modelvals = self.model.main(message) + valwewant = self.telapi.response_from_labels(modelvals["time"],modelvals["stat"],modelvals["metric"],self.username) + response = self.chatgpt.querychatgpt(message,valwewant) + + return response + +if __name__ == "__main__": + v = Vocial() + v.username = "disclosetv" + res = v.main(message="what posts gave the most reactions in the last month",testing=False) + print(res)