diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index fdd9e5b..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,161 +0,0 @@
-# Created by https://www.toptal.com/developers/gitignore/api/python
-# Edit at https://www.toptal.com/developers/gitignore?templates=python
-
-### Python ###
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# Data
-*.json
-*.txt
-*.lock
-
-# Folder
-cache/
-klue_dir/
-prediction/
-wandb/
-best_model/
-
-# Checkpoint
-*.pt
-*.bin
-*.pth
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# End of https://www.toptal.com/developers/gitignore/api/python
\ No newline at end of file
diff --git a/data_augmentation.py b/data_augmentation.py
new file mode 100644
index 0000000..b94c145
--- /dev/null
+++ b/data_augmentation.py
@@ -0,0 +1,104 @@
+import numpy as np
+import pandas as pd
+import pickle
+from typing import List, Tuple, Union, Dict, Text
+
+def find_nth(string, substring, n):
+    if (n == 1):
+        return string.find(substring)
+    else:
+       return string.find(substring, find_nth(string, substring, n - 1) + 1)
+
+def entity_prepro(sentence, entity):
+    start_idx = find_nth(sentence, entity[0], entity[2])
+    end_idx = start_idx + len(entity[0])
+    
+    p_entity = {
+          "word" : entity[0],
+          "start_idx" : start_idx,
+          "end_idx" : end_idx,
+          "type" : entity[1]
+        }
+    
+    return p_entity
+
+
+def data_organizing(
+    sentence : Text,
+    subjects : Tuple[str, str, int],
+    objects : Tuple[str, str, int]
+) -> Union[Text, Dict[str, str], Dict[str, str]]:
+    
+    p_subjects = entity_prepro(sentence, subjects)
+    p_objects = entity_prepro(sentence, objects) 
+    
+    return [sentence, p_subjects, p_objects]
+
+def augmentation(
+    tagged_sentences : Union[List[Tuple[str, str]]]
+) -> List[Union[str, Dict, Dict]]:
+    
+    tagged_sentence_word_cnt = []
+    
+    for sent in tagged_sentences: # 토큰별로 몇번째로 등장했는지 추가
+        tmp = ''
+        count_tagged = []
+        for tok, tag in sent:
+            count_tagged.append((tok, tag, tmp.count(tok)+1))
+            tmp += tok
+        tagged_sentence_word_cnt.append(count_tagged)
+    
+    print("Number of Data to aumgented :", len(tagged_sentence_word_cnt))
+    
+    augmented_data = []
+    for tag_sent in tagged_sentence_word_cnt:
+        org_sent = "".join([tok for tok, tag, _ in tag_sent])
+        obj_list = [(tok, tag, cnt) for tok, tag, cnt in tag_sent if tag_map[tag]!='O']
+        sbj_list = [(tok, tag, cnt) for tok, tag, cnt in obj_list if tag in ['PERSON', 'ORGANIZATION']]
+        cand_list = [[org_sent, sbj, obj] for sbj in sbj_list for obj in obj_list if sbj!=obj]
+        augmented_data.extend([data_organizing(sent, sbj, obj) for sent, sbj, obj in cand_list])
+        
+    print("Number of Augmented data :", len(augmented_data))
+    
+    return augmented_data
+
+def main():
+    using_tag = ['PERSON', 'LOCATION', 'ORGANIZATION', 'DATE', 'TIME', 'CITY']
+
+    tag_map = {
+        'PERSON' : 'PER',
+        'LOCATION' : 'LOC',
+        'ORGANIZATION' : 'ORG',
+        'CITY' : 'LOC',
+        'COUNTRY' : 'ORG', #ORG
+        'ARTIFACT' : 'O',
+        'DATE' : 'DAT',
+        'TIME' : 'DAT',
+        'CIVILIZATION' : 'O',
+        'ANIMAL' : 'O',
+        'PLANT' : 'O',
+        'QUANTITY' : 'NOH',
+        'STUDY_FIELD' : 'O',
+        'THEORY' : 'O',
+        'EVENT' : 'O', #ORG
+        'MATERIAL' : 'O',
+        'TERM' : 'O',
+        'OCCUPATION' : 'O', #직업
+        'DISEASE' : 'O',
+        'O' : 'O',
+    }
+    with open('tagged_sentence.pickle', 'rb') as f:
+        tagged_sentence = pickle.load(f)
+
+    aug_data = augmentation(tagged_sentence)
+    
+    augmented_data = pd.DataFrame(aug_data)
+    augmented_data.columns = ['sentence', 'subject_entity', 'object_entity']
+    augmented_data['label'] = None
+    augmented_data['source'] = 'augmented'
+    
+    augmented_data.to_csv("augmented_data.csv", index=False)
+
+    with open('augmented_data.pickle', 'wb') as f:
+        pickle.dump(augmented_data, f, pickle.HIGHEST_PROTOCOL)
+        
\ No newline at end of file