Skip to content

Commit fbb75fc

Browse files
committed
Fifth Project file
1 parent 4cd5666 commit fbb75fc

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed

Extract_Text_from_pdf.ipynb

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 2,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"Requirement already satisfied: pyPDF2 in c:\\programdata\\anaconda3\\lib\\site-packages (1.26.0)\n",
13+
"Note: you may need to restart the kernel to use updated packages.\n"
14+
]
15+
}
16+
],
17+
"source": [
18+
"pip install pyPDF2"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 9,
24+
"metadata": {},
25+
"outputs": [
26+
{
27+
"name": "stdout",
28+
"output_type": "stream",
29+
"text": [
30+
"The Document details : \n",
31+
"\n",
32+
"{'/Title': 'Machine Learning', '/Author': 'nazli', '/CreationDate': \"D:20210125212118+05'30'\", '/ModDate': \"D:20210125212118+05'30'\", '/Producer': 'Microsoft® PowerPoint® 2016', '/Creator': 'Microsoft® PowerPoint® 2016'}\n",
33+
"The pages are : \n",
34+
"69\n"
35+
]
36+
}
37+
],
38+
"source": [
39+
"import PyPDF2\n",
40+
"a = PyPDF2.PdfFileReader(\"ML2401.ppt.pdf\")\n",
41+
"print(\"The Document details : \\n\")\n",
42+
"print(a.documentInfo)\n",
43+
"print(\"The pages are : \")\n",
44+
"print(a.getNumPages())\n",
45+
"str=\"\"\n",
46+
"for i in range(1,11):\n",
47+
" str += a.getPage(i).extractText()\n",
48+
" \n",
49+
"with open(\"text.txt\",\"w\",encoding ='utf-8') as f:\n",
50+
" f.write(str)"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 10,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"#I created a program that extractes the text from PDF and creates an Text file"
60+
]
61+
},
62+
{
63+
"cell_type": "code",
64+
"execution_count": null,
65+
"metadata": {},
66+
"outputs": [],
67+
"source": []
68+
}
69+
],
70+
"metadata": {
71+
"kernelspec": {
72+
"display_name": "Python 3",
73+
"language": "python",
74+
"name": "python3"
75+
},
76+
"language_info": {
77+
"codemirror_mode": {
78+
"name": "ipython",
79+
"version": 3
80+
},
81+
"file_extension": ".py",
82+
"mimetype": "text/x-python",
83+
"name": "python",
84+
"nbconvert_exporter": "python",
85+
"pygments_lexer": "ipython3",
86+
"version": "3.7.6"
87+
}
88+
},
89+
"nbformat": 4,
90+
"nbformat_minor": 4
91+
}

0 commit comments

Comments
 (0)