|
87 | 87 | }, |
88 | 88 | { |
89 | 89 | "cell_type": "code", |
90 | | - "execution_count": null, |
| 90 | + "execution_count": 37, |
91 | 91 | "metadata": {}, |
92 | 92 | "outputs": [ |
93 | 93 | { |
94 | 94 | "name": "stdout", |
95 | 95 | "output_type": "stream", |
96 | 96 | "text": [ |
97 | 97 | "\n", |
98 | | - "Downloading dataset...\n" |
99 | | - ] |
100 | | - }, |
101 | | - { |
102 | | - "name": "stderr", |
103 | | - "output_type": "stream", |
104 | | - "text": [ |
105 | | - "2436.28s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" |
106 | | - ] |
107 | | - }, |
108 | | - { |
109 | | - "name": "stdout", |
110 | | - "output_type": "stream", |
111 | | - "text": [ |
112 | | - "Dataset URL: https://www.kaggle.com/datasets/tobiasbueck/multilingual-customer-support-tickets\n", |
113 | | - "License(s): Attribution 4.0 International (CC BY 4.0)\n", |
114 | | - "multilingual-customer-support-tickets.zip: Skipping, found more recently modified local copy (use --force to force download)\n", |
115 | | - "multilingual-customer-support-tickets.zip: Skipping, found more recently modified local copy (use --force to force download)\n" |
116 | | - ] |
117 | | - }, |
118 | | - { |
119 | | - "name": "stderr", |
120 | | - "output_type": "stream", |
121 | | - "text": [ |
122 | | - "2444.87s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" |
123 | | - ] |
124 | | - }, |
125 | | - { |
126 | | - "name": "stdout", |
127 | | - "output_type": "stream", |
128 | | - "text": [ |
129 | | - "replace support_tickets/aa_dataset-tickets-multi-lang-5-2-50-version.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: " |
130 | | - ] |
131 | | - }, |
132 | | - { |
133 | | - "ename": "OSError", |
134 | | - "evalue": "[Errno 5] Input/output error", |
135 | | - "output_type": "error", |
136 | | - "traceback": [ |
137 | | - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", |
138 | | - "\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)", |
139 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/IPython/utils/_process_posix.py:130\u001b[39m, in \u001b[36mProcessHandler.system\u001b[39m\u001b[34m(self, cmd)\u001b[39m\n\u001b[32m 127\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 128\u001b[39m \u001b[38;5;66;03m# res is the index of the pattern that caused the match, so we\u001b[39;00m\n\u001b[32m 129\u001b[39m \u001b[38;5;66;03m# know whether we've finished (if we matched EOF) or not\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m130\u001b[39m res_idx = \u001b[43mchild\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexpect_list\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpatterns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mread_timeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 131\u001b[39m \u001b[38;5;28mprint\u001b[39m(child.before[out_size:].decode(enc, \u001b[33m'\u001b[39m\u001b[33mreplace\u001b[39m\u001b[33m'\u001b[39m), end=\u001b[33m'\u001b[39m\u001b[33m'\u001b[39m)\n", |
140 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/pexpect/spawnbase.py:383\u001b[39m, in \u001b[36mSpawnBase.expect_list\u001b[39m\u001b[34m(self, pattern_list, timeout, searchwindowsize, async_, **kw)\u001b[39m\n\u001b[32m 382\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m383\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mexp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexpect_loop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n", |
141 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/pexpect/expect.py:169\u001b[39m, in \u001b[36mExpecter.expect_loop\u001b[39m\u001b[34m(self, timeout)\u001b[39m\n\u001b[32m 168\u001b[39m \u001b[38;5;66;03m# Still have time left, so read more data\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m169\u001b[39m incoming = \u001b[43mspawn\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_nonblocking\u001b[49m\u001b[43m(\u001b[49m\u001b[43mspawn\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmaxread\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.spawn.delayafterread \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", |
142 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/pexpect/pty_spawn.py:500\u001b[39m, in \u001b[36mspawn.read_nonblocking\u001b[39m\u001b[34m(self, size, timeout)\u001b[39m\n\u001b[32m 497\u001b[39m \u001b[38;5;66;03m# Because of the select(0) check above, we know that no data\u001b[39;00m\n\u001b[32m 498\u001b[39m \u001b[38;5;66;03m# is available right now. But if a non-zero timeout is given\u001b[39;00m\n\u001b[32m 499\u001b[39m \u001b[38;5;66;03m# (possibly timeout=None), we call select() with a timeout.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m500\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (timeout != \u001b[32m0\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[43mselect\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m 501\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m(spawn, \u001b[38;5;28mself\u001b[39m).read_nonblocking(size)\n", |
143 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/pexpect/pty_spawn.py:450\u001b[39m, in \u001b[36mspawn.read_nonblocking.<locals>.select\u001b[39m\u001b[34m(timeout)\u001b[39m\n\u001b[32m 449\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mselect\u001b[39m(timeout):\n\u001b[32m--> \u001b[39m\u001b[32m450\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mselect_ignore_interrupts\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mchild_fd\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m[\u001b[32m0\u001b[39m]\n", |
144 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/pexpect/utils.py:143\u001b[39m, in \u001b[36mselect_ignore_interrupts\u001b[39m\u001b[34m(iwtd, owtd, ewtd, timeout)\u001b[39m\n\u001b[32m 142\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m143\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mselect\u001b[49m\u001b[43m.\u001b[49m\u001b[43mselect\u001b[49m\u001b[43m(\u001b[49m\u001b[43miwtd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mowtd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mewtd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 144\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mInterruptedError\u001b[39;00m:\n", |
145 | | - "\u001b[31mKeyboardInterrupt\u001b[39m: ", |
146 | | - "\nDuring handling of the above exception, another exception occurred:\n", |
147 | | - "\u001b[31mOSError\u001b[39m Traceback (most recent call last)", |
148 | | - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[36]\u001b[39m\u001b[32m, line 6\u001b[39m\n\u001b[32m 4\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33mDownloading dataset...\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 5\u001b[39m get_ipython().system(\u001b[33m'\u001b[39m\u001b[33mkaggle datasets download -d tobiasbueck/multilingual-customer-support-tickets\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m \u001b[43mget_ipython\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43msystem\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43munzip -q multilingual-customer-support-tickets.zip -d support_tickets\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mDataset downloaded and extracted successfully!\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 8\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n", |
149 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/ipykernel/zmqshell.py:788\u001b[39m, in \u001b[36mZMQInteractiveShell.system_piped\u001b[39m\u001b[34m(self, cmd)\u001b[39m\n\u001b[32m 786\u001b[39m \u001b[38;5;28mself\u001b[39m.user_ns[\u001b[33m\"\u001b[39m\u001b[33m_exit_code\u001b[39m\u001b[33m\"\u001b[39m] = system(cmd)\n\u001b[32m 787\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m788\u001b[39m \u001b[38;5;28mself\u001b[39m.user_ns[\u001b[33m\"\u001b[39m\u001b[33m_exit_code\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[43msystem\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mvar_expand\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcmd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdepth\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", |
150 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/IPython/utils/_process_posix.py:141\u001b[39m, in \u001b[36mProcessHandler.system\u001b[39m\u001b[34m(self, cmd)\u001b[39m\n\u001b[32m 136\u001b[39m out_size = \u001b[38;5;28mlen\u001b[39m(child.before)\n\u001b[32m 137\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m:\n\u001b[32m 138\u001b[39m \u001b[38;5;66;03m# We need to send ^C to the process. The ascii code for '^C' is 3\u001b[39;00m\n\u001b[32m 139\u001b[39m \u001b[38;5;66;03m# (the character is known as ETX for 'End of Text', see\u001b[39;00m\n\u001b[32m 140\u001b[39m \u001b[38;5;66;03m# curses.ascii.ETX).\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m141\u001b[39m \u001b[43mchild\u001b[49m\u001b[43m.\u001b[49m\u001b[43msendline\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mchr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[32;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 142\u001b[39m \u001b[38;5;66;03m# Read and print any more output the program might produce on its\u001b[39;00m\n\u001b[32m 143\u001b[39m \u001b[38;5;66;03m# way out.\u001b[39;00m\n\u001b[32m 144\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n", |
151 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/pexpect/pty_spawn.py:578\u001b[39m, in \u001b[36mspawn.sendline\u001b[39m\u001b[34m(self, s)\u001b[39m\n\u001b[32m 572\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m'''Wraps send(), sending string ``s`` to child process, with\u001b[39;00m\n\u001b[32m 573\u001b[39m \u001b[33;03m``os.linesep`` automatically appended. Returns number of bytes\u001b[39;00m\n\u001b[32m 574\u001b[39m \u001b[33;03mwritten. Only a limited number of bytes may be sent for each\u001b[39;00m\n\u001b[32m 575\u001b[39m \u001b[33;03mline in the default terminal mode, see docstring of :meth:`send`.\u001b[39;00m\n\u001b[32m 576\u001b[39m \u001b[33;03m'''\u001b[39;00m\n\u001b[32m 577\u001b[39m s = \u001b[38;5;28mself\u001b[39m._coerce_send_string(s)\n\u001b[32m--> \u001b[39m\u001b[32m578\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlinesep\u001b[49m\u001b[43m)\u001b[49m\n", |
152 | | - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/gh/fswl/ai-algorithms/venv/lib/python3.13/site-packages/pexpect/pty_spawn.py:569\u001b[39m, in \u001b[36mspawn.send\u001b[39m\u001b[34m(self, s)\u001b[39m\n\u001b[32m 566\u001b[39m \u001b[38;5;28mself\u001b[39m._log(s, \u001b[33m'\u001b[39m\u001b[33msend\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 568\u001b[39m b = \u001b[38;5;28mself\u001b[39m._encoder.encode(s, final=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m--> \u001b[39m\u001b[32m569\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mos\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mchild_fd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n", |
153 | | - "\u001b[31mOSError\u001b[39m: [Errno 5] Input/output error" |
| 98 | + "Dataset already exists in 'support_tickets' directory. Skipping download.\n" |
154 | 99 | ] |
155 | 100 | } |
156 | 101 | ], |
|
0 commit comments