Skip to content

Commit 6142a98

Browse files
cho-groqozenhati
andauthored
batch processing demo (groq#72)
Co-authored-by: Hatice Ozen <[email protected]>
1 parent 8dcd054 commit 6142a98

File tree

4 files changed

+345
-0
lines changed

4 files changed

+345
-0
lines changed
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
# Groq Batch Processing
2+
3+
This tutorial will guide you through the process of uploading and processing batch jobs using the Groq API.
4+
5+
6+
https://github.com/user-attachments/assets/8ef2e06b-6153-43d3-9122-6bc11b786efd
7+
8+
9+
## Step 1: Set Up Dependencies:
10+
11+
Before interacting with the Groq API, you need to install and import the required dependencies.
12+
13+
```
14+
import os
15+
from dotenv import load_dotenv
16+
import requests # Install with: pip install requests
17+
import time
18+
19+
# Load environment variables from .env file
20+
load_dotenv()
21+
22+
# Access environment variables
23+
api_key = os.getenv("GROQ_API_KEY")
24+
```
25+
Make sure you have a `.env` file containing your `GROQ_API_KEY`. If you don't have one already, you can create a free account and generate one [here](https://console.groq.com/keys).
26+
27+
### Create a virtual environment
28+
`python3 -m venv venv`
29+
30+
### Activate it
31+
`source venv/bin/activate`
32+
33+
### Install the packages
34+
`pip3 install groq requests python-dotenv`
35+
36+
# Step 2: Upload the JSONL File to Groq
37+
```
38+
def upload_file_to_groq(api_key, file_path):
39+
url = "https://api.groq.com/openai/v1/files"
40+
41+
headers = {
42+
"Authorization": f"Bearer {api_key}"
43+
}
44+
45+
files = {
46+
"file": ("batch_file.jsonl", open(file_path, "rb"))
47+
}
48+
49+
data = {"purpose": "batch"}
50+
51+
response = requests.post(url, headers=headers, files=files, data=data)
52+
return response.json()
53+
54+
file_path = "batch_input.jsonl" # Path to your JSONL file
55+
file_id = ""
56+
57+
try:
58+
result = upload_file_to_groq(api_key, file_path)
59+
file_id = result["id"]
60+
print("This is the file_id from Step 2: " + file_id)
61+
except Exception as e:
62+
print(f"Error: {e}")
63+
```
64+
65+
# Step 3: Create a Batch Object
66+
67+
Create a batch object using the uploaded file ID.
68+
69+
```
70+
def create_batch(api_key, input_file_id):
71+
url = "https://api.groq.com/openai/v1/batches"
72+
73+
headers = {
74+
"Authorization": f"Bearer {api_key}",
75+
"Content-Type": "application/json"
76+
}
77+
78+
data = {
79+
"input_file_id": input_file_id,
80+
"endpoint": "/v1/chat/completions",
81+
"completion_window": "24h"
82+
}
83+
84+
response = requests.post(url, headers=headers, json=data)
85+
return response.json()
86+
87+
batch_id = ""
88+
try:
89+
result = create_batch(api_key, file_id)
90+
batch_id = result["id"]
91+
print("This is the Batch object id from Step 3: " + batch_id)
92+
except Exception as e:
93+
print(f"Error: {e}")
94+
```
95+
96+
97+
98+
# Step 4: Get the Batch Status
99+
100+
Monitor the batch job's status until it completes.
101+
102+
```
103+
def get_batch_status(api_key, batch_id):
104+
url = f"https://api.groq.com/openai/v1/batches/{batch_id}"
105+
106+
headers = {
107+
"Authorization": f"Bearer {api_key}",
108+
"Content-Type": "application/json"
109+
}
110+
111+
response = requests.get(url, headers=headers)
112+
return response.json()
113+
114+
output_file_id = ""
115+
try:
116+
result = get_batch_status(api_key, batch_id)
117+
print("\nStep 4 results: ")
118+
119+
count = 0
120+
while result["status"] != "completed" and count < 100:
121+
result = get_batch_status(api_key, batch_id)
122+
time.sleep(3)
123+
print("Your batch status is: " + result["status"])
124+
count += 1
125+
126+
output_file_id = result.get("output_file_id")
127+
print("This is your output_file_id from Step 4: " + output_file_id)
128+
except Exception as e:
129+
print(f"Error: {e}")
130+
```
131+
132+
133+
# Step 5: Retrieve Batch Results
134+
135+
Download and save the batch job results.
136+
137+
```
138+
def download_file_content(api_key, output_file_id, output_file):
139+
url = f"https://api.groq.com/openai/v1/files/{output_file_id}/content"
140+
141+
headers = {
142+
"Authorization": f"Bearer {api_key}"
143+
}
144+
145+
response = requests.get(url, headers=headers)
146+
147+
with open(output_file, 'wb') as f:
148+
f.write(response.content)
149+
150+
return f"\nFile downloaded successfully to {output_file}"
151+
152+
output_file = "batch_output.jsonl"
153+
try:
154+
result = download_file_content(api_key, output_file_id, output_file)
155+
print(result)
156+
except Exception as e:
157+
print(f"Error: {e}")
158+
```
159+
160+
Now you have successfully uploaded, processed, and retrieved batch job results using the Groq API!
161+
162+
163+
## How to run it in your terminal:
164+
`python3 main.py`
165+
166+
167+
### Example output in terminal:
168+
```
169+
This is the file_id from Step 2: file_01jpthgx92e3ts09bma0bfchmt
170+
This is the Batch object id from Step 3: batch_01jpthgxffe8ms4zqdkf1aejjp
171+
172+
Step 4 results:
173+
Your batch status is: validating
174+
Your batch status is: in_progress
175+
Your batch status is: completed
176+
This is your output_file_id from Step 4: file_01jpthh1e3e739wba761nfgvj2
177+
178+
File downloaded successfully to batch_output.jsonl
179+
```
180+
181+
Example input .jsonl file
182+
```
183+
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "llama-3.1-8b-instant", "messages": [{"role": "system", "content": "You are a helpful translation assistant. Translate the following into spanish."}, {"role": "user", "content": "Hello, how are you today?"}]}}
184+
```
185+
186+
Example output .jsonl file
187+
```
188+
{"id":"batch_req_out_01jpra5p4ve4v8k14zkqn6agjm","custom_id":"request-2","response":{"status_code":200,"request_id":"req_01jpra5n2qef6s66k65v8sy51h","body":{"id":"chatcmpl-f822c700-75aa-4fa3-8f2c-3f65cca8a1d3","object":"chat.completion","created":1742425217,"model":"llama-3.1-8b-instant","choices":[{"index":0,"message":{"role":"assistant","content":"\"Hola, ¿cómo estás hoy\" or more commonly in informal settings: \"Hola, ¿qué onda hoy\". \n\nIf you want it to be a more formal conversation, you could use: \"Buenos días, ¿cómo está hoy?\""},"logprobs":null,"finish_reason":"stop"}],"usage":{"queue_time":1.0087154420000002,"prompt_tokens":55,"prompt_time":0.009901563,"completion_tokens":55,"completion_time":0.073333333,"total_tokens":110,"total_time":0.083234896},"system_fingerprint":"fp_076aab041c","x_groq":{"id":"req_01jpra5n2qef6s66k65v8sy51h"}}},"error":null}
189+
```
190+
191+
View more detailed documentation on batch processing [here.](https://console.groq.com/docs/batch)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "llama-3.1-8b-instant", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What is 2+2?"}]}}
2+
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "llama-3.1-8b-instant", "messages": [{"role": "system", "content": "You are a helpful translation assistant. Translate the following into spanish."}, {"role": "user", "content": "Hello, how are you today?"}]}}
3+
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "llama-3.1-8b-instant", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "count up to 1000. starting with 1, 2, 3. print all the numbers, do not stop until you get to 1000."}]}}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"id":"batch_req_out_01jpthgypde739skmk9d2p7frc","custom_id":"request-2","response":{"status_code":200,"request_id":"req_01jpthgyahed0r94pxxy0bjbvr","body":{"id":"chatcmpl-8538d583-f91c-4883-a351-ff03bd6f17f9","object":"chat.completion","created":1742500035,"model":"llama-3.1-8b-instant","choices":[{"index":0,"message":{"role":"assistant","content":"\"Hola, ¿cómo estás hoy?\" \n\nHowever, a more casual and common way to say it in Spanish would be:\n\"Hola, ¿qué onda?\" (informal)\n\"Hola, ¿cómo va?\" (a bit more formal)\n\nIf you want to be more polite, you could say:\n\"Hola, ¿cómo se encuentra hoy?\""},"logprobs":null,"finish_reason":"stop"}],"usage":{"queue_time":0.27463279799999996,"prompt_tokens":55,"prompt_time":0.003482458,"completion_tokens":76,"completion_time":0.101333333,"total_tokens":131,"total_time":0.104815791},"system_fingerprint":"fp_a4265e44d5","x_groq":{"id":"req_01jpthgyahed0r94pxxy0bjbvr"}}},"error":null}
2+
{"id":"batch_req_out_01jpthh170e7387j2e0r03p825","custom_id":"request-3","response":{"status_code":200,"request_id":"req_01jpthgyahe3vrg488eaczvbdm","body":{"id":"chatcmpl-3bedd913-0c82-482f-8c67-e9275055f153","object":"chat.completion","created":1742500035,"model":"llama-3.1-8b-instant","choices":[{"index":0,"message":{"role":"assistant","content":"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34\n35\n36\n37\n38\n39\n40\n41\n42\n43\n44\n45\n46\n47\n48\n49\n50\n51\n52\n53\n54\n55\n56\n57\n58\n59\n60\n61\n62\n63\n64\n65\n66\n67\n68\n69\n70\n71\n72\n73\n74\n75\n76\n77\n78\n79\n80\n81\n82\n83\n84\n85\n86\n87\n88\n89\n90\n91\n92\n93\n94\n95\n96\n97\n98\n99\n100\n101\n102\n103\n104\n105\n106\n107\n108\n109\n110\n111\n112\n113\n114\n115\n116\n117\n118\n119\n120\n121\n122\n123\n124\n125\n126\n127\n128\n129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n151\n152\n153\n154\n155\n156\n157\n158\n159\n160\n161\n162\n163\n164\n165\n166\n167\n168\n169\n170\n171\n172\n173\n174\n175\n176\n177\n178\n179\n180\n181\n182\n183\n184\n185\n186\n187\n188\n189\n190\n191\n192\n193\n194\n195\n196\n197\n198\n199\n200\n201\n202\n203\n204\n205\n206\n207\n208\n209\n210\n211\n212\n213\n214\n215\n216\n217\n218\n219\n220\n221\n222\n223\n224\n225\n226\n227\n228\n229\n230\n231\n232\n233\n234\n235\n236\n237\n238\n239\n240\n241\n242\n243\n244\n245\n246\n247\n248\n249\n250\n251\n252\n253\n254\n255\n256\n257\n258\n259\n260\n261\n262\n263\n264\n265\n266\n267\n268\n269\n270\n271\n272\n273\n274\n275\n276\n277\n278\n279\n280\n281\n282\n283\n284\n285\n286\n287\n288\n289\n290\n291\n292\n293\n294\n295\n296\n297\n298\n299\n300\n301\n302\n303\n304\n305\n306\n307\n308\n309\n310\n311\n312\n313\n314\n315\n316\n317\n318\n319\n320\n321\n322\n323\n324\n325\n326\n327\n328\n329\n330\n331\n332\n333\n334\n335\n336\n337\n338\n339\n340\n341\n342\n343\n344\n345\n346\n347\n348\n349\n350\n351\n352\n353\n354\n355\n356\n357\n358\n359\n360\n361\n362\n363\n364\n365\n366\n367\n368\n369\n370\n371\n372\n373\n374\n375\n376\n377\n378\n379\n380\n381\n382\n383\n384\n385\n386\n387\n388\n389\n390\n391\n392\n393\n393\n394\n395\n396\n397\n398\n399\n400\n401\n402\n403\n404\n405\n406\n407\n408\n409\n410\n411\n412\n413\n414\n415\n416\n417\n418\n419\n420\n421\n422\n423\n424\n425\n426\n427\n428\n429\n430\n431\n432\n433\n434\n435\n436\n437\n438\n439\n440\n441\n442\n443\n444\n445\n446\n447\n448\n449\n450\n451\n452\n453\n454\n455\n456\n457\n458\n459\n460\n461\n462\n463\n464\n465\n466\n467\n468\n469\n470\n471\n472\n473\n474\n475\n476\n477\n478\n479\n480\n481\n482\n483\n484\n485\n486\n487\n488\n489\n490\n491\n492\n493\n494\n495\n496\n497\n498\n499\n500\n501\n502\n503\n504\n505\n506\n507\n508\n509\n510\n511\n512\n513\n514\n515\n516\n517\n518\n519\n520\n521\n522\n523\n524\n525\n526\n527\n528\n529\n530\n531\n532\n533\n534\n535\n536\n537\n538\n539\n540\n541\n542\n543\n544\n545\n546\n547\n548\n549\n550\n551\n552\n553\n554\n555\n556\n557\n558\n559\n560\n561\n562\n563\n564\n565\n566\n567\n568\n569\n570\n571\n572\n573\n574\n575\n576\n577\n578\n579\n580\n581\n582\n583\n584\n585\n586\n587\n588\n589\n590\n591\n592\n593\n594\n595\n596\n597\n598\n599\n600\n601\n602\n603\n604\n605\n606\n607\n608\n609\n610\n611\n612\n613\n614\n615\n616\n617\n618\n619\n620\n621\n622\n623\n624\n625\n626\n627\n628\n629\n630\n631\n632\n633\n634\n635\n636\n637\n638\n639\n640\n641\n642\n643\n644\n645\n646\n647\n648\n649\n650\n651\n652\n653\n654\n655\n656\n657\n658\n659\n660\n661\n662\n663\n664\n665\n666\n667\n668\n669\n670\n671\n672\n673\n674\n675\n676\n677\n678\n679\n680\n681\n682\n683\n684\n685\n686\n687\n688\n689\n690\n691\n692\n693\n694\n695\n696\n697\n698\n699\n700\n701\n702\n703\n704\n705\n706\n707\n708\n709\n710\n711\n712\n713\n714\n715\n716\n717\n718\n719\n720\n721\n722\n723\n724\n725\n726\n727\n728\n729\n730\n731\n732\n733\n734\n735\n736\n737\n738\n739\n740\n741\n742\n743\n744\n745\n746\n747\n748\n749\n750\n751\n752\n753\n754\n755\n756\n757\n758\n759\n760\n761\n762\n763\n764\n765\n766\n767\n768\n769\n770\n771\n772\n773\n774\n775\n776\n777\n778\n779\n780\n781\n782\n783\n784\n785\n786\n787\n788\n789\n790\n791\n792\n793\n794\n795\n796\n797\n798\n799\n800\n801\n802\n803\n804\n805\n806\n807\n808\n809\n810\n811\n812\n813\n814\n815\n816\n817\n818\n819\n820\n821\n822\n823\n824\n825\n826\n827\n828\n829\n830\n831\n832\n833\n834\n835\n836\n837\n838\n839\n840\n841\n842\n843\n844\n845\n846\n847\n848\n849\n850\n851\n852\n853\n854\n855\n856\n857\n858\n859\n860\n861\n862\n863\n864\n865\n866\n867\n868\n869\n870\n871\n872\n873\n874\n875\n876\n877\n878\n879\n880\n881\n882\n883\n884\n885\n886\n887\n888\n889\n890\n891\n892\n893\n894\n895\n896\n897\n898\n899\n900\n901\n902\n903\n904\n905\n906\n907\n908\n909\n910\n911\n912\n913\n914\n915\n916\n917\n918\n919\n920\n921\n922\n923\n924\n925\n926\n927\n928\n929\n930\n931\n932\n933\n934\n935\n936\n937\n938\n939\n940\n941\n942\n943\n944\n945\n946\n947\n948\n949\n950\n951\n952\n953\n954\n955\n956\n957\n958\n959\n960\n961\n962\n963\n964\n965\n966\n967\n968\n969\n970\n971\n972\n973\n974\n975\n976\n977\n978\n979\n980\n981\n982\n983\n984\n985\n986\n987\n988\n989\n990\n991\n992\n993\n994\n995\n996\n997\n998\n999\n1000"},"logprobs":null,"finish_reason":"stop"}],"usage":{"queue_time":0.279345227,"prompt_tokens":75,"prompt_time":0.008248425,"completion_tokens":2003,"completion_time":2.670666667,"total_tokens":2078,"total_time":2.678915092},"system_fingerprint":"fp_a4265e44d5","x_groq":{"id":"req_01jpthgyahe3vrg488eaczvbdm"}}},"error":null}
3+
{"id":"batch_req_out_01jpthgykte7396v5ape5vcty3","custom_id":"request-1","response":{"status_code":200,"request_id":"req_01jpthgyahe3vbw7k2bbrvgyee","body":{"id":"chatcmpl-5cbc35f5-82f3-4aaa-876e-9c6de82907e4","object":"chat.completion","created":1742500035,"model":"llama-3.1-8b-instant","choices":[{"index":0,"message":{"role":"assistant","content":"The answer to 2+2 is 4."},"logprobs":null,"finish_reason":"stop"}],"usage":{"queue_time":0.27736843099999997,"prompt_tokens":48,"prompt_time":0.003871881,"completion_tokens":12,"completion_time":0.016,"total_tokens":60,"total_time":0.019871881},"system_fingerprint":"fp_a4265e44d5","x_groq":{"id":"req_01jpthgyahe3vbw7k2bbrvgyee"}}},"error":null}

tutorials/batch-processing/main.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
"""
2+
Step 1. Set up dependencies
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
import requests # pip install requests first!
8+
import time
9+
10+
# Load environment variables from .env file
11+
load_dotenv()
12+
13+
# Access environment variables
14+
api_key = os.getenv("GROQ_API_KEY")
15+
16+
17+
"""
18+
Step 2. Upload the JSONL file to Groq
19+
"""
20+
21+
def upload_file_to_groq(api_key, file_path):
22+
url = "https://api.groq.com/openai/v1/files"
23+
24+
headers = {
25+
"Authorization": f"Bearer {api_key}"
26+
}
27+
28+
# Prepare the file and form data
29+
files = {
30+
"file": ("batch_file.jsonl", open(file_path, "rb"))
31+
}
32+
33+
data = {
34+
"purpose": "batch"
35+
}
36+
37+
# Make the POST request
38+
response = requests.post(url, headers=headers, files=files, data=data)
39+
40+
return response.json()
41+
42+
# Usage example
43+
file_path = "batch_input.jsonl" # Path to your JSONL file
44+
file_id = "" # will be used in the next step
45+
46+
try:
47+
result = upload_file_to_groq(api_key, file_path)
48+
file_id = result["id"]
49+
print("This is the file_id from Step 2: " + file_id)
50+
51+
except Exception as e:
52+
print(f"Error: {e}")
53+
54+
55+
56+
"""
57+
Step 3. Make a batch object
58+
"""
59+
60+
def create_batch(api_key, input_file_id):
61+
url = "https://api.groq.com/openai/v1/batches"
62+
63+
headers = {
64+
"Authorization": f"Bearer {api_key}",
65+
"Content-Type": "application/json"
66+
}
67+
68+
data = {
69+
"input_file_id": input_file_id,
70+
"endpoint": "/v1/chat/completions",
71+
"completion_window": "24h"
72+
}
73+
74+
response = requests.post(url, headers=headers, json=data)
75+
return response.json()
76+
77+
batch_id = "" # will be used in the next step
78+
try:
79+
result = create_batch(api_key, file_id)
80+
batch_id = result["id"] # batch result id
81+
print("This is the Batch object id from Step 3: " + batch_id)
82+
83+
except Exception as e:
84+
print(f"Error: {e}")
85+
86+
87+
88+
"""
89+
Step 4. Get the batch status
90+
"""
91+
92+
def get_batch_status(api_key, batch_id):
93+
url = f"https://api.groq.com/openai/v1/batches/{batch_id}"
94+
95+
headers = {
96+
"Authorization": f"Bearer {api_key}",
97+
"Content-Type": "application/json"
98+
}
99+
100+
response = requests.get(url, headers=headers)
101+
return response.json()
102+
103+
output_file_id = "" # will be used in the next step
104+
try:
105+
result = get_batch_status(api_key, batch_id)
106+
print("\nStep 4 results: ")
107+
108+
count = 0
109+
110+
# Corrected condition: use `result["status"]` instead of `result.status`
111+
while result["status"] != "completed" and count < 100:
112+
result = get_batch_status(api_key, batch_id) # Update `result` inside the loop
113+
time.sleep(3)
114+
print("Your batch status is: " + result["status"])
115+
count += 1
116+
117+
output_file_id = result.get("output_file_id") # Use .get() to safely access keys
118+
print("This is your output_file_id from Step 4: " + output_file_id)
119+
except Exception as e:
120+
print(f"Error: {e}")
121+
122+
123+
124+
"""
125+
Step 5. Retrieve batch results
126+
"""
127+
128+
def download_file_content(api_key, output_file_id, output_file):
129+
url = f"https://api.groq.com/openai/v1/files/{output_file_id}/content"
130+
131+
headers = {
132+
"Authorization": f"Bearer {api_key}"
133+
}
134+
135+
response = requests.get(url, headers=headers)
136+
137+
# Write the content to a file
138+
with open(output_file, 'wb') as f:
139+
f.write(response.content)
140+
141+
return f"\nFile downloaded successfully to {output_file}"
142+
143+
output_file = "batch_output.jsonl" # replace with your own file of choice to download batch job contents to
144+
try:
145+
result = download_file_content(api_key, output_file_id, output_file)
146+
print(result)
147+
except Exception as e:
148+
print(f"Error: {e}")

0 commit comments

Comments
 (0)