Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 192 additions & 59 deletions docker_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,51 +8,81 @@
import requests
import tarfile
import urllib3
import re
urllib3.disable_warnings()

if len(sys.argv) != 2 :
print('Usage:\n\tdocker_pull.py [registry/][repository/]image[:tag|@digest]\n')
exit(1)

# Look for the Docker image to download
repo = 'library'
tag = 'latest'
imgparts = sys.argv[1].split('/')
# Graphical needs for drawing full line
try:
img,tag = imgparts[-1].split('@')
except ValueError:
try:
img,tag = imgparts[-1].split(':')
except ValueError:
img = imgparts[-1]
# Docker client doesn't seem to consider the first element as a potential registry unless there is a '.' or ':'
if len(imgparts) > 1 and ('.' in imgparts[0] or ':' in imgparts[0]):
registry = imgparts[0]
repo = '/'.join(imgparts[1:-1])
else:
registry = 'registry-1.docker.io'
if len(imgparts[:-1]) != 0:
repo = '/'.join(imgparts[:-1])
console_rows, console_columns = os.popen('stty size', 'r').read().split()
except:
console_rows, console_columns = 20 , 20

############# DEFAULTs VAR

DOCKER_DEFAULT_auth_url='auth.docker.io/token'
DOCKER_DEFAULT_server_url='registry-1.docker.io'
DOCKER_DEFAULT_repo = 'library'
DOCKER_DEFAULT_tag = 'latest'

username = ""
password = ""
output_path = "."

json_manifest_type='application/vnd.docker.distribution.manifest.v2+json'
json_manifest_type_bis='application/vnd.docker.distribution.manifest.list.v2+json'

############################################ FUNCTION ######################################################

# Get endpoint registry from url
def get_endpoint_registry(url,repository):
resp = requests.get('https://{}/v2/'.format(url), verify=False)
server_auth_url=""

# If we get 401, we need to authenticate, so get server_auth_url
if resp.status_code == 401:
try:
realm_address = re.search('realm="([^"]*)"',resp.headers['WWW-Authenticate'])

# If Repository is on NEXUS OSS
if realm_address.group(1) == "Sonatype Nexus Repository Manager":
server_auth_url = "https://" + url + "/v2/"
print ("Nexus OSS repository type")

# If Repository is on DockerHub like
if realm_address.group(1) != url and "http" in realm_address.group(1) :
service = re.search('service="([^"]*)"',resp.headers['WWW-Authenticate'])
server_auth_url = realm_address.group(1) + "?service=" + service.group(1) + "&scope=repository:" + repository + ":pull"
print ("Docker Hub repository type")

except IndexError:
server_auth_url = "https://" + url + "/v2/"
print ("failed !")

return server_auth_url

# Get authentication headers
def get_auth_head(registry_endpoint,type):

# Get authentication header from endpoint
if len(username) != 0 and len(password) != 0:
resp = requests.get('{}'.format(registry_endpoint), auth=(username, password),verify=False)
else:
resp = requests.get('{}'.format(registry_endpoint), verify=False)

# Generate authentication header from response
if (resp.status_code == 200):
try:
access_token = resp.json()['token']
auth_head = {'Authorization':'Bearer '+ access_token, 'Accept': type}
except ValueError:
access_token = resp.request.headers['Authorization'].split("Basic ")[1]
auth_head = {'Authorization':'Basic '+ access_token, 'Accept': type}
elif (resp.status_code == 401):
print ("Authentication error !")
exit(1)
else:
repo = 'library'
repository = '{}/{}'.format(repo, img)

# Get Docker authentication endpoint when it is required
auth_url='https://auth.docker.io/token'
reg_service='registry.docker.io'
resp = requests.get('https://{}/v2/'.format(registry), verify=False)
if resp.status_code == 401:
auth_url = resp.headers['WWW-Authenticate'].split('"')[1]
try:
reg_service = resp.headers['WWW-Authenticate'].split('"')[3]
except IndexError:
reg_service = ""

# Get Docker token (this function is useless for unauthenticated registries like Microsoft)
def get_auth_head(type):
resp = requests.get('{}?service={}&scope=repository:{}:pull'.format(auth_url, reg_service, repository), verify=False)
access_token = resp.json()['token']
auth_head = {'Authorization':'Bearer '+ access_token, 'Accept': type}
print ("Erreur inside get_auth_head function : " + resp.status_code)

return auth_head

# Docker style progress bar
Expand All @@ -68,53 +98,154 @@ def progress_bar(ublob, nb_traits):
sys.stdout.write(']')
sys.stdout.flush()

# Fetch manifest v2 and get image layer digests
auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json')
resp = requests.get('https://{}/v2/{}/manifests/{}'.format(registry, repository, tag), headers=auth_head, verify=False)
#/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\#

############################################## MAIN ########################################################

############## Check if args < 2

if len(sys.argv) < 2 :
print ('Usage:')
print ('\t docker_pull.py [registry/][repository/]image[:tag|@digest] ')
print ('\t docker_pull.py [registry/][repository/]image[:tag|@digest] output_path')
print ('\t docker_pull.py [registry/][repository/]image[:tag|@digest] username password output_path\n')
exit(1)

############## Get info from arg

imgparts = sys.argv[1].split('/')

############## Setup username & password

if os.getenv("REPOSITORY_LOGIN"):
username = os.getenv("REPOSITORY_LOGIN")

if os.getenv("REPOSITORY_PASSWORD"):
password = os.getenv("REPOSITORY_PASSWORD")

if len(sys.argv) == 3:
output_path = sys.argv[2]

if len(sys.argv) == 4:
username = sys.argv[2]
password = sys.argv[3]

if len(sys.argv) == 5:
username = sys.argv[2]
password = sys.argv[3]
output_path = sys.argv[4]

############## Get repository url + registry url for auth

if len(imgparts) > 1 and ('.' in imgparts[0] or ':' in imgparts[0]):
registry_url = imgparts[0]
repository = imgparts[1]

if len(imgparts[:-2]) != 0:
img = ('/'.join(imgparts[2:])).split(':')[0]
tag = ('/'.join(imgparts[2:])).split(':')[1]
else:
img = (imgparts[2:]).split(':')[0]
tag = (imgparts[2:]).split(':')[1]
else:
registry_url = DOCKER_DEFAULT_server_url

if len(imgparts[:-1]) != 0:
img = "" # FIXME: Image name on docker hub is actually the repository url
tag = ('/'.join(imgparts).split('/')[1].split(':')[1])
repository = ('/'.join(imgparts).split(':')[0])
else:
img = "" # FIXME: Image name on docker hub is actually the repository url
tag = (imgparts[0]).split(':')[1]
repository = "library/" + (imgparts[0]).split(':')[0]

############## Get Registry Authentication endpoint when it is required
registry_endpoint = get_endpoint_registry(registry_url,repository)

# Printing vars

print('_'*int(console_columns))
print ("\nDocker image :\t\t\t" + img)
print ("Docker tag :\t\t\t" + tag)
print ("Repository :\t\t\t" + repository )
print ("Serveur_URL :\t\t\t" + "https://" + registry_url )
print ( "Registry_endpoint :\t\t" + registry_endpoint)
print('_'*int(console_columns))

############## Fetch manifest v2 and get image layer digests

# Get manifest v2
auth_head=get_auth_head(registry_endpoint,json_manifest_type)

resp = requests.get('https://{}/v2/{}/{}/manifests/{}'.format(registry_url, repository, img, tag), headers=auth_head, verify=False)

# Check if error (not getting manifest)
if (resp.status_code != 200):
print('[-] Cannot fetch manifest for {} [HTTP {}]'.format(repository, resp.status_code))
print(resp.content)
auth_head = get_auth_head('application/vnd.docker.distribution.manifest.list.v2+json')
resp = requests.get('https://{}/v2/{}/manifests/{}'.format(registry, repository, tag), headers=auth_head, verify=False)
print('[-] Cannot fetch manifest for {} [HTTP {}]'.format(sys.argv[1], resp.status_code))

# Retry with other json_manifest_type
auth_head = get_auth_head(registry_endpoint,json_manifest_type_bis)
resp = requests.get('https://{}/v2/{}/{}/manifests/{}'.format(registry_url, repository, img, tag), headers=auth_head, verify=False)

if (resp.status_code == 200):
print('[+] Manifests found for this tag (use the @digest format to pull the corresponding image):')
manifests = resp.json()['manifests']
for manifest in manifests:
for key, value in manifest["platform"].items():
sys.stdout.write('{}: {}, '.format(key, value))
print('digest: {}'.format(manifest["digest"]))
exit(1)
elif (resp.status_code == 401):
print ("Authentication needed !")
exit(1)
else:
print("Error when getting manifest response status code : " + str(resp.status_code))
exit(1)

# Get all layers from manifest
layers = resp.json()['layers']

# Create tmp folder that will hold the image
imgdir = 'tmp_{}_{}'.format(img, tag.replace(':', '@'))
imgdir = output_path + '/tmp_{}'.format(sys.argv[1].replace('/', '.').replace(':','@'))

if os.path.exists(imgdir):
shutil.rmtree(imgdir)

os.mkdir(imgdir)
print('Creating image structure in: ' + imgdir)

# Get SHA256 ID image
config = resp.json()['config']['digest']
confresp = requests.get('https://{}/v2/{}/blobs/{}'.format(registry, repository, config), headers=auth_head, verify=False)

# Get manifest for SHA256 ID image
confresp = requests.get('https://{}/v2/{}/blobs/{}'.format(registry_url, repository, config), headers=auth_head, verify=False)

# Write manifest inside file
file = open('{}/{}.json'.format(imgdir, config[7:]), 'wb')
file.write(confresp.content)
file.close()

# Prepare content args for json
content = [{
'Config': config[7:] + '.json',
'RepoTags': [ ],
'Layers': [ ]
}]
if len(imgparts[:-1]) != 0:
content[0]['RepoTags'].append('/'.join(imgparts[:-1]) + '/' + img + ':' + tag)
else:
content[0]['RepoTags'].append(img + ':' + tag)

# Set content tag
content[0]['RepoTags'].append(sys.argv[1])

# Prepare template json
empty_json = '{"created":"1970-01-01T00:00:00Z","container_config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false, \
"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false, "StdinOnce":false,"Env":null,"Cmd":null,"Image":"", \
"Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":null}}'

# Build layer folders
parentid=''
for layer in layers:

#Get digest of layer
ublob = layer['digest']

# FIXME: Creating fake layer ID. Don't know how Docker generates it
fake_layerid = hashlib.sha256((parentid+'\n'+ublob+'\n').encode('utf-8')).hexdigest()
layerdir = imgdir + '/' + fake_layerid
Expand All @@ -128,14 +259,16 @@ def progress_bar(ublob, nb_traits):
# Creating layer.tar file
sys.stdout.write(ublob[7:19] + ': Downloading...')
sys.stdout.flush()
auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json') # refreshing token to avoid its expiration
bresp = requests.get('https://{}/v2/{}/blobs/{}'.format(registry, repository, ublob), headers=auth_head, stream=True, verify=False)
auth_head = get_auth_head(registry_endpoint,json_manifest_type) # refreshing token to avoid its expiration

bresp = requests.get('https://{}/v2/{}/blobs/{}'.format(registry_url, repository, ublob), headers=auth_head, stream=True, verify=False)
if (bresp.status_code != 200): # When the layer is located at a custom URL
bresp = requests.get(layer['urls'][0], headers=auth_head, stream=True, verify=False)
if (bresp.status_code != 200):
print('\rERROR: Cannot download layer {} [HTTP {}]'.format(ublob[7:19], bresp.status_code, bresp.headers['Content-Length']))
print(bresp.content)
exit(1)

# Stream download and follow the progress
bresp.raise_for_status()
unit = int(bresp.headers['Content-Length']) / 50
Expand Down Expand Up @@ -194,11 +327,11 @@ def progress_bar(ublob, nb_traits):
file.close()

# Create image tar and clean tmp folder
docker_tar = repo.replace('/', '_') + '_' + img + '.tar'
docker_tar = output_path + "/" + sys.argv[1].replace('/', '_').replace(':','@') + '.tar'
sys.stdout.write("Creating archive...")
sys.stdout.flush()
tar = tarfile.open(docker_tar, "w")
tar.add(imgdir, arcname=os.path.sep)
tar.close()
shutil.rmtree(imgdir)
print('\rDocker image pulled: ' + docker_tar)
print('\rDocker image pulled: ' + docker_tar)