|
| 1 | +''' |
| 2 | +
|
| 3 | +converted.py: Parse a Dockerfile into a Singularity spec file |
| 4 | +
|
| 5 | +Copyright (c) 2016, Vanessa Sochat. All rights reserved. |
| 6 | +
|
| 7 | +''' |
| 8 | + |
| 9 | +import json |
| 10 | +import os |
| 11 | +import re |
| 12 | +import sys |
| 13 | + |
| 14 | +from singularity.utils import ( |
| 15 | + write_file, |
| 16 | + read_file |
| 17 | +) |
| 18 | + |
| 19 | +from singularity.logman import bot |
| 20 | +import json |
| 21 | + |
| 22 | + |
| 23 | +# Parsing functions --------------------------------------------------------------- |
| 24 | + |
| 25 | +def parse_env(env): |
| 26 | + '''parse_env will parse a Dockerfile ENV command to a singularity appropriate one |
| 27 | + eg: ENV PYTHONBUFFER 1 --> export PYTHONBUFFER=1 |
| 28 | + ::note This has to handle multiple exports per line. In the case of having an =, |
| 29 | + It could be that we have more than one pair of variables. If no equals, then |
| 30 | + we probably don't. See: |
| 31 | + see: https://docs.docker.com/engine/reference/builder/#/env |
| 32 | + ''' |
| 33 | + # If the user has "=" then we can have more than one export per line |
| 34 | + exports = [] |
| 35 | + name = None |
| 36 | + value = None |
| 37 | + |
| 38 | + if re.search("=",env): |
| 39 | + |
| 40 | + pieces = [p for p in re.split("( |\\\".*?\\\"|'.*?')", env) if p.strip()] |
| 41 | + while len(pieces) > 0: |
| 42 | + contender = pieces.pop(0) |
| 43 | + # If there is an equal, we've found a name |
| 44 | + if re.search("=",contender): |
| 45 | + if name != None: |
| 46 | + exports.append(join_env(name,value)) |
| 47 | + name = contender |
| 48 | + value = None |
| 49 | + else: |
| 50 | + if value == None: |
| 51 | + value = contender |
| 52 | + else: |
| 53 | + value = "%s %s" %(value,contender) |
| 54 | + exports.append(join_env(name,value)) |
| 55 | + |
| 56 | + # otherwise, the rule is one per line |
| 57 | + else: |
| 58 | + name,value = re.split(' ',env,1) |
| 59 | + exports = ["export %s=%s" %(name,value)] |
| 60 | + environment = [] |
| 61 | + |
| 62 | + # Clean exports, make sure we aren't using |
| 63 | + for export in exports: |
| 64 | + export = export.strip('\n').replace('"',"").replace("'","") |
| 65 | + environment.append(export) |
| 66 | + export = 'echo "\n%s" >> /environment' %(export) |
| 67 | + environment.append(export) |
| 68 | + |
| 69 | + return "%s\n" %"\n".join(environment) |
| 70 | + |
| 71 | + |
| 72 | +def join_env(name,value): |
| 73 | + |
| 74 | + # If it's the end of the string, we don't want a space |
| 75 | + if re.search("=$",name): |
| 76 | + if value != None: |
| 77 | + return "export %s%s" %(name,value) |
| 78 | + else: |
| 79 | + return "export %s" %(name) |
| 80 | + |
| 81 | + if value != None: |
| 82 | + return "export %s %s" %(name,value) |
| 83 | + return "export %s" %(name) |
| 84 | + |
| 85 | + |
| 86 | +def parse_cmd(cmd): |
| 87 | + '''parse_cmd will parse a Dockerfile CMD command to a singularity appropriate one |
| 88 | + eg: CMD /code/run_uwsgi.sh --> /code/run_uwsgi.sh. |
| 89 | + ''' |
| 90 | + return "%s" %(cmd) |
| 91 | + |
| 92 | + |
| 93 | +def parse_entry(cmd): |
| 94 | + '''parse_entry will parse a Dockerfile ENTRYPOINT command to a singularity appropriate one |
| 95 | + eg: ENTRYPOINT /code/run_uwsgi.sh --> exec /code/run_uwsgi.sh. |
| 96 | + ''' |
| 97 | + return 'exec %s "$@"' %(cmd) |
| 98 | + |
| 99 | + |
| 100 | +def parse_copy(copy_str): |
| 101 | + '''parse_copy will copy a file from one location to another. This likely will need |
| 102 | + tweaking, as the files might need to be mounted from some location before adding to |
| 103 | + the image. |
| 104 | + ''' |
| 105 | + return "cp %s" %(copy_str) |
| 106 | + |
| 107 | + |
| 108 | +def parse_http(url,destination): |
| 109 | + '''parse_http will get the filename of an http address, and return a statement |
| 110 | + to download it to some location |
| 111 | + ''' |
| 112 | + file_name = os.path.basename(url) |
| 113 | + download_path = "%s/%s" %(destination,file_name) |
| 114 | + return "curl %s -o %s" %(url,download_path) |
| 115 | + |
| 116 | + |
| 117 | +def parse_targz(targz,destination): |
| 118 | + '''parse_targz will return a commnd to extract a targz file to a destination. |
| 119 | + ''' |
| 120 | + return "tar -xzvf %s %s" %(targz,destination) |
| 121 | + |
| 122 | + |
| 123 | +def parse_zip(zipfile,destination): |
| 124 | + '''parse_zipfile will return a commnd to unzip a file to a destination. |
| 125 | + ''' |
| 126 | + return "unzip %s %s" %(zipfile,destination) |
| 127 | + |
| 128 | +def parse_comment(cmd): |
| 129 | + '''parse_comment simply returns the line as a comment. |
| 130 | + :param cmd: the comment |
| 131 | + ''' |
| 132 | + return "# %s" %(cmd) |
| 133 | + |
| 134 | + |
| 135 | +def parse_maintainer(cmd): |
| 136 | + '''parse_maintainer will eventually save the maintainer as metadata. |
| 137 | + For now we return as comment. |
| 138 | + :param cmd: the maintainer line |
| 139 | + ''' |
| 140 | + return parse_comment(cmd) |
| 141 | + |
| 142 | + |
| 143 | +def parse_add(add): |
| 144 | + '''parse_add will copy multiple files from one location to another. This likely will need |
| 145 | + tweaking, as the files might need to be mounted from some location before adding to |
| 146 | + the image. The add command is done for an entire directory. |
| 147 | + :param add: the command to parse |
| 148 | + ''' |
| 149 | + # In the case that there are newlines or comments |
| 150 | + command,rest = add.split('\n',1) |
| 151 | + from_thing,to_thing = command.split(" ") |
| 152 | + |
| 153 | + # People like to use dots for PWD. |
| 154 | + if from_thing == ".": |
| 155 | + from_thing = os.getcwd() |
| 156 | + if to_thing == ".": |
| 157 | + to_thing = os.getcwd() |
| 158 | + |
| 159 | + # If it's a url or http address, then we need to use wget/curl to get it |
| 160 | + if re.search("^http",from_thing): |
| 161 | + result = parse_http(url=from_thing, |
| 162 | + destination=to_thing) |
| 163 | + |
| 164 | + # If it's a tar.gz, then we are supposed to uncompress |
| 165 | + if re.search(".tar.gz$",from_thing): |
| 166 | + result = parse_targz(targz=from_thing, |
| 167 | + destination=to_thing) |
| 168 | + |
| 169 | + # If it's .zip, then we are supposed to unzip it |
| 170 | + if re.search(".zip$",from_thing): |
| 171 | + result = parse_zip(zipfile=from_thing, |
| 172 | + destination=to_thing) |
| 173 | + |
| 174 | + # Is from thing a directory or something else? |
| 175 | + if os.path.isdir(from_thing): |
| 176 | + result = "cp -R %s %s" %(from_thing,to_thing) |
| 177 | + else: |
| 178 | + result = "cp %s %s" %(from_thing,to_thing) |
| 179 | + return "%s\n%s" %(result,rest) |
| 180 | + |
| 181 | + |
| 182 | +def parse_workdir(workdir): |
| 183 | + '''parse_workdir will simply cd to the working directory |
| 184 | + ''' |
| 185 | + return "cd %s" %(workdir) |
| 186 | + |
| 187 | + |
| 188 | +def get_mapping(): |
| 189 | + '''get_mapping returns a dictionary mapping from a Dockerfile command to a Singularity |
| 190 | + build spec section. Note - this currently ignores lines that we don't know what to do with |
| 191 | + in the context of Singularity (eg, EXPOSE, LABEL, USER, VOLUME, STOPSIGNAL, escape, |
| 192 | + MAINTAINER) |
| 193 | +
|
| 194 | + :: note |
| 195 | +
|
| 196 | + each KEY of the mapping should be a command start in the Dockerfile (eg, RUN) |
| 197 | + for each corresponding value, there should be a dictionary with the following: |
| 198 | +
|
| 199 | + - section: the Singularity build file section to write the new command to |
| 200 | + - fun: any function to pass the output through before writing to the section (optional) |
| 201 | + - json: Boolean, if the section can optionally have json (eg a list) |
| 202 | +
|
| 203 | + I'm not sure the subtle differences between add and copy, other than copy doesn't support |
| 204 | + external files. It should suffice for our purposes (for now) to use the same function |
| 205 | + (parse_add) until evidence for a major difference is determined. |
| 206 | + ''' |
| 207 | + |
| 208 | + # Docker : Singularity |
| 209 | + add_command = {"section": "%post","fun": parse_add, "json": True } |
| 210 | + copy_command = {"section": "%post", "fun": parse_add, "json": True } |
| 211 | + cmd_command = {"section": "%runscript", "fun": parse_cmd, "json": True } |
| 212 | + env_command = {"section": "%post", "fun": parse_env, "json": False } |
| 213 | + comment_command = {"section": "%post", "fun": parse_comment, "json": False } |
| 214 | + from_command = {"section": "From", "json": False } |
| 215 | + run_command = {"section": "%post", "json": True} |
| 216 | + workdir_command = {"section": "%post","fun": parse_workdir, "json": False } |
| 217 | + entry_command = {"section": "%post", "fun": parse_entry, "json": True } |
| 218 | + |
| 219 | + return {"ADD": add_command, |
| 220 | + "COPY":copy_command, |
| 221 | + "CMD":cmd_command, |
| 222 | + "ENTRYPOINT":entry_command, |
| 223 | + "ENV": env_command, |
| 224 | + "FROM": from_command, |
| 225 | + "RUN":run_command, |
| 226 | + "WORKDIR":workdir_command, |
| 227 | + "MAINTAINER":comment_command, |
| 228 | + "VOLUME":comment_command} |
| 229 | + |
| 230 | + |
| 231 | + |
| 232 | +def dockerfile_to_singularity(dockerfile_path, output_dir=None): |
| 233 | + '''dockerfile_to_singularity will return a Singularity build file based on |
| 234 | + a provided Dockerfile. If output directory is not specified, the string |
| 235 | + will be returned. Otherwise, a file called Singularity will be written to |
| 236 | + output_dir |
| 237 | + :param dockerfile_path: the path to the Dockerfile |
| 238 | + :param output_dir: the output directory to write the Singularity file to |
| 239 | + ''' |
| 240 | + build_file = None |
| 241 | + |
| 242 | + if os.path.basename(dockerfile_path) == "Dockerfile": |
| 243 | + |
| 244 | + try: |
| 245 | + spec = read_file(dockerfile_path) |
| 246 | + # Use a common mapping |
| 247 | + mapping = get_mapping() |
| 248 | + # Put into dict of keys (section titles) and list of commands (values) |
| 249 | + sections = organize_sections(lines=spec, |
| 250 | + mapping=mapping) |
| 251 | + # We have to, by default, add the Docker bootstrap |
| 252 | + sections["bootstrap"] = ["docker"] |
| 253 | + # Put into one string based on "order" variable in mapping |
| 254 | + build_file = print_sections(sections=sections, |
| 255 | + mapping=mapping) |
| 256 | + if output_dir != None: |
| 257 | + write_file("%s/Singularity" %(output_dir),build_file) |
| 258 | + print("Singularity spec written to %s" %(output_dir)) |
| 259 | + return build_file |
| 260 | + |
| 261 | + except: |
| 262 | + bot.logger.error("Error generating Dockerfile from %s.", dockerfile_path) |
| 263 | + |
| 264 | + # If we make it here, something didn't work |
| 265 | + bot.logger.error("Could not find %s.", dockerfile_path) |
| 266 | + return build_file |
| 267 | + |
| 268 | + |
| 269 | +def organize_sections(lines,mapping=None): |
| 270 | + '''organize_sections will break apart lines from a Dockerfile, and put into |
| 271 | + appropriate Singularity sections. |
| 272 | + :param lines: the raw lines from the Dockerfile |
| 273 | + :mapping: a dictionary mapping Docker commands to Singularity sections |
| 274 | + ''' |
| 275 | + if mapping == None: |
| 276 | + mapping = get_mapping() |
| 277 | + |
| 278 | + sections = dict() |
| 279 | + startre = "|".join(["^%s" %x for x in mapping.keys()]) |
| 280 | + command = None |
| 281 | + name = None |
| 282 | + |
| 283 | + for l in range(0,len(lines)): |
| 284 | + line = lines[l] |
| 285 | + |
| 286 | + # If it's a newline or comment, just add it to post |
| 287 | + if line == "\n" or re.search("^#",line): |
| 288 | + sections = parse_section(name="%post", |
| 289 | + command=line, |
| 290 | + mapping=mapping, |
| 291 | + sections=sections) |
| 292 | + elif re.search(startre,line): |
| 293 | + |
| 294 | + # Parse the last section, and start over |
| 295 | + if command != None and name != None: |
| 296 | + sections = parse_section(name=name, |
| 297 | + command=command, |
| 298 | + mapping=mapping, |
| 299 | + sections=sections) |
| 300 | + name,command = line.split(" ",1) |
| 301 | + else: |
| 302 | + |
| 303 | + # We have a continuation of the last command or an empty line |
| 304 | + command = "%s\n%s" %(command,line) |
| 305 | + |
| 306 | + return sections |
| 307 | + |
| 308 | +def parse_section(sections,name,command,mapping=None): |
| 309 | + '''parse_section will take a command that has lookup key "name" as a key in "mapping" |
| 310 | + and add a line to the list of each in sections that will be rendered into a Singularity |
| 311 | + build file. |
| 312 | + :param sections: the current sections, a dictionary of keys (singularity section titles) |
| 313 | + and a list of lines. |
| 314 | + :param name: the name of the section to add |
| 315 | + :param command: the command to parse: |
| 316 | + :param mapping: the mapping object to use |
| 317 | + ''' |
| 318 | + if mapping == None: |
| 319 | + mapping = get_mapping() |
| 320 | + |
| 321 | + if name in mapping: |
| 322 | + build_section = mapping[name]['section'] |
| 323 | + |
| 324 | + # Can the command potentially be json (a list?) |
| 325 | + if mapping[name]['json']: |
| 326 | + try: |
| 327 | + command = " ".join(json.loads(command)) |
| 328 | + except: |
| 329 | + pass |
| 330 | + |
| 331 | + # Do we need to pass it through a function first? |
| 332 | + if 'fun' in mapping[name]: |
| 333 | + command = mapping[name]['fun'](command) |
| 334 | + |
| 335 | + # Add to our dictionary of sections! |
| 336 | + if build_section not in sections: |
| 337 | + sections[build_section] = [command] |
| 338 | + else: |
| 339 | + sections[build_section].append(command) |
| 340 | + return sections |
| 341 | + |
| 342 | + |
| 343 | +def print_sections(sections,mapping=None): |
| 344 | + '''print_sections will take a sections object (dict with section names and |
| 345 | + list of commands) and parse into a common string, to output to file or return |
| 346 | + to user. |
| 347 | + :param sections: output from organize_sections |
| 348 | + :mapping: a dictionary mapping Docker commands to Singularity sections |
| 349 | + ''' |
| 350 | + |
| 351 | + if mapping == None: |
| 352 | + mapping = get_mapping() |
| 353 | + |
| 354 | + finished_spec = None |
| 355 | + ordering = ['bootstrap',"From","%runscript","%post"] |
| 356 | + |
| 357 | + for section in ordering: |
| 358 | + |
| 359 | + # Was the section found in the file? |
| 360 | + if section in sections: |
| 361 | + content = "".join(sections[section]) |
| 362 | + |
| 363 | + # A single command, intended to go after a colon (yaml) |
| 364 | + if not re.search("^%",section): |
| 365 | + content = "%s:%s" %(section,content) |
| 366 | + else: |
| 367 | + # A list of things to join, after the section header |
| 368 | + content = "%s\n%s" %(section,content) |
| 369 | + |
| 370 | + # Are we adding the first line? |
| 371 | + if finished_spec == None: |
| 372 | + finished_spec = content |
| 373 | + else: |
| 374 | + finished_spec = "%s\n%s" %(finished_spec,content) |
| 375 | + return finished_spec |
0 commit comments