[OpenWrt-Devel] [PATCH] build: download code from github using archive API

Yousong Zhou yszhou4tech at gmail.com
Tue Jun 26 00:47:56 EDT 2018


A new python script scripts/download.py is added to fetch tarballs using
GitHub archive API [1], then repack in a reproducible way same as the
current DownloadMethod/git

The missing piece in the GitHub API is that it cannot provide in the
tarball dependent submodules's source code.  In that case, the
implementation will fallback to using DownloadMethod/git

 [1] Get archive link, https://developer.github.com/v3/repos/contents/#get-archive-link

Signed-off-by: Yousong Zhou <yszhou4tech at gmail.com>
---
 include/download.mk |  75 +++++++-----
 scripts/download.py | 324 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 368 insertions(+), 31 deletions(-)
 create mode 100755 scripts/download.py

diff --git a/include/download.mk b/include/download.mk
index 2ba8a7bdf4..13952b3dfc 100644
--- a/include/download.mk
+++ b/include/download.mk
@@ -21,23 +21,7 @@ DOWNLOAD_RDEP=$(STAMP_PREPARED) $(HOST_STAMP_PREPARED)
 
 # Try to guess the download method from the URL
 define dl_method
-$(strip \
-  $(if $(2),$(2), \
-    $(if $(filter @APACHE/% @GITHUB/% @GNOME/% @GNU/% @KERNEL/% @SF/% @SAVANNAH/% ftp://% http://% https://% file://%,$(1)),default, \
-      $(if $(filter git://%,$(1)),git, \
-        $(if $(filter svn://%,$(1)),svn, \
-          $(if $(filter cvs://%,$(1)),cvs, \
-            $(if $(filter hg://%,$(1)),hg, \
-              $(if $(filter sftp://%,$(1)),bzr, \
-                unknown \
-              ) \
-            ) \
-          ) \
-        ) \
-      ) \
-    ) \
-  ) \
-)
+$(shell $(SCRIPT_DIR)/download.py --action=dl_method --url="$(1)" --proto="$(2)")
 endef
 
 # code for creating tarballs from cvs/svn/git/bzr/hg/darcs checkouts - useful for mirror support
@@ -56,6 +40,10 @@ ifdef CHECK
 check_escape=$(subst ','\'',$(1))
 #')
 
+# $(1): suffix of the F_, C_ variables, e.g. hash_deprecated, hash_mismatch, etc.
+# $(2): filename
+# $(3): expected hash value
+# $(4): hash var name: MD5SUM, HASH
 check_warn_nofix = $(info $(shell printf "$(_R)WARNING: %s$(_N)" '$(call check_escape,$(call C_$(1),$(2),$(3),$(4)))'))
 ifndef FIXUP
   check_warn = $(check_warn_nofix)
@@ -71,6 +59,9 @@ F_hash_mismatch = $(F_hash_deprecated)
 F_hash_missing = $(SCRIPT_DIR)/fixup-makefile.pl $(CURDIR)/Makefile add-hash $(3) $(call gen_sha256sum,$(1))
 endif
 
+# $(1): filename
+# $(2): expected hash value
+# $(3): hash var name: MD5SUM, HASH
 C_download_missing = $(1) is missing, please run make download before re-running this check
 C_hash_mismatch = $(3) does not match $(1) hash $(call gen_sha256sum,$(1))
 C_hash_deprecated = $(3) uses deprecated hash, set to $(call gen_sha256sum,$(1))
@@ -116,6 +107,9 @@ define DownloadMethod/default
 	)
 endef
 
+# $(1): "check"
+# $(2): "PKG_" if <name> as in Download/<name> is "default", otherwise "Download/<name>:"
+# $(3): shell command sequence to do the download
 define wrap_mirror
 $(if $(if $(MIRROR),$(filter-out x,$(MIRROR_HASH))),$(SCRIPT_DIR)/download.pl "$(DL_DIR)" "$(FILE)" "$(MIRROR_HASH)" "" || ( $(3) ),$(3)) \
 $(if $(filter check,$(1)), \
@@ -159,23 +153,42 @@ endef
 
 define DownloadMethod/git
 	$(call wrap_mirror,$(1),$(2), \
-		echo "Checking out files from the git repository..."; \
-		mkdir -p $(TMP_DIR)/dl && \
-		cd $(TMP_DIR)/dl && \
-		rm -rf $(SUBDIR) && \
-		[ \! -d $(SUBDIR) ] && \
-		git clone $(OPTS) $(URL) $(SUBDIR) && \
-		(cd $(SUBDIR) && git checkout $(VERSION) && \
-		git submodule update --init --recursive) && \
-		echo "Packing checkout..." && \
-		export TAR_TIMESTAMP=`cd $(SUBDIR) && git log -1 --format='@%ct'` && \
-		rm -rf $(SUBDIR)/.git && \
-		$(call dl_tar_pack,$(TMP_DIR)/dl/$(FILE),$(SUBDIR)) && \
-		mv $(TMP_DIR)/dl/$(FILE) $(DL_DIR)/ && \
-		rm -rf $(SUBDIR); \
+		$(call DownloadMethod/git-raw) \
 	)
 endef
 
+define DownloadMethod/github-tarball
+	$(call wrap_mirror,$(1),$(2), \
+		$(SCRIPT_DIR)/download.py \
+			--action=dl \
+			--dl-dir="$(DL_DIR)" \
+			--url="$(URL)" \
+			--proto="$(PROTO)" \
+			--version="$(VERSION)" \
+			--subdir="$(SUBDIR)" \
+			--source="$(FILE)" \
+		|| ( $(call DownloadMethod/git-raw) ); \
+	)
+endef
+
+# Only intends to be called as a submethod from other DownloadMethod
+define DownloadMethod/git-raw
+	echo "Checking out files from the git repository..."; \
+	mkdir -p $(TMP_DIR)/dl && \
+	cd $(TMP_DIR)/dl && \
+	rm -rf $(SUBDIR) && \
+	[ \! -d $(SUBDIR) ] && \
+	git clone $(OPTS) $(URL) $(SUBDIR) && \
+	(cd $(SUBDIR) && git checkout $(VERSION) && \
+	git submodule update --init --recursive) && \
+	echo "Packing checkout..." && \
+	export TAR_TIMESTAMP=`cd $(SUBDIR) && git log -1 --format='@%ct'` && \
+	rm -rf $(SUBDIR)/.git && \
+	$(call dl_tar_pack,$(TMP_DIR)/dl/$(FILE),$(SUBDIR)) && \
+	mv $(TMP_DIR)/dl/$(FILE) $(DL_DIR)/ && \
+	rm -rf $(SUBDIR);
+endef
+
 define DownloadMethod/bzr
 	$(call wrap_mirror,$(1),$(2), \
 		echo "Checking out files from the bzr repository..."; \
diff --git a/scripts/download.py b/scripts/download.py
new file mode 100755
index 0000000000..5e2db1bbbb
--- /dev/null
+++ b/scripts/download.py
@@ -0,0 +1,324 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2018 Yousong Zhou <yszhou4tech at gmail.com>
+#
+# This is free software, licensed under the GNU General Public License v2.
+# See /LICENSE for more information.
+
+import argparse
+import calendar
+import datetime
+import errno
+import json
+import os
+import os.path
+import re
+import shutil
+import ssl
+import subprocess
+import sys
+import urllib2
+
+TMPDIR = os.environ.get('TMP_DIR') or '/tmp'
+TMPDIR_DL = os.path.join(TMPDIR, 'dl')
+DOWNLOAD_METHODS = []
+
+class DirException(Exception): pass
+class DownloadException(Exception): pass
+
+
+class Path(object):
+    """Context class for preparing and cleaning up directories.
+
+    If ``path`` ``isdir``, then it will be created on context enter.
+
+    If ``keep`` is True, then ``path`` will NOT be removed on context exit
+    """
+
+    def __init__(self, path, isdir=True, keep=False):
+        self.path = path
+        self.isdir = isdir
+        self.keep = keep
+
+    def __enter__(self):
+        if self.isdir:
+            self.mkdir_all(self.path)
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if not self.keep:
+            self.rm_all(self.path)
+
+    @staticmethod
+    def mkdir_all(path):
+        """Same as mkdir -p."""
+        names = os.path.split(path)
+        p = ''
+        for name in names:
+            p = os.path.join(p, name)
+            if not os.path.isdir(p):
+                os.mkdir(p)
+
+    @staticmethod
+    def _rmdir_all(dir_):
+        names = os.listdir(dir_)
+        for name in names:
+            p = os.path.join(dir_, name)
+            if os.path.isdir(p):
+                Path._rmdir_all(p)
+            else:
+                os.remove(p)
+        os.rmdir(dir_)
+
+    @staticmethod
+    def rm_all(path):
+        """Same as rm -r."""
+        if os.path.isdir(path):
+            Path._rmdir_all(path)
+        else:
+            os.remove(path)
+
+    @staticmethod
+    def untar(path, into=None):
+        """extract tarball at @path into subdir @into."""
+        args = ('tar', '-C', into, '-xzf', path, '--no-same-permissions')
+        subprocess.call(args, preexec_fn=lambda: os.umask(0o22))
+        dirs = os.listdir(into)
+        if len(dirs) == 1:
+            return dirs[0]
+        else:
+            raise DirException('untar %s: expecting a single subdir, got %s' % (path, dirs))
+
+    @staticmethod
+    def tar(path, subdir, into=None, ts=None):
+        """pack @path into tarball @into."""
+        # --sort=name requires a recent build of GNU tar
+        args = ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name']
+        args += ['-C', path, '-cf', into, subdir]
+        envs = os.environ.copy()
+        if ts is not None:
+            args.append('--mtime=@%d' % ts)
+        if into.endswith('.xz'):
+            envs['XZ_OPT'] = '-7e'
+            args.append('-J')
+        elif into.endswith('.bz2'):
+            args.append('-j')
+        elif into.endswith('.gz'):
+            args.append('-z')
+            envs['GZIP'] = '-n'
+        else:
+            raise DirException('unknown compression type %s' % into)
+        subprocess.call(args, env=envs)
+
+
+class DownloadMethod(object):
+    """Base class of all download method."""
+
+    def __init__(self, args):
+        self.args = args
+        self.dl_dir = args.dl_dir
+
+    @classmethod
+    def resolve(cls, args):
+        """Resolve download method to use.
+
+        return instance of subclass of DownloadMethod
+        """
+        for c in DOWNLOAD_METHODS:
+            if c.match(args):
+                return c(args)
+
+    def download(self):
+        """Do the download and put it into the download dir."""
+        return NotImplemented
+
+    @staticmethod
+    def match(args):
+        """return True if it can do the download."""
+        return NotImplemented
+
+
+class DownloadMethodGitHubTarball(DownloadMethod):
+    """Download and repack archive tarabll from GitHub."""
+
+    __repo_url_regex = re.compile(r'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)')
+
+    def __init__(self, args):
+        super(DownloadMethodGitHubTarball, self).__init__(args)
+        self.url = args.url
+        self._init_owner_repo()
+        self.version = args.version
+        self.subdir = args.subdir
+        self.source = args.source
+        self.commit_ts = None           # lazy load commit timestamp
+        self.name = 'github-tarball'
+
+    @staticmethod
+    def match(args):
+        """Match if it's a GitHub clone url."""
+        url = args.url
+        proto = args.proto
+        if proto == 'git' and isinstance(url, basestring) \
+                and (url.startswith('https://github.com/') or url.startswith('git://github.com/')):
+            return True
+        return False
+
+    def download(self):
+        """Download and repack GitHub archive tarball."""
+        self._init_commit_ts()
+        with Path(TMPDIR_DL, keep=True) as dir_dl:
+            # fetch tarball from GitHub
+            tarball_path = os.path.join(dir_dl.path, self.subdir + '.tar.gz.dl')
+            with Path(tarball_path, isdir=False):
+                self._fetch(tarball_path)
+                # unpack
+                d = os.path.join(dir_dl.path, self.subdir + '.untar')
+                with Path(d) as dir_untar:
+                    tarball_prefix = Path.untar(tarball_path, into=dir_untar.path)
+                    dir0 = os.path.join(dir_untar.path, tarball_prefix)
+                    dir1 = os.path.join(dir_untar.path, self.subdir)
+                    # submodules check
+                    if self._has_submodule(dir0):
+                        raise DownloadException('unable to fetch source code of submodules')
+                    # rename subdir
+                    os.rename(dir0, dir1)
+                    into=os.path.join(TMPDIR_DL, self.source)
+                    # repack
+                    Path.tar(dir_untar.path, self.subdir, into=into, ts=self.commit_ts)
+                    # move to target location
+                    file1 = os.path.join(self.dl_dir, self.source)
+                    shutil.move(into, file1)
+
+    def _has_submodule(self, dir_):
+        m = os.path.join(dir_, '.gitmodules')
+        try:
+            st = os.stat(m)
+            return st.st_size > 0
+        except OSError as e:
+            return e.errno != errno.ENOENT
+
+    def _init_owner_repo(self):
+        url = self.url
+        m = self.__repo_url_regex.search(url)
+        if m is None:
+            raise DownloadException('invalid github url: %s' % url)
+        owner = m.group('owner')
+        repo = m.group('repo')
+        if repo.endswith('.git'):
+            repo = repo[:-4]
+        self.owner = owner
+        self.repo = repo
+
+    def _init_commit_ts(self):
+        if self.commit_ts is not None:
+            return
+        url = self._make_repo_url_path('commits', self.version)
+        resp = self._make_request(url)
+        data = resp.read()
+        data = json.loads(data)
+        date = data['commit']['committer']['date']
+        date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ')
+        date = date.timetuple()
+        ct = calendar.timegm(date)
+        self.commit_ts = ct
+
+    def _fetch(self, path):
+        """Fetch tarball of the specified version ref."""
+        ref = self.version
+        url = self._make_repo_url_path('tarball', ref)
+        resp = self._make_request(url)
+        with open(path, 'wb') as fout:
+            while True:
+                d = resp.read(4096)
+                if not d:
+                    break
+                fout.write(d)
+
+    def _make_repo_url_path(self, *args):
+        url = '/repos/{0}/{1}'.format(self.owner, self.repo)
+        if args:
+            url += '/' + '/'.join(args)
+        return url
+
+    def _make_request(self, path):
+        """Request GitHub API endpoint on @path."""
+        url = 'https://api.github.com' + path
+        headers = {
+            'Accept': 'application/vnd.github.v3+json',
+            'User-Agent': 'python',
+        }
+        req = urllib2.Request(url, headers=headers)
+        sslcontext = ssl._create_unverified_context()
+        fileobj = urllib2.urlopen(req, context=sslcontext)
+        return fileobj
+
+
+class DownloadMethodCatchall(DownloadMethod):
+    """Dummy method that knows names but not ways of download."""
+
+    def __init__(self, args):
+        super(DownloadMethodCatchall, self).__init__(args)
+        self.args = args
+        self.url = args.url
+        self.proto = args.proto
+        self.name = self._resolve_name()
+
+    @staticmethod
+    def match(args):
+        """return True."""
+        return True
+
+    def _resolve_name(self):
+        if self.proto:
+            return self.proto
+        methods_map = (
+            ('default', ('@APACHE/', '@GITHUB/', '@GNOME/', '@GNU/',
+                         '@KERNEL/', '@SF/', '@SAVANNAH/', 'ftp://', 'http://',
+                         'https://', 'file://')),
+            ('git', ('git://', )),
+            ('svn', ('svn://', )),
+            ('cvs', ('cvs://', )),
+            ('bzr', ('sftp://', )),
+            ('bzr', ('sftp://', )),
+            ('unknown', ('', )),
+        )
+        for name, prefixes in methods_map:
+            if any(self.url.startswith(prefix) for prefix in prefixes):
+                return name
+
+    def download(self):
+        """Not implemented.
+
+        raise DownloadException
+        """
+        raise DownloadException('download method for %s is not yet implemented' % self.name)
+
+# order matters
+DOWNLOAD_METHODS = [
+    DownloadMethodGitHubTarball,
+    DownloadMethodCatchall,
+]
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--url', required=True, help='Download URL')
+    parser.add_argument('--proto', help='Download proto')
+    parser.add_argument('--subdir', help='Source code subdir name')
+    parser.add_argument('--version', help='Source code version')
+    parser.add_argument('--source', help='Source tarball filename')
+    parser.add_argument('--dl-dir', default=os.getcwd(), help='Download dir')
+    parser.add_argument('--action', choices=('dl_method', 'dl'), help='Action to take')
+    args = parser.parse_args()
+    if args.action == 'dl_method':
+        method = DownloadMethod.resolve(args)
+        sys.stdout.write(method.name + '\n')
+    elif args.action == 'dl':
+        method = DownloadMethod.resolve(args)
+        try:
+            method.download()
+        except Exception:
+            raise
+
+if __name__ == '__main__':
+    main()

_______________________________________________
openwrt-devel mailing list
openwrt-devel at lists.openwrt.org
https://lists.openwrt.org/listinfo/openwrt-devel



More information about the openwrt-devel mailing list