Python module to get (some) files from the repository.
With git it's not possible to simply export a set of paths from the server directly. This module offers a similar functionality, but requires a local clone of the repository. It calls git directly for maximum portability. This is a prerequisite to making the deployment script used for Rockbox Utility work again. Change-Id: I1aca8ddd40d16b6268c1d6a2fc23ac59e964c3c6
This commit is contained in:
parent
db3afb03a2
commit
909b96fa70
1 changed files with 183 additions and 0 deletions
183
utils/common/gitscraper.py
Executable file
183
utils/common/gitscraper.py
Executable file
|
@ -0,0 +1,183 @@
|
|||
#!/usr/bin/python
|
||||
# __________ __ ___.
|
||||
# Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
# \/ \/ \/ \/ \/
|
||||
#
|
||||
# Copyright (c) 2012 Dominik Riebeling
|
||||
#
|
||||
# All files in this archive are subject to the GNU General Public License.
|
||||
# See the file COPYING in the source tree root for full license agreement.
|
||||
#
|
||||
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
# KIND, either express or implied.
|
||||
#
|
||||
|
||||
'''Scrape files from a git repository.
|
||||
|
||||
This module provides functions to get a subset of files from a git repository.
|
||||
The files to retrieve can be specified, and the git tree to work on can be
|
||||
specified. That was arbitrary trees can be retrieved (like a subset of files
|
||||
for a given tag).
|
||||
|
||||
Retrieved files can be packaged into a bzip2 compressed tarball or stored in a
|
||||
given folder for processing afterwards.
|
||||
|
||||
Calls git commands directly for maximum compatibility.
|
||||
'''
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import os
|
||||
import tarfile
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
|
||||
def get_refs(repo):
|
||||
'''Get dict matching refs to hashes from repository pointed to by repo.
|
||||
@param repo Path to repository root.
|
||||
@return Dict matching hashes to each ref.
|
||||
'''
|
||||
print "Getting list of refs"
|
||||
output = subprocess.Popen(["git", "show-ref"], stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE, cwd=repo)
|
||||
cmdout = output.communicate()
|
||||
refs = {}
|
||||
|
||||
if len(cmdout[1]) > 0:
|
||||
print "An error occured!\n"
|
||||
print cmdout[1]
|
||||
return refs
|
||||
|
||||
for line in cmdout:
|
||||
regex = re.findall(r'([a-f0-9]+)\s+(\S+)', line)
|
||||
for r in regex:
|
||||
# ref is the key, hash its value.
|
||||
refs[r[1]] = r[0]
|
||||
|
||||
return refs
|
||||
|
||||
|
||||
def get_lstree(repo, start, filterlist=[]):
|
||||
'''Get recursive list of tree objects for a given tree.
|
||||
@param repo Path to repository root.
|
||||
@param start Hash identifying the tree.
|
||||
@param filterlist List of paths to retrieve objecs hashes for.
|
||||
An empty list will retrieve all paths.
|
||||
@return Dict mapping filename to blob hash
|
||||
'''
|
||||
output = subprocess.Popen(["git", "ls-tree", "-r", start],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
|
||||
cmdout = output.communicate()
|
||||
objects = {}
|
||||
|
||||
if len(cmdout[1]) > 0:
|
||||
print "An error occured!\n"
|
||||
print cmdout[1]
|
||||
return objects
|
||||
|
||||
for line in cmdout[0].split('\n'):
|
||||
regex = re.findall(r'([0-9]+)\s+([a-z]+)\s+([0-9a-f]+)\s+(\S+)', line)
|
||||
for rf in regex:
|
||||
# filter
|
||||
add = False
|
||||
for f in filterlist:
|
||||
if rf[3].find(f) == 0:
|
||||
add = True
|
||||
|
||||
# If two files have the same content they have the same hash, so
|
||||
# the filename has to be used as key.
|
||||
if len(filterlist) == 0 or add == True:
|
||||
if rf[3] in objects:
|
||||
print "FATAL: key already exists in dict!"
|
||||
return {}
|
||||
objects[rf[3]] = rf[2]
|
||||
return objects
|
||||
|
||||
|
||||
def get_object(repo, blob, destfile):
|
||||
'''Get an identified object from the repository.
|
||||
@param repo Path to repository root.
|
||||
@param blob hash for blob to retrieve.
|
||||
@param destfile filename for blob output.
|
||||
@return True if file was successfully written, False on error.
|
||||
'''
|
||||
output = subprocess.Popen(["git", "cat-file", "-p", blob],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
|
||||
cmdout = output.communicate()
|
||||
# make sure output path exists
|
||||
if len(cmdout[1]) > 0:
|
||||
print "An error occured!\n"
|
||||
print cmdout[1]
|
||||
return False
|
||||
if not os.path.exists(os.path.dirname(destfile)):
|
||||
os.makedirs(os.path.dirname(destfile))
|
||||
f = open(destfile, 'w')
|
||||
for line in cmdout[0]:
|
||||
f.write(line)
|
||||
f.close()
|
||||
return True
|
||||
|
||||
|
||||
def describe_treehash(repo, treehash):
|
||||
'''Retrieve output of git-describe for a given hash.
|
||||
@param repo Path to repository root.
|
||||
@param treehash Hash identifying the tree / commit to describe.
|
||||
@return Description string.
|
||||
'''
|
||||
output = subprocess.Popen(["git", "describe", treehash],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
|
||||
cmdout = output.communicate()
|
||||
if len(cmdout[1]) > 0:
|
||||
print "An error occured!\n"
|
||||
print cmdout[1]
|
||||
return ""
|
||||
return cmdout[0].rstrip()
|
||||
|
||||
|
||||
def scrape_files(repo, treehash, filelist, dest=""):
|
||||
'''Scrape list of files from repository.
|
||||
@param repo Path to repository root.
|
||||
@param treehash Hash identifying the tree.
|
||||
@param filelist List of files to get from repository.
|
||||
@param dest Destination path for files. Files will get retrieved with full
|
||||
path from the repository, and the folder structure will get
|
||||
created below dest as necessary.
|
||||
@return Destination path.
|
||||
'''
|
||||
print "Scraping files from repository"
|
||||
|
||||
if dest == "":
|
||||
dest = tempfile.mkdtemp()
|
||||
treeobjects = get_lstree(repo, treehash, filelist)
|
||||
for obj in treeobjects:
|
||||
get_object(repo, treeobjects[obj], os.path.join(dest, obj))
|
||||
|
||||
return dest
|
||||
|
||||
|
||||
def archive_files(repo, treehash, filelist, basename, tmpfolder=""):
|
||||
'''Archive list of files into tarball.
|
||||
@param repo Path to repository root.
|
||||
@param treehash Hash identifying the tree.
|
||||
@param filelist List of files to archive. All files in the archive if left
|
||||
empty.
|
||||
@param basename Basename (including path) of output file. Will get used as
|
||||
basename inside of the archive as well (i.e. no tarbomb).
|
||||
@param tmpfolder Folder to put intermediate files in. If no folder is given
|
||||
a temporary one will get used.
|
||||
@return Output filename.
|
||||
'''
|
||||
print "Archiving files from repository"
|
||||
|
||||
workfolder = scrape_files(repo, treehash, filelist, tmpfolder)
|
||||
outfile = basename + ".tar.bz2"
|
||||
tf = tarfile.open(outfile, "w:bz2")
|
||||
tf.add(workfolder, basename)
|
||||
tf.close()
|
||||
if tmpfolder != workfolder:
|
||||
shutil.rmtree(workfolder)
|
||||
return outfile
|
Loading…
Reference in a new issue