Adding new files to tar gzipped file.

Postby catchvjay » Mon Oct 14, 2013 1:03 pm

Hi Friends,

I am creating a code to archive one year old data. I am using Python 2.6 on Linux server. I want to add file to existing .tgz file. I tried with, "w:gz") method but it is overwriting old file every time new file is added to the "tgz" file. Is there a way to add a new file to existing tar gzipped file?

My current code is below and I am ignoring files whose creation date matches existing archive files.

Code: Select all
import os, time, tarfile
from datetime import datetime, date, timedelta
import datetime

# Set the base path to archive the content
path = "/files/ob/test_archive"
count = 0
now = time.time()
yearago = now - 60*60*24*365

base_date = datetime.datetime.strptime(set_date, '%Y-%m-%d')

# Dictionary to store file name and creation / modification date
date_file_dict = {}

#Loop through the path and get all files and date into the dictionary
for root, subFolders, files in os.walk(path):
        for file in files:
            file = os.path.join(root,file)
            file = os.path.join(path, file)
            if os.path.getmtime(file) < yearago:
                stats = os.stat(file)
                c_date = date.fromtimestamp(stats[8]).strftime('%m-%d-%y')
                date_file_tuple = c_date, file
                date_file_dict[file] = c_date

d_list = date_file_dict.values()
dd_list = list(set(d_list))
date_occur_dict = {}
for search_date in dd_list:
        tar_file = "/files/yearly_archive/nas_archive_" + search_date + ".tgz"
        if os.path.isfile(tar_file):
            print "Archive file for this date already exist. Unable to archvie files for ", search_date
            mytar =,"w:gz")
            log_file = "/files/yearly_archive/archive_log_" + search_date
            fcount = 0
        #print tar_file
        #print log_file
            f = open(log_file,'ab+')
            for f_name, d_date in date_file_dict.iteritems():
                if d_date == search_date:
                    #print f_name
                    fcount += 1
                    f.write(f_name + '\n')
            date_occur_dict[search_date] = fcount
