You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

229 lines
7.5 KiB

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# archivefiles.py January 2018
#
# allows you to read files from the file service and save them to a directory on the file system.
# Optionally, the tool will also delete files from the file service in order to free up space.
# For example,
#
# ./archivefiles.py -n log -d 6 -p /job -fp /tmp
#
# Blog: https://blogs.sas.com/content/sgf/2019/04/04/where-are-my-viya-files/
#
# Change History
#
# 27JAN2019 Comments added
# 20SEP2019 Do not write out binary files
# 20SEP2019 Accept parent folder as a parameter
# 12FEB2020 Bug fix when not query is provided
# 20FEB2020 Fix for python 3 unicode is now str
#
#
# Copyright © 2018, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the License); you may not use this file except in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing permissions and limitations under the License.
#
import argparse , datetime, os, time, json, sys
from sharedfunctions import callrestapi,printresult,getfolderid,getidsanduris,createdatefilter
from datetime import datetime as dt, timedelta as td
# get python version
version=int(str(sys.version_info[0]))
# in python3 unicode is now string
if version >= 3: unicode = str
# setup command-line arguements. In this block which is common to all the tools you setup what parameters
# are passed to the tool
# the --output parameter is a common one which supports the styles of output json, simplejson, simple or csv
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description="Archive and optionally delete files stored in the infrastructure data server.")
parser.add_argument("-n","--name", help="Name contains",default=None)
parser.add_argument("-c","--type", help="Content Type in.",default=None)
parser.add_argument("-p","--parent", help="ParentURI starts with.",default=None)
parser.add_argument("-pf","--parentfolder", help="Parent Folder Name.",default=None)
parser.add_argument("-d","--days", help="List files older than this number of days",default='-1')
parser.add_argument("-m","--modifiedby", help="Last modified id equals",default=None)
parser.add_argument("-fp","--path", help="Path of directory to store files",default='/tmp')
parser.add_argument("-x","--delete", help="Delete Files from Viya",action='store_true')
parser.add_argument("--debug", action='store_true', help="Debug")
args = parser.parse_args()
daysolder=args.days
modby=args.modifiedby
nameval=args.name
puri=args.parent
path=args.path
dodelete=args.delete
pfolder=args.parentfolder
debug=args.debug
# you can subset by parenturi or parentfolder but not both
if puri !=None and pfolder !=None:
print("ERROR: cannot use both -p parent and -pf parentfolder at the same time.")
print("ERROR: Use -pf for folder parents and -p for service parents.")
sys.exit()
# prompt if delete is requested
if dodelete:
if version > 2:
areyousure=input("The files will be archived. Do you also want to delete the files? (Y)")
else:
areyousure=raw_input("The files will be archived. Do you also want to delete the files? (Y))")
if areyousure !='Y': dodelete=False
# calculate time period for files
datefilter=createdatefilter(olderoryounger='older',datevar='creationTimeStamp',days=daysolder)
# create a list for filter conditions
filtercond=[]
# there is always a number of days, the default is zero
filtercond.append(datefilter)
if nameval!=None: filtercond.append('contains($primary,name,"'+nameval+'")')
if modby!=None: filtercond.append("eq(modifiedBy,"+modby+")")
# set the request type
reqtype='get'
delimiter = ','
# process items not in folders
if puri!=None:
filtercond.append("contains(parentUri,'"+puri+"')")
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&limit=10000"
# process items in folders
elif pfolder!=None:
folderid=getfolderid(pfolder)[0]
# add the start and end and comma delimit the filter
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/folders/folders/"+folderid+"/members?filter="+completefilter+"&limit=10000"
files_in_folder=callrestapi(reqval,reqtype)
#now get the file objects using the ids returned
iddict=getidsanduris(files_in_folder)
# get the uris of the files
uris=iddict['uris']
#get id, need to do this because only the uri of the folder is returned
idlist=[]
for item in uris:
vallist=item.rsplit('/')
idlist.append(vallist[-1])
#inclause = ','.join(map(str, ids))
inclause=(', '.join("'" + item + "'" for item in idlist))
filtercond.append("in(id,"+inclause+")")
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&limit=10000"
else:
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&limit=10000"
files_result_json=callrestapi(reqval,reqtype)
#create a directory with a name of the timestamp only if running in execute mode
newdirname="D"+dt.today().strftime("%Y%m%dT%H%MS")
archivepath=os.path.join(path,newdirname )
if os.path.isdir(archivepath)==False: os.makedirs(archivepath)
files = files_result_json['items']
if debug:
print(reqval)
#print(json.dumps(files,indent=2))
if len(files):
if os.path.isdir(archivepath)==False: os.makedirs(archivepath)
# list that contains files that can be archived
passlist=[]
# process each file
for file in files:
fileid=file['id']
contenttype=file['contentType']
filename=file['name']
archivefile=os.path.join(archivepath,filename )
reqtype='get'
reqval="/files/files/"+fileid+"/content"
content=callrestapi(reqval,reqtype)
out_type='w'
# decide on write style w+b is binary w is text
# currently cannot process binary files
if contenttype.startswith('application/v') or contenttype.startswith('image') or contenttype.startswith('video') or contenttype.startswith('audio') or contenttype.startswith('application/pdf'):
out_type="wb"
print('NOTE: '+filename+' of content type ' +contenttype+' not supported')
else:
# if files is not binary write it to the archive
if type(content) is dict:
with open(archivefile, out_type) as fp:
json.dump(content,fp,indent=4)
fp.close()
passlist.append(filename)
elif type(content) is unicode or type(content) is str:
with open(archivefile, out_type) as fp:
if version < 3:
fp.write(content.encode('utf8'))
else: fp.write(content)
fp.close()
passlist.append(filename)
else: print('NOTE: '+filename+' content type not supported')
# delete requested
if dodelete:
reqtype='delete'
reqval="/files/files/"+fileid
callrestapi(reqval,reqtype)
if len(passlist):
print('NOTE: files archived to the directory '+archivepath)
if dodelete: print('NOTE: files deleted from Viya.')
else:
print('NOTE: No files that can be processed were found.')