diff --git a/__pycache__/sharedfunctions.cpython-36.pyc b/__pycache__/sharedfunctions.cpython-36.pyc index ded7975..8f6f0eb 100644 Binary files a/__pycache__/sharedfunctions.cpython-36.pyc and b/__pycache__/sharedfunctions.cpython-36.pyc differ diff --git a/archivefiles.py b/archivefiles.py index bac3231..4b3a6eb 100755 --- a/archivefiles.py +++ b/archivefiles.py @@ -3,12 +3,21 @@ # # archivefiles.py January 2018 # +# allows you to read files from the file service and save them to a directory on the file system. +# Optionally, the tool will also delete files from the file service in order to free up space. +# For example, +# +# ./archivefiles.py -n log -d 6 -p /job -fp /tmp +# +# Blog: https://blogs.sas.com/content/sgf/2019/04/04/where-are-my-viya-files/ # # Change History # # 27JAN2019 Comments added # 20SEP2019 Do not write out binary files # 20SEP2019 Accept parent folder as a parameter +# 12FEB2020 Bug fix when not query is provided +# 20FEB2020 Fix for python 3 unicode is now str # # # Copyright © 2018, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. @@ -28,11 +37,15 @@ from datetime import datetime as dt, timedelta as td # get python version version=int(str(sys.version_info[0])) +# in python3 unicode is now string +if version >= 3: unicode = str + # setup command-line arguements. In this block which is common to all the tools you setup what parameters # are passed to the tool -# the --output parameter is a common one which supports the three styles of output json, simple or csv +# the --output parameter is a common one which supports the styles of output json, simplejson, simple or csv parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(description="Archive and optionally delete files stored in the infrastructure data server.") parser.add_argument("-n","--name", help="Name contains",default=None) parser.add_argument("-c","--type", help="Content Type in.",default=None) @@ -42,6 +55,7 @@ parser.add_argument("-d","--days", help="List files older than this number of da parser.add_argument("-m","--modifiedby", help="Last modified id equals",default=None) parser.add_argument("-fp","--path", help="Path of directory to store files",default='/tmp') parser.add_argument("-x","--delete", help="Delete Files from Viya",action='store_true') +parser.add_argument("--debug", action='store_true', help="Debug") args = parser.parse_args() daysolder=args.days @@ -51,6 +65,7 @@ puri=args.parent path=args.path dodelete=args.delete pfolder=args.parentfolder +debug=args.debug # you can subset by parenturi or parentfolder but not both if puri !=None and pfolder !=None: @@ -92,10 +107,9 @@ if puri!=None: filtercond.append("contains(parentUri,'"+puri+"')") completefilter = 'and('+delimiter.join(filtercond)+')' reqval="/files/files?filter="+completefilter+"&limit=10000" - files_result_json=callrestapi(reqval,reqtype) - + # process items in folders -if pfolder!=None: +elif pfolder!=None: folderid=getfolderid(pfolder)[0] # add the start and end and comma delimit the filter @@ -123,13 +137,18 @@ if pfolder!=None: filtercond.append("in(id,"+inclause+")") completefilter = 'and('+delimiter.join(filtercond)+')' - #print(completefilter) reqval="/files/files?filter="+completefilter+"&limit=10000" - - #make the rest call using the callrestapi function - files_result_json=callrestapi(reqval,reqtype) -#create a directory with a name of the timestamp only if running in execmodeute mode +else: + + completefilter = 'and('+delimiter.join(filtercond)+')' + reqval="/files/files?filter="+completefilter+"&limit=10000" + + +files_result_json=callrestapi(reqval,reqtype) + + +#create a directory with a name of the timestamp only if running in execute mode newdirname="D"+dt.today().strftime("%Y%m%dT%H%MS") archivepath=os.path.join(path,newdirname ) @@ -137,6 +156,10 @@ if os.path.isdir(archivepath)==False: os.makedirs(archivepath) files = files_result_json['items'] +if debug: + print(reqval) + #print(json.dumps(files,indent=2)) + if len(files): if os.path.isdir(archivepath)==False: os.makedirs(archivepath) @@ -148,7 +171,8 @@ for file in files: fileid=file['id'] contenttype=file['contentType'] - + + filename=file['name'] archivefile=os.path.join(archivepath,filename ) @@ -156,7 +180,8 @@ for file in files: reqval="/files/files/"+fileid+"/content" content=callrestapi(reqval,reqtype) - + + out_type='w' # decide on write style w+b is binary w is text @@ -164,6 +189,7 @@ for file in files: if contenttype.startswith('application/v') or contenttype.startswith('image') or contenttype.startswith('video') or contenttype.startswith('audio') or contenttype.startswith('application/pdf'): out_type="wb" + print('NOTE: '+filename+' of content type ' +contenttype+' not supported') else: @@ -177,11 +203,14 @@ for file in files: fp.close() passlist.append(filename) - elif type(content) is unicode: + elif type(content) is unicode or type(content) is str: with open(archivefile, out_type) as fp: - fp.write(content.encode('utf8')) - + + if version < 3: + fp.write(content.encode('utf8')) + else: fp.write(content) + fp.close() passlist.append(filename) diff --git a/listfiles.py b/listfiles.py index 127942f..d54c698 100755 --- a/listfiles.py +++ b/listfiles.py @@ -3,6 +3,18 @@ # # lisfiles.py January 2018 # +# provides an easy interface to query what files are currently stored in the infrastructure data server. +# You can list all files sorted by modified date or size of file, and query based on date modified, +# user who last modified the file, parentUri or filename. The output provides the size of each file, +# so that you can check the space being used to store files. +# Use this tool to view files managed by the files service and stored in the infrastructure data server. +# +# For example, if I want to see all potential log files, +# created by the /jobexecution service that are older than 6 days old. +# +# ./listfiles.py -n log -p /jobExecution -d 6 -o csv +# +# Blog: https://blogs.sas.com/content/sgf/2019/04/04/where-are-my-viya-files/ # # Change History # @@ -30,6 +42,7 @@ from datetime import datetime as dt, timedelta as td parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(description="Query and list files stored in the infrastructure data server.") parser.add_argument("-n","--name", help="Name contains",default=None) parser.add_argument("-c","--type", help="Content Type in.",default=None) parser.add_argument("-p","--parent", help="ParentURI starts with.",default=None) @@ -38,6 +51,7 @@ parser.add_argument("-d","--days", help="List files older than this number of da parser.add_argument("-m","--modifiedby", help="Last modified id equals",default=None) parser.add_argument("-s","--sortby", help="Sort the output descending by this field",default='modifiedTimeStamp') parser.add_argument("-o","--output", help="Output Style", choices=['csv','json','simple','simplejson'],default='json') +parser.add_argument("--debug", action='store_true', help="Debug") args = parser.parse_args() output_style=args.output @@ -47,6 +61,10 @@ sortby=args.sortby nameval=args.name puri=args.parent pfolder=args.parentfolder +debug=args.debug + + +files_result_json=None # you can subset by parenturi or parentfolder but not both if puri !=None and pfolder !=None: @@ -77,10 +95,9 @@ if puri!=None: filtercond.append("contains(parentUri,'"+puri+"')") completefilter = 'and('+delimiter.join(filtercond)+')' reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":descending&limit=10000" - files_result_json=callrestapi(reqval,reqtype) - + # process items in folders -if pfolder!=None: +elif pfolder!=None: folderid=getfolderid(pfolder)[0] # add the start and end and comma delimit the filter @@ -109,19 +126,28 @@ if pfolder!=None: filtercond.append("in(id,"+inclause+")") completefilter = 'and('+delimiter.join(filtercond)+')' - #print(completefilter) reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":descending&limit=10000" - #make the rest call using the callrestapi function - files_result_json=callrestapi(reqval,reqtype) + +else: + completefilter = 'and('+delimiter.join(filtercond)+')' + reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":descending&limit=10000" + +if debug: print(reqval) + +files_result_json=callrestapi(reqval,reqtype) cols=['id','name','contentType','documentType','createdBy','modifiedTimeStamp','size','parentUri'] # print result -printresult(files_result_json,output_style,cols) + +if files_result_json == None: + print("No files returned by query") +else: + printresult(files_result_json,output_style,cols) - \ No newline at end of file +