Browse Source

Bugfixlistfiles (#45)

* Update listfiles.py

* fixes for python3
master
Gerry Nelson 6 years ago
committed by GitHub
parent
commit
bb6e772999
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. BIN
      __pycache__/sharedfunctions.cpython-36.pyc
  2. 45
      archivefiles.py
  3. 38
      listfiles.py

BIN
__pycache__/sharedfunctions.cpython-36.pyc

Binary file not shown.

45
archivefiles.py

@ -3,12 +3,21 @@
# #
# archivefiles.py January 2018 # archivefiles.py January 2018
# #
# allows you to read files from the file service and save them to a directory on the file system.
# Optionally, the tool will also delete files from the file service in order to free up space.
# For example,
#
# ./archivefiles.py -n log -d 6 -p /job -fp /tmp
#
# Blog: https://blogs.sas.com/content/sgf/2019/04/04/where-are-my-viya-files/
# #
# Change History # Change History
# #
# 27JAN2019 Comments added # 27JAN2019 Comments added
# 20SEP2019 Do not write out binary files # 20SEP2019 Do not write out binary files
# 20SEP2019 Accept parent folder as a parameter # 20SEP2019 Accept parent folder as a parameter
# 12FEB2020 Bug fix when not query is provided
# 20FEB2020 Fix for python 3 unicode is now str
# #
# #
# Copyright © 2018, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. # Copyright © 2018, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
@ -28,11 +37,15 @@ from datetime import datetime as dt, timedelta as td
# get python version # get python version
version=int(str(sys.version_info[0])) version=int(str(sys.version_info[0]))
# in python3 unicode is now string
if version >= 3: unicode = str
# setup command-line arguements. In this block which is common to all the tools you setup what parameters # setup command-line arguements. In this block which is common to all the tools you setup what parameters
# are passed to the tool # are passed to the tool
# the --output parameter is a common one which supports the three styles of output json, simple or csv # the --output parameter is a common one which supports the styles of output json, simplejson, simple or csv
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description="Archive and optionally delete files stored in the infrastructure data server.")
parser.add_argument("-n","--name", help="Name contains",default=None) parser.add_argument("-n","--name", help="Name contains",default=None)
parser.add_argument("-c","--type", help="Content Type in.",default=None) parser.add_argument("-c","--type", help="Content Type in.",default=None)
@ -42,6 +55,7 @@ parser.add_argument("-d","--days", help="List files older than this number of da
parser.add_argument("-m","--modifiedby", help="Last modified id equals",default=None) parser.add_argument("-m","--modifiedby", help="Last modified id equals",default=None)
parser.add_argument("-fp","--path", help="Path of directory to store files",default='/tmp') parser.add_argument("-fp","--path", help="Path of directory to store files",default='/tmp')
parser.add_argument("-x","--delete", help="Delete Files from Viya",action='store_true') parser.add_argument("-x","--delete", help="Delete Files from Viya",action='store_true')
parser.add_argument("--debug", action='store_true', help="Debug")
args = parser.parse_args() args = parser.parse_args()
daysolder=args.days daysolder=args.days
@ -51,6 +65,7 @@ puri=args.parent
path=args.path path=args.path
dodelete=args.delete dodelete=args.delete
pfolder=args.parentfolder pfolder=args.parentfolder
debug=args.debug
# you can subset by parenturi or parentfolder but not both # you can subset by parenturi or parentfolder but not both
if puri !=None and pfolder !=None: if puri !=None and pfolder !=None:
@ -92,10 +107,9 @@ if puri!=None:
filtercond.append("contains(parentUri,'"+puri+"')") filtercond.append("contains(parentUri,'"+puri+"')")
completefilter = 'and('+delimiter.join(filtercond)+')' completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&limit=10000" reqval="/files/files?filter="+completefilter+"&limit=10000"
files_result_json=callrestapi(reqval,reqtype)
# process items in folders # process items in folders
if pfolder!=None: elif pfolder!=None:
folderid=getfolderid(pfolder)[0] folderid=getfolderid(pfolder)[0]
# add the start and end and comma delimit the filter # add the start and end and comma delimit the filter
@ -123,13 +137,18 @@ if pfolder!=None:
filtercond.append("in(id,"+inclause+")") filtercond.append("in(id,"+inclause+")")
completefilter = 'and('+delimiter.join(filtercond)+')' completefilter = 'and('+delimiter.join(filtercond)+')'
#print(completefilter)
reqval="/files/files?filter="+completefilter+"&limit=10000" reqval="/files/files?filter="+completefilter+"&limit=10000"
#make the rest call using the callrestapi function else:
files_result_json=callrestapi(reqval,reqtype)
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&limit=10000"
#create a directory with a name of the timestamp only if running in execmodeute mode files_result_json=callrestapi(reqval,reqtype)
#create a directory with a name of the timestamp only if running in execute mode
newdirname="D"+dt.today().strftime("%Y%m%dT%H%MS") newdirname="D"+dt.today().strftime("%Y%m%dT%H%MS")
archivepath=os.path.join(path,newdirname ) archivepath=os.path.join(path,newdirname )
@ -137,6 +156,10 @@ if os.path.isdir(archivepath)==False: os.makedirs(archivepath)
files = files_result_json['items'] files = files_result_json['items']
if debug:
print(reqval)
#print(json.dumps(files,indent=2))
if len(files): if len(files):
if os.path.isdir(archivepath)==False: os.makedirs(archivepath) if os.path.isdir(archivepath)==False: os.makedirs(archivepath)
@ -149,6 +172,7 @@ for file in files:
fileid=file['id'] fileid=file['id']
contenttype=file['contentType'] contenttype=file['contentType']
filename=file['name'] filename=file['name']
archivefile=os.path.join(archivepath,filename ) archivefile=os.path.join(archivepath,filename )
@ -157,6 +181,7 @@ for file in files:
content=callrestapi(reqval,reqtype) content=callrestapi(reqval,reqtype)
out_type='w' out_type='w'
# decide on write style w+b is binary w is text # decide on write style w+b is binary w is text
@ -164,6 +189,7 @@ for file in files:
if contenttype.startswith('application/v') or contenttype.startswith('image') or contenttype.startswith('video') or contenttype.startswith('audio') or contenttype.startswith('application/pdf'): if contenttype.startswith('application/v') or contenttype.startswith('image') or contenttype.startswith('video') or contenttype.startswith('audio') or contenttype.startswith('application/pdf'):
out_type="wb" out_type="wb"
print('NOTE: '+filename+' of content type ' +contenttype+' not supported') print('NOTE: '+filename+' of content type ' +contenttype+' not supported')
else: else:
@ -177,10 +203,13 @@ for file in files:
fp.close() fp.close()
passlist.append(filename) passlist.append(filename)
elif type(content) is unicode: elif type(content) is unicode or type(content) is str:
with open(archivefile, out_type) as fp: with open(archivefile, out_type) as fp:
if version < 3:
fp.write(content.encode('utf8')) fp.write(content.encode('utf8'))
else: fp.write(content)
fp.close() fp.close()
passlist.append(filename) passlist.append(filename)

38
listfiles.py

@ -3,6 +3,18 @@
# #
# lisfiles.py January 2018 # lisfiles.py January 2018
# #
# provides an easy interface to query what files are currently stored in the infrastructure data server.
# You can list all files sorted by modified date or size of file, and query based on date modified,
# user who last modified the file, parentUri or filename. The output provides the size of each file,
# so that you can check the space being used to store files.
# Use this tool to view files managed by the files service and stored in the infrastructure data server.
#
# For example, if I want to see all potential log files,
# created by the /jobexecution service that are older than 6 days old.
#
# ./listfiles.py -n log -p /jobExecution -d 6 -o csv
#
# Blog: https://blogs.sas.com/content/sgf/2019/04/04/where-are-my-viya-files/
# #
# Change History # Change History
# #
@ -30,6 +42,7 @@ from datetime import datetime as dt, timedelta as td
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description="Query and list files stored in the infrastructure data server.")
parser.add_argument("-n","--name", help="Name contains",default=None) parser.add_argument("-n","--name", help="Name contains",default=None)
parser.add_argument("-c","--type", help="Content Type in.",default=None) parser.add_argument("-c","--type", help="Content Type in.",default=None)
parser.add_argument("-p","--parent", help="ParentURI starts with.",default=None) parser.add_argument("-p","--parent", help="ParentURI starts with.",default=None)
@ -38,6 +51,7 @@ parser.add_argument("-d","--days", help="List files older than this number of da
parser.add_argument("-m","--modifiedby", help="Last modified id equals",default=None) parser.add_argument("-m","--modifiedby", help="Last modified id equals",default=None)
parser.add_argument("-s","--sortby", help="Sort the output descending by this field",default='modifiedTimeStamp') parser.add_argument("-s","--sortby", help="Sort the output descending by this field",default='modifiedTimeStamp')
parser.add_argument("-o","--output", help="Output Style", choices=['csv','json','simple','simplejson'],default='json') parser.add_argument("-o","--output", help="Output Style", choices=['csv','json','simple','simplejson'],default='json')
parser.add_argument("--debug", action='store_true', help="Debug")
args = parser.parse_args() args = parser.parse_args()
output_style=args.output output_style=args.output
@ -47,6 +61,10 @@ sortby=args.sortby
nameval=args.name nameval=args.name
puri=args.parent puri=args.parent
pfolder=args.parentfolder pfolder=args.parentfolder
debug=args.debug
files_result_json=None
# you can subset by parenturi or parentfolder but not both # you can subset by parenturi or parentfolder but not both
if puri !=None and pfolder !=None: if puri !=None and pfolder !=None:
@ -77,10 +95,9 @@ if puri!=None:
filtercond.append("contains(parentUri,'"+puri+"')") filtercond.append("contains(parentUri,'"+puri+"')")
completefilter = 'and('+delimiter.join(filtercond)+')' completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":descending&limit=10000" reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":descending&limit=10000"
files_result_json=callrestapi(reqval,reqtype)
# process items in folders # process items in folders
if pfolder!=None: elif pfolder!=None:
folderid=getfolderid(pfolder)[0] folderid=getfolderid(pfolder)[0]
# add the start and end and comma delimit the filter # add the start and end and comma delimit the filter
@ -109,15 +126,24 @@ if pfolder!=None:
filtercond.append("in(id,"+inclause+")") filtercond.append("in(id,"+inclause+")")
completefilter = 'and('+delimiter.join(filtercond)+')' completefilter = 'and('+delimiter.join(filtercond)+')'
#print(completefilter)
reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":descending&limit=10000" reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":descending&limit=10000"
#make the rest call using the callrestapi function
files_result_json=callrestapi(reqval,reqtype) else:
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":descending&limit=10000"
if debug: print(reqval)
files_result_json=callrestapi(reqval,reqtype)
cols=['id','name','contentType','documentType','createdBy','modifiedTimeStamp','size','parentUri'] cols=['id','name','contentType','documentType','createdBy','modifiedTimeStamp','size','parentUri']
# print result # print result
printresult(files_result_json,output_style,cols)
if files_result_json == None:
print("No files returned by query")
else:
printresult(files_result_json,output_style,cols)

Loading…
Cancel
Save