forked from ehanson8/dspace-data-collection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheckInventory.py
53 lines (39 loc) · 1.74 KB
/
checkInventory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import argparse
import pandas as pd
import os
def main():
# begin: argument parsing
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--inventory', required=True,
help='csv file containing the inventory. the path, if given, can be absolute or relative to this script')
parser.add_argument('-d', '--dataDir',
help='directory containing the data. if omitted, data will be read from the directory containing the inventory file')
parser.add_argument('-f', '--field',
help='field in the csv containing the fileNames. default: name')
parser.add_argument('-v', '--verbose', action='store_true',
help='increase output verbosity')
args = parser.parse_args()
if not args.dataDir:
(args.dataDir, null) = os.path.split(args.inventory)
if not args.field:
args.field = 'name'
if args.verbose:
print('verbosity turned on')
print('reading inventory from {}'.format(args.inventory))
print('fileNames read from field named {}'.format(args.field))
print('searching for files in {}'.format(args.dataDir))
# end: argument parsing
inventory = pd.read_csv(args.inventory, usecols=[args.field])
fileNames = inventory[args.field]
foundfiles = 0
missingfiles = 0
for fileName in fileNames:
if os.path.isfile(args.dataDir + '/' + fileName):
if args.verbose:
print('{} is not missing'.format(fileName))
foundfiles += 1
else:
print('{} is missing'.format(fileName))
missingfiles += 1
print('{} files found and {} files missing'.format(foundfiles, missingfiles))
if __name__ == "__main__": main()