ulsdevteam · ojas-uls-dev · Nov 5, 2025 · Jan 26, 2026 · Jan 27, 2026 · Jan 29, 2026
diff --git a/.gitignore b/.gitignore
@@ -2,5 +2,5 @@
 *.csv
 *.log
 *.json
-*.new
 ignore/
+__pycache__/
diff --git a/README.md b/README.md
@@ -20,46 +20,50 @@ sub-folders in preparation for DRL to perform their scanning processes.
 
 Spreadsheet Columns:
 
-  -----------------------------------------------------------------------
-  **Required          
-  Columns**           
-  ------------------- ---------------------------------------------------
-  'id'                This is the PID of the object. Must currently be
-                      the first column of the sheet.
+  |Required Columns    ||           
+  |------------------- |---------------------------------------------------|
+  |'id'                |This is the PID of the object. Must currently be the first column of the sheet.|
 
-  **Optional          
-  Columns**           
+  |Optional Columns    ||           
+  |------------------- |---------------------------------------------------|
+  |Any                 |Any additional columns as needed.|
 
-  Any                 Any additional columns as needed.
-  -----------------------------------------------------------------------
+## Script Parameters:
 
-Script Parameters:
+  |Parameter           |Description|
+  |:--- |:--- |
+  |\--config-file      |Path to the script config file containing paths to the workbench directory (workbench_path), the scanning directory (scanning_path), and the path to the python executable.|
+  |\--xls-file         |Path to the spreadsheet to be processed.|
+  |\--batch-name       |The Name of the batch that will be created.|
+  |\--log-file         |Path to the log file.|
+  |\--use-google       |Set this to true if using Google Sheets.|
+  |\--google-sheet-id  |The Google Sheet Identifier.|
+  |\--google-sheet-name|The Google Sheet Tab Name. This defaults to 'Sheet1' if not included.|
+  |\--google-creds-file|The file containing the Google credentials file.|
 
-  -----------------------------------------------------------------------
-  Parameter           Description
-  ------------------- ---------------------------------------------------
-  \--config_file      Path to the script config file containing paths to
-                      the workbench directory (workbench_path), the
-                      scanning directory (scanning_path), and the path to
-                      the python executable.
+## Config File requirements:
 
-  \--xls-file         Path to the spreadsheet to be processed.
+The config file contains a single option "scanning_path" that points the script to where you would like to build the directory structure for the batch-name you are passing in to the script.  This is the top level folder usually where you would store all your batches.  In our example, we would set "scanning_path" to "/scanning" as in the following.
+
+```
+scaning_path: /scanning
+```
+
+An example config file can be found in the make-batch-dirs.conf-sample file.
 
-  \--batch-name       The Name of the batch that will be created.
-  -----------------------------------------------------------------------
 
 ## Usage:
 
-Script Usage Example:
+Script Usage Examples:
+  |Type|Example|
+  |:--- |:--- |
+  |Spreadsheet|make-batch-dirs --config_file config.conf --xls-file input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch|
+  |Google Sheet|make-batch-dirs --config_file config.conf --log-file log.txt --batch-name MyNewBatch --use_google {true\\|false} --google-sheet-id {sheet id} --google-sheet-name {E.g. 'Sheet1'} --google-creds-file {path to credentials file.}|
 
-make-batch-dirs ---config_file config.conf ---xls-file
-input_spreadsheet.xls ---batch-name MyNewBatch
 
 ## Function:
 
-For each row in the spreadsheet, obtain the first column contents which
-should be the 'id' column and construct a new directory structure in the
-format of {scanning_path}/{batch-name}/{id}.
+For each row in the spreadsheet, obtain the 'id' column contents and construct a new directory structure in the format of {scanning_path}/{batch-name}/{id}.
 
 E.g. Result: /scanning/MyNewBatch/317350000001,
 /scanning/MyNewBatch/317350000002, /scanning/MyNewBatch/317350000003
diff --git a/make-batch-dirs b/make-batch-dirs
@@ -7,85 +7,204 @@ import json
 import sys
 import subprocess
 import shutil
+import sheetutils
 import logging
 import openpyxl
 import csv
+import pandas as pd# for type hints
 
-def get_username():
+# Setup the log file format.
+log_formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(levelname)s %(message)s',datefmt="%Y%m%d %H:%M:%S")
+
+def get_username() -> str:
+    """fetch username of user running script"""
     return pwd.getpwuid(os.getuid())[0]
 
-def setup_logger(name, log_file, level=logging.DEBUG):
+def setup_logger(name:str, log_file:str, level=logging.DEBUG):
     """To setup as many loggers as needed"""
-    handler = logging.FileHandler(log_file)
-    handler.setFormatter(log_formatter)
+    file_handler = logging.FileHandler(log_file)
+    file_handler.setFormatter(log_formatter)
+    console_handler = logging.StreamHandler(sys.stdout)
     logger = logging.getLogger(name)
     logger.setLevel(level)
-    logger.addHandler(handler)
+    logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
     return logger
 
-def read_yaml_file(path):
+def read_yaml_file(path: str) -> dict:
+    """ 
+    read yaml file.
+    Note: get_data coerces yaml to most appropriate type.
+        Most of the time this is dict, but might be list or str possibly
+    """
     with open(path, "r") as stream:
+        # FullLoader allows yaml to execute arbitrary python
+        # so script users are assumed to be trusted
         return yaml.FullLoader(stream).get_data()
 
-parser = argparse.ArgumentParser(description='Run..')
-parser.add_argument('--config-file', dest="config_file", required=True, help='Config file.')
-parser.add_argument('--xls-file', dest="xls_file", required=True, help='XLS file.')
-parser.add_argument('--batch-name', dest="batch_name", required=False, help='Name of the batch.')
-args = parser.parse_args()
-
-# Set configuration variables from config-file parameter
-username            = get_username()
-cfg                 = read_yaml_file(args.config_file)
-workbench_path      = cfg['workbench_path']
-scanning_path       = cfg['scanning_path']
-python_exe          = cfg['python_exe']
-
-if ( args.batch_name ):
-    batch_name = args.batch_name
-else:
-    batch_name = os.path.splitext(os.path.basename(args.xls_file))[0]
-
-batch_path = scanning_path+"/"+batch_name
-
-print(f"Creating Batch Path: {batch_path}")
-if ( os.path.isdir(batch_path) ):
-    print(f"Error: {batch_path} exists!")
-    exit()
-else:
-    os.mkdir(batch_path)
-
-print(f"Copying spreadsheet to {batch_path}/manifest.xlsx")
-shutil.copyfile(args.xls_file, batch_path+"/manifest.xlsx")
-
-print(f"Creating spreadsheet as csv")
-wb = openpyxl.load_workbook(args.xls_file)
-sheetnames = wb.sheetnames
-sheet_value_arr = []
-for a in sheetnames:
-    sheet = wb[a]
-    with open(batch_path+"/manifest.csv", "w") as f:
-        c = csv.writer(f)
-        for row in sheet.rows:
-            sheet_value_arr.append([cell.value for cell in row])
-            #for r in sheet.rows:
-            c.writerow([cell.value for cell in row])
-f.close()
-
-print(f"Reading spreadsheet: {args.xls_file}")
-workbook = openpyxl.load_workbook(args.xls_file)
-dataframe = workbook.active
-
-rows = dataframe.iter_rows()
-next(rows)
-for row in rows:
-    if ((str(row[0].value)) and (str(row[0].value) != "None" )): 
-        id = str(row[0].value)
-        print(f"Creating {batch_path}/{id}")
-        object_path = batch_path + "/" + id
-        try: 
-            os.mkdir(object_path)
-        except OSError as error:
-            print(f"Warning: {batch_path}/{id} - {error}.")
-
-print(f"Batch Path Creation Complete.")
+def str_to_bool(value: str) -> bool:
+    """
+    take common 'yes' and 'no' nouns and converts them to boolean
+
+    error: raises ArgumentTypeError when noun not found within expected nouns
+    """
+    value = value.strip()
+    if value.lower() in {'true','t','yes','y','1'}:
+        return True
+    elif value.lower() in {'false','f','no','n','0'}:
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected')
+
+def create_batch_folder(scanning_path:str, batch_name:str):
+    """
+    create folder {scanning_path}/{batch_name} in cwd
+
+    error: if path exists, then logs error and exits
+    """
+    batch_path = os.path.sep.join([scanning_path,batch_name])
+    logger.info(f"Creating Batch Path: {batch_path}")
+    if ( os.path.isdir(batch_path) ):
+        logger.error(f"Error: {batch_path} exists")
+        exit(1)
+    else:
+        os.mkdir(batch_path)
+    return batch_path
+
+def copy_xslx_to_batch(batch_path:str):
+    """
+    copy manifest.xslx into batch_path created folder
+
+    error: if manifest.xslx does not exist, then log and fail
+    """
+    manifest_path = os.path.sep.join([batch_path,'manifest.xlsx'])
+    if not os.path.isfile(manifest_path):
+        logger.info(f"Copying spreadsheet to {manifest_path}")
+        shutil.copyfile(args.xls_file, manifest_path)
+    else:
+        logger.error(f"Error: {manifest_path} already exists.")
+        exit(1)
+
+def save_xslx_as_csv(batch_path:str):
+    """
+    save args.xls_file into batch_path/manifest.csv
+    """
+    logger.info(f"Creating spreadsheet as csv")
+    wb = openpyxl.load_workbook(args.xls_file)
+    sheetnames = wb.sheetnames
+    sheet_value_arr = []
+    manifest_csv = os.path.sep.join([batch_path,'manifest.csv'])
+    for a in sheetnames:
+        sheet = wb[a]
+        with open(manifest_csv, "w") as f:
+            c = csv.writer(f)
+            for row in sheet.rows:
+                sheet_value_arr.append([cell.value for cell in row])
+                #for r in sheet.rows:
+                c.writerow([cell.value for cell in row])
+
+def make_dirs_from_df(df:pd.DataFrame, batch_path):
+    """
+    read id field of dataframe, and create batch_path/id
+    for non empty and non None ids.
+
+    error: log if mkdir fails. log and exit if id not in 
+        dataframe's columns or dataframe empty 
+    """
+    # Make sure the df has an 'id' column and data rows
+    if 'id' not in df.columns:
+        logger.error(f"Column 'id' does not exist")
+        exit(1)
+    # Make sure the df has rows besides the header row.
+    if len(df) == 0:
+        logger.error("Sheet contains no data")
+        exit(1)
+    rows = df.iterrows()
+    for _, row in rows:
+        if (str(row.loc['id']) and (str(row.loc['id']) != "None" )): 
+            id = str(row.loc['id'])
+            logger.info(f"Creating {batch_path}/{id}")
+            object_path = os.path.sep.join([batch_path,id])
+            try: 
+                os.mkdir(object_path)
+            except OSError as error:
+                logger.warning(f"Warning: {object_path} - {error}.")
+
+#
+# Main function.
+#
+def main():
+    """
+    This the main function.
+    """
+    # Parse command line arguements
+    parser = argparse.ArgumentParser(description='Run..')
+    parser.add_argument('--config-file', dest="config_file", required=True, help='Config file.')
+    parser.add_argument('--xls-file', dest="xls_file", required=False, help='XLS file.')
+    parser.add_argument('--batch-name', dest="batch_name", required=True, help='Name of the batch.')
+    parser.add_argument('--log-file', dest="log_file", required=False, help='Log file.')
+    parser.add_argument('--use-google', dest='use_google', type=str_to_bool, required=False, default=False, help='Use Google Sheet.')
+    parser.add_argument('--google-sheet-id', dest='google_sheet_id', required=False)
+    parser.add_argument('--google-sheet-name', dest='google_sheet_name', default="Sheet1", required=False)
+    parser.add_argument('--google-creds-file', dest='google_sheet_creds', required=False)
+
+    # Make args a global variable
+    global args
+    args = parser.parse_args()
+
+    # Set configuration variables from config-file parameter
+    username            = get_username()
+    cfg                 = read_yaml_file(args.config_file)
+    scanning_path       = cfg['scanning_path']
+
+    # Setup Log file.
+    global logger
+    if ( args.log_file ):
+        logger = setup_logger("log",args.log_file)
+    else:
+        logger = setup_logger("log","log.txt")
+    logger.info(f"Log file created.")
+
+    # Setup the batch_name
+    if ( args.batch_name ):
+        batch_name = args.batch_name
+    else:
+        batch_name = os.path.splitext(os.path.basename(args.xls_file))[0]
+    logger.info(f"Batch name: {batch_name}")
+
+    # If we are using google sheets...
+    if args.use_google:
+        if not args.google_sheet_creds:
+            logger.error(f"Error: --google-sheet-creds is required.")
+        if not args.google_sheet_id:
+            logger.error(f"Error: --google-sheet-id is required.")
+        if not args.google_sheet_name:
+            logger.error(f"Error: --google-sheet-name is required.")
+        if args.google_sheet_creds and args.google_sheet_id and args.google_sheet_name and os.path.isfile(args.google_sheet_creds):
+            logger.info(f"Using Google Sheet: {args.google_sheet_id},{args.google_sheet_name}")
+            manager = sheetutils.GoogleSheetManager()
+            manager.connect(args.google_sheet_creds)
+            sheet = manager.sheet(args.google_sheet_id, args.google_sheet_name)
+            df = sheet.read()
+            batch_path = create_batch_folder(scanning_path, batch_name)
+            make_dirs_from_df(df,batch_path)
+        else:
+            logger.error(f"Error: Google arguments are required when using Google Sheets.")
+            exit()
+    # Else we are using a Spreadsheet...
+    else:
+        logger.info(f"Using Spreadsheet: {args.xls_file}")
+        logger.info(f"Creating Batch folder: {scanning_path}/{batch_name}")
+        df = pd.read_excel(args.xls_file)
+        batch_path = create_batch_folder(scanning_path, batch_name)
+        make_dirs_from_df(df,batch_path)
+        logger.info(f"Storing local copy of xlsx file and creating csv file from Google Sheet.")
+        copy_xslx_to_batch(batch_path)
+        save_xslx_as_csv(batch_path)
+
+    logger.info(f"Batch Path Creation Complete.")
+
+# Main call.
+if __name__ == "__main__":
+    main()
 
diff --git a/make-batch-dirs.conf_sample b/make-batch-dirs.conf_sample
@@ -2,11 +2,6 @@
 # make-batch-dirs config.
 #
 
-# workbench_path - where is your workbench directory.
-workbench_path: 
-
 # scanning_path - where is your scanning directory.
 scanning_path: 
 
-# python_exe - where is your python executable.
-python_exe: /usr/bin/python3
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,5 +2,5 @@ @@
     *.csv
     *.log
     *.json
-    *.new
     ignore/
+    __pycache__/