From 52e700d03bf2005e9410bd4e0fe00c7f738f58fa Mon Sep 17 00:00:00 2001 From: Ojas Mishra Date: Wed, 5 Nov 2025 15:18:38 -0500 Subject: [PATCH 01/39] adding google sheets option --- make-batch-dirs | 116 ++++++++++++++++++++++++++++++------------------ sheetutils.py | 106 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+), 43 deletions(-) create mode 100644 sheetutils.py diff --git a/make-batch-dirs b/make-batch-dirs index 32bfc6a..ac69386 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -7,6 +7,7 @@ import json import sys import subprocess import shutil +import sheetutils import logging import openpyxl import csv @@ -29,8 +30,12 @@ def read_yaml_file(path): parser = argparse.ArgumentParser(description='Run..') parser.add_argument('--config-file', dest="config_file", required=True, help='Config file.') -parser.add_argument('--xls-file', dest="xls_file", required=True, help='XLS file.') +parser.add_argument('--xls-file', dest="xls_file", required=False, help='XLS file.') parser.add_argument('--batch-name', dest="batch_name", required=False, help='Name of the batch.') +parser.add_argument('--use-google', dest='use_google', required=False) +parser.add_argument('--google-sheet-id', dest='google_sheet_id', required=False) +parser.add_argument('--google-sheet-name', dest='google_sheet_name', required=False) +parser.add_argument('--google-creds-file', dest='google_sheet_creds', required=False) args = parser.parse_args() # Set configuration variables from config-file parameter @@ -45,47 +50,72 @@ if ( args.batch_name ): else: batch_name = os.path.splitext(os.path.basename(args.xls_file))[0] -batch_path = scanning_path+"/"+batch_name -print(f"Creating Batch Path: {batch_path}") -if ( os.path.isdir(batch_path) ): - print(f"Error: {batch_path} exists!") - exit() -else: - os.mkdir(batch_path) - -print(f"Copying spreadsheet to {batch_path}/manifest.xlsx") -shutil.copyfile(args.xls_file, batch_path+"/manifest.xlsx") - -print(f"Creating spreadsheet as csv") -wb = openpyxl.load_workbook(args.xls_file) -sheetnames = wb.sheetnames -sheet_value_arr = [] -for a in sheetnames: - sheet = wb[a] - with open(batch_path+"/manifest.csv", "w") as f: - c = csv.writer(f) - for row in sheet.rows: - sheet_value_arr.append([cell.value for cell in row]) - #for r in sheet.rows: - c.writerow([cell.value for cell in row]) -f.close() - -print(f"Reading spreadsheet: {args.xls_file}") -workbook = openpyxl.load_workbook(args.xls_file) -dataframe = workbook.active - -rows = dataframe.iter_rows() -next(rows) -for row in rows: - if ((str(row[0].value)) and (str(row[0].value) != "None" )): - id = str(row[0].value) - print(f"Creating {batch_path}/{id}") - object_path = batch_path + "/" + id - try: - os.mkdir(object_path) - except OSError as error: - print(f"Warning: {batch_path}/{id} - {error}.") - -print(f"Batch Path Creation Complete.") +def create_batch_folder(scanning_path, batch_name): + batch_path = scanning_path+"/"+batch_name + + print(f"Creating Batch Path: {batch_path}") + if ( os.path.isdir(batch_path) ): + print(f"Error: {batch_path} exists!") + exit() + else: + os.mkdir(batch_path) + return batch_path + +def copy_xslx_to_batch(batch_path): + print(f"Copying spreadsheet to {batch_path}/manifest.xlsx") + shutil.copyfile(args.xls_file, batch_path+"/manifest.xlsx") + +def save_xslx_as_csv(): + print(f"Creating spreadsheet as csv") + wb = openpyxl.load_workbook(args.xls_file) + sheetnames = wb.sheetnames + sheet_value_arr = [] + for a in sheetnames: + sheet = wb[a] + with open(batch_path+"/manifest.csv", "w") as f: + c = csv.writer(f) + for row in sheet.rows: + sheet_value_arr.append([cell.value for cell in row]) + #for r in sheet.rows: + c.writerow([cell.value for cell in row]) + +def xls_file_as_df(xls_file): + print(f"Reading spreadsheet: {xls_file}") + workbook = openpyxl.load_workbook(xls_file) + dataframe = workbook.active + +def make_dirs_from_df(dataframe): + rows = dataframe.iterrows() + next(rows) + for _, row in rows: + print(row) + if ((str(row.iloc[0])) and (str(row.iloc[0]) != "None" )): + id = str(row.iloc[0]) + print(f"Creating {batch_path}/{id}") + object_path = batch_path + "/" + id + try: + os.mkdir(object_path) + except OSError as error: + print(f"Warning: {batch_path}/{id} - {error}.") + + +if __name__ == '__main__': + batch_path = create_batch_folder(scanning_path, batch_name) + if args.use_google: + manager = sheetutils.GoogleSheetManager() + manager.connect(args.google_sheet_creds) + sheet = manager.sheet(args.google_sheet_id, args.google_sheet_name) + df = sheet.read() + df.to_csv(f"{batch_path}/manifest.csv") + make_dirs_from_df(df) + pass + else: + copy_xsl_to_batch(batch_path) + save_xslx_as_csv() + + df = xls_file_as_df(args.xls_file) + make_dirs_from_df(df) + + print(f"Batch Path Creation Complete.") diff --git a/sheetutils.py b/sheetutils.py new file mode 100644 index 0000000..600d0cf --- /dev/null +++ b/sheetutils.py @@ -0,0 +1,106 @@ +from googleapiclient.discovery import build +from google.oauth2 import service_account +import os +import pandas as pd +from typing import TypeAlias +import logging + +logger = logging # use default logger + +class GoogleSheet: + def __init__(self, sheet_obj): + self.sheet = sheet_obj + def read(self) -> pd.DataFrame: + sheet = self.sheet + data = sheet.get('values', []) + + if not data: + logger.warn(f"read_google_sheet - No data found in the specified worksheet.") + print(f"No data found in the specified worksheet.") + + # Return empty DataFrame + return pd.DataFrame() + + else: + logger.info(f"read_google_sheet - Read of Google Sheet Successful.") + print(f"Read of Google Sheet Successful.") + + # Convert to DataFrame + # First row as headers, rest as data + headers = data[0] + rows = data[1:] if len(data) > 1 else [] + + # Pad rows with fewer columns with fill_value + fill_value = None + max_columns = len(headers) + padded_rows = [row + [fill_value] * (max_columns - len(row)) for row in rows] + + # Create DataFrame + df = pd.DataFrame(padded_rows, columns=headers) + + return df + + def update(self, df: pd.DataFrame) -> tuple[bool, str]: + + # Convert DataFrame to list of lists (including headers) + values = [df.columns.tolist()] + df.values.tolist() + + # Prepare the body for the API request + body = { + 'values': values + } + + # Update the sheet with DataFrame contents + result = sheet.values().update( + spreadsheetId=spreadsheet_id, + range=f'{sheet_name}!A1', + valueInputOption='RAW', + body=body + ).execute() + + updated_cells = result.get('updatedCells', 0) + return True, f"Successfully updated {updated_cells} cells" + +class GoogleSheetManager: + def __init__(self): + self._service = None + def connect(self, credentials_file): + """ + Connects to the Google Sheets API using service account credentials. + + Args: + credentials_file (str): Path to the Google service account credentials file. + + Returns: + build: The Google Sheets API service object. + """ + SCOPES = ['https://www.googleapis.com/auth/spreadsheets'] + CONFIG_FILE = credentials_file + + if not os.path.exists(CONFIG_FILE): + raise Exception(f"Configuration file not found: {CONFIG_FILE}") + + try: + creds = service_account.Credentials.from_service_account_file( + CONFIG_FILE, + scopes=SCOPES + ) + + self._service = build('sheets', 'v4', credentials=creds) + return self.service + + except Exception as e: + raise Exception(f"Failed to create Google Sheets service: {str(e)}") + + @property + def service(self): + if self._service is None: + raise Exception("Connect not executed") + return self._service + + def sheet(self, spreadsheet_id, sheet_name): + sheet = self.service.spreadsheets().values().get( + spreadsheetId=spreadsheet_id, # spreadsheet id is base64 in edit url + range=sheet_name + ).execute() + return GoogleSheet(sheet) From dc508c417b2ab354772d86ec11c04bf6b8d6dfc3 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Mon, 26 Jan 2026 13:15:23 -0500 Subject: [PATCH 02/39] Updating .gitignore to exclude *.patch files. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 2b0d552..e93a2e7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ *.log *.json *.new +*.patch ignore/ +__pycache__/ From 08d639d8f8f27fe08ead56f440a7b8e824f5e532 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Tue, 27 Jan 2026 11:33:22 -0500 Subject: [PATCH 03/39] Enhanced parameter and output. --- make-batch-dirs | 70 +++++++++++++++++++++++++++++++++++-------------- sheetutils.py | 2 ++ 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index ac69386..c1a3e2a 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -28,13 +28,21 @@ def read_yaml_file(path): with open(path, "r") as stream: return yaml.FullLoader(stream).get_data() +def str_to_bool(value): + if value.lower() in {'true','t','yes','y','1'}: + return True + elif value.lower() in {'false','f','no','n','0'}: + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected') + parser = argparse.ArgumentParser(description='Run..') parser.add_argument('--config-file', dest="config_file", required=True, help='Config file.') parser.add_argument('--xls-file', dest="xls_file", required=False, help='XLS file.') -parser.add_argument('--batch-name', dest="batch_name", required=False, help='Name of the batch.') -parser.add_argument('--use-google', dest='use_google', required=False) +parser.add_argument('--batch-name', dest="batch_name", required=True, help='Name of the batch.') +parser.add_argument('--use-google', dest='use_google', type=str_to_bool, required=False, default=False, help='Use Google Sheet.') parser.add_argument('--google-sheet-id', dest='google_sheet_id', required=False) -parser.add_argument('--google-sheet-name', dest='google_sheet_name', required=False) +parser.add_argument('--google-sheet-name', dest='google_sheet_name', default="Sheet1", required=False) parser.add_argument('--google-creds-file', dest='google_sheet_creds', required=False) args = parser.parse_args() @@ -63,8 +71,12 @@ def create_batch_folder(scanning_path, batch_name): return batch_path def copy_xslx_to_batch(batch_path): - print(f"Copying spreadsheet to {batch_path}/manifest.xlsx") - shutil.copyfile(args.xls_file, batch_path+"/manifest.xlsx") + if os.path.isfile(batch_path+"/manifest.xlsx"): + print(f"Copying spreadsheet to {batch_path}/manifest.xlsx") + shutil.copyfile(args.xls_file, batch_path+"/manifest.xlsx") + else: + print(f"Error: {batch_path}/manifest.xlsx does not exist.") + exit() def save_xslx_as_csv(): print(f"Creating spreadsheet as csv") @@ -87,11 +99,11 @@ def xls_file_as_df(xls_file): def make_dirs_from_df(dataframe): rows = dataframe.iterrows() - next(rows) for _, row in rows: - print(row) - if ((str(row.iloc[0])) and (str(row.iloc[0]) != "None" )): - id = str(row.iloc[0]) + #print(row) + #if ((str(row.iloc[0])) and (str(row.iloc[0]) != "None" )): + if (str(row.loc['id']) and (str(row.loc['id']) != "None" )): + id = str(row.loc['id']) print(f"Creating {batch_path}/{id}") object_path = batch_path + "/" + id try: @@ -101,21 +113,39 @@ def make_dirs_from_df(dataframe): if __name__ == '__main__': - batch_path = create_batch_folder(scanning_path, batch_name) if args.use_google: - manager = sheetutils.GoogleSheetManager() - manager.connect(args.google_sheet_creds) - sheet = manager.sheet(args.google_sheet_id, args.google_sheet_name) - df = sheet.read() - df.to_csv(f"{batch_path}/manifest.csv") - make_dirs_from_df(df) - pass + if not args.google_sheet_creds: + print(f"Error: --google-sheet-creds is required.") + if not args.google_sheet_id: + print(f"Error: --google-sheet-id is required.") + if not args.google_sheet_name: + print(f"Error: --google-sheet-name is required.") + if args.google_sheet_creds and args.google_sheet_id and args.google_sheet_name and os.path.isfile(args.google_sheet_creds): + manager = sheetutils.GoogleSheetManager() + manager.connect(args.google_sheet_creds) + sheet = manager.sheet(args.google_sheet_id, args.google_sheet_name) + df = sheet.read() + batch_path = create_batch_folder(scanning_path, batch_name) + # Make sure the df has an 'id' column and data rows + if ('id' in df.columns): + # Make sure the df has rows besides the header row. + if (len(df) > 0): + make_dirs_from_df(df) + else: + print(f"Error: Sheet contains no data.") + else: + print(f"Error: Column 'id' does not exist.") + exit() + else: + print(f"Error: Google arguments are required when using Google Sheets.") + exit() else: - copy_xsl_to_batch(batch_path) + print(f"Creating Batch folder: {scanning_path}/{batch_name}") + batch_path = create_batch_folder(scanning_path, batch_name) + copy_xslx_to_batch(batch_path) save_xslx_as_csv() - df = xls_file_as_df(args.xls_file) - make_dirs_from_df(df) + make_dirs_from_df(df) print(f"Batch Path Creation Complete.") diff --git a/sheetutils.py b/sheetutils.py index 600d0cf..39857ea 100644 --- a/sheetutils.py +++ b/sheetutils.py @@ -10,6 +10,7 @@ class GoogleSheet: def __init__(self, sheet_obj): self.sheet = sheet_obj + def read(self) -> pd.DataFrame: sheet = self.sheet data = sheet.get('values', []) @@ -64,6 +65,7 @@ def update(self, df: pd.DataFrame) -> tuple[bool, str]: class GoogleSheetManager: def __init__(self): self._service = None + def connect(self, credentials_file): """ Connects to the Google Sheets API using service account credentials. From 462e34f3bac25dd921b87410bb11bd9cb236809c Mon Sep 17 00:00:00 2001 From: Ojas Mishra Date: Thu, 29 Jan 2026 12:57:54 -0500 Subject: [PATCH 04/39] add logger statements wherever print used #1 --- make-batch-dirs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/make-batch-dirs b/make-batch-dirs index c1a3e2a..0f46e16 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -24,6 +24,7 @@ def setup_logger(name, log_file, level=logging.DEBUG): logger.addHandler(handler) return logger +logger = logging # change as needed def read_yaml_file(path): with open(path, "r") as stream: return yaml.FullLoader(stream).get_data() @@ -61,9 +62,10 @@ else: def create_batch_folder(scanning_path, batch_name): batch_path = scanning_path+"/"+batch_name - + logger.info(f"Creating Batch Path: {batch_path}") print(f"Creating Batch Path: {batch_path}") if ( os.path.isdir(batch_path) ): + logger.err(f"Error: {batch_path} exists") print(f"Error: {batch_path} exists!") exit() else: @@ -72,13 +74,16 @@ def create_batch_folder(scanning_path, batch_name): def copy_xslx_to_batch(batch_path): if os.path.isfile(batch_path+"/manifest.xlsx"): + logger.info(f"Copying spreadsheet to {batch_path}/manifest.xlsx") print(f"Copying spreadsheet to {batch_path}/manifest.xlsx") shutil.copyfile(args.xls_file, batch_path+"/manifest.xlsx") else: + logger.err(f"Error: {batch_path}/manifest.xlsx does not exist.") print(f"Error: {batch_path}/manifest.xlsx does not exist.") exit() def save_xslx_as_csv(): + logger.info(f"Creating spreadsheet as csv") print(f"Creating spreadsheet as csv") wb = openpyxl.load_workbook(args.xls_file) sheetnames = wb.sheetnames @@ -93,6 +98,7 @@ def save_xslx_as_csv(): c.writerow([cell.value for cell in row]) def xls_file_as_df(xls_file): + logger.info(f"Reading spreadsheet: {xls_file}") print(f"Reading spreadsheet: {xls_file}") workbook = openpyxl.load_workbook(xls_file) dataframe = workbook.active @@ -104,21 +110,26 @@ def make_dirs_from_df(dataframe): #if ((str(row.iloc[0])) and (str(row.iloc[0]) != "None" )): if (str(row.loc['id']) and (str(row.loc['id']) != "None" )): id = str(row.loc['id']) + logger.info(f"Creating {batch_path}/{id}") print(f"Creating {batch_path}/{id}") object_path = batch_path + "/" + id try: os.mkdir(object_path) except OSError as error: + logger.warn(f"Warning: {batch_path}/{id} - {error}.") print(f"Warning: {batch_path}/{id} - {error}.") if __name__ == '__main__': if args.use_google: if not args.google_sheet_creds: + logger.err(f"Error: --google-sheet-creds is required.") print(f"Error: --google-sheet-creds is required.") if not args.google_sheet_id: + logger.err(f"Error: --google-sheet-id is required.") print(f"Error: --google-sheet-id is required.") if not args.google_sheet_name: + logger.err(f"Error: --google-sheet-name is required.") print(f"Error: --google-sheet-name is required.") if args.google_sheet_creds and args.google_sheet_id and args.google_sheet_name and os.path.isfile(args.google_sheet_creds): manager = sheetutils.GoogleSheetManager() @@ -132,14 +143,18 @@ if __name__ == '__main__': if (len(df) > 0): make_dirs_from_df(df) else: + logger.err(f"Error: Sheet contains no data.") print(f"Error: Sheet contains no data.") else: + logger.err(f"Error: Column 'id' does not exist.") print(f"Error: Column 'id' does not exist.") exit() else: + logger.err(f"Error: Google arguments are required when using Google Sheets.") print(f"Error: Google arguments are required when using Google Sheets.") exit() else: + logger.info(f"Creating Batch folder: {scanning_path}/{batch_name}") print(f"Creating Batch folder: {scanning_path}/{batch_name}") batch_path = create_batch_folder(scanning_path, batch_name) copy_xslx_to_batch(batch_path) @@ -147,5 +162,6 @@ if __name__ == '__main__': df = xls_file_as_df(args.xls_file) make_dirs_from_df(df) + logger.info(f"Batch Path Creation Complete.") print(f"Batch Path Creation Complete.") From 24157584afe8002ef62eec36b84e5099cc0f056c Mon Sep 17 00:00:00 2001 From: Ojas Mishra Date: Tue, 10 Feb 2026 12:18:38 -0500 Subject: [PATCH 05/39] add doc comments and type hints. also put batch_path as explicit argument, when before it was inherited from parent namespace --- make-batch-dirs | 61 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index 0f46e16..3ff8637 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -11,11 +11,12 @@ import sheetutils import logging import openpyxl import csv - -def get_username(): +import pandas as pd# for type hints +def get_username() -> str: + """fetch username of user running script""" return pwd.getpwuid(os.getuid())[0] -def setup_logger(name, log_file, level=logging.DEBUG): +def setup_logger(name:str, log_file:str, level=logging.DEBUG): """To setup as many loggers as needed""" handler = logging.FileHandler(log_file) handler.setFormatter(log_formatter) @@ -25,11 +26,24 @@ def setup_logger(name, log_file, level=logging.DEBUG): return logger logger = logging # change as needed -def read_yaml_file(path): +def read_yaml_file(path: str) -> dict: + """ + read yaml file. + Note: get_data coerces yaml to most appropriate type. + Most of the time this is dict, but might be list or str possibly + """ with open(path, "r") as stream: + # FullLoader allows yaml to execute arbitrary python + # so script users are assumed to be trusted return yaml.FullLoader(stream).get_data() -def str_to_bool(value): +def str_to_bool(value: str) -> bool: + """ + take common 'yes' and 'no' nouns and converts them to boolean + + error: raises ArgumentTypeError when noun not found within expected nouns + """ + value = value.strip() if value.lower() in {'true','t','yes','y','1'}: return True elif value.lower() in {'false','f','no','n','0'}: @@ -60,7 +74,12 @@ else: batch_name = os.path.splitext(os.path.basename(args.xls_file))[0] -def create_batch_folder(scanning_path, batch_name): +def create_batch_folder(scanning_path:str, batch_name:str): + """ + create folder {scanning_path}/{batch_name} in cwd + + error: if path exists, then logs error and exits + """ batch_path = scanning_path+"/"+batch_name logger.info(f"Creating Batch Path: {batch_path}") print(f"Creating Batch Path: {batch_path}") @@ -72,7 +91,12 @@ def create_batch_folder(scanning_path, batch_name): os.mkdir(batch_path) return batch_path -def copy_xslx_to_batch(batch_path): +def copy_xslx_to_batch(batch_path:str): + """ + copy manifest.xslx into batch_path created folder + + error: if manifest.xslx does not exist, then log and fail + """ if os.path.isfile(batch_path+"/manifest.xlsx"): logger.info(f"Copying spreadsheet to {batch_path}/manifest.xlsx") print(f"Copying spreadsheet to {batch_path}/manifest.xlsx") @@ -82,7 +106,10 @@ def copy_xslx_to_batch(batch_path): print(f"Error: {batch_path}/manifest.xlsx does not exist.") exit() -def save_xslx_as_csv(): +def save_xslx_as_csv(batch_path:str): + """ + save args.xls_file into batch_path/manifest.csv + """ logger.info(f"Creating spreadsheet as csv") print(f"Creating spreadsheet as csv") wb = openpyxl.load_workbook(args.xls_file) @@ -97,13 +124,23 @@ def save_xslx_as_csv(): #for r in sheet.rows: c.writerow([cell.value for cell in row]) -def xls_file_as_df(xls_file): +def xls_file_as_df(xls_file:str) -> pd.DataFrame: + """ + read xls file as dataframe + """ logger.info(f"Reading spreadsheet: {xls_file}") print(f"Reading spreadsheet: {xls_file}") workbook = openpyxl.load_workbook(xls_file) dataframe = workbook.active + return dataframe + +def make_dirs_from_df(dataframe:pd.DataFrame, batch_path): + """ + read id field of dataframe, and create batch_path/id + for non empty and non None ids. -def make_dirs_from_df(dataframe): + error: log if mkdir fails + """ rows = dataframe.iterrows() for _, row in rows: #print(row) @@ -158,9 +195,9 @@ if __name__ == '__main__': print(f"Creating Batch folder: {scanning_path}/{batch_name}") batch_path = create_batch_folder(scanning_path, batch_name) copy_xslx_to_batch(batch_path) - save_xslx_as_csv() + save_xslx_as_csv(batch_path) df = xls_file_as_df(args.xls_file) - make_dirs_from_df(df) + make_dirs_from_df(df, batch_path) logger.info(f"Batch Path Creation Complete.") print(f"Batch Path Creation Complete.") From 2a3d1b59bd882d689d60aebe6d2973793925c4d5 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Tue, 10 Feb 2026 13:28:35 -0500 Subject: [PATCH 06/39] Update .gitignore to remove certain patterns Remove specific file patterns from .gitignore. --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index e93a2e7..36fd670 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,5 @@ *.csv *.log *.json -*.new -*.patch ignore/ __pycache__/ From 05a482cea50134d8d6e2f65f85b23df358534d81 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Tue, 10 Feb 2026 14:32:41 -0500 Subject: [PATCH 07/39] Refactor path concatenation for batch directory creation Updated path handling to use os.path.sep for cross-platform compatibility. --- make-batch-dirs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index 3ff8637..77cb599 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -12,6 +12,7 @@ import logging import openpyxl import csv import pandas as pd# for type hints + def get_username() -> str: """fetch username of user running script""" return pwd.getpwuid(os.getuid())[0] @@ -80,7 +81,7 @@ def create_batch_folder(scanning_path:str, batch_name:str): error: if path exists, then logs error and exits """ - batch_path = scanning_path+"/"+batch_name + batch_path = os.path.sep.join(scanning_path,batch_name) logger.info(f"Creating Batch Path: {batch_path}") print(f"Creating Batch Path: {batch_path}") if ( os.path.isdir(batch_path) ): @@ -97,10 +98,10 @@ def copy_xslx_to_batch(batch_path:str): error: if manifest.xslx does not exist, then log and fail """ - if os.path.isfile(batch_path+"/manifest.xlsx"): + if os.path.isfile(os.path.sep.join(batch_path,'manifest.xlsx')): logger.info(f"Copying spreadsheet to {batch_path}/manifest.xlsx") print(f"Copying spreadsheet to {batch_path}/manifest.xlsx") - shutil.copyfile(args.xls_file, batch_path+"/manifest.xlsx") + shutil.copyfile(args.xls_file, os.path.sep.join(batch_path,'manifest.xlsx')) else: logger.err(f"Error: {batch_path}/manifest.xlsx does not exist.") print(f"Error: {batch_path}/manifest.xlsx does not exist.") @@ -117,7 +118,7 @@ def save_xslx_as_csv(batch_path:str): sheet_value_arr = [] for a in sheetnames: sheet = wb[a] - with open(batch_path+"/manifest.csv", "w") as f: + with open(os.path.sep.join(batch_path,'manifest.csv'), "w") as f: c = csv.writer(f) for row in sheet.rows: sheet_value_arr.append([cell.value for cell in row]) @@ -149,12 +150,12 @@ def make_dirs_from_df(dataframe:pd.DataFrame, batch_path): id = str(row.loc['id']) logger.info(f"Creating {batch_path}/{id}") print(f"Creating {batch_path}/{id}") - object_path = batch_path + "/" + id + object_path = os.path.sep.join(batch_path,id) try: os.mkdir(object_path) except OSError as error: - logger.warn(f"Warning: {batch_path}/{id} - {error}.") - print(f"Warning: {batch_path}/{id} - {error}.") + logger.warn(f"Warning: {object_path} - {error}.") + print(f"Warning: {object_path} - {error}.") if __name__ == '__main__': From 24142dc6fc15604a34c28ea6a6c0e96347b1ce1b Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 11 Feb 2026 09:51:14 -0500 Subject: [PATCH 08/39] Clean up code by removing commented debugging lines. Removed commented-out print statement and condition check related to debugging. --- make-batch-dirs | 2 -- 1 file changed, 2 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index 77cb599..bcac485 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -144,8 +144,6 @@ def make_dirs_from_df(dataframe:pd.DataFrame, batch_path): """ rows = dataframe.iterrows() for _, row in rows: - #print(row) - #if ((str(row.iloc[0])) and (str(row.iloc[0]) != "None" )): if (str(row.loc['id']) and (str(row.loc['id']) != "None" )): id = str(row.loc['id']) logger.info(f"Creating {batch_path}/{id}") From 31f39fc3a490991cdbaac6912e6eaa154537241d Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 11 Feb 2026 10:38:36 -0500 Subject: [PATCH 09/39] Test - Updated README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 64e81a7..afd0049 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # Description: + This script is used to read a list of PIDs from a spreadsheet and a passed in batch name, create the batch folder and corresponding PID sub-folders in preparation for DRL to perform their scanning processes. From 2df42912a4c5a8e424172fe66a406a420a69f03b Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 11 Feb 2026 10:39:44 -0500 Subject: [PATCH 10/39] Test - Updated README.md - undo changes --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index afd0049..64e81a7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ # Description: - This script is used to read a list of PIDs from a spreadsheet and a passed in batch name, create the batch folder and corresponding PID sub-folders in preparation for DRL to perform their scanning processes. From d68be8f20bda99dd9bf55870946f91d3810facbb Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 11 Feb 2026 11:53:23 -0500 Subject: [PATCH 11/39] Logging Cleanup & fixes. Created a main function. Moved parameter parsing into main function. Added a --log-file option. --- make-batch-dirs | 129 ++++++++++++++++++++++++++---------------------- 1 file changed, 70 insertions(+), 59 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index bcac485..0a42c06 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -13,20 +13,24 @@ import openpyxl import csv import pandas as pd# for type hints +# Setup the log file format. +log_formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(levelname)s %(message)s',datefmt="%Y%m%d %H:%M:%S") + def get_username() -> str: """fetch username of user running script""" return pwd.getpwuid(os.getuid())[0] def setup_logger(name:str, log_file:str, level=logging.DEBUG): """To setup as many loggers as needed""" - handler = logging.FileHandler(log_file) - handler.setFormatter(log_formatter) + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(log_formatter) + console_handler = logging.StreamHandler(sys.stdout) logger = logging.getLogger(name) logger.setLevel(level) - logger.addHandler(handler) + logger.addHandler(file_handler) + logger.addHandler(console_handler) return logger -logger = logging # change as needed def read_yaml_file(path: str) -> dict: """ read yaml file. @@ -52,42 +56,18 @@ def str_to_bool(value: str) -> bool: else: raise argparse.ArgumentTypeError('Boolean value expected') -parser = argparse.ArgumentParser(description='Run..') -parser.add_argument('--config-file', dest="config_file", required=True, help='Config file.') -parser.add_argument('--xls-file', dest="xls_file", required=False, help='XLS file.') -parser.add_argument('--batch-name', dest="batch_name", required=True, help='Name of the batch.') -parser.add_argument('--use-google', dest='use_google', type=str_to_bool, required=False, default=False, help='Use Google Sheet.') -parser.add_argument('--google-sheet-id', dest='google_sheet_id', required=False) -parser.add_argument('--google-sheet-name', dest='google_sheet_name', default="Sheet1", required=False) -parser.add_argument('--google-creds-file', dest='google_sheet_creds', required=False) -args = parser.parse_args() - -# Set configuration variables from config-file parameter -username = get_username() -cfg = read_yaml_file(args.config_file) -workbench_path = cfg['workbench_path'] -scanning_path = cfg['scanning_path'] -python_exe = cfg['python_exe'] - -if ( args.batch_name ): - batch_name = args.batch_name -else: - batch_name = os.path.splitext(os.path.basename(args.xls_file))[0] - - def create_batch_folder(scanning_path:str, batch_name:str): """ create folder {scanning_path}/{batch_name} in cwd error: if path exists, then logs error and exits """ - batch_path = os.path.sep.join(scanning_path,batch_name) + batch_path = os.path.sep.join([scanning_path,batch_name]) logger.info(f"Creating Batch Path: {batch_path}") print(f"Creating Batch Path: {batch_path}") if ( os.path.isdir(batch_path) ): - logger.err(f"Error: {batch_path} exists") - print(f"Error: {batch_path} exists!") - exit() + logger.error(f"Error: {batch_path} exists") + exit(1) else: os.mkdir(batch_path) return batch_path @@ -98,21 +78,18 @@ def copy_xslx_to_batch(batch_path:str): error: if manifest.xslx does not exist, then log and fail """ - if os.path.isfile(os.path.sep.join(batch_path,'manifest.xlsx')): + if os.path.isfile(os.path.sep.join([batch_path,'manifest.xlsx'])): logger.info(f"Copying spreadsheet to {batch_path}/manifest.xlsx") - print(f"Copying spreadsheet to {batch_path}/manifest.xlsx") shutil.copyfile(args.xls_file, os.path.sep.join(batch_path,'manifest.xlsx')) else: - logger.err(f"Error: {batch_path}/manifest.xlsx does not exist.") - print(f"Error: {batch_path}/manifest.xlsx does not exist.") - exit() + logger.error(f"Error: {batch_path}/manifest.xlsx does not exist.") + exit(1) def save_xslx_as_csv(batch_path:str): """ save args.xls_file into batch_path/manifest.csv """ logger.info(f"Creating spreadsheet as csv") - print(f"Creating spreadsheet as csv") wb = openpyxl.load_workbook(args.xls_file) sheetnames = wb.sheetnames sheet_value_arr = [] @@ -130,7 +107,6 @@ def xls_file_as_df(xls_file:str) -> pd.DataFrame: read xls file as dataframe """ logger.info(f"Reading spreadsheet: {xls_file}") - print(f"Reading spreadsheet: {xls_file}") workbook = openpyxl.load_workbook(xls_file) dataframe = workbook.active return dataframe @@ -147,27 +123,61 @@ def make_dirs_from_df(dataframe:pd.DataFrame, batch_path): if (str(row.loc['id']) and (str(row.loc['id']) != "None" )): id = str(row.loc['id']) logger.info(f"Creating {batch_path}/{id}") - print(f"Creating {batch_path}/{id}") object_path = os.path.sep.join(batch_path,id) try: os.mkdir(object_path) except OSError as error: - logger.warn(f"Warning: {object_path} - {error}.") - print(f"Warning: {object_path} - {error}.") + logger.warning(f"Warning: {object_path} - {error}.") + +# +# Main function. +# +def main(): + # Parse command line arguements + parser = argparse.ArgumentParser(description='Run..') + parser.add_argument('--config-file', dest="config_file", required=True, help='Config file.') + parser.add_argument('--xls-file', dest="xls_file", required=False, help='XLS file.') + parser.add_argument('--batch-name', dest="batch_name", required=True, help='Name of the batch.') + parser.add_argument('--log-file', dest="log_file", required=False, help='Log file.') + parser.add_argument('--use-google', dest='use_google', type=str_to_bool, required=False, default=False, help='Use Google Sheet.') + parser.add_argument('--google-sheet-id', dest='google_sheet_id', required=False) + parser.add_argument('--google-sheet-name', dest='google_sheet_name', default="Sheet1", required=False) + parser.add_argument('--google-creds-file', dest='google_sheet_creds', required=False) + + # Make args a global variable + global args + args = parser.parse_args() + + # Set configuration variables from config-file parameter + username = get_username() + cfg = read_yaml_file(args.config_file) + scanning_path = cfg['scanning_path'] + + # Setup Log file. + global logger + if ( args.log_file ): + logger = setup_logger("log",args.log_file) + else: + logger = setup_logger("log","log.txt") + logger.info(f"Log file created.") + # Setup the batch_name + if ( args.batch_name ): + batch_name = args.batch_name + else: + batch_name = os.path.splitext(os.path.basename(args.xls_file))[0] + logger.info(f"Batch name: {batch_name}") -if __name__ == '__main__': + # If we are using google sheets... if args.use_google: if not args.google_sheet_creds: - logger.err(f"Error: --google-sheet-creds is required.") - print(f"Error: --google-sheet-creds is required.") + logger.error(f"Error: --google-sheet-creds is required.") if not args.google_sheet_id: - logger.err(f"Error: --google-sheet-id is required.") - print(f"Error: --google-sheet-id is required.") + logger.error(f"Error: --google-sheet-id is required.") if not args.google_sheet_name: - logger.err(f"Error: --google-sheet-name is required.") - print(f"Error: --google-sheet-name is required.") - if args.google_sheet_creds and args.google_sheet_id and args.google_sheet_name and os.path.isfile(args.google_sheet_creds): + logger.error(f"Error: --google-sheet-name is required.") + if args.google_sheet_creds and args.google_sheet_id and args.google_sheet_name and os.path.isfile(args.google_sheet_creds): + logger.info(f"Using Google Sheet: {args.google_sheet_id},{args.google_sheet_name}") manager = sheetutils.GoogleSheetManager() manager.connect(args.google_sheet_creds) sheet = manager.sheet(args.google_sheet_id, args.google_sheet_name) @@ -179,25 +189,26 @@ if __name__ == '__main__': if (len(df) > 0): make_dirs_from_df(df) else: - logger.err(f"Error: Sheet contains no data.") - print(f"Error: Sheet contains no data.") + logger.error(f"Error: Sheet contains no data.") else: - logger.err(f"Error: Column 'id' does not exist.") - print(f"Error: Column 'id' does not exist.") - exit() + logger.error(f"Error: Column 'id' does not exist.") + exit(1) else: - logger.err(f"Error: Google arguments are required when using Google Sheets.") - print(f"Error: Google arguments are required when using Google Sheets.") + logger.error(f"Error: Google arguments are required when using Google Sheets.") exit() + # Else we are using a Spreadsheet... else: + logger.info(f"Using Spreadsheet: {args.xls_file}") logger.info(f"Creating Batch folder: {scanning_path}/{batch_name}") - print(f"Creating Batch folder: {scanning_path}/{batch_name}") batch_path = create_batch_folder(scanning_path, batch_name) copy_xslx_to_batch(batch_path) save_xslx_as_csv(batch_path) df = xls_file_as_df(args.xls_file) make_dirs_from_df(df, batch_path) - + logger.info(f"Batch Path Creation Complete.") - print(f"Batch Path Creation Complete.") + +# Main call. +if __name__ == "__main__": + main() From 9bd4a20af5bfc0f24a5bf1cf4f2a06643431578e Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 11 Feb 2026 11:56:01 -0500 Subject: [PATCH 12/39] Updated README.md to includ info about --log-file. --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 64e81a7..2c4396b 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,8 @@ Script Parameters: \--xls-file Path to the spreadsheet to be processed. \--batch-name The Name of the batch that will be created. + + \--log-file Path to the log file. ----------------------------------------------------------------------- ## Usage: @@ -53,7 +55,7 @@ Script Parameters: Script Usage Example: make-batch-dirs ---config_file config.conf ---xls-file -input_spreadsheet.xls ---batch-name MyNewBatch +input_spreadsheet.xls ---log-file log.txt ---batch-name MyNewBatch ## Function: From d80c81e2e2175a1813759af395798540b60f0f58 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 11 Feb 2026 12:02:36 -0500 Subject: [PATCH 13/39] Updated README.md to see if we can add columns in it. --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 2c4396b..f8d78dd 100644 --- a/README.md +++ b/README.md @@ -20,20 +20,20 @@ sub-folders in preparation for DRL to perform their scanning processes. Spreadsheet Columns: - ----------------------------------------------------------------------- - **Required - Columns** - ------------------- --------------------------------------------------- - 'id' This is the PID of the object. Must currently be - the first column of the sheet. + |-----------------------------------------------------------------------| + |**Required + Columns**| + |------------------- |---------------------------------------------------| + |'id' |This is the PID of the object. Must currently be + the first column of the sheet.| - **Optional - Columns** + |**Optional + Columns**| - Any Any additional columns as needed. - ----------------------------------------------------------------------- + |Any |Any additional columns as needed.| + |-----------------------------------------------------------------------| -Script Parameters: +## Script Parameters: ----------------------------------------------------------------------- Parameter Description From 7420ebc987a16b0b1916348e3d9b5bbd4d70fc1a Mon Sep 17 00:00:00 2001 From: Ojas Mishra Date: Wed, 11 Feb 2026 12:56:38 -0500 Subject: [PATCH 14/39] sheetutils documentation --- make-batch-dirs | 1 - sheetutils.py | 55 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index 0a42c06..5d00130 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -64,7 +64,6 @@ def create_batch_folder(scanning_path:str, batch_name:str): """ batch_path = os.path.sep.join([scanning_path,batch_name]) logger.info(f"Creating Batch Path: {batch_path}") - print(f"Creating Batch Path: {batch_path}") if ( os.path.isdir(batch_path) ): logger.error(f"Error: {batch_path} exists") exit(1) diff --git a/sheetutils.py b/sheetutils.py index 39857ea..af24c42 100644 --- a/sheetutils.py +++ b/sheetutils.py @@ -1,30 +1,35 @@ -from googleapiclient.discovery import build +from googleapiclient.discovery import build, Resource from google.oauth2 import service_account import os import pandas as pd -from typing import TypeAlias +from typing import TypeAlias, Any import logging logger = logging # use default logger class GoogleSheet: - def __init__(self, sheet_obj): + """ + represents a single spreadsheet (i.e. a single tab in sheets) + and allows read and update operations + """ + def __init__(self, sheet_obj: Resource): self.sheet = sheet_obj def read(self) -> pd.DataFrame: + """ + read spreadsheet from service resource object into dataframe + """ sheet = self.sheet data = sheet.get('values', []) if not data: logger.warn(f"read_google_sheet - No data found in the specified worksheet.") - print(f"No data found in the specified worksheet.") # Return empty DataFrame return pd.DataFrame() else: logger.info(f"read_google_sheet - Read of Google Sheet Successful.") - print(f"Read of Google Sheet Successful.") # Convert to DataFrame # First row as headers, rest as data @@ -42,7 +47,10 @@ def read(self) -> pd.DataFrame: return df def update(self, df: pd.DataFrame) -> tuple[bool, str]: - + """ + write contents of df into spreadsheet. Note that this + overwrites the spreadsheet contents + """ # Convert DataFrame to list of lists (including headers) values = [df.columns.tolist()] + df.values.tolist() @@ -60,13 +68,18 @@ def update(self, df: pd.DataFrame) -> tuple[bool, str]: ).execute() updated_cells = result.get('updatedCells', 0) + logger.info(f"Successfully updated {updated_cells} cells") return True, f"Successfully updated {updated_cells} cells" class GoogleSheetManager: + """ + handles the boilerplate of creating an authenticated + service and returning a spreadsheet object. + """ def __init__(self): self._service = None - def connect(self, credentials_file): + def connect(self, credentials_file:str) -> Resource: """ Connects to the Google Sheets API using service account credentials. @@ -95,14 +108,28 @@ def connect(self, credentials_file): raise Exception(f"Failed to create Google Sheets service: {str(e)}") @property - def service(self): + def service(self) -> Resource: + """ + getter for service. + Using service as property allows instantiation of object and + authentication to be separated, while also ensuring that all calls + to service are authenticated + """ if self._service is None: - raise Exception("Connect not executed") + raise ValueError("Connect not executed") return self._service - def sheet(self, spreadsheet_id, sheet_name): - sheet = self.service.spreadsheets().values().get( - spreadsheetId=spreadsheet_id, # spreadsheet id is base64 in edit url - range=sheet_name - ).execute() + def sheet(self, spreadsheet_id:str, sheet_name:str) -> GoogleSheet: + """ + Uses instantiated service to fetch Google Sheet + `spreadsheet_id` and fetches the `sheet_name` spreadsheet + """ + try: + sheet = self.service.spreadsheets().values().get( + spreadsheetId=spreadsheet_id, # spreadsheet id is base64 in edit url + range=sheet_name + ).execute() + except Exception as e: + print(f"Failed to read {spreadsheet_id}:{sheet_name} due to {e}") + exit() return GoogleSheet(sheet) From a49ae398f50899a911e7e80211075a4849be2b9c Mon Sep 17 00:00:00 2001 From: Ojas Mishra Date: Wed, 11 Feb 2026 13:02:09 -0500 Subject: [PATCH 15/39] wrap sheet update with try --- sheetutils.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sheetutils.py b/sheetutils.py index af24c42..e3a5df0 100644 --- a/sheetutils.py +++ b/sheetutils.py @@ -60,12 +60,16 @@ def update(self, df: pd.DataFrame) -> tuple[bool, str]: } # Update the sheet with DataFrame contents - result = sheet.values().update( - spreadsheetId=spreadsheet_id, - range=f'{sheet_name}!A1', - valueInputOption='RAW', - body=body - ).execute() + try: + result = sheet.values().update( + spreadsheetId=spreadsheet_id, + range=f'{sheet_name}!A1', + valueInputOption='RAW', + body=body + ).execute() + except Exception as e: + logger.err(f"Failed to update {spreadsheet_id}:{sheet_name} due to {e}") + exit() updated_cells = result.get('updatedCells', 0) logger.info(f"Successfully updated {updated_cells} cells") From a476f23167748177eb8e21d1bb4c26844567f9f5 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 11 Feb 2026 13:20:57 -0500 Subject: [PATCH 16/39] Fix remaining os.path.sep.join statements. --- make-batch-dirs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index 5d00130..39afdb7 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -79,7 +79,7 @@ def copy_xslx_to_batch(batch_path:str): """ if os.path.isfile(os.path.sep.join([batch_path,'manifest.xlsx'])): logger.info(f"Copying spreadsheet to {batch_path}/manifest.xlsx") - shutil.copyfile(args.xls_file, os.path.sep.join(batch_path,'manifest.xlsx')) + shutil.copyfile(args.xls_file, os.path.sep.join([batch_path,'manifest.xlsx'])) else: logger.error(f"Error: {batch_path}/manifest.xlsx does not exist.") exit(1) @@ -94,7 +94,7 @@ def save_xslx_as_csv(batch_path:str): sheet_value_arr = [] for a in sheetnames: sheet = wb[a] - with open(os.path.sep.join(batch_path,'manifest.csv'), "w") as f: + with open(os.path.sep.join([batch_path,'manifest.csv']), "w") as f: c = csv.writer(f) for row in sheet.rows: sheet_value_arr.append([cell.value for cell in row]) @@ -122,7 +122,7 @@ def make_dirs_from_df(dataframe:pd.DataFrame, batch_path): if (str(row.loc['id']) and (str(row.loc['id']) != "None" )): id = str(row.loc['id']) logger.info(f"Creating {batch_path}/{id}") - object_path = os.path.sep.join(batch_path,id) + object_path = os.path.sep.join([batch_path,id]) try: os.mkdir(object_path) except OSError as error: From f5b351aba33d0005fdc533aa8fb6dc30fbb16006 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 11 Feb 2026 13:44:23 -0500 Subject: [PATCH 17/39] Cleanup and simplification regarding manifest.xlsx and manifest.csv paths. --- make-batch-dirs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index 39afdb7..bf70eee 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -77,11 +77,12 @@ def copy_xslx_to_batch(batch_path:str): error: if manifest.xslx does not exist, then log and fail """ - if os.path.isfile(os.path.sep.join([batch_path,'manifest.xlsx'])): - logger.info(f"Copying spreadsheet to {batch_path}/manifest.xlsx") - shutil.copyfile(args.xls_file, os.path.sep.join([batch_path,'manifest.xlsx'])) + manifest_path = os.path.sep.join([batch_path,'manifest.xlsx']) + if os.path.isfile(manifest_path): + logger.info(f"Copying spreadsheet to {manifest_path}") + shutil.copyfile(args.xls_file, manifest_path) else: - logger.error(f"Error: {batch_path}/manifest.xlsx does not exist.") + logger.error(f"Error: {manifest_path} does not exist.") exit(1) def save_xslx_as_csv(batch_path:str): @@ -92,9 +93,10 @@ def save_xslx_as_csv(batch_path:str): wb = openpyxl.load_workbook(args.xls_file) sheetnames = wb.sheetnames sheet_value_arr = [] + manifest_csv = os.path.sep.join([batch_path,'manifest.csv']) for a in sheetnames: sheet = wb[a] - with open(os.path.sep.join([batch_path,'manifest.csv']), "w") as f: + with open(manifest_csv, "w") as f: c = csv.writer(f) for row in sheet.rows: sheet_value_arr.append([cell.value for cell in row]) From 9880f0c0cb14edf22283dc0738f1807d605ec6f5 Mon Sep 17 00:00:00 2001 From: Ojas Mishra Date: Wed, 11 Feb 2026 13:53:32 -0500 Subject: [PATCH 18/39] pull dataframe checks into make_dir_from_df and update docs --- make-batch-dirs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index bf70eee..29401cd 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -117,8 +117,17 @@ def make_dirs_from_df(dataframe:pd.DataFrame, batch_path): read id field of dataframe, and create batch_path/id for non empty and non None ids. - error: log if mkdir fails + error: log if mkdir fails. log and exit if id not in + dataframe's columns or dataframe empty """ + # Make sure the df has an 'id' column and data rows + if 'id' not in df.columns: + logger.error(f"Column 'id' does not exist") + exit(1) + # Make sure the df has rows besides the header row. + if len(df) == 0: + logger.error("Sheet contains no data") + exit(1) rows = dataframe.iterrows() for _, row in rows: if (str(row.loc['id']) and (str(row.loc['id']) != "None" )): @@ -184,16 +193,7 @@ def main(): sheet = manager.sheet(args.google_sheet_id, args.google_sheet_name) df = sheet.read() batch_path = create_batch_folder(scanning_path, batch_name) - # Make sure the df has an 'id' column and data rows - if ('id' in df.columns): - # Make sure the df has rows besides the header row. - if (len(df) > 0): - make_dirs_from_df(df) - else: - logger.error(f"Error: Sheet contains no data.") - else: - logger.error(f"Error: Column 'id' does not exist.") - exit(1) + make_dirs_from_df(df) else: logger.error(f"Error: Google arguments are required when using Google Sheets.") exit() From 5ab9e42ba435150f65bd40ec2af9f1539d489dba Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 11 Feb 2026 15:00:03 -0500 Subject: [PATCH 19/39] Added a comment for the main function. --- make-batch-dirs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/make-batch-dirs b/make-batch-dirs index 29401cd..40a1a7e 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -143,6 +143,9 @@ def make_dirs_from_df(dataframe:pd.DataFrame, batch_path): # Main function. # def main(): + """ + This the main function. + """ # Parse command line arguements parser = argparse.ArgumentParser(description='Run..') parser.add_argument('--config-file', dest="config_file", required=True, help='Config file.') From 7b3f9bb00e9679df9e9f8c34b2bdcfe6f1d0eb1d Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:17:44 -0500 Subject: [PATCH 20/39] More README.md adjustments around the tables. --- README.md | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index f8d78dd..e76e50d 100644 --- a/README.md +++ b/README.md @@ -35,20 +35,18 @@ Spreadsheet Columns: ## Script Parameters: - ----------------------------------------------------------------------- - Parameter Description - ------------------- --------------------------------------------------- - \--config_file Path to the script config file containing paths to + |Parameter |Description| + |------------------- |---------------------------------------------------| + |\--config_file |Path to the script config file containing paths to the workbench directory (workbench_path), the scanning directory (scanning_path), and the path to - the python executable. + the python executable.| - \--xls-file Path to the spreadsheet to be processed. + |\--xls-file |Path to the spreadsheet to be processed.| - \--batch-name The Name of the batch that will be created. + |\--batch-name |The Name of the batch that will be created.| - \--log-file Path to the log file. - ----------------------------------------------------------------------- + |\--log-file |Path to the log file.| ## Usage: From 3ae4ee2ffeff5bdfa7fa6efce1f1fc455bf1a528 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:19:44 -0500 Subject: [PATCH 21/39] More README.md adjustments around the tables. --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e76e50d..1f7b0b7 100644 --- a/README.md +++ b/README.md @@ -37,16 +37,14 @@ Spreadsheet Columns: |Parameter |Description| |------------------- |---------------------------------------------------| - |\--config_file |Path to the script config file containing paths to - the workbench directory (workbench_path), the - scanning directory (scanning_path), and the path to - the python executable.| + |\--config_file |Path to the script config file containing paths to the workbench directory (workbench_path), the scanning directory (scanning_path), and the path to the python executable.| |\--xls-file |Path to the spreadsheet to be processed.| |\--batch-name |The Name of the batch that will be created.| |\--log-file |Path to the log file.| + |------------------- |---------------------------------------------------| ## Usage: From d1f252689c0fa3d7f4cc6f99b37b472dd26446d0 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:20:26 -0500 Subject: [PATCH 22/39] More README.md adjustments around the tables. --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 1f7b0b7..d0e208a 100644 --- a/README.md +++ b/README.md @@ -38,11 +38,8 @@ Spreadsheet Columns: |Parameter |Description| |------------------- |---------------------------------------------------| |\--config_file |Path to the script config file containing paths to the workbench directory (workbench_path), the scanning directory (scanning_path), and the path to the python executable.| - |\--xls-file |Path to the spreadsheet to be processed.| - |\--batch-name |The Name of the batch that will be created.| - |\--log-file |Path to the log file.| |------------------- |---------------------------------------------------| From 21f803840c4ded1c4a5bb6ae975ea8c4004efb41 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:21:07 -0500 Subject: [PATCH 23/39] More README.md adjustments around the tables. --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index d0e208a..00388c2 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,6 @@ Spreadsheet Columns: |\--xls-file |Path to the spreadsheet to be processed.| |\--batch-name |The Name of the batch that will be created.| |\--log-file |Path to the log file.| - |------------------- |---------------------------------------------------| ## Usage: From 29fb3e78f669c12198bbe7c63b117b2839687603 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:24:15 -0500 Subject: [PATCH 24/39] More README.md adjustments around the tables. --- README.md | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 00388c2..60d1044 100644 --- a/README.md +++ b/README.md @@ -20,18 +20,13 @@ sub-folders in preparation for DRL to perform their scanning processes. Spreadsheet Columns: - |-----------------------------------------------------------------------| - |**Required - Columns**| + |**Required Columns**| |------------------- |---------------------------------------------------| - |'id' |This is the PID of the object. Must currently be - the first column of the sheet.| - - |**Optional - Columns**| + |'id' |This is the PID of the object. Must currently be the first column of the sheet.| + |**Optional Columns**| + |------------------- |---------------------------------------------------| |Any |Any additional columns as needed.| - |-----------------------------------------------------------------------| ## Script Parameters: @@ -46,8 +41,8 @@ Spreadsheet Columns: Script Usage Example: -make-batch-dirs ---config_file config.conf ---xls-file -input_spreadsheet.xls ---log-file log.txt ---batch-name MyNewBatch +make-batch-dirs --config_file config.conf --xls-file +input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch ## Function: From 6e8eba416503034d9b9681f32325d00702e8eba6 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:25:44 -0500 Subject: [PATCH 25/39] More README.md adjustments around the tables. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 60d1044..44887ba 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,11 @@ sub-folders in preparation for DRL to perform their scanning processes. Spreadsheet Columns: - |**Required Columns**| + |Required Columns || |------------------- |---------------------------------------------------| |'id' |This is the PID of the object. Must currently be the first column of the sheet.| - |**Optional Columns**| + |Optional Columns || |------------------- |---------------------------------------------------| |Any |Any additional columns as needed.| From fd20fca77fe45ca70928b403b02c871bc81063e0 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:28:54 -0500 Subject: [PATCH 26/39] More README.md adjustments around the tables. --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 44887ba..337633f 100644 --- a/README.md +++ b/README.md @@ -39,16 +39,16 @@ Spreadsheet Columns: ## Usage: -Script Usage Example: +Script Usage Examples: + + |Spread Sheet E.g. |make-batch-dirs --config_file config.conf --xls-file +input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch| + |Google Sheet E.g. |make-batch-dirs --config_file config.conf --log-file log.xt --batch-name MyNewBatch --| -make-batch-dirs --config_file config.conf --xls-file -input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch ## Function: -For each row in the spreadsheet, obtain the first column contents which -should be the 'id' column and construct a new directory structure in the -format of {scanning_path}/{batch-name}/{id}. +For each row in the spreadsheet, obtain the 'id' column contents and construct a new directory structure in the format of {scanning_path}/{batch-name}/{id}. E.g. Result: /scanning/MyNewBatch/317350000001, /scanning/MyNewBatch/317350000002, /scanning/MyNewBatch/317350000003 From 524add95ed64f8d2069a9890a0fdd6a6a4eb5e29 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:29:59 -0500 Subject: [PATCH 27/39] More README.md adjustments around the tables. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 337633f..a697ead 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,10 @@ Spreadsheet Columns: ## Usage: Script Usage Examples: - - |Spread Sheet E.g. |make-batch-dirs --config_file config.conf --xls-file -input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch| - |Google Sheet E.g. |make-batch-dirs --config_file config.conf --log-file log.xt --batch-name MyNewBatch --| + |Examples:|| + |make-batch-dirs --config_file config.conf --xls-file +input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch|| + |make-batch-dirs --config_file config.conf --log-file log.xt --batch-name MyNewBatch --|| ## Function: From 9a92b59add5862e00f4a4ca345881d3b4063dc21 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:31:20 -0500 Subject: [PATCH 28/39] More README.md adjustments around the tables. --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a697ead..faf0686 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,11 @@ Spreadsheet Columns: ## Usage: Script Usage Examples: - |Examples:|| + |Examples:| + |---| |make-batch-dirs --config_file config.conf --xls-file -input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch|| - |make-batch-dirs --config_file config.conf --log-file log.xt --batch-name MyNewBatch --|| +input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch| + |make-batch-dirs --config_file config.conf --log-file log.xt --batch-name MyNewBatch --| ## Function: From 613aff2c5f54a2f0b6fa2d8bd5ed7ab8fabbdf1a Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:32:59 -0500 Subject: [PATCH 29/39] More README.md adjustments around the tables. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index faf0686..c4f5a63 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Spreadsheet Columns: ## Usage: Script Usage Examples: - |Examples:| + |:Examples| |---| |make-batch-dirs --config_file config.conf --xls-file input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch| From ee8e220e581ca85cb5d4aff55e7c410414ed1a11 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:34:11 -0500 Subject: [PATCH 30/39] More README.md adjustments around the tables. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c4f5a63..b126366 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,8 @@ Spreadsheet Columns: ## Usage: Script Usage Examples: - |:Examples| - |---| + |Examples| + |:---| |make-batch-dirs --config_file config.conf --xls-file input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch| |make-batch-dirs --config_file config.conf --log-file log.xt --batch-name MyNewBatch --| From 59f71bd38cee5505b51f29bab940a93b91c25a68 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:36:41 -0500 Subject: [PATCH 31/39] More README.md adjustments around the tables. --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b126366..1e14c2f 100644 --- a/README.md +++ b/README.md @@ -40,11 +40,10 @@ Spreadsheet Columns: ## Usage: Script Usage Examples: - |Examples| - |:---| - |make-batch-dirs --config_file config.conf --xls-file -input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch| - |make-batch-dirs --config_file config.conf --log-file log.xt --batch-name MyNewBatch --| + |Type|Example| + |:---|---| + |Spreadsheet|make-batch-dirs --config_file config.conf --xls-file input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch| + |Google Sheet|make-batch-dirs --config_file config.conf --log-file log.xt --batch-name MyNewBatch --| ## Function: From 216ab090d212051669b51b7964633111afd811e7 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:42:08 -0500 Subject: [PATCH 32/39] More README.md adjustments around the tables. --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1e14c2f..409ff33 100644 --- a/README.md +++ b/README.md @@ -32,10 +32,14 @@ Spreadsheet Columns: |Parameter |Description| |------------------- |---------------------------------------------------| - |\--config_file |Path to the script config file containing paths to the workbench directory (workbench_path), the scanning directory (scanning_path), and the path to the python executable.| + |\--config-file |Path to the script config file containing paths to the workbench directory (workbench_path), the scanning directory (scanning_path), and the path to the python executable.| |\--xls-file |Path to the spreadsheet to be processed.| |\--batch-name |The Name of the batch that will be created.| |\--log-file |Path to the log file.| + |\--use-google |Set this to true if using Google Sheets.| + |\--google-sheet-id |The Google Sheet Identifier.| + |\--google-sheet-name|The Google Sheet Tab Name.| + |\--google-creds-file|The file containing the Google credentials file.| ## Usage: From a5314a075a630528e7d2a659f4302d28ca55fd1c Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:46:52 -0500 Subject: [PATCH 33/39] More README.md adjustments around the tables. --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 409ff33..12f065f 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Spreadsheet Columns: ## Script Parameters: |Parameter |Description| - |------------------- |---------------------------------------------------| + |:--- |:--- | |\--config-file |Path to the script config file containing paths to the workbench directory (workbench_path), the scanning directory (scanning_path), and the path to the python executable.| |\--xls-file |Path to the spreadsheet to be processed.| |\--batch-name |The Name of the batch that will be created.| @@ -45,9 +45,9 @@ Spreadsheet Columns: Script Usage Examples: |Type|Example| - |:---|---| + |:--- |:--- | |Spreadsheet|make-batch-dirs --config_file config.conf --xls-file input_spreadsheet.xls --log-file log.txt --batch-name MyNewBatch| - |Google Sheet|make-batch-dirs --config_file config.conf --log-file log.xt --batch-name MyNewBatch --| + |Google Sheet|make-batch-dirs --config_file config.conf --log-file log.txt --batch-name MyNewBatch --use_google {true\\|false} --google-sheet-id {sheet id} --google-sheet-name {E.g. 'Sheet1'} --google-creds-file {path to credentials file.}| ## Function: From d9712b30fa89ef7c502eb93ca45df956142a8652 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 13:56:20 -0500 Subject: [PATCH 34/39] Adjusting make_dirs_from_df function dataframe -> df. --- make-batch-dirs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index 40a1a7e..db7dec2 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -112,7 +112,7 @@ def xls_file_as_df(xls_file:str) -> pd.DataFrame: dataframe = workbook.active return dataframe -def make_dirs_from_df(dataframe:pd.DataFrame, batch_path): +def make_dirs_from_df(df:pd.DataFrame, batch_path): """ read id field of dataframe, and create batch_path/id for non empty and non None ids. @@ -128,7 +128,7 @@ def make_dirs_from_df(dataframe:pd.DataFrame, batch_path): if len(df) == 0: logger.error("Sheet contains no data") exit(1) - rows = dataframe.iterrows() + rows = df.iterrows() for _, row in rows: if (str(row.loc['id']) and (str(row.loc['id']) != "None" )): id = str(row.loc['id']) @@ -196,7 +196,7 @@ def main(): sheet = manager.sheet(args.google_sheet_id, args.google_sheet_name) df = sheet.read() batch_path = create_batch_folder(scanning_path, batch_name) - make_dirs_from_df(df) + make_dirs_from_df(df,batch_path) else: logger.error(f"Error: Google arguments are required when using Google Sheets.") exit() From 9908c714978e0b593f3f64654dd5313e46e50b26 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 14:00:37 -0500 Subject: [PATCH 35/39] Adjusting the make-batch-dirs.conf-sample file. --- make-batch-dirs.conf_sample | 5 ----- 1 file changed, 5 deletions(-) diff --git a/make-batch-dirs.conf_sample b/make-batch-dirs.conf_sample index 01d6dbd..adff3b7 100644 --- a/make-batch-dirs.conf_sample +++ b/make-batch-dirs.conf_sample @@ -2,11 +2,6 @@ # make-batch-dirs config. # -# workbench_path - where is your workbench directory. -workbench_path: - # scanning_path - where is your scanning directory. scanning_path: -# python_exe - where is your python executable. -python_exe: /usr/bin/python3 From 153c900269dcede643d51c906ba312d436f1e4ed Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 14:07:10 -0500 Subject: [PATCH 36/39] Adding a config file section to the README.md. --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 12f065f..496715e 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,17 @@ Spreadsheet Columns: |\--google-sheet-name|The Google Sheet Tab Name.| |\--google-creds-file|The file containing the Google credentials file.| +## Config File requirements: + +The config file contains a single option "scanning_path" that points the script to where you would like to build the directory structure for the batch-name you are passing in to the script. This is the top level folder usually where you would store all your batches. In our example, we would set "scanning_path" to "/scanning" as in the following. + +``` +scaning_path: /scanning +``` + +An example config file can be found in the make-batch-dirs.conf-sample file. + + ## Usage: Script Usage Examples: From 65b046f640d4c7632548037cf497fda2c7317db7 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 14:10:05 -0500 Subject: [PATCH 37/39] Adding a note about the default tab (sheet-name) used 'Sheet1' the README.md file. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 496715e..90e95b0 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Spreadsheet Columns: |\--log-file |Path to the log file.| |\--use-google |Set this to true if using Google Sheets.| |\--google-sheet-id |The Google Sheet Identifier.| - |\--google-sheet-name|The Google Sheet Tab Name.| + |\--google-sheet-name|The Google Sheet Tab Name. This defaults to 'Sheet1' if not included.| |\--google-creds-file|The file containing the Google credentials file.| ## Config File requirements: From 716634510360107ce500b8f664d814306af83420 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 14:48:25 -0500 Subject: [PATCH 38/39] Updated make_dirs_from_df and removed function xls_file_as_df due to pandas call works better. --- make-batch-dirs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/make-batch-dirs b/make-batch-dirs index db7dec2..2ab4ece 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -78,11 +78,11 @@ def copy_xslx_to_batch(batch_path:str): error: if manifest.xslx does not exist, then log and fail """ manifest_path = os.path.sep.join([batch_path,'manifest.xlsx']) - if os.path.isfile(manifest_path): + if not os.path.isfile(manifest_path): logger.info(f"Copying spreadsheet to {manifest_path}") shutil.copyfile(args.xls_file, manifest_path) else: - logger.error(f"Error: {manifest_path} does not exist.") + logger.error(f"Error: {manifest_path} already exists.") exit(1) def save_xslx_as_csv(batch_path:str): @@ -103,15 +103,6 @@ def save_xslx_as_csv(batch_path:str): #for r in sheet.rows: c.writerow([cell.value for cell in row]) -def xls_file_as_df(xls_file:str) -> pd.DataFrame: - """ - read xls file as dataframe - """ - logger.info(f"Reading spreadsheet: {xls_file}") - workbook = openpyxl.load_workbook(xls_file) - dataframe = workbook.active - return dataframe - def make_dirs_from_df(df:pd.DataFrame, batch_path): """ read id field of dataframe, and create batch_path/id @@ -204,11 +195,11 @@ def main(): else: logger.info(f"Using Spreadsheet: {args.xls_file}") logger.info(f"Creating Batch folder: {scanning_path}/{batch_name}") + df = pd.read_excel(args.xls_file) batch_path = create_batch_folder(scanning_path, batch_name) + make_dirs_from_df(df,batch_path) copy_xslx_to_batch(batch_path) save_xslx_as_csv(batch_path) - df = xls_file_as_df(args.xls_file) - make_dirs_from_df(df, batch_path) logger.info(f"Batch Path Creation Complete.") From d2ca0f0720e35f9fc10188054a5f294f3c4b192a Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Thu, 12 Feb 2026 14:50:28 -0500 Subject: [PATCH 39/39] Added an additional logging line. --- make-batch-dirs | 1 + 1 file changed, 1 insertion(+) diff --git a/make-batch-dirs b/make-batch-dirs index 2ab4ece..ca2c1ad 100755 --- a/make-batch-dirs +++ b/make-batch-dirs @@ -198,6 +198,7 @@ def main(): df = pd.read_excel(args.xls_file) batch_path = create_batch_folder(scanning_path, batch_name) make_dirs_from_df(df,batch_path) + logger.info(f"Storing local copy of xlsx file and creating csv file from Google Sheet.") copy_xslx_to_batch(batch_path) save_xslx_as_csv(batch_path)