From 038c722525da52a43ad53bc78545895ad11c806d Mon Sep 17 00:00:00 2001 From: bhandarivijay Date: Wed, 4 Mar 2026 17:34:52 +0000 Subject: [PATCH] chore: Migrate gsutil usage to gcloud storage --- beam/util/build-beam-artifacts.sh | 2 +- cloudbuild/run-presubmit-on-k8s.sh | 2 +- datasketches/datasketches.sh | 2 +- gobblin/gobblin.sh | 2 +- gpu/manual-test-runner.sh | 2 +- hbase/test_hbase.py | 4 ++-- hive-lineage/hive-lineage.sh | 2 +- knox/verify_knox.sh | 2 +- push-to-gcs.sh | 2 +- rapids/manual-test-runner.sh | 3 +-- 10 files changed, 11 insertions(+), 12 deletions(-) diff --git a/beam/util/build-beam-artifacts.sh b/beam/util/build-beam-artifacts.sh index ed83f6ed9..576c85c3b 100755 --- a/beam/util/build-beam-artifacts.sh +++ b/beam/util/build-beam-artifacts.sh @@ -13,7 +13,7 @@ readonly BEAM_SOURCE_VERSION="${3:-master}" function build_job_service() { ./gradlew :beam-runners-flink_2.11-job-server:shadowJar - gsutil cp \ + gcloud storage cp \ ./runners/flink/job-server/build/libs/beam-runners-flink_2.11-job-server-*-SNAPSHOT.jar \ ${BEAM_JOB_SERVICE_DESTINATION}/beam-runners-flink_2.11-job-server-${BEAM_SOURCE_VERSION}-SNAPSHOT.jar } diff --git a/cloudbuild/run-presubmit-on-k8s.sh b/cloudbuild/run-presubmit-on-k8s.sh index 5e2001c91..f3c84bbcf 100644 --- a/cloudbuild/run-presubmit-on-k8s.sh +++ b/cloudbuild/run-presubmit-on-k8s.sh @@ -52,7 +52,7 @@ if [[ ${exit_code} != 0 ]]; then LOG_GCS_PATH="gs://${BUCKET}/${BUILD_ID}/logs/${POD_NAME}.log" echo "Attempting to upload logs to ${LOG_GCS_PATH}" - if kubectl logs "${POD_NAME}" | gsutil cp - "${LOG_GCS_PATH}"; then + if kubectl logs "${POD_NAME}" | gcloud storage cp - "${LOG_GCS_PATH}"; then echo "Logs for failed pod ${POD_NAME} uploaded to: ${LOG_GCS_PATH}" else echo "Log upload to ${LOG_GCS_PATH} failed." diff --git a/datasketches/datasketches.sh b/datasketches/datasketches.sh index b0aee99c3..98aa18c74 100755 --- a/datasketches/datasketches.sh +++ b/datasketches/datasketches.sh @@ -57,7 +57,7 @@ function download_libraries() function download_example_jar() { if [[ "${SPARK_VERSION}" < "3.5" ]]; then - gsutil cp "${SPARK_JAVA_EXAMPLE_JAR}" "${DS_LIBPATH}" + gcloud storage cp "${SPARK_JAVA_EXAMPLE_JAR}" "${DS_LIBPATH}" if [ $? -eq 0 ]; then echo "Downloaded "${SPARK_JAVA_EXAMPLE_JAR}" successfully" else diff --git a/gobblin/gobblin.sh b/gobblin/gobblin.sh index 16e50287b..a41ecceb5 100755 --- a/gobblin/gobblin.sh +++ b/gobblin/gobblin.sh @@ -91,7 +91,7 @@ EOF function install_package() { # Download binary. local temp=$(mktemp -d) - gsutil cp "${PACKAGE_URL}" "${temp}/package.tar.gz" + gcloud storage cp "${PACKAGE_URL}" "${temp}/package.tar.gz" tar -xf "${temp}/package.tar.gz" -C "${temp}" # Setup package. diff --git a/gpu/manual-test-runner.sh b/gpu/manual-test-runner.sh index 37982bfe4..481a0b2df 100644 --- a/gpu/manual-test-runner.sh +++ b/gpu/manual-test-runner.sh @@ -48,7 +48,7 @@ function exit_handler() { # TODO: remove any test related resources in the project echo 'Uploading local logs to GCS bucket.' - gsutil -m rsync -r "${log_dir}/" "${gcs_log_dir}/" + gcloud storage rsync --recursive "${log_dir}/" "${gcs_log_dir}/" if [[ -f "${tmp_dir}/tests_success" ]]; then echo -e "${GREEN}Workflow succeeded${NC}, check logs at ${log_dir}/ or ${gcs_log_dir}/" diff --git a/hbase/test_hbase.py b/hbase/test_hbase.py index 2d8d07f19..c8c54b180 100644 --- a/hbase/test_hbase.py +++ b/hbase/test_hbase.py @@ -15,11 +15,11 @@ def setUp(self): super().setUp() self.GCS_BUCKET = "test-hbase-{}-{}".format(self.datetime_str(), self.random_str()) - self.assert_command('gsutil mb -c regional -l {} gs://{}'.format( + self.assert_command('gcloud storage buckets create --default-storage-class=regional --location {} gs://{}'.format( self.REGION, self.GCS_BUCKET)) def tearDown(self): - self.assert_command('gsutil -m rm -rf gs://{}'.format(self.GCS_BUCKET)) + self.assert_command('gcloud storage rm --recursive --continue-on-error gs://{}'.format(self.GCS_BUCKET)) super().tearDown() def verify_instance(self, name): diff --git a/hive-lineage/hive-lineage.sh b/hive-lineage/hive-lineage.sh index 84da22515..b3da0b7e0 100644 --- a/hive-lineage/hive-lineage.sh +++ b/hive-lineage/hive-lineage.sh @@ -48,7 +48,7 @@ function set_hive_lineage_conf() { function install_jars() { echo "Installing openlineage-hive hook" - gsutil cp -P "$INSTALLATION_SOURCE/hive-openlineage-hook-$HIVE_OL_HOOK_VERSION.jar" "$HIVE_LIB_DIR/hive-openlineage-hook.jar" + gcloud storage cp --preserve-posix "$INSTALLATION_SOURCE/hive-openlineage-hook-$HIVE_OL_HOOK_VERSION.jar" "$HIVE_LIB_DIR/hive-openlineage-hook.jar" } function restart_hive_server2_master() { diff --git a/knox/verify_knox.sh b/knox/verify_knox.sh index 5241a1252..fa75bb611 100644 --- a/knox/verify_knox.sh +++ b/knox/verify_knox.sh @@ -43,7 +43,7 @@ function test_installation() { # to test update, we will upload a new topology to gs bucket, and check whether it appears # we assume that knox initialization action is the very first one, /etc/google-dataproc/startup-scripts/dataproc-initialization-script-0 function test_update_new_topology() { - gsutil cp /etc/knox/conf/topologies/example-hive-pii.xml "${KNOX_GW_CONFIG_GCS}/topologies/update_topology.xml" + gcloud storage cp /etc/knox/conf/topologies/example-hive-pii.xml "${KNOX_GW_CONFIG_GCS}/topologies/update_topology.xml" sudo /bin/bash /etc/google-dataproc/startup-scripts/dataproc-initialization-script-0 update test_installation update_topology [[ $? == 1 ]] && return 1 diff --git a/push-to-gcs.sh b/push-to-gcs.sh index 9e4b2f5ca..4a47cba31 100755 --- a/push-to-gcs.sh +++ b/push-to-gcs.sh @@ -51,6 +51,6 @@ for file in "${MODULE}/"*.sh; do fi done -gsutil -m rsync -R -x "__pycache__/.*" "${MODULE}/" "${GCS_FOLDER}" +gcloud storage rsync --recursive --exclude "__pycache__/.*" "${MODULE}/" "${GCS_FOLDER}" echo "Pushed ${MODULE}/ to ${GCS_FOLDER}." diff --git a/rapids/manual-test-runner.sh b/rapids/manual-test-runner.sh index 371917a40..c414be771 100644 --- a/rapids/manual-test-runner.sh +++ b/rapids/manual-test-runner.sh @@ -47,7 +47,7 @@ function exit_handler() { # TODO: remove any test related resources in the project echo 'Uploading local logs to GCS bucket.' - gsutil -m rsync -r "${log_dir}/" "${gcs_log_dir}/" + gcloud storage rsync --recursive "${log_dir}/" "${gcs_log_dir}/" if [[ -f "${tmp_dir}/tests_success" ]]; then echo -e "${GREEN}Workflow succeeded, check logs at ${log_dir}/ or ${gcs_log_dir}/${NC}" @@ -74,4 +74,3 @@ export INTERNAL_IP_SSH="true" screen -US "${session_name}" -c rapids/bazel.screenrc -