-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.base
More file actions
109 lines (93 loc) · 5.73 KB
/
Dockerfile.base
File metadata and controls
109 lines (93 loc) · 5.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Base image for ExampleStudy: R 4.2, Java, DB drivers, Darwin R packages, ODBC, etc.
# Requires extras/SimbaSparkODBC-*-Debian-64bit.zip (Databricks ODBC driver). Always built for linux/amd64.
# Build: docker build -f Dockerfile.base --platform linux/amd64 -t examplestudy-base:latest .
# Preinstalled drivers support get_connection(dbms) for:
# postgres -> RPostgres (env: CDM5_POSTGRESQL_*)
# snowflake -> odbc (env: SNOWFLAKE_*, SNOWFLAKE_DRIVER e.g. "Snowflake DSII")
# sqlserver -> odbc (env: SQL_SERVER_DRIVER e.g. "ODBC Driver 18 for SQL Server", CDM5_SQL_SERVER_*)
# spark -> odbc::databricks() (env: DATABRICKS_HTTPPATH)
FROM --platform=linux/amd64 rocker/rstudio:4.2
LABEL org.opencontainers.image.maintainer="Adam Black <a.black@darwin-eu.org>"
# Install java and rJava
RUN apt-get -y update && apt-get install -y \
default-jdk \
r-cran-rjava \
sudo \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/ \
&& sudo R CMD javareconf
RUN echo 'options(repos = c(CRAN = "https://packagemanager.posit.co/cran/__linux__/jammy/2026-02-01"))' >>"${R_HOME}/etc/Rprofile.site"
RUN install2.r --error rJava && rm -rf /tmp/download_packages/ /tmp/*.rds
RUN install2.r --error DatabaseConnector && rm -rf /tmp/download_packages/ /tmp/*.rds
ENV DATABASECONNECTOR_JAR_FOLDER="/opt/hades/jdbc_drivers"
RUN R -e "DatabaseConnector::downloadJdbcDrivers('all');"
RUN install2.r --error Andromeda && rm -rf /tmp/download_packages/ /tmp/*.rds
RUN install2.r --error RJSONIO && rm -rf /tmp/download_packages/ /tmp/*.rds
RUN install2.r --error CirceR && rm -rf /tmp/download_packages/ /tmp/*.rds
RUN install2.r --error SqlRender && rm -rf /tmp/download_packages/ /tmp/*.rds
RUN install2.r --error renv && rm -rf /tmp/download_packages/ /tmp/*.rds
# Install utility R packages
RUN apt-get -y update && apt-get install -y \
libxml2-dev libssl-dev libcurl4-openssl-dev libfontconfig1-dev libharfbuzz-dev libfribidi-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/
RUN install2.r --error openssl httr xml2 remotes && rm -rf /tmp/download_packages/ /tmp/*.rds
RUN install2.r --error duckdb && rm -rf /tmp/download_packages/ /tmp/*.rds
# Install odbc and RPostgres drivers (unixODBC + dev headers + pkg-config for R odbc package)
# libsasl2-modules-gssapi-mit: dependency for Simba/Databricks ODBC (from extras/ zip)
# CXX required: R was built without C++ compiler; odbc's configure invokes ${CXX} -E
RUN apt-get -y update && apt-get install -y --install-suggests \
unixodbc unixodbc-dev libpq-dev curl pkg-config build-essential libsasl2-modules-gssapi-mit \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/ \
&& PKG_CONFIG_PATH=/usr/lib/x86_64-linux-gnu/pkgconfig:/usr/lib/pkgconfig \
CXX=g++ \
install2.r --error RPostgres duckdb odbc \
&& rm -rf /tmp/download_packages/ /tmp/*.rds
# Install Darwin packages (and study Imports: dplyr, ggplot2, shiny, plotly)
RUN install2.r --error \
omopgenerics \
CDMConnector \
IncidencePrevalence \
PatientProfiles \
TreatmentPatterns \
DrugExposureDiagnostics \
DrugUtilisation \
dplyr \
ggplot2 \
shiny \
plotly \
&& rm -rf /tmp/download_packages/ /tmp/*.rds
# GitHub token for installs (pass at build time: docker build --build-arg GITHUB_PAT=xxx)
RUN echo "DATABASECONNECTOR_JAR_FOLDER=/opt/hades/jdbc_drivers" >> /usr/local/lib/R/etc/Renviron
RUN echo "RENV_PATHS_CELLAR=/opt/renv_cellar" >> /usr/local/lib/R/etc/Renviron
# SQL Server and Snowflake ODBC (amd64 image)
RUN curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | sudo tee /etc/apt/trusted.gpg.d/microsoft.asc > /dev/null \
&& curl -fsSL https://packages.microsoft.com/config/ubuntu/22.04/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list \
&& apt-get clean && apt-get update && ACCEPT_EULA=Y apt-get install -y msodbcsql18 \
&& curl -fsSL --output snowflake-odbc-3.1.1.x86_64.deb https://sfc-repo.snowflakecomputing.com/odbc/linux/3.1.1/snowflake-odbc-3.1.1.x86_64.deb \
&& sudo dpkg -i snowflake-odbc-3.1.1.x86_64.deb \
&& rm -f snowflake-odbc-3.1.1.x86_64.deb \
&& sed -i 's/libodbcinst.so.1/libodbcinst.so.2/g' /usr/lib/snowflake/odbc/lib/simba.snowflake.ini \
&& sed -i 's|^ODBCInstLib=.*|ODBCInstLib=/usr/lib/x86_64-linux-gnu/libodbcinst.so.2|' /usr/lib/snowflake/odbc/lib/simba.snowflake.ini \
&& ( grep -q '\[SnowflakeDSIIDriver\]' /etc/odbcinst.ini && sed -i '/\[SnowflakeDSIIDriver\]/,/^\[/ s|^Driver=.*|Driver=/usr/lib/snowflake/odbc/lib/libSnowflake.so|' /etc/odbcinst.ini || true ) \
&& echo /usr/lib/snowflake/odbc/lib > /etc/ld.so.conf.d/snowflake-odbc.conf && ldconfig
# So ODBC drivers (Snowflake, etc.) and unixODBC can load shared libs at runtime
ENV LD_LIBRARY_PATH=/usr/lib/snowflake/odbc/lib:/usr/lib/x86_64-linux-gnu
# Default Snowflake ODBC driver name (must match [Section] in /etc/odbcinst.ini)
ENV SNOWFLAKE_DRIVER=SnowflakeDSIIDriver
# Databricks ODBC (for odbc::databricks()). Required: place SimbaSparkODBC-*-Debian-64bit.zip in extras/ before building.
COPY extras/SimbaSparkODBC-*-Debian-64bit.zip /tmp/databricks-odbc.zip
RUN apt-get update && apt-get install -y unzip \
&& unzip -o /tmp/databricks-odbc.zip -d /tmp \
&& DEB="$(ls /tmp/SimbaSparkODBC-*-Debian-64bit/simbaspark_*_amd64.deb)" && test -n "$DEB" && sudo dpkg -i "$DEB" \
&& rm -rf /tmp/databricks-odbc.zip /tmp/SimbaSparkODBC-*-Debian-64bit
RUN install2.r --error here log4r testthat renv devtools remotes cli rlang uuid \
&& rm -rf /tmp/download_packages/ /tmp/*.rds
RUN echo "EUNOMIA_DATA_FOLDER=/opt/eunomia_data" >> /usr/local/lib/R/etc/Renviron
RUN R -e 'CDMConnector::downloadEunomiaData()'
# Install vim
RUN apt-get -y update && apt-get install -y vim && apt-get clean && rm -rf /var/lib/apt/lists/
RUN mkdir /results
WORKDIR /code
CMD ["bash"]