Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 90 additions & 24 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,20 +1,33 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so
###############################
# Project specific .gitignore #
###############################

# Secrets file
secrets.toml

# Log file
log.txt

# Testing Python files
# Debug output file
debug_output/

# Temporary testing files
test.py

##################################################################
# Python .gitignore #
# https://github.com/github/gitignore/blob/main/Python.gitignore #
# Retrieved 2026-03-17 #
##################################################################

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
Expand All @@ -36,8 +49,8 @@ share/python-wheels/
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

Expand All @@ -55,7 +68,7 @@ htmlcov/
nosetests.xml
coverage.xml
*.cover
*.py,cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
Expand Down Expand Up @@ -101,22 +114,37 @@ ipython_config.py
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# Pipfile.lock

# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
uv.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# poetry.lock
# poetry.toml

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
# pdm.lock
# pdm.toml
.pdm-python
.pdm-build/

# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
# pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
Expand All @@ -125,11 +153,25 @@ __pypackages__/
celerybeat-schedule
celerybeat.pid

# Redis
*.rdb
*.aof
*.pid

# RabbitMQ
mnesia/
rabbitmq/
rabbitmq-data/

# ActiveMQ
activemq-data/

# SageMath parsed files
*.sage.py

# Environments
.env
.envrc
.venv
env/
venv/
Expand Down Expand Up @@ -162,11 +204,35 @@ dmypy.json
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# VSCode settings
.vscode
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/

# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
.vscode/

# Ruff stuff:
.ruff_cache/

# PyPI configuration file
.pypirc

# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/

# Streamlit
.streamlit/secrets.toml
69 changes: 19 additions & 50 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ server = "<SqlInstanceName>" # SQL instance containing GIS database
database = "<SqlDatabaseName>" # database within instance containing GIS datasets (GQ/LUDU)

[sql]
staging = "<FolderPath>" # unconditional network folder path visible to SQL instance for BULK INSERT
staging = '<FolderPath>' # unconditional network folder path visible to SQL instance for BULK INSERT
```

## Running
Expand Down Expand Up @@ -47,68 +47,37 @@ mgra = "mgra15"
start_year = 2020

# The last year inclusive to end running with
end_year = 2023
end_year = 2024

# The code version
version = "0.0.0-dev"
version = "1.1.1-dev"

# Additional notes on this run
comments = "Example comment"

# The 'debug' section contains configuration for running a subset of modules of the
# Estimates Program for a given set of years. All parameters must be provided except for
# 'run_id', 'version', and 'comments'. If 'run_id' is -1, then a new 'run_id' will
# be automatically created, similar to 'run' mode
# The `debug` section contains configuration for running a single module for a single
# year based on the input data of an existing complete Estimates run. Output data is not
# written to database, but is instead saved to a local folder debug_output\, which is
# ignored by .gitignore. No data is saved locally for the "startup" and "staging"
# modules
[debug]

# Whether to use the 'debug' section. Mutually exclusive with 'run' mode
enabled = false

# (Optional) If provided, then most parameters in the 'debug' section will be pulled
# from '[run].[metadata]'. If not provided, then a new 'run_id' will be automatically
# created. Use -1 to indicate no run_id (TOML doesn't support null)
run_id = -1
# The [run_id] of a fully [complete] Estimates Program run. Input data for debugging
# will be pulled from this [run_id]
run_id = 82 # The run_id for the released v24 Estimates

# The first year inclusive and last year inclusive to run. In the case that...
# * The value of 'run_id' is -1, the values will be loaded into [metadata].[run]
# and will be used as is
# * The value of 'run_id' is not -1, the values will be checked against the values
# already in '[run].[metadata]'
start_year = 2020
end_year = 2023

# (Optional) The code version. If provided, then 'run_id' must be -1
version = "0.0.0-dev"

# (Optional) Additional notes on this run. If provided, then 'run_id' must be -1
comments = ""

# Whether to run the 'startup' module
startup = false

# Whether to run the 'housing_and_households' module. If enabled, then any above
# modules must all be enabled due to module dependencies
housing_and_households = false

# Whether to run the 'population' module. If enabled, then any above modules must all
# be enabled due to module dependencies
population = false

# Whether to run the 'population_by_ase' module. If enabled, then any above modules
# must all be enabled due to module dependencies
population_by_ase = false

# Whether to run the 'household_characteristics' module. If enabled, then any above
# modules must all be enabled due to module dependencies
household_characteristics = false

# Whether to run the 'employment' module. If enabled, then startup module must also be
# enabled due to module dependencies
employment = false
# The year of the Estimates Program to run. This year must be consistent with the stored
# [start_year] and [end_year] associated with the above [run_id] in [metadata].[run]
year = 2020

# Whether to run the 'staging' module. If enabled, then any above modules must all be
# enabled due to module dependencies
staging = false
# The module of the Estimates Program to run. Since only [complete] [run_id]s are
# allowed, this can be any Estimates Program module. Explicitly, the valid inputs
# are "startup", "housing_and_households", "population", "population_by_ase",
# "household_characteristics", "employment", or "staging"
module = ""
```

### Production Database Schema
Expand Down
23 changes: 7 additions & 16 deletions config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,12 @@ end_year = 2024
version = "1.1.1-dev"
comments = "Example comment"

# The `debug` section contains configuration for running a subset of modules of the
# Estimates Program for a given set of years. All parameters must be provided except for
# `run_id` and `comments`. If `run_id` is `null`, then a new `run_id` will be
# automatically created, similar to `run` mode
# The `debug` section contains configuration for running a single module for a single
# year based on the input data of an existing complete Estimates run. The input module
# string can be any of "startup", "housing_and_households", "population",
# "population_by_ase", "household_characteristics", "employment", or "staging"
[debug]
enabled = false
run_id = -1 # -1 is interpreted as None
start_year = 2022
end_year = 2023
version = "1.1.1-dev"
comments = ""
startup = false
housing_and_households = false
population = false
population_by_ase = false
household_characteristics = false
employment = false
staging = false
run_id = 82 # The run_id for the released v24 Estimates
year = 2020
module = ""
14 changes: 7 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
if utils.RUN_INSTRUCTIONS["startup"]:
utils.display_ascii_art("data/welcome.txt")
logger.info("Running Startup module...\n")
startup.run_startup()
startup.run_startup(debug=utils.DEBUG)

# Loop through the years first
for year in utils.RUN_INSTRUCTIONS["years"]:
Expand All @@ -41,27 +41,27 @@
# Housing and Households module
if utils.RUN_INSTRUCTIONS["housing_and_households"]:
logger.info("Running Housing and Households module...")
hs_hh.run_hs_hh(year)
hs_hh.run_hs_hh(year, debug=utils.DEBUG)

# Population module
if utils.RUN_INSTRUCTIONS["population"]:
logger.info("Running Population module...")
pop.run_pop(year)
pop.run_pop(year, debug=utils.DEBUG)

# Population by Age/Sex/Ethnicity module
if utils.RUN_INSTRUCTIONS["population_by_ase"]:
logger.info("Running Population by Age/Sex/Ethnicity module...")
ase.run_ase(year)
ase.run_ase(year, debug=utils.DEBUG)

# Household Characteristics module
if utils.RUN_INSTRUCTIONS["household_characteristics"]:
logger.info("Running Household Characteristics module...")
hh_characteristics.run_hh_characteristics(year)
hh_characteristics.run_hh_characteristics(year, debug=utils.DEBUG)

# Employment module
if utils.RUN_INSTRUCTIONS["employment"]:
logger.info("Running Employment module...")
employment.run_employment(year)
employment.run_employment(year, debug=utils.DEBUG)

# Diagnostic print for this year
logger.info(f"Finished running {year}\n")
Expand All @@ -70,7 +70,7 @@
# [metadata].[run] table
if utils.RUN_INSTRUCTIONS["staging"]:
logger.info("Running Staging module...")
staging.run_staging()
staging.run_staging(debug=utils.DEBUG)

# Final print for completion
logger.info("Completed")
Loading