diff --git a/.github/workflows/Jafner.dev_hugo.yaml b/.github/workflows/Jafner.dev_hugo.yaml index a00c316a..87f4657a 100644 --- a/.github/workflows/Jafner.dev_hugo.yaml +++ b/.github/workflows/Jafner.dev_hugo.yaml @@ -3,7 +3,7 @@ on: push: branches: [ main ] paths: - - '**/Jafner.dev/**' + - 'sites/Jafner.dev/**' workflow_dispatch: permissions: @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-latest defaults: run: - working-directory: "active projects/Jafner.dev" + working-directory: "sites/Jafner.dev" env: HUGO_VERSION: 0.121.2 steps: diff --git a/.gitmodules b/.gitmodules index e69de29b..a9feb91a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "sites/Jafner.dev/themes/hello-friend-ng"] + path = sites/Jafner.dev/themes/hello-friend-ng + url = https://github.com/rhazdon/hugo-theme-hello-friend-ng.git diff --git a/Monorepo TODO.md b/Monorepo TODO.md index bad3727e..92bf84ad 100644 --- a/Monorepo TODO.md +++ b/Monorepo TODO.md @@ -68,30 +68,24 @@ That's obviously a lot of steps to handle each repo manually, so let's script it #!/bin/bash { - echo "# 1. Quick reset: started" + echo "# 0. Quick reset: started" rm -rf $HOME/Git/Jafner.net rm -rf $HOME/Git/monorepo-temp rm -rf /tmp/gitleaks cd $HOME/Git - echo "# 1. Quick reset: completed" + echo "# 0. Quick reset: completed" } { - echo "# 2. Configure paths: started" - echo " # 2.1 Configure local path for Git repos. Should not contain any of the git directories involved, as all will be cloned fresh. Consider using a temporary project directory." + echo "# 1. Configure paths and variables: started" + echo " # Configure local paths for Git repos. Should not contain any of the git directories involved, as all will be cloned fresh. Consider using a temporary project directory." MONOREPO_DIR=$HOME/Git/Jafner.net TEMP_CLONE_DIR=$HOME/Git/monorepo-temp mkdir -p "$TEMP_CLONE_DIR" mkdir -p "$MONOREPO_DIR" - cd $TEMP_CLONE_DIR - echo "# 2. Configure paths: completed" -} - -{ - echo "# 3. List repositories: started" - echo " # 3.1 First repository in list is parent monorepo." - echo " # 3.2 Note: While we don't need write access to any of the constituent repositories, we do need authenticated access for any private repositories. Use ssh URLs when possible." - echo " # 3.3 Note: The URL of the monorepo repository does not need to exist already. This is asserted idempotently." + echo " # Configure array of repositories to compose into monorepo." + echo " # Note: First repository in list is parent monorepo." + echo " # Note: While we don't need write access to any of the constituent repositories, we do need authenticated access for any private repositories. Use ssh URLs when possible." REPOSITORIES=( "Jafner.net ssh://git@gitea.jafner.tools:2225/Jafner/Jafner.net.git" "homelab ssh://git@gitea.jafner.tools:2225/Jafner/homelab.git" @@ -112,28 +106,48 @@ That's obviously a lot of steps to handle each repo manually, so let's script it "razer-bat git@github.com:Jafner/Razer-BatteryLevelRGB.git" "5etools-docker git@github.com:Jafner/5etools-docker.git" "jafner-homebrew git@github.com:Jafner/jafner-homebrew.git" - ) - cd $TEMP_CLONE_DIR - echo "# 3. List repositories: completed" + ) + cd $TEMP_CLONE_DIR + echo "# 1. Configure paths and variables: completed" } { - echo "# 4. Assert dependencies are installed: started" - echo " # git-filter-repo" - FILTER_REPO_OUTPUT=$(git filter-repo -h) + echo "# 2. Assert dependencies are installed: started" + echo -n " # gitleaks: " + gitleaks version > /dev/null 2>&1 + GITLEAKS_MISSING=$? + if [[ $GITLEAKS_MISSING != "0" ]]; then + echo "missing" + echo " # Attempting to install from https://github.com/gitleaks/gitleaks" + echo " # Installing at ~/.local/bin/gitleaks" + echo " # Note: Building gitleaks will fail if go is not installed." + mkdir -p ~/.local/bin + git clone https://github.com/gitleaks/gitleaks.git /tmp/gitleaks-git + cd /tmp/gitleaks-git + make build + cp gitleaks ~/.local/bin/gitleaks + else + echo "found at $(which gitleaks)" + fi + echo -n " # git-filter-repo: " + git filter-repo -h > /dev/null 2>&1 FILTER_REPO_MISSING=$? - if [[ $FILTER_REPO_MISSING == "1" ]]; then + if [[ $FILTER_REPO_MISSING != "0" ]]; then + echo "missing" echo " # git-filter repo not installed. Attempting to install from https://github.com/newren/git-filter-repo" + echo " # Installing at ~/.local/bin/git-filter-repo" + mkdir -p ~/.local/bin/git-filter-repo curl -o ~/.local/bin/git-filter-repo https://raw.githubusercontent.com/newren/git-filter-repo/main/git-filter-repo chmod +x ~/.local/bin/git-filter-repo else - echo " # git-filter repo found" + echo "found at $(which git-filter-repo)" fi - echo " # BFG Repo-Cleaner" - BFG_OUTPUT=$(bfg --version) + echo -n " # BFG Repo-Cleaner: " + bfg --version > /dev/null 2>&1 BFG_MISSING=$? - if [[ $BFG_MISSING == "1" ]]; then - echo " # BGF Repo-Cleaner not installed. Automated installation not yet implemented." + if [[ $BFG_MISSING != "0" ]]; then + echo "missing" + echo " # Automated installation not yet implemented." echo " # Install BFG Repo-Cleaner by downloading the latest jar from:" echo " # https://rtyley.github.io/bfg-repo-cleaner/ " echo " # Then run:" @@ -141,15 +155,13 @@ That's obviously a lot of steps to handle each repo manually, so let's script it echo " # Exiting..." exit 1 else - echo -n " # BGF Repo-Cleaner found at:" - which bfg + echo "found at: $(which bfg)" fi - cd $TEMP_CLONE_DIR - echo "# 4. Assert dependencies are installed: completed" + echo "# 2. Assert dependencies are installed: completed" } { - echo "# 5. Clone all constituent repositories, assert default branch is main: started" + echo "# 3. Clone all constituent repositories, assert default branch is main: started" cd "$TEMP_CLONE_DIR" for repo in "${REPOSITORIES[@]:1}"; do REPO_NAME=$(echo $repo | cut -d' ' -f1) @@ -163,11 +175,11 @@ That's obviously a lot of steps to handle each repo manually, so let's script it cd "$TEMP_CLONE_DIR" done cd $TEMP_CLONE_DIR - echo "# 5. Clone all constituent repositories, assert default branch is main: completed" + echo "# 3. Clone all constituent repositories, assert default branch is main: completed" } { - echo "# 6. Rewrite history (to subdirectory) for each constituent repository: started" + echo "# 4. Rewrite history (to subdirectory) for each constituent repository: started" for repo in $(echo "$TEMP_CLONE_DIR"/*); do REPO_NAME=$(basename $repo) cd "$repo" @@ -176,11 +188,11 @@ That's obviously a lot of steps to handle each repo manually, so let's script it cd "$TEMP_CLONE_DIR" done cd $TEMP_CLONE_DIR - echo "# 6. Rewrite history (to subdirectory) for each constituent repository: completed" + echo "# 4. Rewrite history (to subdirectory) for each constituent repository: completed" } { - echo "# 7. Scan each constituent repository for leaked secrets: started" + echo "# 5. Scan each constituent repository for leaked secrets: started" for repo in $(echo "$TEMP_CLONE_DIR"/*); do REPO_NAME=$(basename $repo) cd "$repo" @@ -190,11 +202,11 @@ That's obviously a lot of steps to handle each repo manually, so let's script it gitleaks detect -l warn --no-banner -r /tmp/gitleaks/$REPO_NAME/gitleaks-report.json && echo "No secrets detected" || COMPROMISED_REPOS+="$REPO_NAME\n" done cd $TEMP_CLONE_DIR - echo "# 7. Scan each constituent repository for leaked secrets: completed" + echo "# 5. Scan each constituent repository for leaked secrets: completed" } { - echo "# 8. Nuke secrets from git history: started" + echo "# 6. Nuke secrets from git history: started" for repo in $(echo "$TEMP_CLONE_DIR"/*); do cd $repo REPO_NAME=$(basename $repo) @@ -211,11 +223,11 @@ That's obviously a lot of steps to handle each repo manually, so let's script it rm /tmp/gitleaks/secret.txt done cd $TEMP_CLONE_DIR - echo "# 8. Nuke secrets from git history: completed" + echo "# 6. Nuke secrets from git history: completed" } { - echo "# 9. Verify repository histories are clean of secrets: started" + echo "# 7. Verify repository histories are clean of secrets: started" for repo in $(echo "$TEMP_CLONE_DIR"/*); do REPO_NAME=$(basename $repo) cd "$repo" @@ -225,11 +237,11 @@ That's obviously a lot of steps to handle each repo manually, so let's script it gitleaks detect -l warn --no-banner -r /tmp/gitleaks/$REPO_NAME/gitleaks-report.json && echo "No secrets detected" || echo " # Something didn't work right; clean $REPO_NAME manually" done cd $TEMP_CLONE_DIR - echo "# 9. Verify repository histories are clean of secrets: completed" + echo "# 7. Verify repository histories are clean of secrets: completed" } { - echo "# 10. Init monorepo and add constituent repos: started" + echo "# 8. Init monorepo and add constituent repos: started" cd "$MONOREPO_DIR" git init for repo in $(echo "$TEMP_CLONE_DIR"/*); do @@ -243,7 +255,7 @@ That's obviously a lot of steps to handle each repo manually, so let's script it echo " # Running one more gitleaks scan for sanity." gitleaks detect -v --no-banner cd $TEMP_CLONE_DIR - echo "# 10. Init monorepo and add constituent repos: completed" + echo "# 8. Init monorepo and add constituent repos: completed" } { diff --git a/homelab/paladin/image-1.png b/homelab/paladin/image-1.png new file mode 100644 index 00000000..02682f06 Binary files /dev/null and b/homelab/paladin/image-1.png differ diff --git a/homelab/paladin/image.png b/homelab/paladin/image.png new file mode 100644 index 00000000..1632364f Binary files /dev/null and b/homelab/paladin/image.png differ diff --git a/sites/Jafner.dev/config.toml b/sites/Jafner.dev/config.toml index e18f9c01..c34c51cb 100644 --- a/sites/Jafner.dev/config.toml +++ b/sites/Jafner.dev/config.toml @@ -55,7 +55,7 @@ disableHugoGeneratorInject = false enableThemeToggle = false enableSharingButtons = true enableGlobalLanguageMenu = false - customCSS = ["/css/toc-no-underline.css"] + customCSS = ["/css/toc-no-underline.css","/css/code-blocks-no-shadow.css"] customJS = [] justifyContent = false # Set "text-align: justify" to .post-content. [params.author] diff --git a/sites/Jafner.dev/content/homelab/monorepo/index.md b/sites/Jafner.dev/content/homelab/monorepo/index.md index 6a17a7f5..f5a38867 100644 --- a/sites/Jafner.dev/content/homelab/monorepo/index.md +++ b/sites/Jafner.dev/content/homelab/monorepo/index.md @@ -1,23 +1,45 @@ ---- ++++ title = 'Monorepo' description = " " date = 2024-07-17T10:23:25-07:00 aliases = [] author = "Joey Hafner" ogimage = '/img/Jafner.dev.logo.png' -slug = "draft" +slug = "monorepo" draft = true ---- ++++ -## How to consolidate disparate repos into a monorepo. +## How to: Pull all your Git repositories into one big monorepo + +*Table of contents* +{{% toc %}} ### What is a monorepo? -And why would I want it? +Take all your projects across GitHub, GitLab, Gitea, and local. Plop them into one big repo. That's it. +### Why would I want one? *An illustration of the issue* ![graphic design is my passion](monorepo.png) -### Grabbing all the pieces +That's more complicated. Especially for a non-traditional type of monorepo like mine. Wikipedia gives a pretty good summary on its [Monorepo](https://en.wikipedia.org/wiki/Monorepo) page, which I'll summarize briefly here: + +- You can re-use code more easily. +- You can centralize management of dependencies. +- You can synchronize work-in-progress across projects very easily. +- You can refactor code across the entire organization at once. + +A lot of those benefits are specific to collaborative environments where a monolithic shared version control system mitigates patterns of divergent work. But I'm just one guy writing mostly scripts and infrastructure-as-code. What do I get out of migrating to a monorepo? + +- *Visibility.* A centralized body of work lets me *see everything* at once without pulling 15 repos. +- *Centralized workflow.* I'm constantly finding ways to improve my workflow. If I add a `.pre-commit-config.yaml` file to stop myself from committing secrets, I won't accidentally forget when I work in another repo. +- *Integration of infrastructure and application.* Most of my work is piles of `docker-compose.yml` files and bash scripts. But every once in a while I actually make something worth running. By keeping applications and infrastructure in the same repo, I can dramatically simplify CI/CD workflows. + +## How to migrate to a monorepo +If you're reading this, you probably know what a monorepo is, and why you might want one. Let's get into the juice. + +I broke down the project into **four stages**: *collect, clean, combine, and commit*. Let's dig into what each of those mean. + +### Collect: Grabbing all the pieces The first step in this project for me was to make a list of all the repos I wanted to consolidate. This included public and private repos, and only repos containing my original work. **My list:** @@ -58,38 +80,498 @@ So, the first part is to **pull the repositories we want to consolidate**. And at this point we have all our repos together in one place. Next we need to prep each repo to be consolidated. But before that, here's the script for the steps described above in this +#### 1. Configure paths and variables ```bash -REPOSITORIES=( - "Jafner.net ssh://git@gitea.jafner.tools:2225/Jafner/Jafner.net.git" - "homelab ssh://git@gitea.jafner.tools:2225/Jafner/homelab.git" - "docker_config git@github.com:Jafner/docker_config.git" - "wiki git@github.com:Jafner/wiki.git" - "cloud_tools git@github.com:Jafner/cloud_tools.git" - "self-hosting git@github.com:Jafner/self-hosting.git" - "Jafner.dev git@github.com:Jafner/Jafner.dev.git" - "dotfiles_gitea ssh://git@gitea.jafner.tools:2225/Jafner/dotfiles.git" - "dotfiles_github git@github.com:Jafner/dotfiles.git" - "nvgm ssh://git@gitea.jafner.tools:2225/Jafner/nvgm.git" - "pamidi_gitea ssh://git@gitea.jafner.tools:2225/Jafner/pamidi.git" - "pamidi_github git@github.com:Jafner/pamidi.git" - "docker-llm-amd ssh://git@gitea.jafner.tools:2225/Jafner/docker-llm-amd.git" - "doradash ssh://git@gitea.jafner.tools:2225/Jafner/doradash.git" - "clip-it-and-ship-it git@github.com:Jafner/clip-it-and-ship-it.git" - "PyClipIt ssh://git@gitea.jafner.tools:2225/Jafner/PyClipIt.git" - "razer-bat git@github.com:Jafner/Razer-BatteryLevelRGB.git" - "5etools-docker git@github.com:Jafner/5etools-docker.git" - "jafner-homebrew git@github.com:Jafner/jafner-homebrew.git" -) +{ + echo "# 1. Configure paths and variables: started" + echo " # Configure local paths for Git repos. Should not contain any of the git directories involved, as all will be cloned fresh. Consider using a temporary project directory." + MONOREPO_DIR=$HOME/Git/Jafner.net + TEMP_CLONE_DIR=$HOME/Git/monorepo-temp + mkdir -p "$TEMP_CLONE_DIR" + mkdir -p "$MONOREPO_DIR" + echo " # Configure array of repositories to compose into monorepo." + echo " # Note: First repository in list is parent monorepo." + echo " # Note: While we don't need write access to any of the constituent repositories, we do need authenticated access for any private repositories. Use ssh URLs when possible." + REPOSITORIES=( + "Jafner.net ssh://git@gitea.jafner.tools:2225/Jafner/Jafner.net.git" + "homelab ssh://git@gitea.jafner.tools:2225/Jafner/homelab.git" + "docker_config git@github.com:Jafner/docker_config.git" + "wiki git@github.com:Jafner/wiki.git" + "cloud_tools git@github.com:Jafner/cloud_tools.git" + "self-hosting git@github.com:Jafner/self-hosting.git" + "Jafner.dev git@github.com:Jafner/Jafner.dev.git" + "dotfiles_gitea ssh://git@gitea.jafner.tools:2225/Jafner/dotfiles.git" + "dotfiles_github git@github.com:Jafner/dotfiles.git" + "nvgm ssh://git@gitea.jafner.tools:2225/Jafner/nvgm.git" + "pamidi_gitea ssh://git@gitea.jafner.tools:2225/Jafner/pamidi.git" + "pamidi_github git@github.com:Jafner/pamidi.git" + "docker-llm-amd ssh://git@gitea.jafner.tools:2225/Jafner/docker-llm-amd.git" + "doradash ssh://git@gitea.jafner.tools:2225/Jafner/doradash.git" + "clip-it-and-ship-it git@github.com:Jafner/clip-it-and-ship-it.git" + "PyClipIt ssh://git@gitea.jafner.tools:2225/Jafner/PyClipIt.git" + "razer-bat git@github.com:Jafner/Razer-BatteryLevelRGB.git" + "5etools-docker git@github.com:Jafner/5etools-docker.git" + "jafner-homebrew git@github.com:Jafner/jafner-homebrew.git" + ) + cd $TEMP_CLONE_DIR + echo "# 1. Configure paths and variables: completed" +} ``` -### Scrubbing clean +#### 2. Assert dependencies are installed +```bash +{ + echo "# 2. Assert dependencies are installed: started" + echo -n " # gitleaks: " + gitleaks version > /dev/null 2>&1 + GITLEAKS_MISSING=$? + if [[ $GITLEAKS_MISSING != "0" ]]; then + echo "missing" + echo " # Attempting to install from https://github.com/gitleaks/gitleaks" + echo " # Installing at ~/.local/bin/gitleaks" + echo " # Note: Building gitleaks will fail if go is not installed." + mkdir -p ~/.local/bin + git clone https://github.com/gitleaks/gitleaks.git /tmp/gitleaks-git + cd /tmp/gitleaks-git + make build + cp gitleaks ~/.local/bin/gitleaks + else + echo "found at $(which gitleaks)" + fi + echo -n " # git-filter-repo: " + git filter-repo -h > /dev/null 2>&1 + FILTER_REPO_MISSING=$? + if [[ $FILTER_REPO_MISSING != "0" ]]; then + echo "missing" + echo " # git-filter repo not installed. Attempting to install from https://github.com/newren/git-filter-repo" + echo " # Installing at ~/.local/bin/git-filter-repo" + mkdir -p ~/.local/bin/git-filter-repo + curl -o ~/.local/bin/git-filter-repo https://raw.githubusercontent.com/newren/git-filter-repo/main/git-filter-repo + chmod +x ~/.local/bin/git-filter-repo + else + echo "found at $(which git-filter-repo)" + fi + echo -n " # BFG Repo-Cleaner: " + bfg --version > /dev/null 2>&1 + BFG_MISSING=$? + if [[ $BFG_MISSING != "0" ]]; then + echo "missing" + echo " # Automated installation not yet implemented." + echo " # Install BFG Repo-Cleaner by downloading the latest jar from:" + echo " # https://rtyley.github.io/bfg-repo-cleaner/ " + echo " # Then run:" + echo ' # sudo cp ~/Downloads/bfg.jar /usr/bin/bfg.jar && echo "java -jar /usr/bin/bfg.jar $@" | sudo tee /usr/bin/bfg && sudo chmod +x /usr/bin/bfg' + echo " # Exiting..." + exit 1 + else + echo "found at: $(which bfg)" + fi + echo "# 2. Assert dependencies are installed: completed" +} +``` -### Reorganization and pruning +#### 3. Clone all constituent repositories, assert default branch is main +```bash +{ + echo "# 3. Clone all constituent repositories, assert default branch is main: started" + cd "$TEMP_CLONE_DIR" + for repo in "${REPOSITORIES[@]:1}"; do + REPO_NAME=$(echo $repo | cut -d' ' -f1) + echo " # Cloning repo $REPO_NAME" + git clone --quiet $(echo "$repo" | cut -d' ' -f2) "$REPO_NAME" > /dev/null + cd "$REPO_NAME" + DEFAULT_BRANCH=$(cat .git/HEAD | cut -d' ' -f2 | xargs basename) + if ! [[ $DEFAULT_BRANCH == "main" ]]; then + git branch -m $DEFAULT_BRANCH main + fi + cd "$TEMP_CLONE_DIR" + done + cd $TEMP_CLONE_DIR + echo "# 3. Clone all constituent repositories, assert default branch is main: completed" +} +``` -### Rebuilding automations +### Clean: Scrubbing secrets from our repos and making them play nice with each other +The next stage of the process is to prepare each of our repos to be integrated into our monorepo. I took 4 steps to process my repos: + +4. Rewrite each repo into a self-named subdirectory of itself. + - We do this so that our files don't collide when we merge everything into the monorepo. + - We use `git filter-repo --to-subdirectory-filter` to rewrite the entire history of the repo. + - This looks like moving all of the contents of `~/Git/homelab` to `~/Git/homelab/homelab`. + - The root of the repo doesn't move, so we still have `.git` at `~/Git/homelab/.git`. + - If you wanted to handle this process manually, you could reorganize your repos during this step. For example, rewriting `~/Git/pamidi` to `~/Git/pamidi/projects/pamidi`. +5. Scan each repo's files *and history* for exposed secrets. + - We use gitleaks to scrub through the full history of each repo and create a report of all findings. + - We place the report into a subdirectory of `/tmp`, which helps mitigate the risk of accidentally keeping a list of every API key and password you've ever accidentally leaked sitting somewhere on your system. + - You may want to add custom rules to help gitleaks find secrets matching an usual pattern. Check the [configuration documentation](https://github.com/gitleaks/gitleaks?tab=readme-ov-file#configuration) for details on how to write your own detection rules. + - You may want to explicitly permit some secrets to remain in a repo, such as example API keys or passwords in documentation. See [gitleaks' documentation](https://github.com/gitleaks/gitleaks?tab=readme-ov-file#additional-configuration) for guidance on how to handle that situation. Just make sure to re-run this step with the new rules before proceeding to the next step. +6. Nuke secrets from git histories. + - We generate a text file containing all the secrets listed in our gitleaks report. + - We use BFG Repo-Cleaner's `--replace-text` flag to find and replace all historic and present instances of each secret. + - **Warning:** this process is not perfect. Any *multi-line secrets* in the gitleaks report will be passed into BFG *one line at a time*. So if you have a multi-line secret with a *line which matches non-secrets*, BFG will nuke all matches. + - After we're done with the secrets file, we overwrite its contents with random characters before deleting it. +7. Verify repository histories are clean. + - We use gitleaks to scrub through the full history of each repo *again*. This time we only print information to the console and don't store a report file. + - If any repos still contain secrets, we need to address that manually. + +And after that, we've got our repos prepared to consolidate. The next steps are mostly manual and to-taste, rather than prescribed. Before that, here's the script for each of the steps above: + +#### 4. Rewrite history to subdirectory for each constituent repository. +```bash +{ + echo "# 4. Rewrite history (to subdirectory) for each constituent repository: started" + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + REPO_NAME=$(basename $repo) + cd "$repo" + echo " # Rewriting repo $REPO_NAME" + git filter-repo --quiet --to-subdirectory-filter "$REPO_NAME" --force > /dev/null + cd "$TEMP_CLONE_DIR" + done + cd $TEMP_CLONE_DIR + echo "# 4. Rewrite history (to subdirectory) for each constituent repository: completed" +} +``` + +#### 5. Scan each constituent repository for leaked secrets +```bash +{ + echo "# 5. Scan each constituent repository for leaked secrets: started" + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + REPO_NAME=$(basename $repo) + cd "$repo" + mkdir -p /tmp/gitleaks/$REPO_NAME/ + echo -n " # Scanning repo $REPO_NAME " + rm -f /tmp/gitleaks/$REPO_NAME/gitleaks-report.json + gitleaks detect -l warn --no-banner -r /tmp/gitleaks/$REPO_NAME/gitleaks-report.json && echo "No secrets detected" || COMPROMISED_REPOS+="$REPO_NAME\n" + done + cd $TEMP_CLONE_DIR + echo "# 5. Scan each constituent repository for leaked secrets: completed" +} +``` + +#### 6. Nuke secrets from git history +```bash +{ + echo "# 6. Nuke secrets from git history: started" + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + cd $repo + REPO_NAME=$(basename $repo) + report=/tmp/gitleaks/$REPO_NAME/gitleaks-report.json + if ! [[ $(cat $report | jq length) > 0 ]]; then + echo " # No exposed secrets in repo $REPO_NAME; Skipping." + continue + fi + echo " # Nuking secrets in repo $REPO_NAME" + cat $report | jq -r '.[].Secret' > /tmp/gitleaks/secret.txt + bfg --replace-text /tmp/gitleaks/secret.txt --no-blob-protection . + git reflog expire --expire=now --all && git gc --prune=now --aggressive + cat /dev/urandom | tr -dc A-Za-z0-9 | head -c1000 > /tmp/gitleaks/secret.txt + rm /tmp/gitleaks/secret.txt + done + cd $TEMP_CLONE_DIR + echo "# 6. Nuke secrets from git history: completed" +} +``` + +#### 7. Verify repository histories are clean of secrets +```bash +{ + echo "# 7. Verify repository histories are clean of secrets: started" + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + REPO_NAME=$(basename $repo) + cd "$repo" + mkdir -p /tmp/gitleaks/$REPO_NAME/ + echo -n " # Scanning repo $REPO_NAME " + rm -f /tmp/gitleaks/$REPO_NAME/gitleaks-report.json + gitleaks detect -l warn --no-banner -r /tmp/gitleaks/$REPO_NAME/gitleaks-report.json && echo "No secrets detected" || echo " # Something didn't work right; clean $REPO_NAME manually" + done + cd $TEMP_CLONE_DIR + echo "# 7. Verify repository histories are clean of secrets: completed" +} +``` + +### Combine: Assemble the pieces and glue it together +Our third stage is the last one we can handle programmatically. Just initialize the new monorepo. + +1. Initialize the new monorepo and add constituent repos. + - It's *almost* as simple as `cp -r old-repo new-repo/old-repo`, but not quite. + - First we run a good old-fashioned `git init`. Quick reminder to run whatever `git config --global` commands you need to ensure it initializes properly for your system. + - Next we glomb each of our constituent repos onto the new one in four steps: + - Add the local path to the repo as a remote. + - Fetch the remote with tags. + - Merge the changes on the `main` branch. + - Then remove the remote. + - After all that I ran one last gitleaks scan for good measure. This is entirely superstition. + +#### 8. Initialize the monorepo and add constituent repos +```bash +{ + echo "# 8. Init monorepo and add constituent repos: started" + cd "$MONOREPO_DIR" + git init + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + REPO_NAME=$(basename $repo) + echo "Adding $REPO_NAME" + git remote add "$REPO_NAME" "$repo" + git fetch "$REPO_NAME" --tags + git merge --quiet --allow-unrelated-histories -m "Merge $REPO_NAME into $(basename $MONOREPO_DIR)" "$REPO_NAME/main" + git remote remove "$REPO_NAME" + done + echo " # Running one more gitleaks scan for sanity." + gitleaks detect -v --no-banner + cd $TEMP_CLONE_DIR + echo "# 8. Init monorepo and add constituent repos: completed" +} +``` + +### Commit: Reorganizing and publishing the monorepo +The last stage is to tie everything up nicely and push to the upstream. + +9. Reorganize the repo to taste. + - In my case, that meant creating the `archive`, `dotfiles`, `homelab`, `projects`, and `sites` root-level subdirectories, and sorting my other repos into those. +10. Update repo-root configuration files. + - Files like `.gitignore`, `.gitattributes`, `.gitmodules`, *do still work in subdirectories*. But I prefer to consolidate these into one root-level file. + - Similar stuff like `.dockerignore`, `.pre-commit-config.yaml`, and whatever your project has should be considered. + - `.github/workflows`, `.gitea/workflows`, and `.gitlab-ci.yml` all need to be at the repo root in order to work, which will necessitate some refactoring of those jobs. +11. Write a new `README.md` for the newly-created monorepo root. In my case, I chose to populate it with a Map of Contents for the repo. It looks like this: + +![example readme for jafner.net monorepo|400](readme-example.png) + +12. Create and add the remote origin server, and push! + - In my case, I created the repo as private because I wasn't ready to publish it for the entire internet to scan. Once I created my private Gitea repo, I ran: + - `git remote add origin ssh://git@gitea.jafner.tools:2225/Jafner/Jafner.net.git` + - `git push --set-upstream origin main --force` + +And that's it. Well, except for all the stuff that still needs to be done! + +## Closing thoughts and next steps +At this point we probably still have some work to do before our repo is at function parity with our distributed, self-contained repos. For my repo, these are the last steps before we can mark this migration project as resolved: + +- Reconfigure CI/CD pipelines to work properly with subdirectories. +- Migrate relevant issues from old repos to new repo. I have no idea how to do this, especially when repos are split across multiple platforms. +- Comb through all the cobwebbed projects we've just consolidated and archive or delete as appropriate. +- Read through [korfuri/awesome-monorepo](https://github.com/korfuri/awesome-monorepo), which is a dope page I *really* wish I'd seen before I undertook this project. + +Go forth and consolidate! ## The Script In the interest of ensuring my process was maximally reproducible (so I could wipe and restart every time I made a mistake), I wrote this script. Steps 1, 2, and 3 are hard-coded with parameters specific to the directories and repos I'm working with. After that, everything *should be* agnostic of directories and repos. +Additionally, I wrote it such that the entire thing could be copied in chunks or in full and pasted into a terminal (zero-indent curly brackets `{}` indicate self-contained chunks). I have tested this script in both bash and zsh. + +### `monorepo.sh` +```bash +#!/bin/bash + +{ + echo "# 0. Quick reset: started" + rm -rf $HOME/Git/Jafner.net + rm -rf $HOME/Git/monorepo-temp + rm -rf /tmp/gitleaks + cd $HOME/Git + echo "# 0. Quick reset: completed" +} + +{ + echo "# 1. Configure paths and variables: started" + echo " # Configure local paths for Git repos. Should not contain any of the git directories involved, as all will be cloned fresh. Consider using a temporary project directory." + MONOREPO_DIR=$HOME/Git/Jafner.net + TEMP_CLONE_DIR=$HOME/Git/monorepo-temp + mkdir -p "$TEMP_CLONE_DIR" + mkdir -p "$MONOREPO_DIR" + echo " # Configure array of repositories to compose into monorepo." + echo " # Note: First repository in list is parent monorepo." + echo " # Note: While we don't need write access to any of the constituent repositories, we do need authenticated access for any private repositories. Use ssh URLs when possible." + REPOSITORIES=( + "Jafner.net ssh://git@gitea.jafner.tools:2225/Jafner/Jafner.net.git" + "homelab ssh://git@gitea.jafner.tools:2225/Jafner/homelab.git" + "docker_config git@github.com:Jafner/docker_config.git" + "wiki git@github.com:Jafner/wiki.git" + "cloud_tools git@github.com:Jafner/cloud_tools.git" + "self-hosting git@github.com:Jafner/self-hosting.git" + "Jafner.dev git@github.com:Jafner/Jafner.dev.git" + "dotfiles_gitea ssh://git@gitea.jafner.tools:2225/Jafner/dotfiles.git" + "dotfiles_github git@github.com:Jafner/dotfiles.git" + "nvgm ssh://git@gitea.jafner.tools:2225/Jafner/nvgm.git" + "pamidi_gitea ssh://git@gitea.jafner.tools:2225/Jafner/pamidi.git" + "pamidi_github git@github.com:Jafner/pamidi.git" + "docker-llm-amd ssh://git@gitea.jafner.tools:2225/Jafner/docker-llm-amd.git" + "doradash ssh://git@gitea.jafner.tools:2225/Jafner/doradash.git" + "clip-it-and-ship-it git@github.com:Jafner/clip-it-and-ship-it.git" + "PyClipIt ssh://git@gitea.jafner.tools:2225/Jafner/PyClipIt.git" + "razer-bat git@github.com:Jafner/Razer-BatteryLevelRGB.git" + "5etools-docker git@github.com:Jafner/5etools-docker.git" + "jafner-homebrew git@github.com:Jafner/jafner-homebrew.git" + ) + cd $TEMP_CLONE_DIR + echo "# 1. Configure paths and variables: completed" +} + +{ + echo "# 2. Assert dependencies are installed: started" + echo -n " # gitleaks: " + gitleaks version > /dev/null 2>&1 + GITLEAKS_MISSING=$? + if [[ $GITLEAKS_MISSING != "0" ]]; then + echo "missing" + echo " # Attempting to install from https://github.com/gitleaks/gitleaks" + echo " # Installing at ~/.local/bin/gitleaks" + echo " # Note: Building gitleaks will fail if go is not installed." + mkdir -p ~/.local/bin + git clone https://github.com/gitleaks/gitleaks.git /tmp/gitleaks-git + cd /tmp/gitleaks-git + make build + cp gitleaks ~/.local/bin/gitleaks + else + echo "found at $(which gitleaks)" + fi + echo -n " # git-filter-repo: " + git filter-repo -h > /dev/null 2>&1 + FILTER_REPO_MISSING=$? + if [[ $FILTER_REPO_MISSING != "0" ]]; then + echo "missing" + echo " # git-filter repo not installed. Attempting to install from https://github.com/newren/git-filter-repo" + echo " # Installing at ~/.local/bin/git-filter-repo" + mkdir -p ~/.local/bin/git-filter-repo + curl -o ~/.local/bin/git-filter-repo https://raw.githubusercontent.com/newren/git-filter-repo/main/git-filter-repo + chmod +x ~/.local/bin/git-filter-repo + else + echo "found at $(which git-filter-repo)" + fi + echo -n " # BFG Repo-Cleaner: " + bfg --version > /dev/null 2>&1 + BFG_MISSING=$? + if [[ $BFG_MISSING != "0" ]]; then + echo "missing" + echo " # Automated installation not yet implemented." + echo " # Install BFG Repo-Cleaner by downloading the latest jar from:" + echo " # https://rtyley.github.io/bfg-repo-cleaner/ " + echo " # Then run:" + echo ' # sudo cp ~/Downloads/bfg.jar /usr/bin/bfg.jar && echo "java -jar /usr/bin/bfg.jar $@" | sudo tee /usr/bin/bfg && sudo chmod +x /usr/bin/bfg' + echo " # Exiting..." + exit 1 + else + echo "found at: $(which bfg)" + fi + echo "# 2. Assert dependencies are installed: completed" +} + +{ + echo "# 3. Clone all constituent repositories, assert default branch is main: started" + cd "$TEMP_CLONE_DIR" + for repo in "${REPOSITORIES[@]:1}"; do + REPO_NAME=$(echo $repo | cut -d' ' -f1) + echo " # Cloning repo $REPO_NAME" + git clone --quiet $(echo "$repo" | cut -d' ' -f2) "$REPO_NAME" > /dev/null + cd "$REPO_NAME" + DEFAULT_BRANCH=$(cat .git/HEAD | cut -d' ' -f2 | xargs basename) + if ! [[ $DEFAULT_BRANCH == "main" ]]; then + git branch -m $DEFAULT_BRANCH main + fi + cd "$TEMP_CLONE_DIR" + done + cd $TEMP_CLONE_DIR + echo "# 3. Clone all constituent repositories, assert default branch is main: completed" +} + +{ + echo "# 4. Rewrite history (to subdirectory) for each constituent repository: started" + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + REPO_NAME=$(basename $repo) + cd "$repo" + echo " # Rewriting repo $REPO_NAME" + git filter-repo --quiet --to-subdirectory-filter "$REPO_NAME" --force > /dev/null + cd "$TEMP_CLONE_DIR" + done + cd $TEMP_CLONE_DIR + echo "# 4. Rewrite history (to subdirectory) for each constituent repository: completed" +} + +{ + echo "# 5. Scan each constituent repository for leaked secrets: started" + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + REPO_NAME=$(basename $repo) + cd "$repo" + mkdir -p /tmp/gitleaks/$REPO_NAME/ + echo -n " # Scanning repo $REPO_NAME " + rm -f /tmp/gitleaks/$REPO_NAME/gitleaks-report.json + gitleaks detect -l warn --no-banner -r /tmp/gitleaks/$REPO_NAME/gitleaks-report.json && echo "No secrets detected" || COMPROMISED_REPOS+="$REPO_NAME\n" + done + cd $TEMP_CLONE_DIR + echo "# 5. Scan each constituent repository for leaked secrets: completed" +} + +{ + echo "# 6. Nuke secrets from git history: started" + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + cd $repo + REPO_NAME=$(basename $repo) + report=/tmp/gitleaks/$REPO_NAME/gitleaks-report.json + if ! [[ $(cat $report | jq length) > 0 ]]; then + echo " # No exposed secrets in repo $REPO_NAME; Skipping." + continue + fi + echo " # Nuking secrets in repo $REPO_NAME" + cat $report | jq -r '.[].Secret' > /tmp/gitleaks/secret.txt + bfg --replace-text /tmp/gitleaks/secret.txt --no-blob-protection . + git reflog expire --expire=now --all && git gc --prune=now --aggressive + cat /dev/urandom | tr -dc A-Za-z0-9 | head -c1000 > /tmp/gitleaks/secret.txt + rm /tmp/gitleaks/secret.txt + done + cd $TEMP_CLONE_DIR + echo "# 6. Nuke secrets from git history: completed" +} + +{ + echo "# 7. Verify repository histories are clean of secrets: started" + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + REPO_NAME=$(basename $repo) + cd "$repo" + mkdir -p /tmp/gitleaks/$REPO_NAME/ + echo -n " # Scanning repo $REPO_NAME " + rm -f /tmp/gitleaks/$REPO_NAME/gitleaks-report.json + gitleaks detect -l warn --no-banner -r /tmp/gitleaks/$REPO_NAME/gitleaks-report.json && echo "No secrets detected" || echo " # Something didn't work right; clean $REPO_NAME manually" + done + cd $TEMP_CLONE_DIR + echo "# 7. Verify repository histories are clean of secrets: completed" +} + +{ + echo "# 8. Init monorepo and add constituent repos: started" + cd "$MONOREPO_DIR" + git init + for repo in $(echo "$TEMP_CLONE_DIR"/*); do + REPO_NAME=$(basename $repo) + echo "Adding $REPO_NAME" + git remote add "$REPO_NAME" "$repo" + git fetch "$REPO_NAME" --tags + git merge --quiet --allow-unrelated-histories -m "Merge $REPO_NAME into $(basename $MONOREPO_DIR)" "$REPO_NAME/main" + git remote remove "$REPO_NAME" + done + echo " # Running one more gitleaks scan for sanity." + gitleaks detect -v --no-banner + cd $TEMP_CLONE_DIR + echo "# 8. Init monorepo and add constituent repos: completed" +} + +{ + echo "############################################################" + echo "# #" + echo "# Next steps: #" + echo "# 1. Reorganize the repo to taste #" + echo "# 2. Update repo-root configuration files such as: #" + echo "# - .gitignore, .gitattributes, .gitmodules #" + echo "# - .dockerignore, .pre-commit-config.yaml #" + echo "# - .github/workflows, .gitlab-ci.yml, .gitea/workflows #" + echo "# - LICENSE, CONTRIBUTING, MAINTAINERS #" + echo "# 3. Write a new root-level README.md #" + echo "# 4. Add the remote repo as origin with: #" + echo "# git remote add origin #" + echo "# 5. Push the code to the Git server with: #" + echo "# git push --set-upstream origin main #" + echo "# #" + echo "############################################################" +} +``` diff --git a/sites/Jafner.dev/content/homelab/monorepo/readme-example.png b/sites/Jafner.dev/content/homelab/monorepo/readme-example.png new file mode 100644 index 00000000..103ce11f Binary files /dev/null and b/sites/Jafner.dev/content/homelab/monorepo/readme-example.png differ diff --git a/sites/Jafner.dev/static/css/code-blocks-no-shadow.css b/sites/Jafner.dev/static/css/code-blocks-no-shadow.css new file mode 100644 index 00000000..bdc0a2d4 --- /dev/null +++ b/sites/Jafner.dev/static/css/code-blocks-no-shadow.css @@ -0,0 +1,5 @@ +code[class*="language-"], +pre[class*="language-"] { + text-shadow: none; + box-shadow: none; +} \ No newline at end of file