diff --git a/.gitignore b/.gitignore index 808c3da..04271c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ .env books/* -mindmap/* \ No newline at end of file +# mindmap/* \ No newline at end of file diff --git a/libro.ipynb b/libro.ipynb index 960a365..d4c2ad3 100644 --- a/libro.ipynb +++ b/libro.ipynb @@ -2,33 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, - "id": "9f587bf1", - "metadata": {}, - "outputs": [], - "source": [ - "import { load } from \"jsr:@std/dotenv\";\n", - "import OpenAI from \"jsr:@openai/openai\";\n", - "\n", - "const _ = await load({ export: true });\n", - "const openai = new OpenAI();" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "4650126c", - "metadata": {}, - "outputs": [], - "source": [ - "const safeName = (s: string) => s.replace(/[<>:\"/\\\\|?*\\x00-\\x1F]/g, \"_\").trim();\n", - "const bookName =\n", - " \"Nmap Network Scanning Official Nmap Project Guide to Network Discovery and Security Scanning\";\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "id": "ae701b32", "metadata": {}, "outputs": [], @@ -59,7 +33,66 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, + "id": "9f587bf1", + "metadata": {}, + "outputs": [], + "source": [ + "import { load } from \"jsr:@std/dotenv\";\n", + "import OpenAI from \"jsr:@openai/openai\";\n", + "\n", + "// flush all previous env vars\n", + "/* for (const key of Object.keys(Deno.env.toObject())) {\n", + " Deno.env.delete(key);\n", + "} */\n", + "const _ = await load({ export: true });\n", + "const openai = new OpenAI();" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4650126c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "progit\n" + ] + } + ], + "source": [ + "const safeName = (s: string) => s.replace(/[<>:\"/\\\\|?*\\x00-\\x1F]/g, \"_\").trim();\n", + "const bookName = Deno.env.get(\"BOOK_NAME\");\n", + "\n", + "if (!bookName) {\n", + " throw new Error(\"BOOK_NAME environment variable is not set\");\n", + "}\n", + "\n", + "console.log(bookName);\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "211e589f", + "metadata": {}, + "outputs": [], + "source": [ + "// BOOKMARK_LEVEL=1\n", + "// BOOK_FROM=10\n", + "// BOOK_TO=-1\n", + "\n", + "Deno.env.set(\"BOOKMARK_LEVEL\", \"1\");\n", + "Deno.env.set(\"BOOK_FROM\", \"10\");\n", + "Deno.env.set(\"BOOK_TO\", \"-1\");" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "id": "8bee369d", "metadata": {}, "outputs": [ @@ -68,51 +101,33 @@ "output_type": "stream", "text": [ "[\n", - " { title: \"01\", level: 1, page: 27, endpage: 50 },\n", - " { title: \"02\", level: 1, page: 51, endpage: 71 },\n", - " { title: \"03\", level: 1, page: 72, endpage: 97 },\n", - " { title: \"04\", level: 1, page: 98, endpage: 119 },\n", - " { title: \"05\", level: 1, page: 120, endpage: 158 },\n", - " { title: \"06\", level: 1, page: 159, endpage: 168 },\n", - " { title: \"07\", level: 1, page: 169, endpage: 193 },\n", - " { title: \"08\", level: 1, page: 194, endpage: 227 },\n", - " { title: \"09\", level: 1, page: 228, endpage: 278 },\n", - " { title: \"10\", level: 1, page: 279, endpage: 315 },\n", - " { title: \"11\", level: 1, page: 316, endpage: 326 },\n", - " { title: \"12\", level: 1, page: 327, endpage: 356 },\n", - " { title: \"13\", level: 1, page: 357, endpage: 382 },\n", - " { title: \"14\", level: 1, page: 383, endpage: 391 },\n", - " { title: \"15\", level: 1, page: 392, endpage: 392 }\n", - "]\n", - "Processing chapter: 01\n", - "Processing chapter: 02\n", - "Processing chapter: 03\n", - "Processing chapter: 04\n", - "Processing chapter: 05\n", - "Processing chapter: 06\n", - "Processing chapter: 07\n", - "Processing chapter: 08\n", - "Processing chapter: 09\n", - "Processing chapter: 10\n", - "Processing chapter: 11\n", - "Processing chapter: 12\n", - "Processing chapter: 13\n", - "Processing chapter: 14\n", - "Processing chapter: 15\n", - "Finished processing chapter: 15\n", - "Finished processing chapter: 11\n", - "Finished processing chapter: 14\n", - "Finished processing chapter: 05\n", - "Finished processing chapter: 07\n", - "Finished processing chapter: 06\n", - "Finished processing chapter: 12\n", - "Finished processing chapter: 04\n", - "Finished processing chapter: 03\n", - "Finished processing chapter: 01\n", - "Finished processing chapter: 13\n", - "Finished processing chapter: 09\n", - "Finished processing chapter: 08\n", - "Finished processing chapter: 02\n" + " { title: \"Git Branching\", level: 1, page: 69, endpage: 110 },\n", + " { title: \"Git on the Server\", level: 1, page: 111, endpage: 131 },\n", + " { title: \"Distributed Git\", level: 1, page: 132, endpage: 172 },\n", + " { title: \"GitHub\", level: 1, page: 173, endpage: 225 },\n", + " { title: \"Git Tools\", level: 1, page: 226, endpage: 348 },\n", + " { title: \"Customizing Git\", level: 1, page: 349, endpage: 380 },\n", + " { title: \"Git and Other Systems\", level: 1, page: 381, endpage: 436 },\n", + " { title: \"Git Internals\", level: 1, page: 437, endpage: 476 },\n", + " {\n", + " title: \"Appendix A: Git in Other Environments\",\n", + " level: 1,\n", + " page: 477,\n", + " endpage: 489\n", + " },\n", + " {\n", + " title: \"Appendix B: Embedding Git in your Applications\",\n", + " level: 1,\n", + " page: 490,\n", + " endpage: 501\n", + " },\n", + " {\n", + " title: \"Appendix C: Git Commands\",\n", + " level: 1,\n", + " page: 502,\n", + " endpage: 502\n", + " }\n", + "]\n" ] } ], @@ -133,7 +148,7 @@ " (res) => new TextDecoder().decode(res.stdout),\n", ").then((data) => {\n", " const lines = data.split(\"\\n\");\n", - " let bookmarks = [];\n", + " let bookmarks: Bookmark[] = [];\n", " let currentBookmark: Partial | null = null;\n", "\n", " for (const line of lines) {\n", @@ -164,7 +179,12 @@ " bookmarks.push(currentBookmark as Bookmark);\n", " }\n", "\n", - " bookmarks = bookmarks.filter((b) => b.level === 1).slice(8);\n", + " bookmarks = bookmarks.filter((b) =>\n", + " b.level === parseInt(Deno.env.get(\"BOOKMARK_LEVEL\") ?? \"1\")\n", + " ).slice(\n", + " parseInt(Deno.env.get(\"BOOK_FROM\") ?? \"0\"),\n", + " parseInt(Deno.env.get(\"BOOK_TO\") ?? String(bookmarks.length)),\n", + " );\n", "\n", " for (let i = 0; i < bookmarks.length; i++) {\n", " const current = bookmarks[i] as Bookmark;\n", @@ -178,8 +198,71 @@ "\n", " return bookmarks;\n", "});\n", - "console.log(bookmarks);\n", "\n", + "if (!bookmarks || bookmarks.length === 0) {\n", + " throw new Error(\"No bookmarks found in the PDF.\");\n", + "} /* else if (bookmarks.length > ordinals.length) {\n", + " throw new Error(\n", + " `Not enough ordinals for the number of chapters: ${bookmarks.length} chapters but only ${ordinals.length} ordinals.`,\n", + " );\n", + "} */\n", + "\n", + "console.log(bookmarks);\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "6029d4ac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing chapter: Appendix C: Git Commands at resp_04d1e841c5c4dfc2006984fd06079c81949afb817229705523\n", + "Finished processing chapter: Appendix C: Git Commands\n", + "Processing chapter: Customizing Git at resp_0ee8a5d1ba6d78a6006984fd06313c819087c4177391a50101\n", + "Finished processing chapter: Customizing Git\n", + "Processing chapter: Appendix A: Git in Other Environments at resp_00ee07fd3ffdee22006984fd069c7c8193bf192277fb80f4bc\n", + "Finished processing chapter: Appendix A: Git in Other Environments\n", + "Processing chapter: Git Internals at resp_0d312b5627d2306a006984fd0660c081939ebc561caf071506\n", + "Finished processing chapter: Git Internals\n", + "Processing chapter: Git on the Server at resp_07581a2e7f9be083006984fd05d32c8190b87cf1e8d7d68a5c\n", + "Finished processing chapter: Git on the Server\n", + "Processing chapter: GitHub at resp_0e4a9edfb24ed6a1006984fd06722081909497d0c7e35bfb1d\n", + "Finished processing chapter: GitHub\n", + "Processing chapter: Appendix B: Embedding Git in your Applications at resp_079044470442fcc0006984fd05fc1881978cacbd612d6e2f30\n", + "Finished processing chapter: Appendix B: Embedding Git in your Applications\n", + "Processing chapter: Distributed Git at resp_0947b861b710eb8a006984fd06918881968e0e1f70b418cc88\n", + "Finished processing chapter: Distributed Git\n", + "Processing chapter: Git Tools at resp_01ec7c1b665f5b12006984fd06b9048196a56f1e376843b55a\n", + "Finished processing chapter: Git Tools\n", + "Processing chapter: Git and Other Systems at resp_0e3a4b42775ae33f006984fd061cf8819389817080ff57b78f\n", + "Finished processing chapter: Git and Other Systems\n", + "Processing chapter: Git Branching at resp_0bb2c9b2b1435efc006984fd0641d08194b3fd55b5e3b3b432\n", + "Finished processing chapter: Git Branching\n" + ] + }, + { + "data": { + "text/plain": [ + "[\n", + " \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n", + " \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n", + " \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n", + " \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n", + " \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n", + " \u001b[90mundefined\u001b[39m\n", + "]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ "const promises = [];\n", "for (const [idx, ch] of bookmarks.entries()) {\n", " async function processChapter(idx: number, title: string, file: File) {\n", @@ -215,7 +298,7 @@ "\n", " const mindMapContent = response.output_text;\n", " await Deno.writeTextFile(\n", - " `./mindmaps/${safeName(title)}.md`,\n", + " `./mindmap/${safeName(title)}.md`,\n", " mindMapContent,\n", " );\n", "\n", @@ -266,7 +349,7 @@ { "data": { "text/plain": [ - "{ object: \u001b[32m\"file\"\u001b[39m, deleted: \u001b[33mtrue\u001b[39m, id: \u001b[32m\"file-X2CJi3gozhJniBURG2qfsm\"\u001b[39m }" + "{ object: \u001b[32m\"file\"\u001b[39m, deleted: \u001b[33mtrue\u001b[39m, id: \u001b[32m\"file-CxDoeyfgNLkuizajP6EHas\"\u001b[39m }" ] }, "execution_count": 5, diff --git a/mindmap/Appendix A_ Git in Other Environments.md b/mindmap/Appendix A_ Git in Other Environments.md new file mode 100644 index 0000000..0e4f365 --- /dev/null +++ b/mindmap/Appendix A_ Git in Other Environments.md @@ -0,0 +1,495 @@ +```markmap +# Appendix A: Git in Other Environments + +## Purpose / framing +- You’ve learned Git at the command line + - Work with local files + - Connect repositories over a network + - Collaborate effectively +- Git is often part of a larger ecosystem + - Terminal isn’t always the best interface +- Goal of this appendix + - Survey other environments where Git is useful + - Show how other applications work alongside Git + +## Graphical Interfaces (GUIs) +- Git’s native environment: the terminal + - New features appear there first + - Full power is available at the command line +- Why GUIs exist / when they help + - Plain text isn’t best for all tasks + - Visual representations can be essential + - Some users prefer point-and-click workflows +- Tools differ by intended workflow + - Some clients expose only a curated subset of Git + - To support a specific way of working the author considers effective + - “Better” is workflow-dependent + - Tools are more/less fit for purpose, not universally better +- Key constraint + - GUIs can’t do anything the command line client can’t + - Command line = most power and control + +## gitk and git-gui (bundled with Git) + +### gitk (graphical history viewer) +- What it is + - GUI history viewer + - Think: powerful GUI shell over + - `git log` + - `git grep` +- Best used for + - Finding something that happened in the past + - Visualizing project history +- How to start + - In a Git repo directory + - `cd ` + - Run: + - `gitk [git log options]` +- Options + - Accepts many command-line options + - Most are passed through to underlying `git log` + - Especially useful: + - `--all` + - Show commits reachable from any ref (not just `HEAD`) +- Interface overview (Figure 151: The gitk history viewer) + - Top area (graph; similar to `git log --graph`) + - Dot = commit + - Lines = parent relationships + - Refs = colored boxes + - Yellow dot = `HEAD` + - Red dot = changes not yet a commit + - Bottom area (selected commit) + - Left = comments + patch + - Right = summary view + - Middle controls + - Searching history controls + +### git-gui (commit crafting tool) +- What it is + - Primarily a tool for crafting commits +- How to start + - Run: + - `git gui` +- Interface overview (Figure 152: The git-gui commit tool) + - Left: index (staging area) + - Unstaged changes: top + - Staged changes: bottom + - Move entire files between states + - Click file icons + - Select a file for viewing + - Click file name + - Top right: diff view + - Shows changes for the selected file + - Stage granular changes + - Stage individual hunks (or lines) by right-clicking + - Bottom right: message + action area + - Enter commit message + - Click “Commit” + - Similar to `git commit` + - Amend last commit workflow + - Select “Amend” radio button + - Updates “Staged Changes” with contents of last commit + - Then + - Stage/unstage changes + - Alter commit message + - Click “Commit” again + - Replaces old commit with a new one + +### Tool style classification +- `gitk` and `git-gui` are **task-oriented tools** + - Tailored to one purpose each + - `gitk`: viewing/searching history + - `git-gui`: creating/amending commits + - Omit features not needed for that task + +## GitHub for macOS and Windows +- What these clients are + - Two **workflow-oriented** Git clients + - One for Windows + - One for macOS + - Workflow-oriented meaning + - Focus on a curated set of commonly used features + - Avoid exposing all of Git’s functionality +- Similarity across platforms + - Designed to look and work very much alike + - Treated as a single product here +- Not a full product rundown + - They have their own documentation + - Focus here: “changes” view tour (main work area) + +### “Changes” view: main areas +- Left: repositories list (tracked by client) + - Add repo via “+” icon + - Clone + - Attach local repository +- Center: commit-input area + - Enter commit message + - Select which files to include + - Commit history placement differs + - Windows: directly below + - macOS: separate tab +- Right: diff view + - Shows working directory changes + - Or changes included in selected commit +- Top-right: “Sync” button + - Primary network interaction method +- Important note + - No GitHub account required + - Designed to highlight GitHub service/workflow + - Still works with + - Any repository + - Any Git host (network operations) + +### Installation +- Download locations + - GitHub for Windows: + - GitHub for macOS: +- First run behavior + - Walks through first-time Git setup + - Configure name + - Configure email address + - Sets sane defaults for common config options + - Credential caches + - CRLF behavior +- Update model + - “Evergreen” + - Updates downloaded/installed in background while app is open + - Bundled Git + - Likely no need to manually update Git separately +- Windows addition + - Includes shortcut to launch PowerShell with Posh-git (covered later) +- Add repositories + - Shows list of GitHub repos you have access to + - Clone in one step + - Add existing local repo + - Drag directory from + - Finder (macOS) + - Windows Explorer (Windows) + - Added to repository list on the left + +### Recommended Workflow (“GitHub Flow”) +- Covered elsewhere in more detail (“The GitHub Flow”), gist here + - (a) Commit to a branch + - (b) Sync with a remote repository fairly regularly +- Branch management differences + - macOS + - “Create Branch” button at top of window (Figure 155) + - Windows + - Type new branch name in branch-switching widget (Figure 156) +- Making commits + - Make changes in working directory + - Client shows changed files + - Enter commit message + - Select files to include + - Click “Commit” + - Shortcut: `Ctrl-Enter` (Windows) or `⌘-Enter` (macOS) +- Network interaction via “Sync” + - Git internally separates + - push / fetch / merge / rebase + - GitHub clients collapse these into one multi-step feature + - Clicking “Sync” does + 1. `git pull --rebase` + - If fails due to merge conflict + - Fall back to `git pull --no-rebase` + 2. `git push` + - Rationale + - Common network sequence in this style + - Squashing into one command saves time + +### GitHub client summary (fit and tradeoffs) +- Strengths + - Well-suited to intended workflow + - Developers and non-developers can collaborate within minutes + - Best practices baked into the tools +- When to choose something else + - Workflow differs + - Want more control over + - How network operations are done + - When network operations are done + - Recommendation + - Use another client or the command line + +## Other GUIs (general landscape) +- Many graphical Git clients exist + - From specialized single-purpose tools + - To apps that try to expose everything Git can do +- Where to find them + - Official Git site curated list: + - More comprehensive list (Git wiki): + - + +## Git in Visual Studio +- Built-in Git tooling + - Starting Visual Studio 2019 version 16.8 +- Supported Git functionality + - Create or clone a repository + - Open and browse repository history + - Create and checkout branches and tags + - Stash, stage, and commit changes + - Fetch, pull, push, or sync commits + - Merge and rebase branches + - Resolve merge conflicts + - View diffs + - “…and more!” +- Next step + - Read official documentation + +## Git in Visual Studio Code +- Built-in Git support +- Requirement + - Git version 2.0.0 (or newer) installed +- Main features + - Diff in the gutter for the file you are editing + - Git Status Bar (lower left) + - Current branch + - Dirty indicators + - Incoming commits + - Outgoing commits + - Common operations inside the editor + - Initialize a repository + - Clone a repository + - Create branches and tags + - Stage and commit changes + - Push/pull/sync with a remote branch + - Resolve merge conflicts + - View diffs +- GitHub Pull Requests support (extension) + - +- Official documentation + - + +## Git in JetBrains IDEs (IntelliJ / PyCharm / WebStorm / PhpStorm / RubyMine / others) +- Git Integration plugin + - Ships with JetBrains IDEs +- Provides + - Dedicated IDE view for Git + - Dedicated IDE view for GitHub Pull Requests + - Example: Version Control ToolWindow (Figure 157) +- Dependency + - Relies on command-line `git` + - Requires `git` to be installed +- Official documentation + - + +## Git in Sublime Text +- Availability + - From version 3.2 onwards +- Features + - Sidebar shows git status with a badge/icon + - `.gitignore`-listed files/folders are faded in sidebar + - Status bar shows + - Current git branch + - How many modifications you have made + - Gutter markers show all changes to a file + - Partial integration with Sublime Merge + - Use some Sublime Merge git client functionality from within Sublime Text + - Requires Sublime Merge installed + - Sublime Merge: +- Official documentation + - + +## Git in Bash +- Motivation + - Use shell features to make Git friendlier in Bash +- Note + - Git ships with plugins for several shells + - Not enabled by default + +### Enable tab completion (`git-completion.bash`) +- Get completion script matching your Git version + - Check version: `git version` + - In Git source for that release + - `git checkout tags/vX.Y.Z` (match your installed version) + - Copy file: + - `contrib/completion/git-completion.bash` + - Place somewhere handy (example) + - Home directory +- Enable in `~/.bashrc` + - Add: + - `. ~/git-completion.bash` +- Use completion + - Inside a Git repository, type + - `git chec` + - Auto-completes to `git checkout` + - Completion coverage + - Git subcommands + - Command-line parameters + - Remotes and ref names (where appropriate) + +### Customize prompt (`git-prompt.sh`) +- Motivation + - Show Git info in prompt + - Current branch + - Working directory status +- Setup + - Copy file from Git source: + - `contrib/completion/git-prompt.sh` + - Place in home directory + - Add to `~/.bashrc` + - `. ~/git-prompt.sh` + - `export GIT_PS1_SHOWDIRTYSTATE=1` + - `export PS1='\w$(__git_ps1 " (%s)")\$ '` +- Meaning of prompt pieces + - `\w` + - Print current working directory + - `\$` + - Print `$` part of the prompt + - `__git_ps1 " (%s)"` + - Calls function provided by `git-prompt.sh` + - Uses formatting argument `" (%s)"` +- Result + - Prompt shows Git context inside Git-controlled projects (Figure 158) +- Further info + - Both scripts have documentation + - Inspect `git-completion.bash` + - Inspect `git-prompt.sh` + +## Git in Zsh + +### Tab completion +- Enable + - Add to `~/.zshrc` + - `autoload -Uz compinit && compinit` +- Interface (more powerful than Bash) + - Example: `git che` + - Shows options with descriptions, e.g. + - `check-attr` — display gitattributes information + - `check-ref-format` — ensure a reference name is well formed + - `checkout` — checkout branch or paths to working tree + - `checkout-index` — copy files from index to working directory + - `cherry` — find commits not merged upstream + - `cherry-pick` — apply changes introduced by an existing commit + - Ambiguous completions + - Not just listed; include helpful descriptions + - Navigate list by repeatedly hitting tab + - Completion scope + - Git commands and arguments + - Refs/remotes and other repo-internal names + - Filenames and standard Zsh completions + +### Prompt integration with `vcs_info` +- Built-in framework + - `vcs_info` provides VCS information for prompts +- Configure branch name in right prompt + - Add to `~/.zshrc` + - `autoload -Uz vcs_info` + - `precmd_vcs_info() { vcs_info }` + - `precmd_functions+=( precmd_vcs_info )` + - `setopt prompt_subst` + - `RPROMPT='${vcs_info_msg_0_}'` + - `# PROMPT='${vcs_info_msg_0_}%# '` + - `zstyle ':vcs_info:git:*' formats '%b'` +- Result + - Branch shown on right side inside Git repos (Figure 159) + - Left-side prompt also possible + - Uncomment the `PROMPT=...` line +- Documentation + - `vcs_info` docs in `zshcontrib(1)` + - Online: + - + +### Alternative: Git’s `git-prompt.sh` +- Option + - Use Git’s `git-prompt.sh` instead of `vcs_info` +- Compatibility + - Works with Bash and Zsh +- Reference + - + +### Zsh framework: oh-my-zsh +- What it is + - A framework to enhance Zsh +- Location + - +- Git-related value + - Powerful git tab completion via plugin system + - Many prompt “themes” showing version-control data + - Example theme shown (Figure 160) + +## Git in PowerShell + +### Why / what +- Limitation + - `cmd.exe` isn’t really capable of a customized Git experience +- If using PowerShell + - `posh-git` provides + - Powerful tab completion + - Enhanced prompt (repository status awareness) + - Works with PowerShell Core on Linux/macOS too + - Project: + - Example shown (Figure 161) + +### Installation + +#### Prerequisites (Windows only): ExecutionPolicy +- Requirement + - Set local `ExecutionPolicy` to `RemoteSigned` + - Anything except `Undefined` and `Restricted` +- `AllSigned` vs `RemoteSigned` + - `AllSigned` + - Local scripts (your own) also need digital signatures + - `RemoteSigned` + - Only scripts with `ZoneIdentifier` = Internet (downloaded from web) need signatures + - Others do not +- Scope guidance + - Admin / all users + - `-Scope LocalMachine` + - Normal user / no admin rights + - `-Scope CurrentUser` +- References + - Scopes: + - ExecutionPolicy: +- Set `RemoteSigned` for all users + - `Set-ExecutionPolicy -Scope LocalMachine -ExecutionPolicy RemoteSigned -Force` + +#### Install via PowerShell Gallery +- When available + - PowerShell 5+, or PowerShell 4 with PackageManagement installed +- Reference + - +- Install commands (CurrentUser) + - `Install-Module posh-git -Scope CurrentUser -Force` + - `Install-Module posh-git -Scope CurrentUser -AllowPrerelease -Force` + - Newer beta with PowerShell Core support +- Install for all users + - Use `-Scope AllUsers` + - Run from elevated PowerShell console +- If install fails due to `PowerShellGet` error + - Run first: + - `Install-Module PowerShellGet -Force -SkipPublisherCheck` + - Then retry + - Reason given + - Built-in Windows PowerShell modules are signed with a different publishment certificate + +#### Update PowerShell prompt (load automatically) +- Import now + - `Import-Module posh-git` +- Auto-import every time PowerShell starts + - `Add-PoshGitToProfile -AllHosts` + - Adds import statement to `$profile` script +- Note + - Multiple `$profile` scripts exist + - e.g., console profile vs ISE profile + +#### Install from source +- Download a release + - +- Uncompress +- Import module via full path to `posh-git.psd1` + - `Import-Module \src\posh-git.psd1` +- Add to profile + - `Add-PoshGitToProfile -AllHosts` +- Effect + - Adds proper line to `profile.ps1` + - Active next time PowerShell opens + +#### Further references (prompt meaning & customization) +- Git status summary information + - +- Prompt customization variables + - + +## Appendix summary +- You can harness Git from within everyday tools (GUIs, IDEs, editors, shells) +- You can also access Git repositories from your own programs +``` \ No newline at end of file diff --git a/mindmap/Appendix B_ Embedding Git in your Applications.md b/mindmap/Appendix B_ Embedding Git in your Applications.md new file mode 100644 index 0000000..e9248ef --- /dev/null +++ b/mindmap/Appendix B_ Embedding Git in your Applications.md @@ -0,0 +1,695 @@ +# Appendix B: Embedding Git in your Applications + +## Why embed / integrate Git +- Target audience for integration + - Developer-focused applications + - likely benefit from integration with source control + - Non-developer applications + - example: document editors + - can benefit from version-control features +- Why Git specifically + - Git’s model works very well for many different scenarios + +## Two main integration options +- Option A: spawn a shell and call the `git` command-line program +- Option B: embed a Git library into your application +- This appendix covers + - command-line integration + - several of the most popular embeddable Git libraries + +## Command-line Git (calling the `git` CLI) +- What it is + - spawn a shell process + - use the Git command-line tool to do the work +- Benefits + - canonical behavior + - all of Git’s features are supported + - fairly easy to implement + - most runtime environments can invoke a process with command-line arguments +- Downsides + - Output is plain text + - you must parse Git’s output to read progress/results + - Git’s output format can change occasionally + - parsing can be inefficient and error-prone + - Lack of error recovery + - if repository is corrupted + - or user has malformed configuration value + - Git may refuse to perform many operations + - Process management complexity + - must maintain a shell environment in a separate process + - coordinating many processes can be challenging + - especially if multiple processes may access the same repository + +## Libgit2 +- What it is + - dependency-free implementation of Git + - focus: a nice API for use within other programs + - website: https://libgit2.org + +### Libgit2 C API (whirlwind tour) +- Example flow shown + - Open a repository + - `git_repository *repo;` + - `int error = git_repository_open(&repo, "/path/to/repository");` + - Dereference `HEAD` to a commit + - `git_object *head_commit;` + - `error = git_revparse_single(&head_commit, repo, "HEAD^{commit}");` + - `git_commit *commit = (git_commit*)head_commit;` + - Print commit properties + - `printf("%s", git_commit_message(commit));` + - `const git_signature *author = git_commit_author(commit);` + - `printf("%s <%s>\n", author->name, author->email);` + - `const git_oid *tree_id = git_commit_tree_id(commit);` + - Cleanup + - `git_commit_free(commit);` + - `git_repository_free(repo);` + +- Repository opening details + - `git_repository` type + - handle to a repository with an in-memory cache + - `git_repository_open` + - simplest method when you know exact path to working directory or `.git` folder + - other APIs mentioned + - `git_repository_open_ext` + - includes options for searching + - `git_clone` (and friends) + - make a local clone of a remote repository + - `git_repository_init` + - create an entirely new repository + +- Dereferencing `HEAD` details + - rev-parse usage + - uses rev-parse syntax + - reference: “see Branch References for more on this” + - return type + - `git_revparse_single` returns a `git_object*` + - represents something that exists in the repository’s Git object database + - `git_object` is a “parent” type for several object kinds + - child types share the same memory layout as `git_object` + - safe to cast to the correct “child” type when appropriate + - cast safety note in this example + - `git_object_type(commit)` would return `GIT_OBJ_COMMIT` + - therefore it’s safe to cast to `git_commit*` + +- Commit property access details + - message + - `git_commit_message(commit)` + - author signature + - `git_commit_author(commit)` returns `const git_signature *` + - fields shown + - `author->name` + - `author->email` + - tree id + - `git_commit_tree_id(commit)` returns a `git_oid` + - `git_oid` + - Libgit2 representation for a SHA-1 hash + +### Patterns illustrated by the Libgit2 C sample +- Error-code style + - pattern: declare pointer, pass its address into a Libgit2 call + - return value: integer error code + - `0` = success + - `< 0` = error +- Memory / ownership rules + - if Libgit2 populates a pointer for you + - you must free it + - if Libgit2 returns a `const` pointer + - you don’t free it + - it becomes invalid when the owning object is freed +- Practical note + - “Writing C is a bit painful.” + +### Language bindings (Libgit2 ecosystem) +- Implication of “writing C is painful” + - you’re unlikely to write C when using Libgit2 + - there are language-specific bindings that make integration easier + +#### Ruby bindings: Rugged +- Name: Rugged +- URL: https://github.com/libgit2/rugged +- Example equivalent to the C code + - `repo = Rugged::Repository.new('path/to/repository')` + - `commit = repo.head.target` + - `puts commit.message` + - `puts "#{commit.author[:name]} <#{commit.author[:email]}>" ` + - `tree = commit.tree` +- Why it’s “less cluttered” + - error handling + - Rugged uses exceptions + - examples mentioned: `ConfigError`, `ObjectError` + - resource management + - no explicit freeing + - Ruby is garbage-collected + +- Example: crafting a commit from scratch (Rugged) + - Code sequence shown (with numbered markers) + - ① create a new blob + - `blob_id = repo.write("Blob contents", :blob) ①` + - work with index + - `index = repo.index` + - `index.read_tree(repo.head.target.tree)` + - ② add a new file entry + - `index.add(:path => 'newfile.txt', :oid => blob_id) ②` + - build a signature hash + - `sig = {` + - ` :email => "bob@example.com",` + - ` :name => "Bob User",` + - ` :time => Time.now,` + - `}` + - create the commit (with parameters) + - `commit_id = Rugged::Commit.create(repo,` + - ` :tree => index.write_tree(repo), ③` + - ` :author => sig,` + - ` :committer => sig, ④` + - ` :message => "Add newfile.txt", ⑤` + - ` :parents => repo.empty? ? [] : [ repo.head.target ].compact, ⑥` + - ` :update_ref => 'HEAD', ⑦` + - `)` + - ⑧ look up the created commit object + - `commit = repo.lookup(commit_id) ⑧` + + - Meaning of each numbered step (①–⑧) + - ① Create a new blob + - contains the contents of a new file + - ② Populate index and add file + - populate index with head commit’s tree + - add the new file at path `newfile.txt` + - ③ Create a new tree in the ODB + - uses it for the new commit + - ④ Author and committer fields + - same signature used for both + - ⑤ Commit message + - `"Add newfile.txt"` + - ⑥ Parents + - when creating a commit, you must specify parents + - uses the tip of `HEAD` for the single parent + - handles empty repository case + - ⑦ Update a ref (optional) + - Rugged (and Libgit2) can optionally update a reference when making a commit + - here it updates `HEAD` + - ⑧ Return value / lookup + - the return value is the SHA-1 hash of the new commit object + - you can use it to get a `Commit` object + +- Performance note + - Ruby code is clean + - Libgit2 does heavy lifting → runs pretty fast +- Pointer to later section + - “If you’re not a rubyist, we touch on some other bindings in Other Bindings.” + +## Advanced Functionality (Libgit2) +- Out-of-core-Git capabilities + - Libgit2 has capabilities outside the scope of core Git +- Example capability: pluggability + - can provide custom “backends” for several operation types + - enables storage in a different way than stock Git + - backend types mentioned + - configuration + - ref storage + - object database + - “among other things” + +### Custom backend example: object database (ODB) +- Example source + - from Libgit2 backend examples + - URL: https://github.com/libgit2/libgit2-backends +- Setup shown (with numbered markers) + - ① create ODB “frontend” + - `git_odb *odb;` + - `int error = git_odb_new(&odb); ①` + - meaning: initialize empty ODB frontend container for backends + - ② initialize custom backend + - `git_odb_backend *my_backend;` + - `error = git_odb_backend_mine(&my_backend, /*…*/); ②` + - ③ add backend to frontend + - `error = git_odb_add_backend(odb, my_backend, 1); ③` + - open a repository + - `git_repository *repo;` + - `error = git_repository_open(&repo, "some-path");` + - ④ set repository to use custom ODB + - `error = git_repository_set_odb(repo, odb); ④` + - meaning: repo uses this ODB to look up objects +- Note about the example’s error handling + - errors are captured but not handled + - “We hope your code is better than ours.” + +### Implementing `git_odb_backend_mine` +- What it is + - constructor for your own ODB implementation +- Requirement + - fill in the `git_odb_backend` structure properly +- Example struct layout shown + - `typedef struct {` + - ` git_odb_backend parent;` + - ` // Some other stuff` + - ` void *custom_context;` + - `} my_backend_struct;` +- Subtle memory-layout constraint + - `my_backend_struct`’s first member must be a `git_odb_backend` structure + - ensures Libgit2 sees the memory layout it expects +- Flexibility + - the rest of the struct is arbitrary + - can be as large or small as needed +- Example initialization function responsibilities shown + - allocate + - `backend = calloc(1, sizeof (my_backend_struct));` + - set custom context + - `backend->custom_context = …;` + - fill supported function pointers in `parent` + - `backend->parent.read = &my_backend__read;` + - `backend->parent.read_prefix = &my_backend__read_prefix;` + - `backend->parent.read_header = &my_backend__read_header;` + - `// …` + - return it through output parameter + - `*backend_out = (git_odb_backend *) backend;` + - return success constant + - `return GIT_SUCCESS;` +- Where to find full signatures + - Libgit2 source file: + - `include/git2/sys/odb_backend.h` + - which signatures to implement depends on use case + +## Other Bindings (Libgit2) +- Breadth + - bindings exist for many languages +- Section purpose + - show small examples using a few more complete bindings packages (as of writing) +- Other languages mentioned as having libraries (various maturity) + - C++ + - Go + - Node.js + - Erlang + - JVM +- Official collection of bindings + - browse repos: https://github.com/libgit2 +- Common goal for the code in this section + - return the commit message from the commit eventually pointed to by `HEAD` + - “sort of like `git log -1`” + +### LibGit2Sharp +- For + - .NET or Mono applications +- URL + - https://github.com/libgit2/libgit2sharp +- Characteristics + - bindings written in C# + - wraps raw Libgit2 calls with native-feeling CLR APIs +- Example program (single expression) + - `new Repository(@"C:\path\to\repo").Head.Tip.Message;` +- Desktop Windows note + - NuGet package available to get started quickly + +### objective-git +- Platform context + - Apple platform + - likely using Objective-C as implementation language +- URL + - https://github.com/libgit2/objective-git +- Example program outline + - initialize repo + - `GTRepository *repo =` + - ` [[GTRepository alloc] initWithURL:[NSURL fileURLWithPath: @"/path/to/repo"]` + - `error:NULL];` + - retrieve commit message + - `NSString *msg = [[[repo headReferenceWithError:NULL] resolvedTarget] message];` +- Swift note + - objective-git is fully interoperable with Swift + +### pygit2 +- What it is + - Python bindings for Libgit2 +- URL + - https://www.pygit2.org +- Example program (chained calls) + - `pygit2.Repository("/path/to/repo") # open repository` + - `.head # get the current branch` + - `.peel(pygit2.Commit) # walk down to the commit` + - `.message # read the message` + +## Further Reading (Libgit2) +- Scope note + - full treatment of Libgit2 capabilities is outside the scope of the book +- Libgit2 resources + - API documentation: https://libgit2.github.com/libgit2 + - guides: https://libgit2.github.com/docs +- Other bindings + - check bundled README and tests + - often have small tutorials and pointers to further reading + +## JGit +- Purpose + - use Git from within a Java program +- What it is + - fully featured Git library called JGit + - relatively full-featured implementation of Git written natively in Java + - widely used in the Java community + - under the Eclipse umbrella +- Home + - https://www.eclipse.org/jgit/ + +### Getting Set Up (JGit) +- Multiple ways to connect project to JGit +- Easiest path: Maven + - add dependency snippet to `` in `pom.xml` + - `` + - ` org.eclipse.jgit` + - ` org.eclipse.jgit` + - ` 3.5.0.201409260305-r` + - `` + - version note + - likely advanced by the time you read this + - check updates: + - https://mvnrepository.com/artifact/org.eclipse.jgit/org.eclipse.jgit + - result + - Maven automatically acquires and uses the JGit libraries you need +- Manual dependency management + - pre-built binaries + - https://www.eclipse.org/jgit/download + - compile/run examples + - `javac -cp .:org.eclipse.jgit-3.5.0.201409260305-r.jar App.java` + - `java -cp .:org.eclipse.jgit-3.5.0.201409260305-r.jar App` + +### Plumbing (JGit) +- Two levels of API + - plumbing + - porcelain +- Terminology source: Git itself + - porcelain APIs + - friendly front-end for common user-level actions + - like what a normal user would use the Git command-line tool for + - plumbing APIs + - interact with low-level repository objects directly + +#### Starting point: `Repository` +- Starting point for most JGit sessions + - class: `Repository` +- Creating/opening a filesystem-based repository + - note: JGit also allows other storage models + - Create new repository + - `Repository newlyCreatedRepo = FileRepositoryBuilder.create(new File("/tmp/new_repo/.git"));` + - `newlyCreatedRepo.create();` + - Open existing repository + - `Repository existingRepo = new FileRepositoryBuilder()` + - `.setGitDir(new File("my_repo/.git"))` + - `.build();` + +#### `FileRepositoryBuilder` (finding repositories) +- Builder style + - fluent API +- Helps locate a Git repository + - whether or not your program knows exactly where it’s located +- Methods/strategies mentioned + - environment variables + - `.readEnvironment()` + - search starting from working directory + - `.setWorkTree(…).findGitDir()` + - open known `.git` directory + - `.setGitDir(...)` (as in example) + +#### Plumbing API: quick sampling + explanations +- Sampling actions shown (code outline) + - Get a reference + - `Ref master = repo.getRef("master");` + - Get object ID pointed to by reference + - `ObjectId masterTip = master.getObjectId();` + - Rev-parse + - `ObjectId obj = repo.resolve("HEAD^{tree}");` + - Load raw object contents + - `ObjectLoader loader = repo.open(masterTip);` + - `loader.copyTo(System.out);` + - Create a branch + - `RefUpdate createBranch1 = repo.updateRef("refs/heads/branch1");` + - `createBranch1.setNewObjectId(masterTip);` + - `createBranch1.update();` + - Delete a branch + - `RefUpdate deleteBranch1 = repo.updateRef("refs/heads/branch1");` + - `deleteBranch1.setForceUpdate(true);` + - `deleteBranch1.delete();` + - Config + - `Config cfg = repo.getConfig();` + - `String name = cfg.getString("user", null, "name");` + +- Explanation: references (`Ref`) + - `repo.getRef("master")` + - JGit automatically grabs the actual master ref at `refs/heads/master` + - returns a `Ref` object for reading information about the reference + - `Ref` info available + - name: `.getName()` + - direct reference target object: `.getObjectId()` + - symbolic reference target reference: `.getTarget()` + - `Ref` objects also used for + - tag refs + - tag objects + - Tag “peeled” concept + - peeled = points to final target of a (potentially long) string of tag objects + +- Explanation: object IDs (`ObjectId`) + - represents SHA-1 hash of an object + - object might or might not exist in the object database + +- Explanation: rev-parse (`repo.resolve(...)`) + - accepts any object specifier Git understands + - returns + - a valid `ObjectId`, or + - `null` + - reference: “see Branch References” + +- Explanation: raw object access (`ObjectLoader`) + - can stream contents + - `ObjectLoader.copyTo(...)` + - other capabilities mentioned + - read type and size of object + - return contents as a byte array + - large object handling + - when `.isLarge()` is `true` + - `.openStream()` returns an InputStream-like object + - reads raw data without pulling everything into memory at once + +- Explanation: creating a branch (`RefUpdate`) + - create `RefUpdate` + - set new object ID + - call `.update()` to trigger change + +- Explanation: deleting a branch + - requires `.setForceUpdate(true)` + - otherwise `.delete()` returns `REJECTED` + - and nothing happens + +- Explanation: config (`Config`) + - get via `repo.getConfig()` + - example value read + - `user.name` via `cfg.getString("user", null, "name")` + - config resolution behavior + - uses repository for local configuration + - automatically detects global and system config files + - reads values from them as well + +- Error handling in JGit (not shown in code sample) + - handled via exceptions + - may throw standard Java exceptions + - example: `IOException` + - also has JGit-specific exceptions (examples) + - `NoRemoteRepositoryException` + - `CorruptObjectException` + - `NoMergeBaseException` + +- Scope note + - this is only a small sampling of the full plumbing API + - many more methods/classes exist + +### Porcelain (JGit) +- Why porcelain exists + - plumbing APIs are rather complete + - but can be cumbersome to string together for common goals + - adding a file to the index + - making a new commit +- Entry point class + - `Git` + - construction shown + - `Repository repo;` + - `// construct repo...` + - `Git git = new Git(repo);` + +#### Porcelain command pattern (Git class) +- Pattern + - `Git` methods return a command object + - chain method calls to set parameters + - execute via `.call()` + +#### Example: like `git ls-remote` +- Credentials + - `CredentialsProvider cp = new UsernamePasswordCredentialsProvider("username", "p4ssw0rd");` +- Command chain + - `Collection remoteRefs = git.lsRemote()` + - `.setCredentialsProvider(cp)` + - `.setRemote("origin")` + - `.setTags(true)` + - `.setHeads(false)` + - `.call();` +- Output loop + - `for (Ref ref : remoteRefs) {` + - ` System.out.println(ref.getName() + " -> " + ref.getObjectId().name());` + - `}` +- What it requests + - tags from `origin` + - not heads +- Authentication note + - uses a `CredentialsProvider` + +#### Other commands available through `Git` (examples listed) +- add +- blame +- commit +- clean +- push +- rebase +- revert +- reset + +### Further Reading (JGit) +- Official JGit API documentation + - https://www.eclipse.org/jgit/documentation + - standard Javadoc + - JVM IDEs can install locally as well +- JGit Cookbook + - https://github.com/centic9/jgit-cookbook + - many examples of specific tasks + +## go-git +- When to use + - integrate Git into a service written in Golang +- What it is + - pure Go library implementation + - no native dependencies + - not prone to manual memory management errors + - transparent to standard Golang performance analysis tooling + - CPU profilers + - memory profilers + - race detector + - etc. +- Focus + - extensibility + - compatibility +- Compatibility / API coverage note + - supports most plumbing APIs + - compatibility documented at: + - https://github.com/go-git/go-git/blob/master/COMPATIBILITY.md + +### Basic go-git example +- Import + - `import "github.com/go-git/go-git/v5"` +- Clone + - `r, err := git.PlainClone("/tmp/foo", false, &git.CloneOptions{` + - ` URL: "https://github.com/go-git/go-git",` + - ` Progress: os.Stdout,` + - `})` + +### After you have a `Repository` instance +- “Access information and perform mutations” +- Example operations shown + - Get branch pointed by `HEAD` + - `ref, err := r.Head()` + - Get commit object pointed by `ref` + - `commit, err := r.CommitObject(ref.Hash())` + - Get commit history + - `history, err := commit.History()` + - Iterate commits and print each + - `for _, c := range history {` + - ` fmt.Println(c)` + - `}` + +### Advanced Functionality (go-git) +- Feature: pluggable storage system + - similar to Libgit2 backends + - default implementation: in-memory storage + - “very fast” + - example: clone into memory storage + - `r, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{` + - ` URL: "https://github.com/go-git/go-git",` + - `})` +- Storage options example + - store references, objects, and configuration in Aerospike + - example location: + - https://github.com/go-git/go-git/tree/master/_examples/storage +- Feature: flexible filesystem abstraction + - uses go-billy `Filesystem` + - https://pkg.go.dev/github.com/go-git/go-billy/v5?tab=doc#Filesystem + - makes it easy to store files differently + - pack all files into a single archive on disk + - keep all files in-memory +- Advanced use-case: fine-tunable HTTP client + - example referenced: + - https://github.com/go-git/go-git/blob/master/_examples/custom_http/main.go + - custom client shown + - `customClient := &http.Client{` + - ` Transport: &http.Transport{ // accept any certificate (might be useful for testing)` + - ` TLSClientConfig: &tls.Config{InsecureSkipVerify: true},` + - ` },` + - ` Timeout: 15 * time.Second, // 15 second timeout` + - ` CheckRedirect: func(req *http.Request, via []*http.Request) error {` + - ` return http.ErrUseLastResponse // don't follow redirect` + - ` },` + - `}` + - override protocol handling + - `client.InstallProtocol("https", githttp.NewClient(customClient))` + - purpose: override http(s) default protocol to use custom client + - clone using new client (for `https://`) + - `r, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{URL: url})` + +### Further Reading (go-git) +- Scope note + - full treatment outside scope of the book +- API documentation + - https://pkg.go.dev/github.com/go-git/go-git/v5 +- Usage examples + - https://github.com/go-git/go-git/tree/master/_examples + +## Dulwich +- What it is + - pure-Python Git implementation: Dulwich +- Project hosting / site + - https://www.dulwich.io/ +- Goal + - interface to Git repositories (local and remote) + - does not call out to `git` directly + - uses pure Python instead +- Performance note + - optional C extensions + - significantly improve performance +- API design + - follows Git design + - separates two API levels + - plumbing + - porcelain + +### Dulwich plumbing example (lower-level API) +- Goal + - access the commit message of the last commit +- Code and shown outputs + - `from dulwich.repo import Repo` + - `r = Repo('.')` + - `r.head()` + - `# '57fbe010446356833a6ad1600059d80b1e731e15'` + - `c = r[r.head()]` + - `c` + - `# ` + - `c.message` + - `# 'Add note about encoding.\n'` + +### Dulwich porcelain example (high-level API) +- Goal + - print a commit log using porcelain API +- Code and shown outputs + - `from dulwich import porcelain` + - `porcelain.log('.', max_entries=1)` + - `#commit: 57fbe010446356833a6ad1600059d80b1e731e15` + - `#Author: Jelmer Vernooij ` + - `#Date: Sat Apr 29 2017 23:57:34 +0000` + +### Further Reading (Dulwich) +- Available on official website + - API documentation + - tutorial + - many task-focused examples +- URL + - https://www.dulwich.io/ \ No newline at end of file diff --git a/mindmap/Appendix C_ Git Commands.md b/mindmap/Appendix C_ Git Commands.md new file mode 100644 index 0000000..436b744 --- /dev/null +++ b/mindmap/Appendix C_ Git Commands.md @@ -0,0 +1,85 @@ +```markmap +# Appendix C: Git Commands +## Context / Why this appendix exists +- The book introduces “dozens” of Git commands +- Commands were introduced inside a narrative + - Added “slowly” as the story progressed +- Result of narrative approach + - Examples/usage of commands are “somewhat scattered” throughout the book +- Goal of this appendix + - Go through **all Git commands addressed throughout the book** + - Group them “roughly by what they’re used for” + - For each command + - Explain **very generally** what it does + - Point out **where in the book** it was used + +## Tip / Note: Abbreviating long options +- You can abbreviate long options (when unambiguous) + - Example + - `git commit --a` behaves like `git commit --amend` +- Constraint + - Abbreviation works **only** when the letters after `--` are **unique** among options +- Guidance for scripting + - Use the **full option** when writing scripts + +## Setup and Config +- Two commands used “quite a lot” + - From the **first invocations** of Git + - Through **common every day** tweaking and referencing +- The two commands + - `git config` + - `git help` + +### `git config` +- Core idea + - Git has a **default way** of doing “hundreds of things” + - `git config` lets you change those defaults and set preferences +- What configuration can include (examples given) + - Your **name** + - Your **email address** + - Your **editor** preference + - Specific **terminal color** preferences + - (More generally) “hundreds of things” Git can be told to do differently +- How configuration is stored/applied + - Multiple files are involved + - The command can **read from** and **write to** several files + - Scope flexibility + - Set values **globally** + - Or down to **specific repositories** + +#### How often it appears in the book +- “Used in nearly every chapter of the book” + +#### Where the book used `git config` (as listed) +- **First-Time Git Setup** + - Used to specify + - Name + - Email address + - Editor preference + - Noted as happening “before we even got started using Git” +- **Git Aliases** + - Used to create shorthand commands (aliases) + - Aliases expand to long option sequences + - Purpose: avoid typing long sequences every time +- **Rebasing** + - Used to make `--rebase` the default behavior when running `git pull` +- **Credential Storage** + - Used to set up a default store for HTTP passwords +- **Keyword Expansion** + - Used to set up + - “smudge” filters + - “clean” filters + - Purpose: manage content coming **into** and going **out of** Git +- **Git Configuration** + - “Basically the entirety” of that section/chapter is dedicated to `git config` + +### `git config core.editor` (editor configuration) +- Mentioned as a specific configuration focus + - “git config core.editor commands” +- Linked context + - Accompanies the configuration instructions in **Your Editor** +- What is stated + - Many editors can be set using `core.editor` +- Boundary of provided material + - The excerpt ends just before listing the specific editor command examples +``` \ No newline at end of file diff --git a/mindmap/Customizing Git.md b/mindmap/Customizing Git.md new file mode 100644 index 0000000..0013f77 --- /dev/null +++ b/mindmap/Customizing Git.md @@ -0,0 +1,665 @@ +# Customizing Git +## Purpose & scope +- Goal: make Git operate in a more customized fashion (personal/team/company needs) +- Main customization mechanisms covered + - Configuration settings (`git config`) + - Attributes (path-specific behavior via `.gitattributes` / `.git/info/attributes`) + - Hooks (event-driven scripts: client-side + server-side) + +## Git Configuration +### `git config` basics +- Used to read/write configuration values +- Common initial setup (examples) + - `git config --global user.name "John Doe"` + - `git config --global user.email johndoe@example.com` + +### Configuration files (“levels”) & precedence +- System level + - File: `[path]/etc/gitconfig` + - Applies to: every user + all repositories on the system + - `git config --system …` reads/writes here +- Global level (user) + - File: `~/.gitconfig` or `~/.config/git/config` + - Applies to: a specific user across repositories + - `git config --global …` reads/writes here +- Local level (repo) + - File: `.git/config` (inside current repository) + - Applies to: current repository only + - `git config --local …` reads/writes here + - Default level if you don’t specify `--system/--global/--local` +- Override rule + - `local` overrides `global` overrides `system` +- Editing note + - Config files are plain text; manual edits work + - Generally easier/safer to use `git config` + +### Client-side vs server-side options +- Options fall into two categories + - Client-side (most options): personal working preferences + - Server-side (fewer): repository receiving/policy behaviors +- Discover all supported options + - `man git-config` + - Reference: `https://git-scm.com/docs/git-config` + +### Basic client configuration (common & useful) +#### `core.editor` +- Purpose: editor used for commit/tag messages +- Default selection order + - `$VISUAL` or `$EDITOR` environment variables + - fallback: `vi` +- Set example + - `git config --global core.editor emacs` + +#### `commit.template` +- Purpose: provide an initial commit message template +- Use cases + - Remind yourself/team of message structure and policy + - Encourage consistent subject length + body + ticket references +- Example template content (concepts) + - Subject line guidance (e.g., keep under ~50 chars for `git log --oneline`) + - Multi-line description + - Optional ticket marker (e.g., `[Ticket: X]`) +- Set + behavior + - `git config --global commit.template ~/.gitmessage.txt` + - `git commit` opens editor pre-filled with the template + comment lines + +#### `core.pager` +- Purpose: pager for long output (e.g., `log`, `diff`) +- Default: usually `less` +- Disable paging + - `git config --global core.pager ''` + - Effect: output is printed directly (no pager), regardless of length + +#### `user.signingkey` +- Purpose: simplify signing annotated tags (GPG) +- Set + - `git config --global user.signingkey ` +- Use afterward + - `git tag -s ` (no need to specify key each time) + +#### `core.excludesfile` +- Purpose: global ignore patterns (applies to all repositories for that user) +- Use cases (examples) + - macOS: `.DS_Store` + - editors: Emacs backups `*~`, Vim swap files `.*.swp` +- Example workflow + - Create `~/.gitignore_global` with patterns like + - `*~` + - `.*.swp` + - `.DS_Store` + - Configure + - `git config --global core.excludesfile ~/.gitignore_global` + +#### `help.autocorrect` +- Problem: mistyped commands are suggested but not run +- Set behavior: auto-run a likely intended command after a delay +- Setting semantics + - Integer in tenths of a second + - `1` → 0.1s delay + - `50` → 5s delay +- Example + - `git config --global help.autocorrect 1` +- Runtime behavior + - Shows warning + countdown-like delay, then runs corrected command + +## Colors in Git +### `color.ui` (master switch) +- Purpose: enable/disable default colored terminal output +- Values + - `false` → no color + - `auto` (default) → color only when writing to a terminal; no color codes when piped/redirected + - `always` → always emit color codes (rarely desired) +- Per-command override + - Use `--color` flag on specific Git commands if you want forced coloring in redirected output + +### `color.*` (command-specific control) +- Per-area switches (each: `true`, `false`, or `always`) + - `color.branch` + - `color.diff` + - `color.interactive` + - `color.status` +- Fine-grained subsettings (override specific parts) + - Example: diff “meta” styling + - `git config --global color.diff.meta "blue black bold"` +- Supported colors + - `normal`, `black`, `red`, `green`, `yellow`, `blue`, `magenta`, `cyan`, `white` +- Supported attributes + - `bold`, `dim`, `ul` (underline), `blink`, `reverse` + +## External Merge and Diff Tools +### Why use external tools +- Git has built-in diff/merge, but you can: + - Use external diff viewers + - Use GUI merge tools for conflict resolution +- Example tool used in chapter + - P4Merge (Perforce Visual Merge Tool): graphical + free + cross-platform + +### Wrapper-script approach (example: P4Merge) +- Platform note + - Example paths are macOS/Linux-style + - On Windows, replace `/usr/local/bin` with an executable path in your environment + +#### `extMerge` wrapper +- Purpose: call the GUI merge tool with all passed arguments +- Example content (conceptual) + - Shell script that runs: `p4merge $*` + - macOS example path to binary: + - `/Applications/p4merge.app/Contents/MacOS/p4merge $*` + +#### `extDiff` wrapper +- Purpose: adapt Git’s diff-program arguments to what your merge viewer needs +- Git passes 7 arguments to external diff programs (concept) + - `path old-file old-hex old-mode new-file new-hex new-mode` +- Wrapper logic + - Ensure 7 args exist + - Invoke merge tool on the *old file* and *new file* only + - Uses `$2` (old-file) and `$5` (new-file) + +#### Make wrappers executable +- `sudo chmod +x /usr/local/bin/extMerge` +- `sudo chmod +x /usr/local/bin/extDiff` + +### Configure Git to use wrappers +- Key settings involved + - `merge.tool` → selects merge tool name + - `mergetool..cmd` → how to invoke tool (with `$BASE`, `$LOCAL`, `$REMOTE`, `$MERGED`) + - `mergetool..trustExitCode` → whether tool’s exit code indicates success + - `diff.external` → command for external diffs +- Example config commands + - `git config --global merge.tool extMerge` + - `git config --global mergetool.extMerge.cmd 'extMerge "$BASE" "$LOCAL" "$REMOTE" "$MERGED"'` + - `git config --global mergetool.extMerge.trustExitCode false` + - `git config --global diff.external extDiff` +- Equivalent `.gitconfig` blocks (concept) + - `[merge] tool = extMerge` + - `[mergetool "extMerge"] cmd = … ; trustExitCode = false` + - `[diff] external = extDiff` + +### Using the configured tools +- External diff example + - `git diff ` opens GUI instead of printing to terminal + - (Figure reference in chapter: P4Merge screenshot) +- Merge conflicts + - `git mergetool` launches GUI tool to resolve conflicts + +### Switching tools easily +- Benefit of wrapper design + - Change the underlying tool by editing `extMerge` + - `extDiff` continues calling `extMerge` +- Example: switch to KDiff3 by changing the binary invoked by `extMerge` + +### Built-in mergetool presets +- Git supports many merge tools without custom `cmd` +- List supported tools + - `git mergetool --tool-help` +- Environment caveat + - Windowed tools require a GUI; terminal-only sessions may fail + +### Using a tool only for merges (not diffs) +- If tool command is in `PATH` (example: `kdiff3`) + - `git config --global merge.tool kdiff3` +- Result + - Merge resolution uses KDiff3 + - Diffs remain Git’s normal diff output + +## Formatting and Whitespace +### Problems addressed +- Cross-platform line endings (Windows vs macOS/Linux) +- Subtle whitespace edits introduced by editors/tools + +### `core.autocrlf` (line ending normalization) +- Background + - Windows newline: CRLF (`\r\n`) + - macOS/Linux newline: LF (`\n`) +- Behavior: auto-convert at boundaries + - On add/commit: convert as configured into repository-friendly form + - On checkout: convert as configured into working-tree-friendly form +- Recommended settings by environment + - Windows + cross-platform collaboration + - `git config --global core.autocrlf true` + - Checkout uses CRLF; repo stores LF + - macOS/Linux (LF) but want to “clean up” accidental CRLF commits + - `git config --global core.autocrlf input` + - Convert CRLF→LF on commit; do not convert on checkout + - Windows-only project, want CRLF stored as-is + - `git config --global core.autocrlf false` + +### `core.whitespace` (detect/fix whitespace issues) +- Six primary whitespace issues +- Enabled by default (can be disabled) + - `blank-at-eol` (spaces at end of line) + - `blank-at-eof` (blank lines at end of file) + - `space-before-tab` (spaces before tabs in indentation) +- Disabled by default (can be enabled) + - `indent-with-non-tab` (indent begins with spaces; uses `tabwidth`) + - `tab-in-indent` (tabs in indentation portion) + - `cr-at-eol` (treat CR at EOL as acceptable) +- How to set + - Comma-separated list + - Disable an option by prefixing with `-` + - Omit options to keep defaults +- Shorthand + - `trailing-space` = `blank-at-eol` + `blank-at-eof` +- Example intent from chapter + - Enable most checks, disable `space-before-tab`, and enable the three disabled-by-default checks +- Where it’s used + - `git diff` highlights whitespace problems + - `git apply` uses it for patch application + - Warn: `git apply --whitespace=warn ` + - Fix: `git apply --whitespace=fix ` + - `git rebase` can also fix while rewriting patches + - `git rebase --whitespace=fix` + +## Server Configuration +### General note +- Fewer server-side config options, but some are important for integrity and policy + +### `receive.fsckObjects` +- Purpose: validate object integrity during push reception + - Check SHA-1 checksums + - Ensure objects point to valid objects +- Tradeoff: expensive; can slow pushes (especially large repos/pushes) +- Enable + - `git config --system receive.fsckObjects true` +- Benefit + - Helps prevent corrupt or malicious objects being introduced + +### `receive.denyNonFastForwards` +- Purpose: refuse non-fast-forward updates (blocks most force-pushes) +- Typical scenario + - Rebase already-pushed commits, then attempt to push rewritten history +- Enable + - `git config --system receive.denyNonFastForwards true` +- Alternative/enhancement + - Server-side hooks can enforce this with per-user/per-ref logic + +### `receive.denyDeletes` +- Purpose: prevent deletion of branches/tags on the server + - Stops the “delete and recreate” workaround to bypass non-FF restrictions +- Enable + - `git config --system receive.denyDeletes true` +- Effect + - No user can delete branches/tags via push + - Must remove ref files manually on server (or via ACLs/policy hooks) + +## Git Attributes +### What attributes are +- Path-specific settings controlling Git behavior for subsets of files +- Where to define them + - `.gitattributes` (committed, shared with the project) + - `.git/info/attributes` (local-only, not committed) +- Typical uses + - Choose merge strategies per file/directory + - Teach Git how to diff “non-text” formats + - Filter content on check-in/check-out (clean/smudge filters) + +### Binary Files +#### Identifying binary-like files +- Motivation: some “text” is effectively binary for Git operations (diff/merge not meaningful) +- Example from chapter + - Xcode `*.pbxproj` (UTF-8 text, but acts like machine-managed DB) + - Diffs/merges are not helpful; conflicts are not realistically resolvable by humans +- Attribute + - In `.gitattributes`: `*.pbxproj binary` +- Effects + - Avoid CRLF conversions/fixes for those paths + - Avoid computing/printing diffs for those files + +#### Diffing binary files via text conversion (`textconv`) +- Core idea + - Convert binary content to a text representation, then use normal diff on that representation + +##### Microsoft Word (`.docx`) diffing +- Attribute mapping + - `.gitattributes`: `*.docx diff=word` +- Define the `word` diff “driver” with `textconv` + - Install `docx2txt` (chapter references SourceForge project + INSTALL instructions) + - Create wrapper script named `docx2txt` in `PATH` (concept) + - Calls `docx2txt.pl "$1" -` to emit text to stdout + - Make executable (`chmod a+x docx2txt`) + - Configure Git + - `git config diff.word.textconv docx2txt` +- Result + - `git diff` shows added/removed text instead of “Binary files differ” +- Limitation noted + - Formatting-only changes may not be represented perfectly + +##### Image metadata diffing (EXIF) +- Attribute mapping + - `.gitattributes`: `*.png diff=exif` +- Tool + - Install `exiftool` +- Configure Git + - `git config diff.exif.textconv exiftool` +- Result + - `git diff` shows textual metadata differences (e.g., file size, width/height) + +### Keyword Expansion (CVS/SVN-style substitutions) +#### Why it’s tricky in Git +- Git hashes file content (blobs); modifying file contents “after commit” would change the hash +- Solution pattern + - Inject content on checkout + - Remove/normalize before staging/commit + +#### Built-in `ident` attribute (`$Id$`) +- Attribute + - `.gitattributes`: `*.txt ident` +- Behavior + - On checkout, replaces `$Id$` with `$Id: $` + - Note: uses blob SHA-1 (not commit SHA-1) +- Limitation + - Blob SHA-1 isn’t a human-friendly timestamp/ordering signal + +#### Custom clean/smudge filters +- Terminology + - **smudge**: runs on checkout (into working directory) + - **clean**: runs when staging (into index) + - (Figure references in chapter: smudge-on-checkout and clean-on-stage diagrams) + +##### Example: auto-format C code using `indent` +- `.gitattributes` + - `*.c filter=indent` +- Config filter behavior + - Clean (before staging): `git config --global filter.indent.clean indent` + - Smudge (on checkout): `git config --global filter.indent.smudge cat` (no-op) +- Effect + - Code is run through `indent` before being committed + +##### Example: `$Date$` expansion (RCS-like) +- Smudge script (concept) + - Reads stdin + - Computes last commit date: `git log --pretty=format:"%ad" -1` + - Replaces `$Date$` → `$Date: $` + - Script name in chapter: `expand_date` (Ruby), placed in `PATH` +- Configure the filter “driver” (named `dater`) + - Smudge: `git config filter.dater.smudge expand_date` + - Clean: `git config filter.dater.clean 'perl -pe "s/\\\$Date[^\\\$]*\\\$/\\\$Date\\\$/"'` + - Strips expanded date back to literal `$Date$` before storing +- Apply to files + - `.gitattributes`: `date*.txt filter=dater` +- Demonstrated workflow + - Create file containing `$Date$` + - Commit + - Remove + checkout again + - Observe expanded date in working directory +- Portability caveat + - `.gitattributes` is shared with the repo + - Filter scripts/config are not automatically shared + - Filters should fail gracefully so project still works without them + +### Exporting Your Repository (archives) +#### `export-ignore` +- Purpose: exclude files/dirs from `git archive` output while still tracking them in Git +- Example + - `.gitattributes`: `test/ export-ignore` +- Result + - `git archive` tarball omits `test/` + +#### `export-subst` +- Purpose: apply `git log` formatting/keyword-style substitutions during `git archive` +- Mark file(s) + - `.gitattributes`: `LAST_COMMIT export-subst` +- Embed placeholders in file content (concept) + - Example pattern: `$Format:%cd by %aN$` +- Behavior on archive + - `git archive` injects metadata (date/author/etc.) into exported file + - Can include commit message, git notes, and word-wrapped formatting (chapter shows `%+w(...)` usage) +- Important limitation + - Exported archive is suitable for deployment + - Not suitable for continued development like a full Git checkout + +### Merge Strategies (per-path) +- Goal: apply special merge behavior for specific files +- Example: keep “our” version of a config-like file + - `.gitattributes`: `database.xml merge=ours` + - Configure merge driver + - `git config --global merge.ours.driver true` (dummy driver; always “succeeds” taking ours) +- Result when merging + - Git uses current branch version for that path, avoiding manual conflict resolution for that file + +## Git Hooks +### What hooks are +- Custom scripts triggered by Git events +- Two groups + - Client-side: local operations (commit, rebase, merge, checkout, push initiation, etc.) + - Server-side: network operations (receiving pushes) + +### Installing a hook +- Location + - `.git/hooks` in a repository +- Defaults + - `git init` creates example hook scripts (typically `*.sample`) +- Enabling a hook + - Create/rename a file with the proper hook name (no extension) + - Make it executable +- Implementation language + - Any executable script works (shell, Perl, Ruby, Python, …) + +### Client-side hooks +- Critical distribution note + - Client-side hooks are **not** transferred when cloning + - To **enforce** policy, prefer server-side hooks (client-side can only assist) + +#### Committing-workflow hooks +- `pre-commit` + - Runs: before commit message entry + - Use: inspect staged snapshot, run tests/lint, detect trailing whitespace, verify docs, etc. + - Abort rule: non-zero exit cancels commit + - Bypass: `git commit --no-verify` +- `prepare-commit-msg` + - Runs: after default message is created, before editor opens + - Inputs (parameters) + - Commit message file path + - Commit type + - Commit SHA-1 (for amended commits) + - Use: adjust auto-generated messages (merge commits, squashes, amended commits, template-based flows) +- `commit-msg` + - Runs: after message is written, before commit is finalized + - Input: commit message file path + - Use: validate message format / required patterns +- `post-commit` + - Runs: after commit completes + - No parameters + - Use: notifications; can identify last commit via `git log -1 HEAD` + +#### Email workflow hooks (for `git am`) +- Scope note + - Only relevant if using email patch workflows (`git format-patch` → `git am`) +- `applypatch-msg` + - Runs: first + - Input: temp file with proposed commit message + - Abort rule: non-zero cancels patch application + - Use: validate/normalize commit messages (can edit file in place) +- `pre-applypatch` + - Runs: after patch applied, before commit is made + - Use: inspect snapshot; run tests; abort `git am` if failures occur +- `post-applypatch` + - Runs: after commit is made + - Use: notify author/team that patch was applied + - Cannot stop the patching process + +#### Other client hooks +- `pre-rebase` + - Runs: before rebase + - Abort rule: non-zero cancels rebase + - Use: prevent rebasing commits that have already been pushed (sample hook attempts this) +- `post-rewrite` + - Triggered by: commands that replace commits (`git commit --amend`, `git rebase`; not `git filter-branch`) + - Input: argument naming the triggering command; rewrite list on stdin + - Use: similar to post-checkout/post-merge automation/notifications +- `post-checkout` + - Runs: after successful `git checkout` + - Use: project environment setup (populate large binaries not tracked, generate docs, etc.) +- `post-merge` + - Runs: after successful merge + - Use: restore non-tracked working-tree data (e.g., permissions), validate external dependencies +- `pre-push` + - Runs: during `git push` after remote refs updated but before objects transferred + - Inputs + - Parameters: remote name + remote location + - stdin: refs to be updated + - Abort rule: non-zero cancels push + - Use: validate ref updates before transferring objects +- `pre-auto-gc` + - Runs: before automatic garbage collection (`git gc --auto`) + - Use: notify user or abort GC if inconvenient + +### Server-side hooks +- Admin-focused: enforce policies on pushes +- Pre hooks can reject pushes + - Exit non-zero to reject + - Print message to stdout to show error to client + +#### `pre-receive` +- Runs: first during push handling +- Input: list of refs on stdin +- Reject behavior + - Non-zero exit rejects **all** refs in the push +- Use cases + - Block non-fast-forward updates globally + - Access control across refs and paths being modified + +#### `update` +- Similar role to `pre-receive`, but: + - Runs **once per branch/ref** being updated + - Inputs (arguments) + - Ref name + - Old SHA-1 + - New SHA-1 +- Reject behavior + - Non-zero exit rejects **only that ref**; other refs can still update + +#### `post-receive` +- Runs: after push process completes +- Input: same stdin data as `pre-receive` +- Use cases + - Notify services/users (email lists, CI, ticket trackers) + - Parse commit messages for automation +- Performance note + - Cannot stop push; client waits until hook finishes + - Avoid long-running tasks or offload them + +#### Hook scripting tip (from chapter) +- Prefer long-form command-line flags in scripts for readability/maintainability + +## An Example Git-Enforced Policy +### Goals +- Enforce commit message format (must include a ticket/reference token) +- Enforce user-based access control (who can change which directories/files) +- Provide client-side hooks to warn users early (reduce rejected pushes) + +### Implementation language in chapter +- Ruby (chosen for readability), but any scripting language works + +### Server-side enforcement (in `hooks/update`) +#### Update hook inputs & environment +- Runs once per branch being pushed +- Arguments + - `refname` (ref being updated) + - `oldrev` (old SHA-1) + - `newrev` (new SHA-1) +- User identification assumption + - User available in `$USER` + - SSH single-user setups may need a wrapper to map public keys to a user and set env var +- Hook prints an “Enforcing Policies…” banner + - Anything printed to stdout is relayed to the pushing client + +#### Policy 1: Enforce commit message format +- Requirement: each commit message must contain something like `[ref: 1234]` +- Identify commits included in the push + - `git rev-list oldrev..newrev` (lists new commits by SHA-1) +- Extract commit message for each commit + - `git cat-file commit ` gives raw commit object + - Message content begins after first blank line + - Use `sed '1,/^$/d'` to print message portion +- Validate messages + - Regex (concept): `/\[ref: (\d+)\]/` + - If any commit lacks the pattern + - Print policy message + - `exit 1` → reject push + +#### Policy 2: Enforce directory/file ACL (user-based permissions) +- ACL file location (server-side) + - `acl` file stored in the bare repository +- ACL format (CVS-like) + - Lines: `avail|user1,user2|path` + - Pipe `|` delimits fields + - Blank `path` means access to everything + - (Example also mentions `unavail`, but the sample enforcement only handles `avail`) +- Example intent + - Admin users: full access + - Doc writers: only `doc/` + - Limited dev: only `lib/` and `tests/` +- Parse ACL into structure + - Map: `user -> [allowed_paths]` + - `nil` path denotes “allowed everywhere” +- Determine what files are modified by pushed commits + - For each new commit: `git log -1 --name-only --pretty=format:'' ` +- Validate each changed path against user’s allowed paths + - Allowed if + - user has a `nil` access path (full access), or + - file path starts with an allowed directory prefix +- On violation + - Print `[POLICY] You do not have access to push to ` + - `exit 1` to reject + +#### Testing behavior (server-side) +- Enable hook: `chmod u+x .git/hooks/update` +- Pushing with a bad commit message + - Hook prints policy banner + error + - Git reports hook failure and rejects the ref update +- Pushing unauthorized file edits + - Similar rejection, specifying the disallowed path +- Outcome + - Repo never accepts commits missing the required reference pattern + - Users are sandboxed to allowed paths + +### Client-side helper hooks (reduce “last-minute” rejections) +#### Distribution limitation +- Hooks don’t clone with the repository +- Must distribute scripts separately and have users install them into `.git/hooks/` and make executable + +#### Client policy 1: commit message check (`commit-msg` hook) +- Runs before commit finalization +- Input: commit message file path (`ARGV[0]`) +- Enforces same regex pattern as server policy +- Behavior + - Non-matching message → print policy message → exit non-zero → commit aborted + - Matching message → commit proceeds + +#### Client policy 2: ACL check before commit (`pre-commit` hook) +- Requires local copy of ACL file + - Expected at: `.git/acl` +- Key differences vs server-side ACL enforcement + - Uses staging area (index) instead of commit history + - File list command + - `git diff-index --cached --name-only HEAD` +- Same core permission logic + - If staged changes include a disallowed path, abort commit +- Identity caveat + - Assumes local `$USER` matches the user used when pushing to the server; otherwise set user explicitly + +#### Client policy 3: prevent rebasing already-pushed commits (`pre-rebase` hook) +- Motivation + - Server likely already denies non-fast-forward updates (`receive.denyNonFastForwards`) and deletes + - Client hook helps prevent accidental rebases that rewrite already-pushed commits +- Script logic (concept) + - Determine base branch + topic branch (`HEAD` default) + - List commits to be rewritten: `git rev-list base..topic` + - List remote refs: `git branch -r` + - For each commit SHA, check if reachable from any remote ref + - Uses revision syntax `sha^@` (all parents) + - Uses `git rev-list ^^@ refs/remotes/` to test reachability + - If any commit already exists remotely, abort rebase with policy message +- Tradeoffs + - Can be slow + - Often unnecessary unless you were going to force-push + - Still a useful preventative exercise + +## Summary (chapter wrap-up) +- Customization categories mastered + - Config settings (client + server) + - Attributes (path-specific diff/merge/filter/export behavior) + - Hooks (client assistance + server enforcement) +- Practical outcome + - Git can be shaped to match nearly any workflow, including enforceable policies and automation \ No newline at end of file diff --git a/mindmap/Distributed Git.md b/mindmap/Distributed Git.md new file mode 100644 index 0000000..02cb3c4 --- /dev/null +++ b/mindmap/Distributed Git.md @@ -0,0 +1,926 @@ +```markdown +# Distributed Git +## Context & Goals (Why this chapter) +- Starting point + - A remote Git repository already exists as a shared focal point + - You already know basic local Git commands/workflow +- What “distributed” enables + - Git supports distributed collaboration patterns beyond centralized VCSs +- Two roles you’ll learn to operate in + - Contributor + - Contribute code successfully + - Make it easy for you + project maintainer + - Integrator / Maintainer + - Maintain a project with many contributors + - Integrate others’ work sustainably and clearly + +## Distributed Workflows (Collaboration models) +### Git vs Centralized Version Control Systems (CVCSs) +- CVCS mental model + - Central hub/repository is the primary “source” + - Developers are “nodes” consuming/syncing with that hub +- Git mental model (distributed) + - Every developer is potentially both: + - A node (contributor to others) + - A hub (maintains a public repo others can base work on) + - Result: many possible workflow designs + - You can choose one workflow + - Or mix-and-match features + +### Centralized Workflow (Single shared hub) +- Core idea + - One central repository (“hub”) accepts code + - Everyone synchronizes with that central location +- What happens when changes overlap in time + - Two developers clone from hub and both change things + - First developer to push succeeds + - Second developer must first merge the upstream work locally + - Prevents overwriting the first developer’s work + - True in Git just like Subversion (or any CVCS) +- Why people like it + - Familiar paradigm for many teams +- Git-specific note + - You can keep using this model in Git + - Give everyone push access + - Git prevents overwriting (rejects non-fast-forward pushes) +- Scale note + - Not limited to small teams + - Git branching makes it possible for hundreds of devs to work across dozens of branches + +- Figure 53: Centralized workflow + - Hub repository with multiple developers syncing to it + +### Integration-Manager Workflow (Multiple repos + canonical “official” repo) +- Core idea + - Each developer: + - Has write access to their own public repository + - Has read access to others’ repositories + - There is a canonical (“official”) repository +- Contribution mechanics (typical steps) + 1. Maintainer pushes to their public repository + 2. Contributor clones that repository and makes changes + 3. Contributor pushes to their own public copy + 4. Contributor requests maintainer to pull changes (often email) + 5. Maintainer adds contributor repo as remote, tests locally, merges locally + 6. Maintainer pushes merged result to canonical repo +- Where it’s common + - Hub-based hosting (e.g., GitHub/GitLab-style forking) +- Key advantage emphasized + - Asynchronous pace + - Contributor keeps working without waiting for acceptance + - Maintainer can pull in changes whenever ready + +- Figure 54: Integration-manager workflow + - Blessed/canonical repo + integration manager + contributor public/private repos + +### Dictator and Lieutenants Workflow (Large hierarchical projects) +- What it is + - Variant of multi-repo workflow + - Used in huge projects with hundreds of collaborators (example: Linux kernel) +- Roles + - Lieutenants + - Integration managers for specific subsystems/areas + - Benevolent dictator + - Single final integration manager + - Pushes to reference (“blessed”) repository +- Process (typical steps) + 1. Regular developers + - Work on topic branches + - Rebase their work on top of `master` + - `master` here = reference repo’s `master` that dictator pushes to + 2. Lieutenants + - Merge developers’ topic branches into lieutenants’ `master` + 3. Dictator + - Merges lieutenants’ `master` branches into dictator’s `master` + 4. Dictator pushes to reference repository + - Other developers rebase on that updated `master` +- When it helps + - Very large projects + - Highly hierarchical environments + - Delegates integration work; collects large subsets before final integration +- Noted as + - Not common overall + - But useful in specific contexts + +- Figure 55: Benevolent dictator workflow + - Dictator integrates from lieutenants; pushes to blessed repository + +### Patterns for Managing Source Code Branches (external guide callout) +- Reference + - Martin Fowler guide: “Patterns for Managing Source Code Branches” + - Link: https://martinfowler.com/articles/branching-patterns.html +- What the guide covers (as noted) + - Common Git workflows + - How/when to use them + - Comparison of high vs low integration frequencies + +### Workflows Summary (transition) +- Distributed Git allows many variations +- Choose a workflow (or combination) that fits reality +- Next: specific contributing patterns and use cases + +## Contributing to a Project (How to contribute effectively) +### Why it’s hard to prescribe one “right” way +- Git is flexible → many real-world collaboration styles +- Every project differs + +### Key variables that change the “best” approach +- Active contributor count (and activity level) + - Small/dormant: 2–3 devs, few commits/day (or less) + - Large: thousands of devs, hundreds/thousands of commits/day + - Practical risk with larger activity + - Your changes may not apply cleanly later + - Your changes may become obsolete/broken before merging + - Need to keep work up to date and commits valid +- Workflow in use + - Centralized (everyone can push to main line) + - Integration manager / maintainer gatekeeping + - Peer review requirements + - Lieutenant system (submit to subsystem maintainers first) +- Your commit access + - Write access vs no write access changes everything + - If no access + - How does project accept contributions (policy/method)? +- Contribution size & frequency + - How much you contribute at a time + - How often you contribute + +### Approach in chapter +- Presents a series of use cases (simple → complex) +- Goal + - Let you construct the workflow you need in practice + +### Commit Guidelines (preparing high-quality contributions) +- Source of guidance + - Git project’s tips: `Documentation/SubmittingPatches` +- Guideline 1: Avoid whitespace errors + - Check before committing: + - `git diff --check` + - Purpose + - Identify whitespace problems early + - Prevent annoying reviewers/maintainers + - Figure 56: Output of `git diff --check` +- Guideline 2: Make commits logically separate changesets + - Keep changes digestible + - Don’t bundle multiple unrelated issues into one massive commit + - Use staging area to split work + - Even if you did work across multiple issues before committing + - If changes overlap in same file + - Use partial staging: + - `git add --patch` + - (Referenced as covered in “Interactive Staging”) + - Key point emphasized + - Final snapshot can be identical whether you do 1 commit or 5 + - But review/revert is much easier with clean, separated commits + - Benefit + - Easier to later pull out/revert one changeset + - Tooling reference + - “Rewriting History” provides techniques for crafting clean history +- Guideline 3: Write high-quality commit messages + - Why + - Makes using Git + collaborating easier + - Recommended structure + - Summary line + - ~50 characters or less + - Concise description of changeset + - Blank line (important) + - Tools (e.g., rebase) can get confused without it + - Body (optional, but recommended when needed) + - More detailed explanation + - Wrap around ~72 characters + - Include motivation for change + - Contrast implementation with previous behavior (Git project requirement) + - Style rules + - Imperative mood + - “Fix bug” (not “Fixed bug” / “Fixes bug”) + - Matches messages auto-generated by `git merge` and `git revert` + - Formatting details + - Multiple paragraphs separated by blank lines + - Bullet points are okay + - Hyphen/asterisk + single space + - Hanging indent + - Conventions vary + - Example quality reference + - Look at Git project history: + - `git log --no-merges` +- Callout: “Do as we say, not as we do” + - Book examples often use `git commit -m` for brevity + - Not meant as best-practice formatting + +## Use Case 1: Private Small Team (simple shared repo) +### Setting +- Private = closed-source, not public +- 1–2 other developers +- Everyone has push access to the shared repository +- Workflow resembles centralized systems (e.g., Subversion), but with Git advantages + - Offline commits + - Easier branching/merging + - Key difference vs Subversion: merges happen client-side + +### Example scenario: John & Jessica pushing to shared repo +- John: clone, edit, commit + - `git clone john@githost:simplegit.git` + - edit `lib/simplegit.rb` + - `git commit -am 'Remove invalid default value'` +- Jessica: clone, edit, commit + - `git clone jessica@githost:simplegit.git` + - edit `TODO` + - `git commit -am 'Add reset task'` +- Jessica pushes successfully + - `git push origin master` + - Push output explained (last line format) + - `.. fromref → toref` + - `oldref`: previous remote reference + - `newref`: updated remote reference + - `fromref`: local ref being pushed + - `toref`: remote ref being updated + - Reference: `git-push` documentation +- John tries to push and is rejected + - `git push origin master` + - Error: rejected (non-fast forward) +- Key lesson vs Subversion + - Even if they edited different files + - Git requires John to fetch + merge locally before pushing + - Subversion might do server-side merge; Git does not + +### John resolves the non-fast-forward +- Step 1: Fetch upstream changes + - `git fetch origin` + - Fetch downloads changes but does not merge them + - Figure 57: John’s divergent history (local master vs `origin/master`) +- Step 2: Merge fetched upstream + - `git merge origin/master` + - Merge strategy shown: `recursive` + - Figure 58: John’s repository after merging `origin/master` +- Step 3: Test merged code (recommended) +- Step 4: Push merged result + - `git push origin master` + - Figure 59: John’s history after pushing to origin + +### Jessica’s parallel work: topic branch + later integration +- Jessica created topic branch `issue54` + - 3 commits on that branch + - She hadn’t fetched John’s updates yet + - Figure 60: Jessica’s topic branch +- Jessica fetches new work + - `git fetch origin` + - Figure 61: Jessica’s history after fetching John’s changes +- Jessica determines what new commits exist on `origin/master` + - `git log --no-merges issue54..origin/master` + - Meaning of `issue54..origin/master` + - Show commits on `origin/master` that are not on `issue54` + - Note: range syntax referenced as covered later in “Commit Ranges” +- Jessica integrates (order doesn’t matter for final snapshot) + - Switch to master + - `git checkout master` + - Message may indicate behind `origin/master` and fast-forwardable + - Merge topic branch to master (chosen first) + - `git merge issue54` + - Result: fast-forward merge (no new merge commit) + - Merge John’s upstream work + - `git merge origin/master` + - Result: merge commit created via recursive strategy + - Figure 62: Jessica’s history after merging John’s changes +- Jessica pushes + - `git push origin master` + - Figure 63: Jessica’s history after pushing all changes + +### General “simple multi-developer” sequence (as summarized) +- Work locally (often in a topic branch) +- Merge topic branch into `master` when ready +- Before sharing + - Fetch and merge `origin/master` if it changed +- Push `master` to server + +- Figure 64: General sequence of events for this simple workflow + +## Use Case 2: Private Managed Team (team branches + integrators) +### Setting +- Larger private group +- Small groups collaborate on features +- Integrators (a subset of engineers) merge into mainline +- Only integrators can update `master` of main repo +- Team collaboration happens on shared feature branches + +### Scenario setup +- FeatureA: John + Jessica +- FeatureB: Jessica + Josie +- Work happens on team-based branches; integrators pull together later + +### Jessica works on FeatureA (with John) +- Create feature branch + - `git checkout -b featureA` +- Work + commit + - `git commit -am 'Add limit to log function'` +- Share with John + - Push feature branch (no `master` push permission) + - `git push -u origin featureA` + - `-u` / `--set-upstream` sets upstream tracking for easier push/pull +- Notify John (email) + +### Jessica works on FeatureB (with Josie) +- Base new branch off server `master` + - `git fetch origin` + - `git checkout -b featureB origin/master` +- Work + commits + - `git commit -am 'Make ls-tree function recursive'` + - `git commit -am 'Add ls-files'` +- Figure 65: Jessica’s initial commit history (featureA and featureB in progress) + +### Josie already started an upstream branch for FeatureB +- Josie pushed initial work as branch `featureBee` +- Jessica fetches + - `git fetch origin` + - New remote-tracking branch: `origin/featureBee` +- Jessica merges Josie’s work into her local `featureB` + - `git merge origin/featureBee` +- Jessica pushes merged result back to the shared upstream branch + - Uses a refspec: + - `git push -u origin featureB:featureBee` + - Refspec concept + - Push local `featureB` to remote branch `featureBee` + - Reference: “The Refspec” + - `-u` sets upstream for simpler future pushes/pulls + +### John updates FeatureA; Jessica reviews and merges +- Fetch updates (includes John’s latest on featureA) + - `git fetch origin` +- See what John added (compare local vs fetched) + - `git log featureA..origin/featureA` +- Merge it in + - `git checkout featureA` + - `git merge origin/featureA` (fast-forward in example) +- Add minor tweaks + - `git commit -am 'Add small tweak to merged content'` +- Push featureA back to server + - `git push` +- Figure 66: Jessica’s history after committing on a feature branch + +### Integrators merge FeatureA and FeatureBee into mainline +- Team informs integrators the branches are ready +- Integrators merge into mainline +- After a fetch, Jessica sees merge commit(s) + - Figure 67: Jessica’s history after integrators merged both topic branches + +### Benefits emphasized +- Multiple teams can work in parallel +- Late merging of independent lines of work +- Remote branches let subgroups collaborate without blocking entire team + +- Figure 68: Basic sequence of the managed-team workflow + +## Use Case 3: Forked Public Project (contribute via forks & pull requests) +### Why this differs +- Public project: you typically cannot push to the official repo +- Need a different path to get work to maintainers + +### Typical fork-based flow +- Clone main repository + - `git clone ` +- Create a topic branch for your work + - `git checkout -b featureA` +- Commit as you work + - `git commit` (repeat as needed) +- Optional cleanup for review + - Use interactive rebase: + - `rebase -i` + - Goals: squash commits, reorder, make review easier + - Reference: “Rewriting History” + +### Fork + push topic branch to your fork +- Fork via hosting site (“Fork” button) → writable fork +- Add fork as remote + - `git remote add myfork ` +- Push only the topic branch (recommended) + - `git push -u myfork featureA` +- Why avoid merging into your `master` before pushing? + - If rejected or cherry-picked, you don’t need to rewind your master + - Maintainers may merge/rebase/cherry-pick; you’ll receive it later by pulling upstream + +### Notify maintainers: Pull request / request-pull +- “Pull request” can be created: + - Through the website (e.g., GitHub mechanism) + - Or manually via `git request-pull` + email +- `git request-pull` purpose + - Produces a summary of changes being requested for pull +- Inputs + - Base branch to pull into (e.g., `origin/master`) + - Repo URL to pull from (your fork) +- Output includes (as shown) + - Base commit reference (“changes since commit …”) + - Where to pull from (URL + branch) + - Commit list + diffstat summary + +### Best practice for multiple contributions +- Keep a local `master` tracking `origin/master` +- Work in topic branches + - Easy to discard if rejected + - Easy to rebase if upstream moves + +### Starting a second topic: don’t stack topics on old branches +- Start new branch from current upstream master + - `git checkout -b featureB origin/master` +- Work + commit +- Push + - `git push myfork featureB` +- Request pull + - `git request-pull origin/master myfork` +- Update your view of upstream + - `git fetch origin` +- Resulting structure + - Topics become separate silos (patch-queue-like) +- Figure 69: Initial commit history with featureB work + +### Scenario: maintainer can’t merge your featureA cleanly anymore +- Cause + - Upstream `origin/master` moved; your topic doesn’t apply cleanly +- Fix + - Rebase your topic onto current upstream + - `git checkout featureA` + - `git rebase origin/master` + - Force push updated branch to your fork + - `git push -f myfork featureA` +- Why `-f` is required + - Rebase rewrites history + - New commits may not be descendants of the remote branch tip +- Alternative mentioned + - Push to a new branch (e.g., `featureAv2`) instead of force-updating +- Figure 70: Commit history after rebasing featureA work + +### Scenario: maintainer likes featureB but requests implementation changes +- Goal + - Re-base on current `origin/master` + - Provide revised branch version +- Workflow + - Create new branch from current upstream + - `git checkout -b featureBv2 origin/master` + - Squash merge old feature branch changes + - `git merge --squash featureB` + - Make requested implementation changes + - Commit + - `git commit` + - Push new branch + - `git push myfork featureBv2` +- Meaning of `--squash` + - Combines all changes into one changeset + - Produces final state as if merged, but without a merge commit + - New commit has only one parent + - Lets you add more edits before recording the final commit +- Extra option callout + - `--no-commit` can delay commit in default merge process +- Figure 71: Commit history after featureBv2 work + +## Use Case 4: Public Project over Email (patch series via mailing list) +### When this is used +- Many older/larger projects accept patches via mailing lists +- Each project has specific procedures → you must check their rules + +### High-level flow +- Create a topic branch per patch series +- Instead of forking/pushing + - Generate email-ready patches + - Email to developer mailing list + +### Create commits on a topic branch +- `git checkout -b topicA` +- Work + commit + - `git commit` (repeat) + +### Generate mbox-formatted patch emails: `git format-patch` +- Command example + - `git format-patch -M origin/master` +- What it produces + - One `*.patch` file per commit + - Each patch file = one email message + - Subject = first line of commit message + - Body = remainder of message + the diff +- Why it’s nice + - Applying patches generated this way preserves commit info properly +- Option noted + - `-M` makes Git detect renames + +### Patch file structure (what maintainers/reviewers see) +- Email-like headers + - `From Mon Sep 17 00:00:00 2001` + - `From: ` + - `Date: ` + - `Subject: [PATCH x/y] ` +- Commit message body text +- Separator + - `---` +- Patch begins + - `diff --git ...` +- Version footer (as shown in example) + +### Adding extra explanation without changing commit message +- You can edit patch files +- Place extra notes between: + - the `---` line + - and the `diff --git` line +- These notes + - are readable by developers + - are ignored by patch application + +### Sending patches without breaking formatting +- Copy/paste into email client can break whitespace/newlines +- Git-provided tools to send properly formatted patches + +#### Option A: IMAP (drafts workflow) with `git imap-send` +- Setup `~/.gitconfig` `[imap]` section (example values shown) + - `folder` (e.g., `[Gmail]/Drafts`) + - `host` (e.g., `imaps://imap.gmail.com`) + - `user` + - `pass` + - `port` (e.g., `993`) + - `sslverify = false` +- SSL note + - If IMAP server doesn’t use SSL + - last lines may be unnecessary + - host uses `imap://` not `imaps://` +- Send patches to Drafts folder + - `cat *.patch | git imap-send` +- Then in email client + - Set `To:` mailing list + - Possibly CC maintainer/area owner + - Send + +#### Option B: SMTP with `git send-email` +- Setup `~/.gitconfig` `[sendemail]` section (example values shown) + - `smtpencryption = tls` + - `smtpserver = smtp.gmail.com` + - `smtpuser` + - `smtpserverport = 587` +- Send patch files + - `git send-email *.patch` +- Interactive prompts noted + - “From” identity + - Recipients + - Message-ID / In-Reply-To for threading +- Output includes per-patch send logs and headers + +- Tip resource + - Configuration help + sandbox for trial patches: + - https://git-send-email.io + +## Contribution Summary (end of “Contributing” portion) +- Covered + - Multiple workflows (private vs public) + - How to handle merges in those workflows + - Commit hygiene + - whitespace checks + - logically separated commits + - strong commit messages + - Patch generation and emailing +- Transition + - Next: maintaining a project (integration/maintainer side) + +## Maintaining a Project (Integrator/Maintainer perspective) +### What “maintaining” involves +- Accepting and applying patches from email + - Often produced by `format-patch` +- Integrating changes from remote branches + - From repos you add as remotes +- Applies whether you + - maintain a canonical repository + - or help by verifying/approving patches +- Goal + - Accept work in a way that is clear for contributors + - Sustainable long-term + +### Working in Topic Branches (safe integration practice) +- Best practice + - Try new contributions in a temporary topic branch +- Why + - Easy to test and tweak + - Easy to abandon temporarily and return later +- Naming guidance + - Use descriptive theme-based names (e.g., `ruby_client`) + - Git maintainer convention: + - namespace: `sc/ruby_client` + - `sc` = contributor shorthand +- Create topic branch from `master` + - Create only: + - `git branch sc/ruby_client master` + - Create and switch immediately: + - `git checkout -b sc/ruby_client master` + +### Applying Patches from Email (two main tools) +- Two methods + - `git apply` + - `git am` + +#### Applying with `git apply` (for raw diffs) +- When to use + - Patch generated from `git diff` or generic Unix diff (not recommended if `format-patch` available) +- Apply a patch file + - `git apply /tmp/patch-ruby-client.patch` +- What it does + - Modifies files in working directory + - Similar to `patch -p1`, but: + - More paranoid (fewer fuzzy matches) + - Understands Git diff format adds/deletes/renames (patch tool may not) + - “Apply all or abort all” (atomic) + - Unlike `patch`, which can partially apply and leave a messy state +- Important limitation + - Does not create a commit + - You must stage and commit manually afterward +- Preflight check + - `git apply --check ` + - Behavior + - No output → should apply cleanly + - Non-zero exit status on failure → script-friendly + +#### Applying with `git am` (for `format-patch` / mbox) +- When to use (preferred) + - Contributor used `git format-patch` + - Patch includes author info and commit message +- Meaning / concept + - `am` = apply patches from a mailbox + - Reads mbox format (plain-text emails in one file) +- Apply a patch file generated by `format-patch` + - `git am 0001-limit-log-function.patch` +- What it does automatically + - Creates commits for you + - Uses email headers/body to populate: + - Author info: From + Date + - Commit message: Subject + body (before diff) + - Committer info becomes the applier + apply time +- Inspecting result (example command shown) + - `git log --pretty=fuller -1` + - Distinction highlighted + - Author vs Committer (applier) + +- If patch fails to apply cleanly + - Common causes noted + - Your branch diverged too far + - Patch depends on another patch not applied yet + - Failure behavior + - Stops and shows options: + - Continue after fixing: `git am --resolved` + - Skip patch: `git am --skip` + - Abort and restore original branch: `git am --abort` + - Adds conflict markers to files (like merge/rebase) + - Manual conflict workflow + - Fix file(s) + - Stage: + - `git add ` + - Continue: + - `git am --resolved` + +- Smarter conflict handling option: `-3` + - `git am -3 ` + - What it does + - Attempts a three-way merge + - Caveat + - Doesn’t work if the base commit referenced by patch isn’t in your repo + - When it works well + - Patch based on a public commit you have + - Example behavior shown + - Can detect “Patch already applied” when appropriate + +- Interactive mode for patch series + - `git am -3 -i ` + - Stops at each patch and asks: + - yes / no / edit / view patch / accept all + - Useful when + - You have many patches saved + - You want to preview or skip already-applied patches + +### Checking Out Remote Branches (pulling history from contributor repos) +- When to use + - Contributor provides: + - repository URL + - branch name containing their changes +- One-time setup + local testing + - Add remote: + - `git remote add jessica git://github.com/jessica/myproject.git` + - Fetch: + - `git fetch jessica` + - Checkout local branch from remote-tracking branch: + - `git checkout -b rubyclient jessica/ruby-client` +- Ongoing benefit + - If same contributor sends more branches + - you can fetch/checkout without re-adding remote +- Pros emphasized + - You get the full commit history + - You know where it’s based → proper three-way merges by default + - Avoid needing `-3` guesswork +- Cons / practicality + - Not efficient to maintain hundreds of remotes for occasional contributors + - For one-off patches, email may be easier + - Scripts/hosted services may change the trade-off + +- One-time pull without saving a remote + - `git pull ` + - Does not store the remote in your config + +### Determining What Is Introduced (reviewing a topic branch) +- Review commits unique to topic branch + - Exclude master commits: + - `git log contrib --not master` + - Equivalent idea to `master..contrib` +- Review changes per commit + - `git log -p ...` to append diffs + +- Review overall diff of what merging would introduce + - Pitfall + - `git diff master` can be misleading if histories diverged + - It compares tip snapshots and may make it look like topic removes master-only changes + - Correct intention + - Diff topic tip vs common ancestor with master + - Compute common ancestor explicitly + - `git merge-base contrib master` + - Then: + - `git diff ` + - or `git diff $(git merge-base contrib master)` + - Shorthand: triple-dot diff + - `git diff master...contrib` + - Shows only changes introduced on topic branch since divergence + +### Integrating Contributed Work (strategies) +#### Merging Workflows (merge-based integration) +- Simple merge-into-master workflow + - `master` contains stable code + - For each completed/verified topic branch + - merge into `master` + - delete topic branch + - repeat + - Figures + - Figure 72: History with several topic branches (`ruby_client`, `php_client`) + - Figure 73: After merging topic branches + +- Two-phase merge cycle (master + develop) + - Two long-running branches + - `master` = only updated on stable releases + - `develop` = integration branch for new code + - Both pushed to public repository + - Process + - Merge topic branches into `develop` + - When ready to release + - tag release + - fast-forward `master` to `develop` + - Figures + - Figure 74: Before topic merge + - Figure 75: After topic merge (into develop) + - Figure 76: After project release (master fast-forward) + - User-facing implication + - Users can choose: + - `master` for stable builds + - `develop` for cutting-edge + +- Extension: add `integrate` branch + - `integrate` collects work together + - When stable + tests pass + - merge into `develop` + - After `develop` proves stable + - fast-forward `master` + +#### Large-Merging Workflows (Git project example) +- Git project long-running branches + - `master` + - `next` + - `seen` (formerly `pu` = proposed updates) + - `maint` (maintenance backports) +- Workflow + - New contributions collected as topic branches + - Topics evaluated + - Safe/ready → merge into `next` and push for wider testing + - Need work → merge into `seen` + - Totally stable → re-merge into `master` + - After master updates + - `next` and `seen` rebuilt from `master` + - Behavior noted + - `master` moves forward steadily + - `next` rebased occasionally + - `seen` rebased more often + - Topic branches removed after they reach `master` +- `maint` branch purpose + - Forked from last release for backports / maintenance releases +- Figures + - Figure 77: Managing many parallel contributed topic branches + - Figure 78: Merging topics into `next`/`seen` and re-merging to `master` +- Note + - Specialized; refer to Git Maintainer’s guide for full clarity + +#### Rebasing and Cherry-Picking Workflows (linear history preference) +- Rebase-based integration + - Maintainer rebases topic branch on top of current `master` (or `develop`) + - If successful + - fast-forward `master` + - Outcome + - Mostly linear history +- Cherry-pick-based integration + - Cherry-pick = reapply the patch from a single commit onto current branch + - Useful when + - You want only some commits from a topic branch + - Or topic branch contains only one commit + - Example + - Before: Figure 79 (commit `e43a6` on topic branch) + - Command: + - `git cherry-pick e43a6` + - After: Figure 80 (new commit SHA because applied at different time) + - After cherry-picking + - remove topic branch / drop unwanted commits + +### Rerere (Reuse Recorded Resolution) +- When it helps + - Lots of merges/rebases + - Long-lived topic branches +- Meaning + - “reuse recorded resolution” +- What it does + - Records successful conflict resolutions (pre/post images) + - Reapplies the same resolution automatically if conflict repeats +- Enable (recommended global) + - `git config --global rerere.enabled true` +- Interacting with rerere + - `git rerere` command + - With no args: + - attempts to match current conflicts to recorded resolutions + - (automatic if enabled) + - Subcommands mentioned + - show what will be recorded + - erase specific resolutions + - clear entire cache +- Reference + - Covered in more detail later in “Rerere” + +### Tagging Your Releases +- Purpose + - Mark releases so they can be recreated later +- Signed tagging example + - `git tag -s v1.5 -m 'my signed 1.5 tag'` + - Requires PGP key + passphrase +- Distributing public PGP key (for verifying signed tags) + - Problem + - Others need your public key to verify signatures + - Git project’s approach + - Store public key in repo as a blob + - Add a tag pointing directly to that blob + - Steps + 1. Find the key + - `gpg --list-keys` + 2. Export key and write to Git object database (blob) + - `gpg -a --export | git hash-object -w --stdin` + - Output is the blob SHA-1 + 3. Tag that blob + - `git tag -a maintainer-pgp-pub ` + 4. Share tag(s) + - `git push --tags` + 5. Users import key from repo + - `git show maintainer-pgp-pub | gpg --import` + 6. Users verify signed tags using imported key + - Extra note + - Put verification instructions in tag message + - `git show ` displays them + +### Generating a Build Number (human-readable commit identifier) +- Problem + - Git doesn’t provide monotonically increasing build numbers per commit +- Solution + - `git describe ` +- Output format + - `--g` + - `g` indicates Git +- Example + - `git describe master` → `v1.6.2-rc1-20-g8c5b85c` +- Behavior notes + - If commit itself is tagged + - Output is just the tag name + - Default requires annotated tags (`-a` or `-s`) + - Include lightweight tags with `--tags` + - Usability note + - Can use describe string with `git checkout` / `git show` + - But relies on abbreviated SHA → may become invalid if abbreviation length changes + - Example mentioned: Linux kernel increased abbrev length (8 → 10) for uniqueness + +### Preparing a Release (archives for non-Git users) +- Tool + - `git archive` +- Create tar.gz snapshot (example) + - `git archive master --prefix='project/' | gzip > \`git describe master\`.tar.gz` +- Create zip snapshot (example) + - `git archive master --prefix='project/' --format=zip > \`git describe master\`.zip` +- Resulting archive contents + - Latest snapshot + - Under a top-level directory prefix (e.g., `project/`) + +### The Shortlog (release notes / mailing list summary) +- Purpose + - Quick changelog summary since last release (or last email) +- Tool + - `git shortlog` +- Example + - `git shortlog --no-merges master --not v1.0.1` +- Output properties + - Groups commits by author + - Lists commit summaries + - Excludes merge commits with `--no-merges` + +## Chapter Summary (end) +- You should now be comfortable + - Contributing using multiple distributed workflows + - Maintaining/integrating contributed work +- Next chapter preview + - GitHub (major Git hosting service) +``` \ No newline at end of file diff --git a/mindmap/Git Branching.md b/mindmap/Git Branching.md new file mode 100644 index 0000000..b984e3f --- /dev/null +++ b/mindmap/Git Branching.md @@ -0,0 +1,798 @@ +```markmap +# Git Branching + +## Why branching matters (and why Git is different) +- Branching (general VCS concept) + - Meaning: diverge from the main line of development + - Goal: keep working without disturbing the main line +- Traditional VCS branching (typical tradeoff) + - Often “expensive” + - May require creating a full new copy of the source directory + - Large projects → branching can take a long time +- Git branching “killer feature” + - Incredibly lightweight model + - Branch operations are (nearly) instantaneous + - Switching branches is typically just as fast + - Encourages frequent branching + merging (even multiple times per day) + - Mastering branching can significantly change how you develop + +## Branches in a Nutshell (how Git’s model works) +- Why you must understand Git’s storage model + - Branching is “just pointers” in Git, but that only makes sense once you know what commits are + - Reference: earlier concept “What is Git?” (snapshots, SHA-1, objects) + +### Git stores snapshots, not diffs/changesets +- Git’s core model + - Instead of storing a sequence of diffs, Git stores a sequence of snapshots + - Each commit represents the state of the project at that point + +### Commit objects: what a commit contains +- Commit object includes + - Pointer to the snapshot you committed (via a tree object) + - Metadata + - author name + email + - commit message + - Parent commit pointer(s) + - 0 parents → initial commit + - 1 parent → normal commit + - 2+ parents → merge commit (merging 2+ branches) + +### Example: first commit with 3 files (blobs + tree + commit) +- Scenario + - Working directory contains 3 files + - You stage all and commit +- Staging step (`git add …`) + - Example: + - `git add README test.rb LICENSE` + - Git computes a checksum (SHA-1) for each file version + - Git stores each file version as a **blob** object + - The staging area (index) records the blob checksums for what’s staged +- Commit step (`git commit …`) + - Example: + - `git commit -m "Initial commit"` + - Git checksums each directory (here: project root) and stores a **tree** object + - Tree object + - Lists directory contents + - Maps filenames → blob IDs + - (And subdirectories → subtree IDs) + - Git creates a **commit** object + - Contains metadata + pointer to the root tree +- Object count after this commit (in this example) + - 3 blobs (file contents) + - 1 tree (directory listing + blob references) + - 1 commit (metadata + pointer to root tree) + +### Commit history: parents create a graph +- Each new commit typically + - Points to a new snapshot (tree) + - Points to its direct parent commit (previous tip) +- Merge commits + - Have multiple parents + - Represent snapshots produced by merges + +### What a branch is (Git definition) +- Branch = lightweight movable pointer to a commit (the tip) +- Default starting branch name + - `master` (historical default name) + - Moves forward automatically as you commit on it + +#### Note: “master” is not special +- It’s identical to any other branch +- It’s common because + - `git init` creates it by default + - many repos never rename it + +### Creating a new branch (pointer creation only) +- Command + - `git branch testing` +- Effect + - Creates a new pointer named `testing` + - Points to the same commit you’re currently on + - Does **not** switch your working branch + +### HEAD: how Git tracks “current branch” +- HEAD in Git + - Special pointer to the local branch you currently have checked out +- Difference from other VCSs (conceptual) + - In Git, HEAD is a pointer to the current local branch (not just “latest revision” in a repo) + +### Seeing branch pointers in `git log` +- Useful visualization option + - `git log --oneline --decorate` +- What `--decorate` shows + - Labels like `HEAD -> master` + - Other branch pointers (e.g., `testing`) attached to commits + +### Switching branches (checkout) +- Switch to an existing branch + - `git checkout testing` +- Effect + - Moves HEAD to point to `testing` + - Your next commit will advance `testing` (because HEAD points to it) + +### Committing advances only the checked-out branch pointer +- If you commit while on `testing` + - `testing` pointer moves forward + - `master` pointer stays behind (unchanged) + +### Switching back updates pointers and your working directory +- Switch back + - `git checkout master` +- Checkout does two major things + - Moves HEAD to `master` + - Resets working directory to match the snapshot at `master`’s tip +- Result + - Your future work on `master` diverges from the commit you left behind on `testing` + +#### Note: `git log` doesn’t show all branches by default +- Default behavior + - `git log` shows the history reachable from the currently checked-out branch +- To see another branch’s history explicitly + - `git log testing` +- To see all branches + - `git log --all` (often paired with `--graph` and `--decorate`) + +#### Note: switching branches changes working directory files +- On branch switch, Git may + - add files + - remove files + - modify files +- Safety rule + - If Git can’t switch cleanly (because it would overwrite conflicting uncommitted changes), it will block the checkout + +### Divergent history and visualization +- When both branches get new commits after diverging + - History becomes a DAG with multiple “tips” +- View divergence clearly + - `git log --oneline --decorate --graph --all` + +### Why Git branches are cheap +- A branch is stored as a simple reference (a file) containing + - the 40-character SHA-1 of the commit it points to + - plus a newline → ~41 bytes written +- Consequences + - Create/delete branches instantly + - Switching is fast because it’s mostly pointer movement + updating working directory snapshot +- Contrast: older VCS branching + - Often implemented by copying the entire project directory + - Can take seconds/minutes depending on repo size +- Merge support benefit + - Git records parent pointers in commits + - Merge-base detection for merges is typically automatic and easy + +### Creating a branch and switching immediately +- Common pattern + - `git checkout -b ` +- Git ≥ 2.23 alternative: `git switch` + - switch to existing: `git switch ` + - create + switch: `git switch -c ` (or `--create`) + - return to previous branch: `git switch -` + +## Basic Branching and Merging (realistic workflow) +- Example workflow goal: develop features while handling urgent production fixes +- High-level steps (website scenario) + - Work on site + - Create branch for a user story + - Work on that branch + - Urgent hotfix appears + - switch to production branch + - create hotfix branch + - test hotfix + - merge hotfix and deploy + - return to user story branch + +### Basic Branching example (issue branch + hotfix branch) +- Starting assumption + - You already have a few commits on `master` + +#### Create and work on a topic branch (issue #53) +- Create + switch + - `git checkout -b iss53` + - Shorthand for + - `git branch iss53` + - `git checkout iss53` +- Do work and commit + - edit `index.html` + - `git commit -a -m "Create new footer [issue 53]"` +- Result + - `iss53` advances (HEAD points to it) + +#### Interrupt with urgent hotfix (without mixing in feature work) +- Key rule before switching branches + - If working directory or staging area has uncommitted changes that would conflict, Git blocks switching + - Best practice: keep a clean working state when switching + - Mentioned workarounds (covered later): stashing, commit amending +- Switch back to production/stable branch + - `git checkout master` +- What you gain + - working directory restored to `master` snapshot (pre-issue work) + - you can focus on hotfix cleanly + +#### Create and finish the hotfix +- Create + switch to hotfix branch + - `git checkout -b hotfix` +- Fix and commit + - edit `index.html` + - `git commit -a -m "Fix broken email address"` + +#### Merge hotfix into master (fast-forward) +- Merge steps + - `git checkout master` + - `git merge hotfix` +- Why it’s a “fast-forward” merge + - hotfix tip commit is directly ahead of master tip commit + - No divergence to reconcile + - Git simply moves the `master` pointer forward +- Deployment outcome + - master now points to a commit whose snapshot includes the hotfix + - you can deploy production fix + +#### Delete completed hotfix branch +- Delete (safe when merged) + - `git branch -d hotfix` +- Rationale + - master already contains the hotfix work + +#### Return to feature branch (iss53) and continue +- Switch back + - `git checkout iss53` +- Continue work and commit + - `git commit -a -m "Finish the new footer [issue 53]"` +- Important note: hotfix isn’t in `iss53` automatically + - Options if needed + - merge master into iss53: `git merge master` + - or wait until iss53 is merged back into master + +### Basic Merging (merge feature branch into master) +- When issue #53 is done + - `git checkout master` + - `git merge iss53` +- Why this merge differs from the hotfix merge + - histories diverged earlier + - master tip is not an ancestor of iss53 tip +- Git performs a three-way merge + - Inputs + - snapshot at master tip + - snapshot at iss53 tip + - snapshot at their common ancestor +- Output + - a new merged snapshot + - a new merge commit + - “special” because it has more than one parent +- Merge strategy note (as shown in output) + - merge made by the `recursive` strategy (typical default for two heads) + +#### Clean up merged branch +- Delete iss53 after merge + - `git branch -d iss53` + +### Basic Merge Conflicts (when Git cannot auto-merge) +- When conflicts occur + - both branches changed the same part of the same file differently +- What `git merge` does on conflict + - stops and reports conflict(s) + - does NOT create the merge commit yet + - requires manual resolution + +#### Identify unmerged paths +- Use + - `git status` +- Status shows + - you are in a merging state + - list of “unmerged paths” + - hints to: + - fix conflicts + - `git add` files to mark resolution + - then `git commit` to conclude merge + +#### Conflict markers inserted into files +- Git writes markers like + - `<<<<<<<` (start of conflict block) + - `=======` (separator) + - `>>>>>>>` (end of block) +- Meaning + - Top section = HEAD version (current branch at merge time, e.g., master) + - Bottom section = incoming branch version (e.g., iss53) + +#### Resolve and mark as resolved +- Manual resolution workflow + - edit file(s) + - choose one side or combine them + - remove all markers +- Mark resolution + - `git add ` for each conflicted file + - staging indicates conflict resolved in Git + +#### Using a merge tool (optional) +- Run + - `git mergetool` +- Behavior + - opens a visual merge tool + - helps walk through conflict resolution +- If not configured + - Git warns `merge.tool` isn’t configured + - offers possible tool choices (platform dependent) + - you can specify an alternative tool name + +#### Finalize the merge +- Verify state + - `git status` + - typically indicates “all conflicts fixed” but merge still in progress +- Conclude + - `git commit` +- Merge commit message details + - default message mentions merged branch + - often lists conflicts + - note in message references merge metadata (e.g., `.git/MERGE_HEAD`) + - you may edit message to explain how/why conflicts were resolved +- Reference for deeper conflict handling + - “Advanced Merging” (mentioned as later coverage) + +## Branch Management (everyday utilities) +- `git branch` does more than create/delete + - provides multiple views and filters of branch state + +### Listing branches +- `git branch` + - lists local branches + - `*` shows current branch (HEAD points here) + +### See last commit on each branch +- `git branch -v` + - shows branch tip commit SHA + message summary + +### Filter by merge status +- `git branch --merged` + - branches already merged into current branch + - usually safe to delete those (except the current `*` branch) +- `git branch --no-merged` + - branches not merged into current branch + - deletion safety + - `git branch -d ` fails if not fully merged + - `git branch -D ` forces deletion (discarding unmerged work) + +#### Note: merge-status filters are relative to a base +- Default base + - current branch (if no argument given) +- You can compare relative to a different branch without checking it out + - `git branch --no-merged master` + +## Changing a branch name (rename) +- Safety warning + - do not rename branches still used by other collaborators + - do not rename default branches (master/main/etc.) without reading next section + +### Rename locally +- `git branch --move bad-branch-name corrected-branch-name` +- Effect + - preserves history + - changes only your local ref name initially + +### Publish the renamed branch and set upstream +- `git push --set-upstream origin corrected-branch-name` +- Effect + - creates the new remote branch name + - configures tracking + +### Remove the old remote branch name +- `git push origin --delete bad-branch-name` +- Effect + - fully replaces the bad remote name with the corrected one + +### Verification +- `git branch --all` + - shows local branches and `remotes/origin/...` remote-tracking refs + +## Changing the master branch name (e.g., `master` → `main`) +- High-impact warning + - renaming default branch can break + - integrations/services + - helper utilities + - build/release scripts + - any references in code, configs, docs + - consult collaborators + - search/update all references to the old name + +### Local rename +- `git branch --move master main` +- Result + - local `master` ref no longer exists + - local `main` points to the same commit tip + +### Push and set upstream +- `git push --set-upstream origin main` +- Result + - remote now has `main` + - remote may still have `master` + - remote HEAD may still point to `origin/master` until host settings change + +### Migration checklist (must update external references) +- Dependent projects + - update code/config referencing old branch +- Test runner configs + - update any branch-name assumptions +- Build/release scripts + - update target branch names +- Repo host settings + - default branch + - merge rules / protections + - other branch-name-based settings +- Documentation + - update old references +- Pull requests + - close/merge/retarget PRs aimed at old branch + +### Delete old remote branch after transition +- `git push origin --delete master` + +## Branching Workflows (patterns enabled by lightweight branches) +- Goal + - choose a branching strategy that matches team/release needs +- Key enabler + - easy repeated three-way merges over time + +### Long-Running Branches (progressive stability) +- Concept + - keep multiple always-open branches for different stability levels + - merge “upwards” as code becomes stable +- Common pattern + - `master`: only stable/released (or release-candidate) code + - `develop` / `next`: integration/testing branch; can be unstable + - topic branches merged into develop/next for testing before master +- How to think about “stability” + - linear commit history view + - stable branches are “behind” (older, tested commits) + - bleeding-edge branches are “ahead” (newer, less proven commits) + - “silo” view + - commits graduate to more stable silos once fully tested +- Multi-level stability in large projects + - additional branches like `proposed` / `pu` (proposed updates) + - idea: not everything is ready for `next` or `master` immediately +- Note + - not required, but often helpful for large/complex projects + +### Topic Branches (short-lived branches) +- Definition + - branch created for a single feature/bugfix/experiment + - typically merged and deleted after completion +- Why Git makes this common + - branch creation/merging is cheap → can do it many times a day +- Benefits + - clean context switching (work isolated by topic) + - easier code review (topic’s commits grouped) + - flexible integration timing (minutes, days, months later) + - can merge in any order regardless of creation order +- Example topology from the chapter + - work on `master` + - branch `iss91` (issue work) + - branch `iss91v2` off `iss91` (alternate approach) + - return to `master` and continue other work + - branch `dumbidea` off `master` (experimental idea) + - outcome + - discard `iss91` if inferior + - merge `iss91v2` and `dumbidea` if chosen +- Reminder: local operations + - branching/merging is local-only until you fetch/push/pull +- Reference mention + - more workflow discussion later in “Distributed Git” + +## Remote Branches (remote references + remote-tracking branches) +### Remote references overview +- Remote repos contain references (pointers) to + - branches + - tags + - other refs +- Ways to inspect + - `git ls-remote ` (full list of remote refs) + - `git remote show ` (focus on remote branches + info) + +### Remote-tracking branches +- Definition + - local references that record the state of remote branches + - you can’t move them yourself + - Git updates them during network communication +- Naming + - `/` + - Examples + - `origin/master` + - `origin/iss53` +- Mental model + - bookmarks showing where a remote branch was last time you connected + +### Clone example (how origin/master appears) +- When cloning from a server + - Git names the remote `origin` by default + - downloads data + - creates `origin/master` (remote-tracking) + - creates your local `master` starting at same commit as origin’s master + +#### Note: “origin” is not special +- It’s just the default name created by `git clone` +- You can rename the default remote at clone time + - `git clone -o booyah ...` → remote-tracking branch becomes `booyah/master` + +### Divergence between local and remote +- If you commit locally and someone else pushes to the remote + - histories diverge + - `origin/master` does not move until you communicate + +### Fetching updates remote-tracking branches +- `git fetch origin` + - contacts remote + - downloads objects you don’t have + - updates pointers like `origin/master` to newer commits + +### Multiple remotes +- Add another remote + - `git remote add teamone ` +- Fetch it + - `git fetch teamone` +- Possible outcome + - if teamone has only a subset of commits you already have from origin: + - fetch downloads no new objects + - still updates `teamone/master` pointer to match teamone’s master tip + +## Pushing (sharing branches) +### Why pushing is explicit +- Local branches do not automatically sync to remotes +- Benefit + - you can keep private local branches + - push only branches you intend to share/collaborate on + +### Push a branch +- Pattern + - `git push ` +- Example + - `git push origin serverfix` +- What Git expands it to (conceptual) + - `refs/heads/serverfix:refs/heads/serverfix` +- Push local branch to a different remote branch name + - `git push origin serverfix:awesomebranch` + +### Authentication convenience (HTTPS) +- HTTPS push commonly prompts for username/password +- To avoid typing credentials repeatedly + - credential cache example: + - `git config --global credential.helper cache` + - reference mentioned: “Credential Storage” (for other options) + +### After someone else fetches +- Fetching a pushed branch + - `git fetch origin` +- Result + - creates/updates a remote-tracking ref (e.g., `origin/serverfix`) + - does NOT create a local editable branch automatically + +### Using fetched remote-tracking branch work +- Merge directly into current branch + - `git merge origin/serverfix` +- Create a local branch based on it (editable) and track it + - `git checkout -b serverfix origin/serverfix` + +## Tracking Branches (local branches that track upstream) +### Definitions +- Tracking branch + - local branch tied to a remote-tracking branch +- Upstream branch + - remote-tracking branch the local branch tracks + +### Why tracking matters +- On a tracking branch, `git pull` can automatically + - fetch from the right remote + - merge the right branch + +### How tracking branches are created +- Common creation form + - `git checkout -b /` +- Shorthand + - `git checkout --track origin/serverfix` +- Extra shortcut + - `git checkout serverfix` + - works if + - local `serverfix` doesn’t exist, and + - exactly one remote has `serverfix` +- Different local name than remote branch + - `git checkout -b sf origin/serverfix` + - local `sf` tracks `origin/serverfix` + +### Set or change upstream later +- `git branch -u origin/serverfix` + - also available as `--set-upstream-to` + +### Upstream shorthand in commands +- `@{upstream}` or `@{u}` + - references the upstream branch of the current branch +- Example + - `git merge @{u}` instead of `git merge origin/master` (when master tracks origin/master) + +### Inspect tracking status and ahead/behind +- `git branch -vv` + - shows local branches + - indicates upstream tracking target + - shows ahead/behind counts +- Interpreting counts + - ahead N → N local commits not pushed + - behind N → N remote commits not merged locally +- Cache caveat + - ahead/behind shown is from last fetch; command doesn’t contact server +- To refresh counts + - `git fetch --all; git branch -vv` + +## Pulling (fetch + merge convenience) +- `git fetch` + - downloads new data + - does not modify working directory + - leaves integration to you (merge/rebase) +- `git pull` + - in most cases = `fetch` immediately followed by `merge` + - uses tracking (upstream) info to pick remote + branch +- Guidance from the chapter + - explicit `fetch` + `merge` is often clearer than the “magic” of `pull` + +## Deleting Remote Branches +- When a remote branch is no longer needed + - merged into mainline/stable branch on the server +- Delete remote branch pointer + - `git push origin --delete serverfix` +- Effect + - removes the branch pointer on the server + - server may keep underlying objects until garbage collection + - accidental deletions can often be recovered before GC runs + +## Rebasing (the other integration strategy) +- Two main ways to integrate changes between branches + - `merge` + - `rebase` + +### The Basic Rebase (replaying commits) +- Starting situation + - branches diverged; each has unique commits +- Merge recap (already covered earlier) + - three-way merge of: + - tip snapshot A + - tip snapshot B + - common ancestor snapshot + - creates a new snapshot + merge commit +- Rebase concept + - take the patch introduced by commits on one branch + - reapply them on top of another branch’s tip +- Example commands + - `git checkout experiment` + - `git rebase master` +- Internal steps (conceptual) + - find common ancestor between current branch and target branch + - compute diffs for each commit on current branch since ancestor + - save diffs temporarily + - reset current branch to target tip + - apply diffs sequentially (creating new commits with new SHAs) +- After rebase + - integrate by fast-forward merge + - `git checkout master` + - `git merge experiment` +- Result comparison + - final snapshot content is the same as with merge + - history is different + - rebase → linear-looking history + - merge → preserves the true parallel shape +- Common use case (contributing workflow) + - rebase your work onto `origin/master` before submitting patches + - maintainer can integrate via fast-forward / clean apply +- Core conceptual distinction + - rebase: replay changes in order introduced + - merge: combine endpoints and record a merge + +### More Interesting Rebases (rebasing a branch off another topic branch) +- Scenario + - topic branch `server` created from master; commits added + - topic branch `client` created from `server`; commits added + - later additional commits added to `server` +- Goal + - ship client changes now (merge into master) + - delay server changes until tested +- Use `--onto` + - `git rebase --onto master server client` + - Meaning + - take commits on `client` that are not on `server` + - replay them as if `client` started from `master` +- Integrate client quickly + - `git checkout master` + - `git merge client` (fast-forward) +- Integrate server later without manual checkout + - `git rebase master server` + - checks out `server` and replays onto master + - `git checkout master` + - `git merge server` (fast-forward) +- Cleanup + - delete topic branches once integrated + - `git branch -d client` + - `git branch -d server` + +### The Perils of Rebasing (rewriting published history) +- The one-line rule + - Do not rebase commits that exist outside your repository and that people may have based work on +- Why rebasing public commits is dangerous + - rebase abandons existing commits and creates new ones + - new commits have different SHAs + - collaborators who based work on old SHAs must reconcile mismatched history +- Example failure pattern (from the chapter) + - you clone and do work + - someone else pushes a merge to the central server + - later they rebase their work and `push --force` (rewriting server history) + - you fetch new commits + - if you `git pull` normally, you may create a merge combining old + new lines + - can lead to duplicate-looking commits (same message/author/date) with different IDs + - pushing that back can reintroduce commits the other dev tried to eliminate +- Social consequence emphasized + - if you rewrite shared history, teammates will have to re-merge and untangle confusion + +### Rebase When You Rebase (recovering after a force-push) +- Problem after force-push + - determine which commits are uniquely yours vs rewritten copies +- Patch-id concept + - besides commit SHA-1, Git can compute a checksum based on the patch content (“patch-id”) +- How rebase helps + - rebasing onto the updated target can let Git: + - identify which commits are already represented (same patch) + - replay only the unique commits +- Example approach + - `git rebase teamone/master` +- What Git may compute during this recovery rebase (as described) + - determine commits unique to your branch + - exclude merge commits from replay + - detect commits that were rewritten but represent the same patch in the target + - apply remaining unique commits on top of the updated branch +- Limitation noted + - works best if rewritten commits are almost the same patch + - otherwise Git may not detect duplication and may reapply a similar patch (possibly failing) +- Convenience options + - `git pull --rebase` instead of normal pull + - or manual: `git fetch` then `git rebase /` + - configure default: + - `git config --global pull.rebase true` +- Safety guideline recap + - safe: rebase commits that never left your machine + - generally ok: rebase pushed commits if nobody based work on them + - risky: rebase publicly shared commits → coordinate + warn others to use `pull --rebase` + +### Rebase vs. Merge (choosing based on what “history” means) +- Two viewpoints on commit history + - History as a factual record + - commit history documents what actually happened + - rewriting is “lying” about events + - merge commits reflect real parallel work + - History as a curated story + - raw development includes missteps and dead ends + - before mainline, rewrite history to tell a clearer story + - tools mentioned: `rebase`, `filter-branch` +- Conclusion + - no universal best choice; depends on team/project +- Practical “best of both worlds” guideline + - rebase local changes before pushing (clean up) + - never rebase anything you’ve pushed somewhere shared/public + +## Summary (skills this chapter expects you to have now) +- Branch creation and switching + - create branches, move between them + - understand HEAD as “current branch pointer” +- Merging + - fast-forward merges + - three-way merges and merge commits (multiple parents) + - resolve conflicts (markers, `status`, `add`, `mergetool`, final `commit`) +- Branch management + - list branches and identify current branch + - inspect branch tips (`-v`) + - find merged/unmerged branches (`--merged`, `--no-merged`) + - delete safely (`-d`) or forcibly (`-D`) + - rename branches (local + remote cleanup) + - rename default branch (master/main) with ecosystem updates +- Collaboration with remotes + - remote-tracking branches, fetch/push/pull behaviors + - create tracking branches and set upstream + - delete remote branches +- Rebasing + - what rebase does and why it can make history linear + - advanced rebase (`--onto`) + - when rebasing is dangerous and how to mitigate with `pull --rebase` +- Next topic preview (mentioned) + - how to run your own Git repository-hosting server +``` \ No newline at end of file diff --git a/mindmap/Git Internals.md b/mindmap/Git Internals.md new file mode 100644 index 0000000..13e03c2 --- /dev/null +++ b/mindmap/Git Internals.md @@ -0,0 +1,667 @@ +```markmap +# Git Internals (Chapter 8) +## Why this chapter exists / positioning in the book +- Can be read early (curiosity) or late (after learning porcelain) +- Understanding internals helps explain *why* Git behaves as it does +- Tradeoff: powerful insight vs. potential complexity for beginners +- Core premise + - Git = **content-addressable filesystem** + **VCS user interface** layered on top +- Historical note + - Early Git (mostly pre-1.5) UI emphasized filesystem concepts → felt complex + - Modern Git UI refined; early “complex Git” stereotype lingers +- Chapter flow + - Content-addressable storage layer (objects) first + - Then transports (protocols) + - Then maintenance + recovery tasks + +## Plumbing and Porcelain +- Porcelain commands (high-level UX) + - Examples: `checkout`, `branch`, `remote`, … + - Most of the book focuses on these +- Plumbing commands (low-level toolkit) + - Designed to be chained (UNIX-style) or used from scripts/tools + - Used here to expose internals and demonstrate implementation + - Often not meant for humans to type frequently + +## The `.git` directory (what Git stores/manipulates) +- Created by `git init` +- Backups/clones + - Copying `.git/` elsewhere gives *nearly everything* needed +- Fresh repo typical contents + - `config` + - Project-specific configuration + - `description` + - Used by GitWeb only + - `HEAD` + - Points to current branch (or object in detached HEAD) + - `hooks/` + - Client/server hook scripts (covered elsewhere) + - `info/` + - Global excludes (patterns you don’t want in `.gitignore`) + - `objects/` + - Object database (content store) + - `refs/` + - Pointers into commits (branches, tags, remotes, …) + - `index` (not shown initially) + - Staging area data (created when needed) +- “Core” pieces emphasized here + - `objects/` — all stored content + - `refs/` — names/pointers into commit graph + - `HEAD` — what’s checked out + - `index` — staging area snapshot used to build trees/commits + +## Git Objects (content-addressable store) +### Concept: a key–value database +- Insert arbitrary data → receive a unique key → retrieve later +- Key is a checksum (SHA-1 in these examples) of: + - a header + the content (details later) + +### Creating a blob object with `git hash-object` +- What it does + - hashes content + - optionally writes object into `.git/objects/` + - returns the object id (40 hex chars = SHA-1) +- Key options + - `-w` — write object to object database + - `--stdin` — read content from stdin (otherwise expects a filename) +- Object storage layout on disk (loose objects) + - Path: `.git/objects//` + - Directory name = first 2 chars of SHA-1 + - Filename = remaining 38 chars +- Inspecting an object + - `git cat-file -p ` — pretty-print content (auto-detect type) + - `git cat-file -t ` — print object type +- Blob objects + - store *only content* (no filename) + - example: versions of `test.txt` stored as different blobs + +### Retrieving content +- You can “recreate” a file from a blob by redirecting `cat-file` output + - `git cat-file -p > test.txt` +- Limitations of blobs alone + - Must remember SHA-1 per version + - No filenames or directory structure + +## Tree Objects (filenames + directories + grouping) +### What a tree is +- Stores a directory listing-like structure +- Entries contain + - mode + - type (`blob` or `tree`) + - SHA-1 of target object + - filename +- Conceptual model (simplified UNIX-like) + - tree ↔ directory entries + - blob ↔ file contents + +### Inspecting trees +- `git cat-file -p master^{tree}` + - shows top-level tree for the last commit on `master` + - example entries include blobs (files) and trees (subdirectories) +- Subtrees + - a directory entry points to another tree object +- Shell quoting pitfalls for `master^{tree}` + - Windows CMD: `^` is escape → use `master^^{tree}` + - PowerShell: quote braces → `git cat-file -p 'master^{tree}'` + - ZSH: `^` globbing → quote expression → `git cat-file -p "master^{tree}"` + +### Building trees manually (via the index) +- Normal Git behavior + - Creates trees from the staging area (index) +- Plumbing commands used + - `git update-index` + - manipulate index entries + - `--add` required if path not in index yet + - `--cacheinfo` used when content isn’t in working tree (already in DB) + - requires: ` ` + - valid file modes for blobs + - `100644` normal file + - `100755` executable + - `120000` symlink + - `git write-tree` + - writes current index to a tree object + - `git read-tree` + - reads a tree into index + - `--prefix=/` stages it as a subtree + +### Example sequence (three trees) +- Tree 1: `test.txt` v1 + - stage blob via `update-index --add --cacheinfo 100644 test.txt` + - `write-tree` → tree1 (contains `test.txt` → blob v1) +- Tree 2: `test.txt` v2 + `new.txt` + - update index to point `test.txt` to blob v2 + - add `new.txt` + - `write-tree` → tree2 (two file entries) +- Tree 3: include Tree 1 under `bak/` + - `read-tree --prefix=bak ` + - `write-tree` → tree3 + - tree3 contains + - `bak/` → tree1 + - `new.txt` → blob + - `test.txt` → blob v2 + +## Commit Objects (snapshots + history + metadata) +### Why commits exist +- Trees represent snapshots but: + - SHA-1s are not memorable + - need who/when/why metadata + - need parent links to form history + +### Creating commits with `git commit-tree` +- Inputs + - a tree SHA-1 (snapshot) + - optional parent commit SHA-1(s) + - message from stdin +- Commit object fields + - `tree ` + - `parent ` (none for first commit) + - `author ...` (from `user.name`, `user.email`, timestamp) + - `committer ...` (same source) + - blank line + - commit message +- Note about hashes in book + - commit hashes differ due to timestamps/author data; use your own + +### Example history +- Commit 1 points to tree1 (no parent) +- Commit 2 points to tree2, parent = commit1 +- Commit 3 points to tree3, parent = commit2 +- View history + - `git log --stat ` +- Key takeaway + - Porcelain `git add`/`git commit` do essentially: + - write blobs for changed content + - update index + - write tree(s) + - write commit referencing tree + parent + +## Object Storage (how objects are actually stored) +### Common storage recipe +- Each object stored as: + - header + content +- Header format + - ` \0` + - type: `blob`, `tree`, `commit`, `tag` + - size: bytes in content + - null byte terminator +- Object id + - SHA-1 of (header + content) +- Compression + - zlib-compressed before writing to disk + +### Ruby walk-through (blob example) +- Build content string +- Build header (`"blob #{bytesize}\0"`) +- Concatenate and hash with SHA-1 + - matches `git hash-object` (use `echo -n` to avoid newline) +- Compress with zlib +- Write to `.git/objects//` +- Validate with `git cat-file -p ` + +## Git References (refs) — naming commits/objects +### What refs are +- Human-friendly names → files containing SHA-1s +- Stored under `.git/refs/` + - `refs/heads/` — branches + - `refs/tags/` — tags + - (later) `refs/remotes/` — remote-tracking refs + +### Creating/updating refs +- Direct edit possible but discouraged + - `echo > .git/refs/heads/master` +- Safer: `git update-ref` + - `git update-ref refs/heads/master ` +- Branch meaning + - A branch is a ref that points to the tip commit of a line of work +- Example: create branch at older commit + - `git update-ref refs/heads/test ` + - `git log test` shows only commits reachable from that ref + +## `HEAD` — what you have checked out +### Symbolic reference (usual case) +- `.git/HEAD` commonly contains + - `ref: refs/heads/` +- On checkout, Git updates `HEAD` to point at chosen branch ref +- Commit parent determination + - `git commit` uses commit pointed to by ref that `HEAD` references + +### Detached HEAD (special case) +- Sometimes `HEAD` contains a raw SHA-1 +- Happens when checking out + - a tag + - a commit + - a remote-tracking branch + +### Managing HEAD safely +- `git symbolic-ref HEAD` — read where HEAD points +- `git symbolic-ref HEAD refs/heads/test` — set symbolic HEAD +- Constraint + - cannot point outside `refs/` namespace + +## Tags (lightweight vs annotated) +### Tag object +- Fourth object type: `tag` +- Similar to commit object (tagger/date/message/pointer) +- Usually points to a commit, but can tag any object (blob/tree/commit) + +### Lightweight tags +- Just a ref under `refs/tags/` pointing directly to an object + - `git update-ref refs/tags/v1.0 ` +- Never moves (unlike branch tips) + +### Annotated tags +- Create a tag object and a ref that points to it + - `git tag -a v1.1 -m '...'` +- `.git/refs/tags/v1.1` contains SHA-1 of the *tag object* +- Tag object content includes + - `object ` + - `type ` + - `tag ` + - `tagger ...` + - message +- Examples mentioned + - Tagging a maintainer’s GPG key stored as a blob + - Kernel repo has an early tag pointing at an initial tree + +## Remotes (remote-tracking references) +### What they are +- Refs under `refs/remotes//...` +- Store last known state of remote branches after communicating + +### Example +- After `git remote add origin ...` and `git push origin master` + - `.git/refs/remotes/origin/master` stores last known remote SHA-1 + +### Key characteristics +- Read-only from user standpoint +- You can checkout one, but Git won’t set `HEAD` as symbolic ref to it +- They act as bookmarks managed by Git for remote state + +## Packfiles (space-efficient object storage) +### Loose objects vs packed objects +- Loose object: one zlib file per object +- Packfile: + - single `.pack` containing many objects + - `.idx` index mapping SHA-1 → offsets + +### When packing happens +- Automatically when: + - many loose objects + - many packfiles +- Manually via `git gc` +- Often during push to a server + +### Demonstration scenario (why deltas matter) +- Add large file (`repo.rb`, ~22K) and commit + - file stored as blob +- Modify it slightly and commit again + - creates a whole new blob + - two near-identical large blobs now exist + +### `git gc` effects +- Creates pack + index +- Removes many loose objects (reachable ones) +- Leaves dangling/unreachable blobs loose (not in pack) + +### Inspecting what’s packed +- `git verify-pack -v .idx` + - shows objects, sizes, offsets, delta bases +- Delta storage behavior shown + - newer version often stored in full + - older version stored as delta against newer + - optimized for fast access to most recent version +- Repacking + - can happen automatically + - can be triggered any time via `git gc` + +## Refspec (ref mapping rules for fetch/push) +### Where it appears +- `.git/config` remote section created by `git remote add` + - `fetch = +refs/heads/*:refs/remotes/origin/*` + +### Syntax +- `(+)?:` + - optional `+` forces update even if not fast-forward + - ``: refs on remote + - ``: local tracking refs + +### Default fetch behavior +- Fetch all remote branches (`refs/heads/*`) +- Track locally as `refs/remotes/origin/*` +- Equivalent references + - `origin/master` + - `remotes/origin/master` + - `refs/remotes/origin/master` + +### Custom fetch examples +- Fetch only master always + - `fetch = +refs/heads/master:refs/remotes/origin/master` +- One-time fetch to a different local name + - `git fetch origin master:refs/remotes/origin/mymaster` +- Multiple refspecs + - CLI or multiple `fetch =` lines in config +- Fast-forward enforcement and overrides + - non-FF rejected unless `+` used +- Partial globs (Git ≥ 2.6.0) + - `qa*` patterns for multiple branches +- Namespaces/directories for teams + - e.g., `refs/heads/qa/*` → `refs/remotes/origin/qa/*` + +## Pushing refspecs & deleting remote refs +### Pushing into a namespace +- Push local `master` to remote `qa/master` + - `git push origin master:refs/heads/qa/master` +- Configure default push mapping + - `push = refs/heads/master:refs/heads/qa/master` + +### Deleting remote references +- Old refspec deletion form + - `git push origin :topic` +- Newer explicit flag (Git ≥ 1.7.0) + - `git push origin --delete topic` + +### Note/limitation +- Refspecs can’t fetch from one repo and push to another (as a single refspec trick) + +## Transfer Protocols (moving data between repositories) +### Two major approaches +- Dumb protocol + - simple, HTTP read-only, no Git server-side logic + - inefficient, hard to secure/private; rarely used now +- Smart protocol + - Git-aware server process + - negotiates what data is needed + - supports pushes + +### Dumb protocol (HTTP) — conceptual clone walkthrough +- `git clone http://server/.git` +- Fetch refs list (requires server-generated metadata) + - `GET info/refs` + - generated by `update-server-info` (often via post-receive hook) +- Fetch HEAD to determine default branch + - `GET HEAD` → `ref: refs/heads/master` +- Walk objects starting from advertised commit SHA + - `GET objects//` for loose objects + - parse commit → learn `tree` + `parent` +- If tree object not found as loose (404) + - check alternates + - `GET objects/info/http-alternates` + - check available packfiles + - `GET objects/info/packs` + - `GET objects/pack/pack-....idx` + - `GET objects/pack/pack-....pack` +- Once required objects are fetched + - checkout working tree for branch pointed to by downloaded `HEAD` + +### Smart protocol — overview +- Upload (push): `send-pack` (client) ↔ `receive-pack` (server) +- Download (fetch/clone): `fetch-pack` (client) ↔ `upload-pack` (server) + +#### Uploading data (push) +- SSH transport + - client runs remote command (conceptually) + - `ssh ... "git-receive-pack '.git'"` + - server advertises + - current refs + SHA-1s + - capabilities appended on the first line after a NUL separator + - pkt-line framing + - each chunk begins with 4 hex chars = length (including those 4 chars) + - `0000` indicates end + - client sends per-ref updates + - ` ` + - all zeros on left = create ref + - all zeros on right = delete ref + - client sends a packfile of objects server lacks + - server replies success/failure + - e.g., `unpack ok` +- HTTP(S) transport + - discovery + - `GET .../info/refs?service=git-receive-pack` + - push + - `POST .../git-receive-pack` with update commands + packfile + - note: HTTP may wrap in chunked transfer encoding + +#### Downloading data (fetch/clone) +- SSH transport + - client runs remote command + - `ssh ... "git-upload-pack '.git'"` + - server advertises + - refs and capabilities + - `symref=HEAD:refs/heads/master` so client knows default branch + - negotiation + - client sends `want ` + - client sends `have ` + - client sends `done` to request packfile generation + - server returns packfile (optionally multiplexing progress via side-band) +- HTTP(S) transport + - discovery + - `GET .../info/refs?service=git-upload-pack` + - negotiation/data request + - `POST .../git-upload-pack` with want/have data + - response includes packfile + +### Protocols summary note +- Only the high-level handshake is covered +- Many capabilities/features (e.g., `multi_ack`, `side-band`) exist beyond this chapter’s scope + +## Maintenance and Data Recovery +### Maintenance (`gc`, packing, pruning) +- Auto maintenance + - Git may run `auto gc` occasionally + - Usually no-op unless thresholds exceeded +- What `git gc` does + - packs loose objects into packfiles + - consolidates packfiles + - removes unreachable objects older than a few months +- Trigger thresholds (approx) + - ~7000 loose objects + - >50 packfiles +- Config knobs + - `gc.auto` + - `gc.autopacklimit` +- Manual auto-gc run + - `git gc --auto` (often does nothing) + +### Packing refs into `packed-refs` +- Before gc: refs stored as many small files + - `.git/refs/heads/*`, `.git/refs/tags/*`, … +- After gc: moved for efficiency into `.git/packed-refs` + - format lines: ` ` + - annotated tags include a “peeled” line starting with `^` + - indicates the commit the tag ultimately points to +- Updating a ref after packing + - Git writes a new loose ref file under `.git/refs/...` + - doesn’t edit `packed-refs` +- Lookup behavior + - Git checks loose refs first, then `packed-refs` fallback + +### Data Recovery (finding lost commits) +#### Common loss causes +- force-delete a branch containing work you later want +- `git reset --hard` moving a branch tip back, abandoning newer commits + +#### Reflog-based recovery +- Reflog records where `HEAD` pointed whenever it changes + - commits, branch switches, resets + - also updated by `git update-ref` (reason to prefer it over manual ref edits) +- Useful commands + - `git reflog` — concise HEAD history + - `git log -g` — reflog shown as a log +- Recovery technique + - find lost commit SHA-1 in reflog + - create a ref/branch pointing to it + - `git branch recover-branch ` + +#### Recovery without reflog +- If reflog is missing (e.g., `.git/logs/` removed) +- Use integrity checker + - `git fsck --full` + - shows dangling/unreachable objects + - `dangling commit ` +- Recover similarly + - create a new branch ref pointing to the dangling commit + +### Removing objects (purging big files from history) +#### Problem statement +- Git clones fetch full history +- A huge file added once remains in history forever if reachable + - even if deleted next commit +- Especially painful in imported repos (SVN/Perforce) + +#### Strong warning +- Destructive: rewrites commit history (new commit IDs) +- Must coordinate contributors (rebase onto rewritten history) + +#### Workflow to locate and remove large objects +- Confirm repo size after packing + - `git gc` + - `git count-objects -v` (check `size-pack`) +- Find largest packed objects + - `git verify-pack -v .idx | sort -k 3 -n | tail -3` + - third field in output is object size +- Map blob SHA to filename + - `git rev-list --objects --all | grep ` +- Identify commits that touched the path + - `git log --oneline --branches -- ` +- Rewrite history to remove the file from every tree + - `git filter-branch --index-filter 'git rm --ignore-unmatch --cached ' -- ^..` + - `--index-filter` is fast (no full checkout per commit) + - `git rm --cached` removes from index/tree, not just working dir +- Remove pointers to old history + - `rm -Rf .git/refs/original` + - `rm -Rf .git/logs/` +- Repack/clean + - `git gc` + - optionally remove remaining loose objects + - `git prune --expire now` + +## Environment Variables (controlling Git behavior) +> Chapter note: not exhaustive; highlights the most useful + +### Global behavior +- `GIT_EXEC_PATH` + - where Git finds sub-programs (e.g., `git-commit`, `git-diff`) + - inspect via `git --exec-path` +- `HOME` + - where Git finds global config + - can be overridden for portable Git setups +- `PREFIX` + - system-wide config path: `$PREFIX/etc/gitconfig` +- `GIT_CONFIG_NOSYSTEM` + - disable system-wide config +- Output paging/editing + - `GIT_PAGER` (fallback `PAGER`) + - `GIT_EDITOR` (fallback `EDITOR`) + +### Repository locations +- `GIT_DIR` + - where `.git` directory is + - if unset, Git walks up directory tree searching +- `GIT_CEILING_DIRECTORIES` + - stops upward search early (useful for slow filesystems) +- `GIT_WORK_TREE` + - working tree root for non-bare repos +- `GIT_INDEX_FILE` + - alternate index path +- Object database + - `GIT_OBJECT_DIRECTORY` — override `.git/objects` + - `GIT_ALTERNATE_OBJECT_DIRECTORIES` + - colon-separated additional object stores (share objects across repos) + +### Pathspecs (path matching rules) +- Pathspecs used in `.gitignore` and CLI patterns (e.g., `git add *.c`) +- Wildcard behavior toggles + - `GIT_GLOB_PATHSPECS=1` — wildcards enabled (default) + - `GIT_NOGLOB_PATHSPECS=1` — wildcards literal (e.g., `*.c` matches file named `*.c`) +- Per-argument overrides + - prefix with `:(glob)` or `:(literal)` +- `GIT_LITERAL_PATHSPECS` + - disables wildcard matching and override prefixes +- `GIT_ICASE_PATHSPECS` + - case-insensitive pathspec matching + +### Committing (author/committer identity) +- Used primarily by `git-commit-tree` (then falls back to config) +- Author fields + - `GIT_AUTHOR_NAME` + - `GIT_AUTHOR_EMAIL` + - `GIT_AUTHOR_DATE` +- Committer fields + - `GIT_COMMITTER_NAME` + - `GIT_COMMITTER_EMAIL` + - `GIT_COMMITTER_DATE` +- `EMAIL` + - fallback email if `user.email` is unset + +### Networking (HTTP behavior) +- `GIT_CURL_VERBOSE` + - emit libcurl debug messages +- `GIT_SSL_NO_VERIFY` + - skip SSL cert verification (self-signed/setup scenarios) +- Low-speed abort settings + - `GIT_HTTP_LOW_SPEED_LIMIT` + - `GIT_HTTP_LOW_SPEED_TIME` + - override `http.lowSpeedLimit` / `http.lowSpeedTime` +- `GIT_HTTP_USER_AGENT` + - override user-agent string + +### Diffing and merging +- `GIT_DIFF_OPTS` + - only supports unified context count: `-u` / `--unified=` +- `GIT_EXTERNAL_DIFF` + - program invoked instead of built-in diff +- Batch diff metadata for external diff tool + - `GIT_DIFF_PATH_COUNTER` + - `GIT_DIFF_PATH_TOTAL` +- `GIT_MERGE_VERBOSITY` (recursive merge) + - 0: only errors + - 1: conflicts only + - 2: + file changes (default) + - 3: + skipped unchanged + - 4: + all processed paths + - 5+: deep debug + +### Debugging/tracing (observability) +- Output destinations + - `"true"`, `"1"`, `"2"` → stderr + - absolute path `/...` → write to file +- `GIT_TRACE` + - general tracing (alias expansion, sub-program exec) +- `GIT_TRACE_PACK_ACCESS` + - pack access tracing: packfile + offset +- `GIT_TRACE_PACKET` + - packet-level tracing for network operations +- `GIT_TRACE_PERFORMANCE` + - timing for each internal step/subcommand +- `GIT_TRACE_SETUP` + - shows discovered repo paths (`git_dir`, `worktree`, `cwd`, `prefix`, ...) + +### Miscellaneous +- `GIT_SSH` + - program used instead of `ssh` + - invoked as: `$GIT_SSH [user@]host [-p ] ` + - wrapper script often needed for extra args; `~/.ssh/config` may be easier +- `GIT_ASKPASS` + - program to prompt for credentials (returns answer on stdout) +- `GIT_NAMESPACE` + - namespaced refs (like `--namespace`), often server-side +- `GIT_FLUSH` + - stdout buffering + - `1` flush frequently; `0` buffer +- `GIT_REFLOG_ACTION` + - custom text written to reflog entries (action descriptor) + +## Summary (what you should now understand) +- Git internals = object database + refs + a UI on top +- Main object types + - blob (content), tree (directories), commit (history + metadata), tag (named pointer + metadata) +- Refs and `HEAD` provide human-friendly naming and current-state tracking +- Packfiles optimize storage through compression and deltas +- Refspecs control fetch/push mappings and enable namespaced workflows +- Transfer protocols + - dumb: simple HTTP reads (rare) + - smart: negotiated pack exchange (common) for fetch/push +- Maintenance/recovery tools + - `gc`, `packed-refs`, `reflog`, `fsck`, `filter-branch`, `prune` +- Environment variables provide control, portability, and deep debugging capabilities +``` \ No newline at end of file diff --git a/mindmap/Git Tools.md b/mindmap/Git Tools.md new file mode 100644 index 0000000..38ea19a --- /dev/null +++ b/mindmap/Git Tools.md @@ -0,0 +1,1122 @@ +```markmap +# Git Tools + +## Purpose / Context +- You already know day-to-day Git workflows + - track + commit files + - staging area + - topic branching + merging +- This chapter: powerful/advanced tools you might not use every day, but will eventually need + +## Revision Selection +- Git can refer to: + - a single commit + - a set of commits + - a range of commits +- References can be: + - hashes (full/short) + - branch names + - reflog entries + - ancestry expressions + - range expressions + +### Single Revisions +- Full SHA-1 + - 40-character commit hash (e.g., from `git log`) +- Short SHA-1 (abbreviated hash) + - Git accepts a prefix of the SHA-1 if: + - at least 4 characters + - unambiguous among all objects in the object database + - Inspect a commit (examples; any unique prefix works) + - `git show ` + - `git show ` + - Generate abbreviated commits in log output + - `git log --abbrev-commit --pretty=oneline` + - defaults to 7 characters; lengthens as needed to remain unique + - Practical uniqueness + - often 8–10 chars enough within a repo + - example note: very large repos still have unique prefixes (Linux kernel cited) +- Note: SHA-1 collision concerns (and Git’s direction) + - SHA-1 digest: 20 bytes / 160 bits + - Random collisions are astronomically unlikely + - 50% collision probability requires about 2^80 randomly-hashed objects + - probability formula cited: `p = (n(n-1)/2) * (1/2^160)` + - If a collision happened organically: + - Git would reuse the first object with that hash (you’d always get first object’s data) + - Deliberate, synthesized collisions are possible (e.g., shattered.io, Feb 2017) + - Git is moving toward SHA-256 as the default hash algorithm + - more resilient to collision attacks + - mitigation code exists, but cannot fully eliminate attacks +- Branch References + - If a commit is the tip of a branch, you can refer to it by branch name + - `git show ` + - equivalent to `git show ` + - Plumbing tool to resolve refs → SHA-1: `git rev-parse` + - example: `git rev-parse topic1` + - purpose: lower-level operations (not typical day-to-day), but useful for “what is this ref really?” +- Reflog Shortnames + - Git records a reflog (local history of where HEAD/refs have pointed) + - View reflog + - `git reflog` + - shows entries like `HEAD@{0}`, `HEAD@{1}`, … + - Refer to older values + - `git show HEAD@{5}` (the 5th prior HEAD value in reflog) + - Time-based reflog syntax + - `git show master@{yesterday}` + - Log-format reflog output + - `git log -g ` (e.g., `git log -g master`) + - Important properties / limitations + - reflog is **strictly local** + - not shared; differs from other clones + - freshly cloned repo starts with empty reflog (no local activity yet) + - retention is limited (typically a few months) + - time lookups only work while data remains in reflog + - Mental model + - reflog ≈ “shell history” for Git refs (personal/session-local) + - PowerShell gotcha: escaping braces `{ }` + - `git show HEAD@{0}` (won’t work) + - `git show HEAD@`{0`}` (OK) + - `git show "HEAD@{0}"` (OK) +- Ancestry References + - Caret `^` (parent selection) + - `ref^` = parent of `ref` + - example: `HEAD^` = parent of HEAD + - Windows cmd.exe gotcha: escaping `^` + - `git show "HEAD^"` or `git show HEAD^^` + - Selecting merge parents + - `ref^2` = second parent (merge commits only) + - first parent: branch you were on when merging (often `master`) + - second parent: branch being merged in (topic branch) + - Tilde `~` (first-parent traversal) + - `ref~` ≡ `ref^` (first parent) + - `ref~2` = first-parent-of-first-parent (grandparent) + - repeated tildes: `HEAD~~~` ≡ `HEAD~3` + - Combining ancestry operators + - example: `HEAD~3^2` = second parent of the commit found via `HEAD~3` (if that commit is a merge) + +### Commit Ranges +- Motivation / questions answered + - “What work is on this branch that hasn’t been merged into main?” + - “What am I about to push?” + - “What’s unique between two lines of development?” + +#### Double Dot (`A..B`) +- Meaning + - commits reachable from `B` **but not** reachable from `A` +- Example uses + - “what’s in experiment not in master?” + - `git log master..experiment` + - opposite direction (what’s in master not in experiment) + - `git log experiment..master` + - “what am I about to push?” + - `git log origin/master..HEAD` +- Omitted side defaults to `HEAD` + - `git log origin/master..` ≡ `git log origin/master..HEAD` + +#### Multiple Points (`^` / `--not`) +- Double-dot is shorthand for a common two-point case +- Equivalent forms + - `git log refA..refB` + - `git log ^refA refB` + - `git log refB --not refA` +- Advantage: can exclude multiple refs + - “reachable from refA or refB, but not from refC” + - `git log refA refB ^refC` + - `git log refA refB --not refC` + +#### Triple Dot (`A...B`) +- Meaning (symmetric difference) + - commits reachable from either `A` or `B` **but not both** +- Example + - `git log master...experiment` +- Often paired with `--left-right` + - `git log --left-right master...experiment` + - marks which side each commit is from (`<` vs `>`) + +## Interactive Staging +- Goal + - craft commits that contain only certain combinations/parts of changes + - split large messy changes into focused, reviewable commits + +### Interactive add mode +- Start + - `git add -i` / `git add --interactive` +- What it shows + - staged vs unstaged changes per path (like `git status`, but compact) +- Core commands menu (as shown) + - `s` status + - `u` update (stage files) + - `r` revert (unstage files) + - `a` add untracked + - `p` patch (stage hunks) + - `d` diff (review staged diff) + - `q` quit + - `h` help + +### Staging and unstaging files (interactive) +- Stage files + - `u` / `update` + - select by numbers (comma-separated) + - `*` indicates selected items + - press Enter with nothing selected to stage all selected +- Unstage files + - `r` / `revert` + - select paths to remove from index +- Review staged diff + - `d` / `diff` + - select file(s) to see + - comparable to `git diff --cached` + +### Staging patches (partial-file staging) +- Enter patch selection + - from interactive prompt: `p` / `patch` + - from command line: `git add -p` / `git add --patch` +- Git presents hunks and asks whether to stage each +- Hunk prompt options (as listed) + - `y` stage this hunk + - `n` do not stage this hunk + - `a` stage this and all remaining hunks in file + - `d` do not stage this hunk nor any remaining hunks in file + - `g` select a hunk to go to + - `/` search for a hunk matching a regex + - `j` leave this hunk undecided, go to next undecided hunk + - `J` leave this hunk undecided, go to next hunk + - `k` leave this hunk undecided, go to previous undecided hunk + - `K` leave this hunk undecided, go to previous hunk + - `s` split current hunk into smaller hunks + - `e` manually edit the current hunk + - `?` help +- Result + - a file can be partially staged (some staged, some unstaged) + - exit and `git commit` will commit staged parts only +- Patch mode appears in other commands too + - `git reset --patch` (partial unstage/reset) + - `git checkout --patch` (partial checkout/revert) + - `git stash save --patch` (stash parts; mentioned as further detail later) + +## Stashing and Cleaning + +### Stash: why and what it does +- Problem + - need to switch branches while work is half-done + - don’t want to commit unfinished work +- `git stash` saves: + - modified tracked files (working directory) + - staged changes (index) +- Stores changes on a stack; can reapply later (even on different branch) +- Note: migration to `git stash push` + - `git stash save` discussed as being deprecated in favor of `git stash push` + - key reason: `push` supports stashing selected pathspecs + +### Stashing your work (basic flow) +- Observe dirty state + - `git status` shows staged + unstaged changes +- Create stash + - `git stash` or `git stash push` + - working directory becomes clean +- List stashes + - `git stash list` (e.g., `stash@{0}`, `stash@{1}`, …) +- Apply stash + - most recent: `git stash apply` + - specific: `git stash apply stash@{2}` + - can apply on different branch + - conflicts possible if changes don’t apply cleanly +- Restore staged state too + - `git stash apply --index` +- Remove stashes + - drop by name: `git stash drop stash@{0}` + - apply + drop: `git stash pop` + +### Creative stashing (useful options) +- Keep staged changes in index + - `git stash --keep-index` + - stashes everything else, but leaves index intact +- Include untracked files + - `git stash -u` / `git stash --include-untracked` +- Include ignored files too + - `git stash --all` / `git stash -a` +- Patch stashing (stash some hunks, keep others) + - `git stash --patch` + - interactive hunk selection (prompt options include `y/n/q/a/d//e/?` per stash prompt) + +### Create a branch from a stash +- Use case + - stash is old; applying on current branch causes conflicts +- Command + - `git stash branch ` +- Behavior + - creates a new branch at the commit you were on when stashing + - checks it out + - reapplies stash there + - drops stash if it applies successfully + +### Cleaning your working directory (`git clean`) +- Purpose + - remove untracked files/dirs (“cruft”) + - remove build artifacts for clean build +- Caution + - removes files not tracked by Git + - often no way to recover + - safer alternative when unsure: `git stash --all` +- Common usage + - preview only: `git clean -n` / `git clean --dry-run` + - remove untracked files + empty dirs: + - `git clean -f -d` + - `-f` required unless `clean.requireForce=false` +- Ignored files + - default: ignored files are NOT removed + - remove ignored too: `git clean -x` +- Interactive cleaning + - `git clean -x -i` + - interactive commands shown: + - clean + - filter by pattern + - select by numbers + - ask each + - quit + - help +- Quirk (nested Git repos) + - directories containing other Git repos may require extra force + - may need a second `-f` (e.g., `git clean -ffd`) + +## Signing Your Work (GPG) +- Git is cryptographically secure (hashing), but not foolproof for trust +- When consuming work from others, signing helps verify authorship/integrity + +### GPG setup +- List keys: `gpg --list-keys` +- Generate key: `gpg --gen-key` +- Configure Git signing key + - `git config --global user.signingkey ` + +### Signing tags +- Create signed tag + - `git tag -s -m ''` (instead of `-a`) +- View signature + - `git show ` +- Passphrase may be required to unlock key + +### Verifying tags +- Verify signed tag + - `git tag -v ` +- Requires signer’s public key in your keyring + - otherwise: “public key not found” / cannot verify + +### Signing commits +- Sign a commit (Git v1.7.9+) + - `git commit -S ...` +- View/check signatures + - `git log --show-signature -1` + - signature status in custom format: `git log --pretty="format:%h %G? %aN %s"` + - example statuses shown in chapter: + - `G` = good/valid signature + - `N` = no signature + +### Enforcing signatures in merges/pulls (Git v1.8.3+) +- Verify signatures during merge/pull + - `git merge --verify-signatures ` + - merge fails if commits are unsigned/untrusted +- Verify + sign resulting merge commit + - `git merge --verify-signatures -S ` + +### Workflow consideration: everyone must sign +- If you require signing: + - ensure all contributors know how to do it + - otherwise you’ll spend time helping rewrite commits to signed versions +- Understand GPG + benefits before adopting as standard workflow + +## Searching + +### `git grep` (search code) +- Search targets + - working directory (default) + - committed trees + - index (staging area) +- Useful options + - line numbers: `-n` / `--line-number` + - per-file match counts: `-c` / `--count` + - show enclosing function: `-p` / `--show-function` +- Complex queries + - combine expressions on same line with `--and` + - multiple `-e ` expressions + - can search historical trees (example in chapter uses tag `v1.8.0`) + - output readability helpers: `--break`, `--heading` +- Advantages vs external tools (grep/ack) + - very fast + - can search any Git tree, not just current checkout + +### `git log` searching (by content) +- Find when a string was introduced/changed (diff-based search) +- Pickaxe (`-S`) + - `git log -S ` + - shows commits that changed number of occurrences of the string +- Regex diff search (`-G`) + - `git log -G ` + +### Line history search (`git log -L`) +- Show history of a function/line range as patches +- Function syntax + - `git log -L ::` +- Regex/range alternatives if function parsing fails + - regex + end pattern: `git log -L '//',/^}/:` + - explicit line ranges or a single line number also supported (noted) + +## Rewriting History + +### Why rewrite history (locally) +- Make history reflect logical, reviewable changes + - reorder commits + - rewrite messages + - modify commit contents + - squash/split commits + - remove commits entirely +- Cardinal rule + - don’t push until you’re happy + - rewriting pushed history confuses collaborators (treat pushed as final unless strong reason) + +### Changing the last commit +- Amend message and/or content + - `git commit --amend` +- Common patterns + - fix message only: amend, edit message in editor + - fix content: + - edit files → stage changes → `git commit --amend` +- Caution + - amending changes SHA-1 (like small rebase) + - don’t amend a commit that’s already pushed +- Tip: avoid editor if message unchanged + - `git commit --amend --no-edit` +- Note: commit message may need updating if content changes substantially + +### Changing multiple commit messages (interactive rebase) +- Tool: interactive rebase + - `git rebase -i ` +- Choosing the range + - specify the parent of the oldest commit you want to edit + - example for last 3 commits: `git rebase -i HEAD~3` +- Warning + - rewrites every commit in selected range and descendants + - avoid rewriting commits already pushed +- Interactive todo list properties + - commits listed oldest→newest (reverse of typical `git log` output) + - Git replays commits top→bottom +- Todo commands shown + - `pick` use commit + - `reword` use commit, edit message + - `edit` stop for amending + - `squash` meld into previous, edit combined message + - `fixup` like squash, discard this commit message + - `exec` run shell command + - `break` stop here, continue later with `git rebase --continue` + - `drop` remove commit + - `label` label current HEAD + - `reset` reset HEAD to a label + - `merge` create merge commit (with options to keep/reword message) + - notes shown in template: + - lines can be re-ordered + - removing a line loses that commit + - removing everything aborts rebase + - empty commits commented out + +### Reordering commits (interactive rebase) +- Reorder lines in todo file +- Save + exit + - Git rewinds branch to parent of the todo range + - replays commits in new order + +### Removing commits (interactive rebase) +- Delete the line or mark it `drop` +- Effects + - rewriting a commit rewrites all following commits’ SHA-1s + - can cause conflicts if later commits depend on removed one + +### Squashing commits +- Mark subsequent commits as `squash` (or `fixup`) +- Git: + - applies changes together + - opens editor to combine messages (except fixup discards message) +- Outcome + - a single commit replacing multiple commits + +### Splitting a commit +- Mark target commit as `edit` in rebase todo +- When rebase stops at that commit + - undo that commit while keeping changes in working tree/index state + - `git reset HEAD^` (mixed reset) + - stage and commit portions into multiple commits + - continue rebase + - `git rebase --continue` +- Reminder + - rewriting changes SHA-1s of affected commit and subsequent commits + - avoid if any are pushed + +### Aborting or recovering +- Abort in-progress rebase + - `git rebase --abort` +- After completing, recover earlier state + - use reflog (chapter references this as Data Recovery elsewhere) + +### The nuclear option: `filter-branch` +- Purpose + - scriptable rewriting across many commits + - examples: + - remove file from every commit + - change email globally + - rewrite project root from subdirectory +- Warning callout + - `git filter-branch` has many pitfalls; no longer recommended + - prefer `git-filter-repo` (Python) for most use cases +- Common uses shown + - Remove a file from every commit (e.g., secrets/huge binaries) + - `git filter-branch --tree-filter 'rm -f passwords.txt' HEAD` + - `--tree-filter` runs command after each checkout; recommits results + - can use patterns (e.g., `rm -f *~`) + - to run across all branches: `--all` + - recommended: test in a branch, then hard-reset master if satisfied + - Make a subdirectory the new root + - `git filter-branch --subdirectory-filter trunk HEAD` + - auto-removes commits that didn’t affect the subdirectory + - Change email addresses globally (only yours) + - `git filter-branch --commit-filter '