Add comprehensive GitHub guide covering account setup, collaboration, and API usage

This commit is contained in:
2026-02-05 21:00:02 -06:00
parent e8f4979ff3
commit ea00a7be82
13 changed files with 8495 additions and 79 deletions

2
.gitignore vendored
View File

@@ -1,3 +1,3 @@
.env
books/*
mindmap/*
# mindmap/*

View File

@@ -2,33 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"id": "9f587bf1",
"metadata": {},
"outputs": [],
"source": [
"import { load } from \"jsr:@std/dotenv\";\n",
"import OpenAI from \"jsr:@openai/openai\";\n",
"\n",
"const _ = await load({ export: true });\n",
"const openai = new OpenAI();"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4650126c",
"metadata": {},
"outputs": [],
"source": [
"const safeName = (s: string) => s.replace(/[<>:\"/\\\\|?*\\x00-\\x1F]/g, \"_\").trim();\n",
"const bookName =\n",
" \"Nmap Network Scanning Official Nmap Project Guide to Network Discovery and Security Scanning\";\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 2,
"id": "ae701b32",
"metadata": {},
"outputs": [],
@@ -59,7 +33,66 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "9f587bf1",
"metadata": {},
"outputs": [],
"source": [
"import { load } from \"jsr:@std/dotenv\";\n",
"import OpenAI from \"jsr:@openai/openai\";\n",
"\n",
"// flush all previous env vars\n",
"/* for (const key of Object.keys(Deno.env.toObject())) {\n",
" Deno.env.delete(key);\n",
"} */\n",
"const _ = await load({ export: true });\n",
"const openai = new OpenAI();"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4650126c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"progit\n"
]
}
],
"source": [
"const safeName = (s: string) => s.replace(/[<>:\"/\\\\|?*\\x00-\\x1F]/g, \"_\").trim();\n",
"const bookName = Deno.env.get(\"BOOK_NAME\");\n",
"\n",
"if (!bookName) {\n",
" throw new Error(\"BOOK_NAME environment variable is not set\");\n",
"}\n",
"\n",
"console.log(bookName);\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "211e589f",
"metadata": {},
"outputs": [],
"source": [
"// BOOKMARK_LEVEL=1\n",
"// BOOK_FROM=10\n",
"// BOOK_TO=-1\n",
"\n",
"Deno.env.set(\"BOOKMARK_LEVEL\", \"1\");\n",
"Deno.env.set(\"BOOK_FROM\", \"10\");\n",
"Deno.env.set(\"BOOK_TO\", \"-1\");"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8bee369d",
"metadata": {},
"outputs": [
@@ -68,51 +101,33 @@
"output_type": "stream",
"text": [
"[\n",
" { title: \"01\", level: 1, page: 27, endpage: 50 },\n",
" { title: \"02\", level: 1, page: 51, endpage: 71 },\n",
" { title: \"03\", level: 1, page: 72, endpage: 97 },\n",
" { title: \"04\", level: 1, page: 98, endpage: 119 },\n",
" { title: \"05\", level: 1, page: 120, endpage: 158 },\n",
" { title: \"06\", level: 1, page: 159, endpage: 168 },\n",
" { title: \"07\", level: 1, page: 169, endpage: 193 },\n",
" { title: \"08\", level: 1, page: 194, endpage: 227 },\n",
" { title: \"09\", level: 1, page: 228, endpage: 278 },\n",
" { title: \"10\", level: 1, page: 279, endpage: 315 },\n",
" { title: \"11\", level: 1, page: 316, endpage: 326 },\n",
" { title: \"12\", level: 1, page: 327, endpage: 356 },\n",
" { title: \"13\", level: 1, page: 357, endpage: 382 },\n",
" { title: \"14\", level: 1, page: 383, endpage: 391 },\n",
" { title: \"15\", level: 1, page: 392, endpage: 392 }\n",
"]\n",
"Processing chapter: 01\n",
"Processing chapter: 02\n",
"Processing chapter: 03\n",
"Processing chapter: 04\n",
"Processing chapter: 05\n",
"Processing chapter: 06\n",
"Processing chapter: 07\n",
"Processing chapter: 08\n",
"Processing chapter: 09\n",
"Processing chapter: 10\n",
"Processing chapter: 11\n",
"Processing chapter: 12\n",
"Processing chapter: 13\n",
"Processing chapter: 14\n",
"Processing chapter: 15\n",
"Finished processing chapter: 15\n",
"Finished processing chapter: 11\n",
"Finished processing chapter: 14\n",
"Finished processing chapter: 05\n",
"Finished processing chapter: 07\n",
"Finished processing chapter: 06\n",
"Finished processing chapter: 12\n",
"Finished processing chapter: 04\n",
"Finished processing chapter: 03\n",
"Finished processing chapter: 01\n",
"Finished processing chapter: 13\n",
"Finished processing chapter: 09\n",
"Finished processing chapter: 08\n",
"Finished processing chapter: 02\n"
" { title: \"Git Branching\", level: 1, page: 69, endpage: 110 },\n",
" { title: \"Git on the Server\", level: 1, page: 111, endpage: 131 },\n",
" { title: \"Distributed Git\", level: 1, page: 132, endpage: 172 },\n",
" { title: \"GitHub\", level: 1, page: 173, endpage: 225 },\n",
" { title: \"Git Tools\", level: 1, page: 226, endpage: 348 },\n",
" { title: \"Customizing Git\", level: 1, page: 349, endpage: 380 },\n",
" { title: \"Git and Other Systems\", level: 1, page: 381, endpage: 436 },\n",
" { title: \"Git Internals\", level: 1, page: 437, endpage: 476 },\n",
" {\n",
" title: \"Appendix A: Git in Other Environments\",\n",
" level: 1,\n",
" page: 477,\n",
" endpage: 489\n",
" },\n",
" {\n",
" title: \"Appendix B: Embedding Git in your Applications\",\n",
" level: 1,\n",
" page: 490,\n",
" endpage: 501\n",
" },\n",
" {\n",
" title: \"Appendix C: Git Commands\",\n",
" level: 1,\n",
" page: 502,\n",
" endpage: 502\n",
" }\n",
"]\n"
]
}
],
@@ -133,7 +148,7 @@
" (res) => new TextDecoder().decode(res.stdout),\n",
").then((data) => {\n",
" const lines = data.split(\"\\n\");\n",
" let bookmarks = [];\n",
" let bookmarks: Bookmark[] = [];\n",
" let currentBookmark: Partial<Bookmark> | null = null;\n",
"\n",
" for (const line of lines) {\n",
@@ -164,7 +179,12 @@
" bookmarks.push(currentBookmark as Bookmark);\n",
" }\n",
"\n",
" bookmarks = bookmarks.filter((b) => b.level === 1).slice(8);\n",
" bookmarks = bookmarks.filter((b) =>\n",
" b.level === parseInt(Deno.env.get(\"BOOKMARK_LEVEL\") ?? \"1\")\n",
" ).slice(\n",
" parseInt(Deno.env.get(\"BOOK_FROM\") ?? \"0\"),\n",
" parseInt(Deno.env.get(\"BOOK_TO\") ?? String(bookmarks.length)),\n",
" );\n",
"\n",
" for (let i = 0; i < bookmarks.length; i++) {\n",
" const current = bookmarks[i] as Bookmark;\n",
@@ -178,8 +198,71 @@
"\n",
" return bookmarks;\n",
"});\n",
"console.log(bookmarks);\n",
"\n",
"if (!bookmarks || bookmarks.length === 0) {\n",
" throw new Error(\"No bookmarks found in the PDF.\");\n",
"} /* else if (bookmarks.length > ordinals.length) {\n",
" throw new Error(\n",
" `Not enough ordinals for the number of chapters: ${bookmarks.length} chapters but only ${ordinals.length} ordinals.`,\n",
" );\n",
"} */\n",
"\n",
"console.log(bookmarks);\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "6029d4ac",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing chapter: Appendix C: Git Commands at resp_04d1e841c5c4dfc2006984fd06079c81949afb817229705523\n",
"Finished processing chapter: Appendix C: Git Commands\n",
"Processing chapter: Customizing Git at resp_0ee8a5d1ba6d78a6006984fd06313c819087c4177391a50101\n",
"Finished processing chapter: Customizing Git\n",
"Processing chapter: Appendix A: Git in Other Environments at resp_00ee07fd3ffdee22006984fd069c7c8193bf192277fb80f4bc\n",
"Finished processing chapter: Appendix A: Git in Other Environments\n",
"Processing chapter: Git Internals at resp_0d312b5627d2306a006984fd0660c081939ebc561caf071506\n",
"Finished processing chapter: Git Internals\n",
"Processing chapter: Git on the Server at resp_07581a2e7f9be083006984fd05d32c8190b87cf1e8d7d68a5c\n",
"Finished processing chapter: Git on the Server\n",
"Processing chapter: GitHub at resp_0e4a9edfb24ed6a1006984fd06722081909497d0c7e35bfb1d\n",
"Finished processing chapter: GitHub\n",
"Processing chapter: Appendix B: Embedding Git in your Applications at resp_079044470442fcc0006984fd05fc1881978cacbd612d6e2f30\n",
"Finished processing chapter: Appendix B: Embedding Git in your Applications\n",
"Processing chapter: Distributed Git at resp_0947b861b710eb8a006984fd06918881968e0e1f70b418cc88\n",
"Finished processing chapter: Distributed Git\n",
"Processing chapter: Git Tools at resp_01ec7c1b665f5b12006984fd06b9048196a56f1e376843b55a\n",
"Finished processing chapter: Git Tools\n",
"Processing chapter: Git and Other Systems at resp_0e3a4b42775ae33f006984fd061cf8819389817080ff57b78f\n",
"Finished processing chapter: Git and Other Systems\n",
"Processing chapter: Git Branching at resp_0bb2c9b2b1435efc006984fd0641d08194b3fd55b5e3b3b432\n",
"Finished processing chapter: Git Branching\n"
]
},
{
"data": {
"text/plain": [
"[\n",
" \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n",
" \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n",
" \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n",
" \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n",
" \u001b[90mundefined\u001b[39m, \u001b[90mundefined\u001b[39m,\n",
" \u001b[90mundefined\u001b[39m\n",
"]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"const promises = [];\n",
"for (const [idx, ch] of bookmarks.entries()) {\n",
" async function processChapter(idx: number, title: string, file: File) {\n",
@@ -215,7 +298,7 @@
"\n",
" const mindMapContent = response.output_text;\n",
" await Deno.writeTextFile(\n",
" `./mindmaps/${safeName(title)}.md`,\n",
" `./mindmap/${safeName(title)}.md`,\n",
" mindMapContent,\n",
" );\n",
"\n",
@@ -266,7 +349,7 @@
{
"data": {
"text/plain": [
"{ object: \u001b[32m\"file\"\u001b[39m, deleted: \u001b[33mtrue\u001b[39m, id: \u001b[32m\"file-X2CJi3gozhJniBURG2qfsm\"\u001b[39m }"
"{ object: \u001b[32m\"file\"\u001b[39m, deleted: \u001b[33mtrue\u001b[39m, id: \u001b[32m\"file-CxDoeyfgNLkuizajP6EHas\"\u001b[39m }"
]
},
"execution_count": 5,

View File

@@ -0,0 +1,495 @@
```markmap
# Appendix A: Git in Other Environments
## Purpose / framing
- Youve learned Git at the command line
- Work with local files
- Connect repositories over a network
- Collaborate effectively
- Git is often part of a larger ecosystem
- Terminal isnt always the best interface
- Goal of this appendix
- Survey other environments where Git is useful
- Show how other applications work alongside Git
## Graphical Interfaces (GUIs)
- Gits native environment: the terminal
- New features appear there first
- Full power is available at the command line
- Why GUIs exist / when they help
- Plain text isnt best for all tasks
- Visual representations can be essential
- Some users prefer point-and-click workflows
- Tools differ by intended workflow
- Some clients expose only a curated subset of Git
- To support a specific way of working the author considers effective
- “Better” is workflow-dependent
- Tools are more/less fit for purpose, not universally better
- Key constraint
- GUIs cant do anything the command line client cant
- Command line = most power and control
## gitk and git-gui (bundled with Git)
### gitk (graphical history viewer)
- What it is
- GUI history viewer
- Think: powerful GUI shell over
- `git log`
- `git grep`
- Best used for
- Finding something that happened in the past
- Visualizing project history
- How to start
- In a Git repo directory
- `cd <repo>`
- Run:
- `gitk [git log options]`
- Options
- Accepts many command-line options
- Most are passed through to underlying `git log`
- Especially useful:
- `--all`
- Show commits reachable from any ref (not just `HEAD`)
- Interface overview (Figure 151: The gitk history viewer)
- Top area (graph; similar to `git log --graph`)
- Dot = commit
- Lines = parent relationships
- Refs = colored boxes
- Yellow dot = `HEAD`
- Red dot = changes not yet a commit
- Bottom area (selected commit)
- Left = comments + patch
- Right = summary view
- Middle controls
- Searching history controls
### git-gui (commit crafting tool)
- What it is
- Primarily a tool for crafting commits
- How to start
- Run:
- `git gui`
- Interface overview (Figure 152: The git-gui commit tool)
- Left: index (staging area)
- Unstaged changes: top
- Staged changes: bottom
- Move entire files between states
- Click file icons
- Select a file for viewing
- Click file name
- Top right: diff view
- Shows changes for the selected file
- Stage granular changes
- Stage individual hunks (or lines) by right-clicking
- Bottom right: message + action area
- Enter commit message
- Click “Commit”
- Similar to `git commit`
- Amend last commit workflow
- Select “Amend” radio button
- Updates “Staged Changes” with contents of last commit
- Then
- Stage/unstage changes
- Alter commit message
- Click “Commit” again
- Replaces old commit with a new one
### Tool style classification
- `gitk` and `git-gui` are **task-oriented tools**
- Tailored to one purpose each
- `gitk`: viewing/searching history
- `git-gui`: creating/amending commits
- Omit features not needed for that task
## GitHub for macOS and Windows
- What these clients are
- Two **workflow-oriented** Git clients
- One for Windows
- One for macOS
- Workflow-oriented meaning
- Focus on a curated set of commonly used features
- Avoid exposing all of Gits functionality
- Similarity across platforms
- Designed to look and work very much alike
- Treated as a single product here
- Not a full product rundown
- They have their own documentation
- Focus here: “changes” view tour (main work area)
### “Changes” view: main areas
- Left: repositories list (tracked by client)
- Add repo via “+” icon
- Clone
- Attach local repository
- Center: commit-input area
- Enter commit message
- Select which files to include
- Commit history placement differs
- Windows: directly below
- macOS: separate tab
- Right: diff view
- Shows working directory changes
- Or changes included in selected commit
- Top-right: “Sync” button
- Primary network interaction method
- Important note
- No GitHub account required
- Designed to highlight GitHub service/workflow
- Still works with
- Any repository
- Any Git host (network operations)
### Installation
- Download locations
- GitHub for Windows: <https://windows.github.com>
- GitHub for macOS: <https://mac.github.com>
- First run behavior
- Walks through first-time Git setup
- Configure name
- Configure email address
- Sets sane defaults for common config options
- Credential caches
- CRLF behavior
- Update model
- “Evergreen”
- Updates downloaded/installed in background while app is open
- Bundled Git
- Likely no need to manually update Git separately
- Windows addition
- Includes shortcut to launch PowerShell with Posh-git (covered later)
- Add repositories
- Shows list of GitHub repos you have access to
- Clone in one step
- Add existing local repo
- Drag directory from
- Finder (macOS)
- Windows Explorer (Windows)
- Added to repository list on the left
### Recommended Workflow (“GitHub Flow”)
- Covered elsewhere in more detail (“The GitHub Flow”), gist here
- (a) Commit to a branch
- (b) Sync with a remote repository fairly regularly
- Branch management differences
- macOS
- “Create Branch” button at top of window (Figure 155)
- Windows
- Type new branch name in branch-switching widget (Figure 156)
- Making commits
- Make changes in working directory
- Client shows changed files
- Enter commit message
- Select files to include
- Click “Commit”
- Shortcut: `Ctrl-Enter` (Windows) or `⌘-Enter` (macOS)
- Network interaction via “Sync”
- Git internally separates
- push / fetch / merge / rebase
- GitHub clients collapse these into one multi-step feature
- Clicking “Sync” does
1. `git pull --rebase`
- If fails due to merge conflict
- Fall back to `git pull --no-rebase`
2. `git push`
- Rationale
- Common network sequence in this style
- Squashing into one command saves time
### GitHub client summary (fit and tradeoffs)
- Strengths
- Well-suited to intended workflow
- Developers and non-developers can collaborate within minutes
- Best practices baked into the tools
- When to choose something else
- Workflow differs
- Want more control over
- How network operations are done
- When network operations are done
- Recommendation
- Use another client or the command line
## Other GUIs (general landscape)
- Many graphical Git clients exist
- From specialized single-purpose tools
- To apps that try to expose everything Git can do
- Where to find them
- Official Git site curated list: <https://git-scm.com/downloads/guis>
- More comprehensive list (Git wiki):
- <https://git.wiki.kernel.org/index.php/Interfaces,_frontends,_and_tools#Graphical_Interfaces>
## Git in Visual Studio
- Built-in Git tooling
- Starting Visual Studio 2019 version 16.8
- Supported Git functionality
- Create or clone a repository
- Open and browse repository history
- Create and checkout branches and tags
- Stash, stage, and commit changes
- Fetch, pull, push, or sync commits
- Merge and rebase branches
- Resolve merge conflicts
- View diffs
- “…and more!”
- Next step
- Read official documentation
## Git in Visual Studio Code
- Built-in Git support
- Requirement
- Git version 2.0.0 (or newer) installed
- Main features
- Diff in the gutter for the file you are editing
- Git Status Bar (lower left)
- Current branch
- Dirty indicators
- Incoming commits
- Outgoing commits
- Common operations inside the editor
- Initialize a repository
- Clone a repository
- Create branches and tags
- Stage and commit changes
- Push/pull/sync with a remote branch
- Resolve merge conflicts
- View diffs
- GitHub Pull Requests support (extension)
- <https://marketplace.visualstudio.com/items?itemName=GitHub.vscode-pull-request-github>
- Official documentation
- <https://code.visualstudio.com/Docs/editor/versioncontrol>
## Git in JetBrains IDEs (IntelliJ / PyCharm / WebStorm / PhpStorm / RubyMine / others)
- Git Integration plugin
- Ships with JetBrains IDEs
- Provides
- Dedicated IDE view for Git
- Dedicated IDE view for GitHub Pull Requests
- Example: Version Control ToolWindow (Figure 157)
- Dependency
- Relies on command-line `git`
- Requires `git` to be installed
- Official documentation
- <https://www.jetbrains.com/help/idea/using-git-integration.html>
## Git in Sublime Text
- Availability
- From version 3.2 onwards
- Features
- Sidebar shows git status with a badge/icon
- `.gitignore`-listed files/folders are faded in sidebar
- Status bar shows
- Current git branch
- How many modifications you have made
- Gutter markers show all changes to a file
- Partial integration with Sublime Merge
- Use some Sublime Merge git client functionality from within Sublime Text
- Requires Sublime Merge installed
- Sublime Merge: <https://www.sublimemerge.com/>
- Official documentation
- <https://www.sublimetext.com/docs/3/git_integration.html>
## Git in Bash
- Motivation
- Use shell features to make Git friendlier in Bash
- Note
- Git ships with plugins for several shells
- Not enabled by default
### Enable tab completion (`git-completion.bash`)
- Get completion script matching your Git version
- Check version: `git version`
- In Git source for that release
- `git checkout tags/vX.Y.Z` (match your installed version)
- Copy file:
- `contrib/completion/git-completion.bash`
- Place somewhere handy (example)
- Home directory
- Enable in `~/.bashrc`
- Add:
- `. ~/git-completion.bash`
- Use completion
- Inside a Git repository, type
- `git chec<tab>`
- Auto-completes to `git checkout`
- Completion coverage
- Git subcommands
- Command-line parameters
- Remotes and ref names (where appropriate)
### Customize prompt (`git-prompt.sh`)
- Motivation
- Show Git info in prompt
- Current branch
- Working directory status
- Setup
- Copy file from Git source:
- `contrib/completion/git-prompt.sh`
- Place in home directory
- Add to `~/.bashrc`
- `. ~/git-prompt.sh`
- `export GIT_PS1_SHOWDIRTYSTATE=1`
- `export PS1='\w$(__git_ps1 " (%s)")\$ '`
- Meaning of prompt pieces
- `\w`
- Print current working directory
- `\$`
- Print `$` part of the prompt
- `__git_ps1 " (%s)"`
- Calls function provided by `git-prompt.sh`
- Uses formatting argument `" (%s)"`
- Result
- Prompt shows Git context inside Git-controlled projects (Figure 158)
- Further info
- Both scripts have documentation
- Inspect `git-completion.bash`
- Inspect `git-prompt.sh`
## Git in Zsh
### Tab completion
- Enable
- Add to `~/.zshrc`
- `autoload -Uz compinit && compinit`
- Interface (more powerful than Bash)
- Example: `git che<tab>`
- Shows options with descriptions, e.g.
- `check-attr` — display gitattributes information
- `check-ref-format` — ensure a reference name is well formed
- `checkout` — checkout branch or paths to working tree
- `checkout-index` — copy files from index to working directory
- `cherry` — find commits not merged upstream
- `cherry-pick` — apply changes introduced by an existing commit
- Ambiguous completions
- Not just listed; include helpful descriptions
- Navigate list by repeatedly hitting tab
- Completion scope
- Git commands and arguments
- Refs/remotes and other repo-internal names
- Filenames and standard Zsh completions
### Prompt integration with `vcs_info`
- Built-in framework
- `vcs_info` provides VCS information for prompts
- Configure branch name in right prompt
- Add to `~/.zshrc`
- `autoload -Uz vcs_info`
- `precmd_vcs_info() { vcs_info }`
- `precmd_functions+=( precmd_vcs_info )`
- `setopt prompt_subst`
- `RPROMPT='${vcs_info_msg_0_}'`
- `# PROMPT='${vcs_info_msg_0_}%# '`
- `zstyle ':vcs_info:git:*' formats '%b'`
- Result
- Branch shown on right side inside Git repos (Figure 159)
- Left-side prompt also possible
- Uncomment the `PROMPT=...` line
- Documentation
- `vcs_info` docs in `zshcontrib(1)`
- Online:
- <http://zsh.sourceforge.net/Doc/Release/User-Contributions.html#Version-Control-Information>
### Alternative: Gits `git-prompt.sh`
- Option
- Use Gits `git-prompt.sh` instead of `vcs_info`
- Compatibility
- Works with Bash and Zsh
- Reference
- <https://github.com/git/git/blob/master/contrib/completion/git-prompt.sh>
### Zsh framework: oh-my-zsh
- What it is
- A framework to enhance Zsh
- Location
- <https://github.com/robbyrussell/oh-my-zsh>
- Git-related value
- Powerful git tab completion via plugin system
- Many prompt “themes” showing version-control data
- Example theme shown (Figure 160)
## Git in PowerShell
### Why / what
- Limitation
- `cmd.exe` isnt really capable of a customized Git experience
- If using PowerShell
- `posh-git` provides
- Powerful tab completion
- Enhanced prompt (repository status awareness)
- Works with PowerShell Core on Linux/macOS too
- Project: <https://github.com/dahlbyk/posh-git>
- Example shown (Figure 161)
### Installation
#### Prerequisites (Windows only): ExecutionPolicy
- Requirement
- Set local `ExecutionPolicy` to `RemoteSigned`
- Anything except `Undefined` and `Restricted`
- `AllSigned` vs `RemoteSigned`
- `AllSigned`
- Local scripts (your own) also need digital signatures
- `RemoteSigned`
- Only scripts with `ZoneIdentifier` = Internet (downloaded from web) need signatures
- Others do not
- Scope guidance
- Admin / all users
- `-Scope LocalMachine`
- Normal user / no admin rights
- `-Scope CurrentUser`
- References
- Scopes: <https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_scopes>
- ExecutionPolicy: <https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.security/set-executionpolicy>
- Set `RemoteSigned` for all users
- `Set-ExecutionPolicy -Scope LocalMachine -ExecutionPolicy RemoteSigned -Force`
#### Install via PowerShell Gallery
- When available
- PowerShell 5+, or PowerShell 4 with PackageManagement installed
- Reference
- <https://docs.microsoft.com/en-us/powershell/scripting/gallery/overview>
- Install commands (CurrentUser)
- `Install-Module posh-git -Scope CurrentUser -Force`
- `Install-Module posh-git -Scope CurrentUser -AllowPrerelease -Force`
- Newer beta with PowerShell Core support
- Install for all users
- Use `-Scope AllUsers`
- Run from elevated PowerShell console
- If install fails due to `PowerShellGet` error
- Run first:
- `Install-Module PowerShellGet -Force -SkipPublisherCheck`
- Then retry
- Reason given
- Built-in Windows PowerShell modules are signed with a different publishment certificate
#### Update PowerShell prompt (load automatically)
- Import now
- `Import-Module posh-git`
- Auto-import every time PowerShell starts
- `Add-PoshGitToProfile -AllHosts`
- Adds import statement to `$profile` script
- Note
- Multiple `$profile` scripts exist
- e.g., console profile vs ISE profile
#### Install from source
- Download a release
- <https://github.com/dahlbyk/posh-git/releases>
- Uncompress
- Import module via full path to `posh-git.psd1`
- `Import-Module <path-to-uncompress-folder>\src\posh-git.psd1`
- Add to profile
- `Add-PoshGitToProfile -AllHosts`
- Effect
- Adds proper line to `profile.ps1`
- Active next time PowerShell opens
#### Further references (prompt meaning & customization)
- Git status summary information
- <https://github.com/dahlbyk/posh-git/blob/master/README.md#git-status-summary-information>
- Prompt customization variables
- <https://github.com/dahlbyk/posh-git/blob/master/README.md#customization-variables>
## Appendix summary
- You can harness Git from within everyday tools (GUIs, IDEs, editors, shells)
- You can also access Git repositories from your own programs
```

View File

@@ -0,0 +1,695 @@
# Appendix B: Embedding Git in your Applications
## Why embed / integrate Git
- Target audience for integration
- Developer-focused applications
- likely benefit from integration with source control
- Non-developer applications
- example: document editors
- can benefit from version-control features
- Why Git specifically
- Gits model works very well for many different scenarios
## Two main integration options
- Option A: spawn a shell and call the `git` command-line program
- Option B: embed a Git library into your application
- This appendix covers
- command-line integration
- several of the most popular embeddable Git libraries
## Command-line Git (calling the `git` CLI)
- What it is
- spawn a shell process
- use the Git command-line tool to do the work
- Benefits
- canonical behavior
- all of Gits features are supported
- fairly easy to implement
- most runtime environments can invoke a process with command-line arguments
- Downsides
- Output is plain text
- you must parse Gits output to read progress/results
- Gits output format can change occasionally
- parsing can be inefficient and error-prone
- Lack of error recovery
- if repository is corrupted
- or user has malformed configuration value
- Git may refuse to perform many operations
- Process management complexity
- must maintain a shell environment in a separate process
- coordinating many processes can be challenging
- especially if multiple processes may access the same repository
## Libgit2
- What it is
- dependency-free implementation of Git
- focus: a nice API for use within other programs
- website: https://libgit2.org
### Libgit2 C API (whirlwind tour)
- Example flow shown
- Open a repository
- `git_repository *repo;`
- `int error = git_repository_open(&repo, "/path/to/repository");`
- Dereference `HEAD` to a commit
- `git_object *head_commit;`
- `error = git_revparse_single(&head_commit, repo, "HEAD^{commit}");`
- `git_commit *commit = (git_commit*)head_commit;`
- Print commit properties
- `printf("%s", git_commit_message(commit));`
- `const git_signature *author = git_commit_author(commit);`
- `printf("%s <%s>\n", author->name, author->email);`
- `const git_oid *tree_id = git_commit_tree_id(commit);`
- Cleanup
- `git_commit_free(commit);`
- `git_repository_free(repo);`
- Repository opening details
- `git_repository` type
- handle to a repository with an in-memory cache
- `git_repository_open`
- simplest method when you know exact path to working directory or `.git` folder
- other APIs mentioned
- `git_repository_open_ext`
- includes options for searching
- `git_clone` (and friends)
- make a local clone of a remote repository
- `git_repository_init`
- create an entirely new repository
- Dereferencing `HEAD` details
- rev-parse usage
- uses rev-parse syntax
- reference: “see Branch References for more on this”
- return type
- `git_revparse_single` returns a `git_object*`
- represents something that exists in the repositorys Git object database
- `git_object` is a “parent” type for several object kinds
- child types share the same memory layout as `git_object`
- safe to cast to the correct “child” type when appropriate
- cast safety note in this example
- `git_object_type(commit)` would return `GIT_OBJ_COMMIT`
- therefore its safe to cast to `git_commit*`
- Commit property access details
- message
- `git_commit_message(commit)`
- author signature
- `git_commit_author(commit)` returns `const git_signature *`
- fields shown
- `author->name`
- `author->email`
- tree id
- `git_commit_tree_id(commit)` returns a `git_oid`
- `git_oid`
- Libgit2 representation for a SHA-1 hash
### Patterns illustrated by the Libgit2 C sample
- Error-code style
- pattern: declare pointer, pass its address into a Libgit2 call
- return value: integer error code
- `0` = success
- `< 0` = error
- Memory / ownership rules
- if Libgit2 populates a pointer for you
- you must free it
- if Libgit2 returns a `const` pointer
- you dont free it
- it becomes invalid when the owning object is freed
- Practical note
- “Writing C is a bit painful.”
### Language bindings (Libgit2 ecosystem)
- Implication of “writing C is painful”
- youre unlikely to write C when using Libgit2
- there are language-specific bindings that make integration easier
#### Ruby bindings: Rugged
- Name: Rugged
- URL: https://github.com/libgit2/rugged
- Example equivalent to the C code
- `repo = Rugged::Repository.new('path/to/repository')`
- `commit = repo.head.target`
- `puts commit.message`
- `puts "#{commit.author[:name]} <#{commit.author[:email]}>" `
- `tree = commit.tree`
- Why its “less cluttered”
- error handling
- Rugged uses exceptions
- examples mentioned: `ConfigError`, `ObjectError`
- resource management
- no explicit freeing
- Ruby is garbage-collected
- Example: crafting a commit from scratch (Rugged)
- Code sequence shown (with numbered markers)
- ① create a new blob
- `blob_id = repo.write("Blob contents", :blob) ①`
- work with index
- `index = repo.index`
- `index.read_tree(repo.head.target.tree)`
- ② add a new file entry
- `index.add(:path => 'newfile.txt', :oid => blob_id) ②`
- build a signature hash
- `sig = {`
- ` :email => "bob@example.com",`
- ` :name => "Bob User",`
- ` :time => Time.now,`
- `}`
- create the commit (with parameters)
- `commit_id = Rugged::Commit.create(repo,`
- ` :tree => index.write_tree(repo), ③`
- ` :author => sig,`
- ` :committer => sig, ④`
- ` :message => "Add newfile.txt", ⑤`
- ` :parents => repo.empty? ? [] : [ repo.head.target ].compact, ⑥`
- ` :update_ref => 'HEAD', ⑦`
- `)`
- ⑧ look up the created commit object
- `commit = repo.lookup(commit_id) ⑧`
- Meaning of each numbered step (①–⑧)
- ① Create a new blob
- contains the contents of a new file
- ② Populate index and add file
- populate index with head commits tree
- add the new file at path `newfile.txt`
- ③ Create a new tree in the ODB
- uses it for the new commit
- ④ Author and committer fields
- same signature used for both
- ⑤ Commit message
- `"Add newfile.txt"`
- ⑥ Parents
- when creating a commit, you must specify parents
- uses the tip of `HEAD` for the single parent
- handles empty repository case
- ⑦ Update a ref (optional)
- Rugged (and Libgit2) can optionally update a reference when making a commit
- here it updates `HEAD`
- ⑧ Return value / lookup
- the return value is the SHA-1 hash of the new commit object
- you can use it to get a `Commit` object
- Performance note
- Ruby code is clean
- Libgit2 does heavy lifting → runs pretty fast
- Pointer to later section
- “If youre not a rubyist, we touch on some other bindings in Other Bindings.”
## Advanced Functionality (Libgit2)
- Out-of-core-Git capabilities
- Libgit2 has capabilities outside the scope of core Git
- Example capability: pluggability
- can provide custom “backends” for several operation types
- enables storage in a different way than stock Git
- backend types mentioned
- configuration
- ref storage
- object database
- “among other things”
### Custom backend example: object database (ODB)
- Example source
- from Libgit2 backend examples
- URL: https://github.com/libgit2/libgit2-backends
- Setup shown (with numbered markers)
- ① create ODB “frontend”
- `git_odb *odb;`
- `int error = git_odb_new(&odb); ①`
- meaning: initialize empty ODB frontend container for backends
- ② initialize custom backend
- `git_odb_backend *my_backend;`
- `error = git_odb_backend_mine(&my_backend, /*…*/); ②`
- ③ add backend to frontend
- `error = git_odb_add_backend(odb, my_backend, 1); ③`
- open a repository
- `git_repository *repo;`
- `error = git_repository_open(&repo, "some-path");`
- ④ set repository to use custom ODB
- `error = git_repository_set_odb(repo, odb); ④`
- meaning: repo uses this ODB to look up objects
- Note about the examples error handling
- errors are captured but not handled
- “We hope your code is better than ours.”
### Implementing `git_odb_backend_mine`
- What it is
- constructor for your own ODB implementation
- Requirement
- fill in the `git_odb_backend` structure properly
- Example struct layout shown
- `typedef struct {`
- ` git_odb_backend parent;`
- ` // Some other stuff`
- ` void *custom_context;`
- `} my_backend_struct;`
- Subtle memory-layout constraint
- `my_backend_struct`s first member must be a `git_odb_backend` structure
- ensures Libgit2 sees the memory layout it expects
- Flexibility
- the rest of the struct is arbitrary
- can be as large or small as needed
- Example initialization function responsibilities shown
- allocate
- `backend = calloc(1, sizeof (my_backend_struct));`
- set custom context
- `backend->custom_context = …;`
- fill supported function pointers in `parent`
- `backend->parent.read = &my_backend__read;`
- `backend->parent.read_prefix = &my_backend__read_prefix;`
- `backend->parent.read_header = &my_backend__read_header;`
- `// …`
- return it through output parameter
- `*backend_out = (git_odb_backend *) backend;`
- return success constant
- `return GIT_SUCCESS;`
- Where to find full signatures
- Libgit2 source file:
- `include/git2/sys/odb_backend.h`
- which signatures to implement depends on use case
## Other Bindings (Libgit2)
- Breadth
- bindings exist for many languages
- Section purpose
- show small examples using a few more complete bindings packages (as of writing)
- Other languages mentioned as having libraries (various maturity)
- C++
- Go
- Node.js
- Erlang
- JVM
- Official collection of bindings
- browse repos: https://github.com/libgit2
- Common goal for the code in this section
- return the commit message from the commit eventually pointed to by `HEAD`
- “sort of like `git log -1`
### LibGit2Sharp
- For
- .NET or Mono applications
- URL
- https://github.com/libgit2/libgit2sharp
- Characteristics
- bindings written in C#
- wraps raw Libgit2 calls with native-feeling CLR APIs
- Example program (single expression)
- `new Repository(@"C:\path\to\repo").Head.Tip.Message;`
- Desktop Windows note
- NuGet package available to get started quickly
### objective-git
- Platform context
- Apple platform
- likely using Objective-C as implementation language
- URL
- https://github.com/libgit2/objective-git
- Example program outline
- initialize repo
- `GTRepository *repo =`
- ` [[GTRepository alloc] initWithURL:[NSURL fileURLWithPath: @"/path/to/repo"]`
- `error:NULL];`
- retrieve commit message
- `NSString *msg = [[[repo headReferenceWithError:NULL] resolvedTarget] message];`
- Swift note
- objective-git is fully interoperable with Swift
### pygit2
- What it is
- Python bindings for Libgit2
- URL
- https://www.pygit2.org
- Example program (chained calls)
- `pygit2.Repository("/path/to/repo") # open repository`
- `.head # get the current branch`
- `.peel(pygit2.Commit) # walk down to the commit`
- `.message # read the message`
## Further Reading (Libgit2)
- Scope note
- full treatment of Libgit2 capabilities is outside the scope of the book
- Libgit2 resources
- API documentation: https://libgit2.github.com/libgit2
- guides: https://libgit2.github.com/docs
- Other bindings
- check bundled README and tests
- often have small tutorials and pointers to further reading
## JGit
- Purpose
- use Git from within a Java program
- What it is
- fully featured Git library called JGit
- relatively full-featured implementation of Git written natively in Java
- widely used in the Java community
- under the Eclipse umbrella
- Home
- https://www.eclipse.org/jgit/
### Getting Set Up (JGit)
- Multiple ways to connect project to JGit
- Easiest path: Maven
- add dependency snippet to `<dependencies>` in `pom.xml`
- `<dependency>`
- ` <groupId>org.eclipse.jgit</groupId>`
- ` <artifactId>org.eclipse.jgit</artifactId>`
- ` <version>3.5.0.201409260305-r</version>`
- `</dependency>`
- version note
- likely advanced by the time you read this
- check updates:
- https://mvnrepository.com/artifact/org.eclipse.jgit/org.eclipse.jgit
- result
- Maven automatically acquires and uses the JGit libraries you need
- Manual dependency management
- pre-built binaries
- https://www.eclipse.org/jgit/download
- compile/run examples
- `javac -cp .:org.eclipse.jgit-3.5.0.201409260305-r.jar App.java`
- `java -cp .:org.eclipse.jgit-3.5.0.201409260305-r.jar App`
### Plumbing (JGit)
- Two levels of API
- plumbing
- porcelain
- Terminology source: Git itself
- porcelain APIs
- friendly front-end for common user-level actions
- like what a normal user would use the Git command-line tool for
- plumbing APIs
- interact with low-level repository objects directly
#### Starting point: `Repository`
- Starting point for most JGit sessions
- class: `Repository`
- Creating/opening a filesystem-based repository
- note: JGit also allows other storage models
- Create new repository
- `Repository newlyCreatedRepo = FileRepositoryBuilder.create(new File("/tmp/new_repo/.git"));`
- `newlyCreatedRepo.create();`
- Open existing repository
- `Repository existingRepo = new FileRepositoryBuilder()`
- `.setGitDir(new File("my_repo/.git"))`
- `.build();`
#### `FileRepositoryBuilder` (finding repositories)
- Builder style
- fluent API
- Helps locate a Git repository
- whether or not your program knows exactly where its located
- Methods/strategies mentioned
- environment variables
- `.readEnvironment()`
- search starting from working directory
- `.setWorkTree(…).findGitDir()`
- open known `.git` directory
- `.setGitDir(...)` (as in example)
#### Plumbing API: quick sampling + explanations
- Sampling actions shown (code outline)
- Get a reference
- `Ref master = repo.getRef("master");`
- Get object ID pointed to by reference
- `ObjectId masterTip = master.getObjectId();`
- Rev-parse
- `ObjectId obj = repo.resolve("HEAD^{tree}");`
- Load raw object contents
- `ObjectLoader loader = repo.open(masterTip);`
- `loader.copyTo(System.out);`
- Create a branch
- `RefUpdate createBranch1 = repo.updateRef("refs/heads/branch1");`
- `createBranch1.setNewObjectId(masterTip);`
- `createBranch1.update();`
- Delete a branch
- `RefUpdate deleteBranch1 = repo.updateRef("refs/heads/branch1");`
- `deleteBranch1.setForceUpdate(true);`
- `deleteBranch1.delete();`
- Config
- `Config cfg = repo.getConfig();`
- `String name = cfg.getString("user", null, "name");`
- Explanation: references (`Ref`)
- `repo.getRef("master")`
- JGit automatically grabs the actual master ref at `refs/heads/master`
- returns a `Ref` object for reading information about the reference
- `Ref` info available
- name: `.getName()`
- direct reference target object: `.getObjectId()`
- symbolic reference target reference: `.getTarget()`
- `Ref` objects also used for
- tag refs
- tag objects
- Tag “peeled” concept
- peeled = points to final target of a (potentially long) string of tag objects
- Explanation: object IDs (`ObjectId`)
- represents SHA-1 hash of an object
- object might or might not exist in the object database
- Explanation: rev-parse (`repo.resolve(...)`)
- accepts any object specifier Git understands
- returns
- a valid `ObjectId`, or
- `null`
- reference: “see Branch References”
- Explanation: raw object access (`ObjectLoader`)
- can stream contents
- `ObjectLoader.copyTo(...)`
- other capabilities mentioned
- read type and size of object
- return contents as a byte array
- large object handling
- when `.isLarge()` is `true`
- `.openStream()` returns an InputStream-like object
- reads raw data without pulling everything into memory at once
- Explanation: creating a branch (`RefUpdate`)
- create `RefUpdate`
- set new object ID
- call `.update()` to trigger change
- Explanation: deleting a branch
- requires `.setForceUpdate(true)`
- otherwise `.delete()` returns `REJECTED`
- and nothing happens
- Explanation: config (`Config`)
- get via `repo.getConfig()`
- example value read
- `user.name` via `cfg.getString("user", null, "name")`
- config resolution behavior
- uses repository for local configuration
- automatically detects global and system config files
- reads values from them as well
- Error handling in JGit (not shown in code sample)
- handled via exceptions
- may throw standard Java exceptions
- example: `IOException`
- also has JGit-specific exceptions (examples)
- `NoRemoteRepositoryException`
- `CorruptObjectException`
- `NoMergeBaseException`
- Scope note
- this is only a small sampling of the full plumbing API
- many more methods/classes exist
### Porcelain (JGit)
- Why porcelain exists
- plumbing APIs are rather complete
- but can be cumbersome to string together for common goals
- adding a file to the index
- making a new commit
- Entry point class
- `Git`
- construction shown
- `Repository repo;`
- `// construct repo...`
- `Git git = new Git(repo);`
#### Porcelain command pattern (Git class)
- Pattern
- `Git` methods return a command object
- chain method calls to set parameters
- execute via `.call()`
#### Example: like `git ls-remote`
- Credentials
- `CredentialsProvider cp = new UsernamePasswordCredentialsProvider("username", "p4ssw0rd");`
- Command chain
- `Collection<Ref> remoteRefs = git.lsRemote()`
- `.setCredentialsProvider(cp)`
- `.setRemote("origin")`
- `.setTags(true)`
- `.setHeads(false)`
- `.call();`
- Output loop
- `for (Ref ref : remoteRefs) {`
- ` System.out.println(ref.getName() + " -> " + ref.getObjectId().name());`
- `}`
- What it requests
- tags from `origin`
- not heads
- Authentication note
- uses a `CredentialsProvider`
#### Other commands available through `Git` (examples listed)
- add
- blame
- commit
- clean
- push
- rebase
- revert
- reset
### Further Reading (JGit)
- Official JGit API documentation
- https://www.eclipse.org/jgit/documentation
- standard Javadoc
- JVM IDEs can install locally as well
- JGit Cookbook
- https://github.com/centic9/jgit-cookbook
- many examples of specific tasks
## go-git
- When to use
- integrate Git into a service written in Golang
- What it is
- pure Go library implementation
- no native dependencies
- not prone to manual memory management errors
- transparent to standard Golang performance analysis tooling
- CPU profilers
- memory profilers
- race detector
- etc.
- Focus
- extensibility
- compatibility
- Compatibility / API coverage note
- supports most plumbing APIs
- compatibility documented at:
- https://github.com/go-git/go-git/blob/master/COMPATIBILITY.md
### Basic go-git example
- Import
- `import "github.com/go-git/go-git/v5"`
- Clone
- `r, err := git.PlainClone("/tmp/foo", false, &git.CloneOptions{`
- ` URL: "https://github.com/go-git/go-git",`
- ` Progress: os.Stdout,`
- `})`
### After you have a `Repository` instance
- “Access information and perform mutations”
- Example operations shown
- Get branch pointed by `HEAD`
- `ref, err := r.Head()`
- Get commit object pointed by `ref`
- `commit, err := r.CommitObject(ref.Hash())`
- Get commit history
- `history, err := commit.History()`
- Iterate commits and print each
- `for _, c := range history {`
- ` fmt.Println(c)`
- `}`
### Advanced Functionality (go-git)
- Feature: pluggable storage system
- similar to Libgit2 backends
- default implementation: in-memory storage
- “very fast”
- example: clone into memory storage
- `r, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{`
- ` URL: "https://github.com/go-git/go-git",`
- `})`
- Storage options example
- store references, objects, and configuration in Aerospike
- example location:
- https://github.com/go-git/go-git/tree/master/_examples/storage
- Feature: flexible filesystem abstraction
- uses go-billy `Filesystem`
- https://pkg.go.dev/github.com/go-git/go-billy/v5?tab=doc#Filesystem
- makes it easy to store files differently
- pack all files into a single archive on disk
- keep all files in-memory
- Advanced use-case: fine-tunable HTTP client
- example referenced:
- https://github.com/go-git/go-git/blob/master/_examples/custom_http/main.go
- custom client shown
- `customClient := &http.Client{`
- ` Transport: &http.Transport{ // accept any certificate (might be useful for testing)`
- ` TLSClientConfig: &tls.Config{InsecureSkipVerify: true},`
- ` },`
- ` Timeout: 15 * time.Second, // 15 second timeout`
- ` CheckRedirect: func(req *http.Request, via []*http.Request) error {`
- ` return http.ErrUseLastResponse // don't follow redirect`
- ` },`
- `}`
- override protocol handling
- `client.InstallProtocol("https", githttp.NewClient(customClient))`
- purpose: override http(s) default protocol to use custom client
- clone using new client (for `https://`)
- `r, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{URL: url})`
### Further Reading (go-git)
- Scope note
- full treatment outside scope of the book
- API documentation
- https://pkg.go.dev/github.com/go-git/go-git/v5
- Usage examples
- https://github.com/go-git/go-git/tree/master/_examples
## Dulwich
- What it is
- pure-Python Git implementation: Dulwich
- Project hosting / site
- https://www.dulwich.io/
- Goal
- interface to Git repositories (local and remote)
- does not call out to `git` directly
- uses pure Python instead
- Performance note
- optional C extensions
- significantly improve performance
- API design
- follows Git design
- separates two API levels
- plumbing
- porcelain
### Dulwich plumbing example (lower-level API)
- Goal
- access the commit message of the last commit
- Code and shown outputs
- `from dulwich.repo import Repo`
- `r = Repo('.')`
- `r.head()`
- `# '57fbe010446356833a6ad1600059d80b1e731e15'`
- `c = r[r.head()]`
- `c`
- `# <Commit 015fc1267258458901a94d228e39f0a378370466>`
- `c.message`
- `# 'Add note about encoding.\n'`
### Dulwich porcelain example (high-level API)
- Goal
- print a commit log using porcelain API
- Code and shown outputs
- `from dulwich import porcelain`
- `porcelain.log('.', max_entries=1)`
- `#commit: 57fbe010446356833a6ad1600059d80b1e731e15`
- `#Author: Jelmer Vernooij <jelmer@jelmer.uk>`
- `#Date: Sat Apr 29 2017 23:57:34 +0000`
### Further Reading (Dulwich)
- Available on official website
- API documentation
- tutorial
- many task-focused examples
- URL
- https://www.dulwich.io/

View File

@@ -0,0 +1,85 @@
```markmap
# Appendix C: Git Commands
## Context / Why this appendix exists
- The book introduces “dozens” of Git commands
- Commands were introduced inside a narrative
- Added “slowly” as the story progressed
- Result of narrative approach
- Examples/usage of commands are “somewhat scattered” throughout the book
- Goal of this appendix
- Go through **all Git commands addressed throughout the book**
- Group them “roughly by what theyre used for”
- For each command
- Explain **very generally** what it does
- Point out **where in the book** it was used
## Tip / Note: Abbreviating long options
- You can abbreviate long options (when unambiguous)
- Example
- `git commit --a` behaves like `git commit --amend`
- Constraint
- Abbreviation works **only** when the letters after `--` are **unique** among options
- Guidance for scripting
- Use the **full option** when writing scripts
## Setup and Config
- Two commands used “quite a lot”
- From the **first invocations** of Git
- Through **common every day** tweaking and referencing
- The two commands
- `git config`
- `git help`
### `git config`
- Core idea
- Git has a **default way** of doing “hundreds of things”
- `git config` lets you change those defaults and set preferences
- What configuration can include (examples given)
- Your **name**
- Your **email address**
- Your **editor** preference
- Specific **terminal color** preferences
- (More generally) “hundreds of things” Git can be told to do differently
- How configuration is stored/applied
- Multiple files are involved
- The command can **read from** and **write to** several files
- Scope flexibility
- Set values **globally**
- Or down to **specific repositories**
#### How often it appears in the book
- “Used in nearly every chapter of the book”
#### Where the book used `git config` (as listed)
- **First-Time Git Setup**
- Used to specify
- Name
- Email address
- Editor preference
- Noted as happening “before we even got started using Git”
- **Git Aliases**
- Used to create shorthand commands (aliases)
- Aliases expand to long option sequences
- Purpose: avoid typing long sequences every time
- **Rebasing**
- Used to make `--rebase` the default behavior when running `git pull`
- **Credential Storage**
- Used to set up a default store for HTTP passwords
- **Keyword Expansion**
- Used to set up
- “smudge” filters
- “clean” filters
- Purpose: manage content coming **into** and going **out of** Git
- **Git Configuration**
- “Basically the entirety” of that section/chapter is dedicated to `git config`
### `git config core.editor` (editor configuration)
- Mentioned as a specific configuration focus
- “git config core.editor commands”
- Linked context
- Accompanies the configuration instructions in **Your Editor**
- What is stated
- Many editors can be set using `core.editor`
- Boundary of provided material
- The excerpt ends just before listing the specific editor command examples
```

665
mindmap/Customizing Git.md Normal file
View File

@@ -0,0 +1,665 @@
# Customizing Git
## Purpose & scope
- Goal: make Git operate in a more customized fashion (personal/team/company needs)
- Main customization mechanisms covered
- Configuration settings (`git config`)
- Attributes (path-specific behavior via `.gitattributes` / `.git/info/attributes`)
- Hooks (event-driven scripts: client-side + server-side)
## Git Configuration
### `git config` basics
- Used to read/write configuration values
- Common initial setup (examples)
- `git config --global user.name "John Doe"`
- `git config --global user.email johndoe@example.com`
### Configuration files (“levels”) & precedence
- System level
- File: `[path]/etc/gitconfig`
- Applies to: every user + all repositories on the system
- `git config --system …` reads/writes here
- Global level (user)
- File: `~/.gitconfig` or `~/.config/git/config`
- Applies to: a specific user across repositories
- `git config --global …` reads/writes here
- Local level (repo)
- File: `.git/config` (inside current repository)
- Applies to: current repository only
- `git config --local …` reads/writes here
- Default level if you dont specify `--system/--global/--local`
- Override rule
- `local` overrides `global` overrides `system`
- Editing note
- Config files are plain text; manual edits work
- Generally easier/safer to use `git config`
### Client-side vs server-side options
- Options fall into two categories
- Client-side (most options): personal working preferences
- Server-side (fewer): repository receiving/policy behaviors
- Discover all supported options
- `man git-config`
- Reference: `https://git-scm.com/docs/git-config`
### Basic client configuration (common & useful)
#### `core.editor`
- Purpose: editor used for commit/tag messages
- Default selection order
- `$VISUAL` or `$EDITOR` environment variables
- fallback: `vi`
- Set example
- `git config --global core.editor emacs`
#### `commit.template`
- Purpose: provide an initial commit message template
- Use cases
- Remind yourself/team of message structure and policy
- Encourage consistent subject length + body + ticket references
- Example template content (concepts)
- Subject line guidance (e.g., keep under ~50 chars for `git log --oneline`)
- Multi-line description
- Optional ticket marker (e.g., `[Ticket: X]`)
- Set + behavior
- `git config --global commit.template ~/.gitmessage.txt`
- `git commit` opens editor pre-filled with the template + comment lines
#### `core.pager`
- Purpose: pager for long output (e.g., `log`, `diff`)
- Default: usually `less`
- Disable paging
- `git config --global core.pager ''`
- Effect: output is printed directly (no pager), regardless of length
#### `user.signingkey`
- Purpose: simplify signing annotated tags (GPG)
- Set
- `git config --global user.signingkey <gpg-key-id>`
- Use afterward
- `git tag -s <tag-name>` (no need to specify key each time)
#### `core.excludesfile`
- Purpose: global ignore patterns (applies to all repositories for that user)
- Use cases (examples)
- macOS: `.DS_Store`
- editors: Emacs backups `*~`, Vim swap files `.*.swp`
- Example workflow
- Create `~/.gitignore_global` with patterns like
- `*~`
- `.*.swp`
- `.DS_Store`
- Configure
- `git config --global core.excludesfile ~/.gitignore_global`
#### `help.autocorrect`
- Problem: mistyped commands are suggested but not run
- Set behavior: auto-run a likely intended command after a delay
- Setting semantics
- Integer in tenths of a second
- `1` → 0.1s delay
- `50` → 5s delay
- Example
- `git config --global help.autocorrect 1`
- Runtime behavior
- Shows warning + countdown-like delay, then runs corrected command
## Colors in Git
### `color.ui` (master switch)
- Purpose: enable/disable default colored terminal output
- Values
- `false` → no color
- `auto` (default) → color only when writing to a terminal; no color codes when piped/redirected
- `always` → always emit color codes (rarely desired)
- Per-command override
- Use `--color` flag on specific Git commands if you want forced coloring in redirected output
### `color.*` (command-specific control)
- Per-area switches (each: `true`, `false`, or `always`)
- `color.branch`
- `color.diff`
- `color.interactive`
- `color.status`
- Fine-grained subsettings (override specific parts)
- Example: diff “meta” styling
- `git config --global color.diff.meta "blue black bold"`
- Supported colors
- `normal`, `black`, `red`, `green`, `yellow`, `blue`, `magenta`, `cyan`, `white`
- Supported attributes
- `bold`, `dim`, `ul` (underline), `blink`, `reverse`
## External Merge and Diff Tools
### Why use external tools
- Git has built-in diff/merge, but you can:
- Use external diff viewers
- Use GUI merge tools for conflict resolution
- Example tool used in chapter
- P4Merge (Perforce Visual Merge Tool): graphical + free + cross-platform
### Wrapper-script approach (example: P4Merge)
- Platform note
- Example paths are macOS/Linux-style
- On Windows, replace `/usr/local/bin` with an executable path in your environment
#### `extMerge` wrapper
- Purpose: call the GUI merge tool with all passed arguments
- Example content (conceptual)
- Shell script that runs: `p4merge $*`
- macOS example path to binary:
- `/Applications/p4merge.app/Contents/MacOS/p4merge $*`
#### `extDiff` wrapper
- Purpose: adapt Gits diff-program arguments to what your merge viewer needs
- Git passes 7 arguments to external diff programs (concept)
- `path old-file old-hex old-mode new-file new-hex new-mode`
- Wrapper logic
- Ensure 7 args exist
- Invoke merge tool on the *old file* and *new file* only
- Uses `$2` (old-file) and `$5` (new-file)
#### Make wrappers executable
- `sudo chmod +x /usr/local/bin/extMerge`
- `sudo chmod +x /usr/local/bin/extDiff`
### Configure Git to use wrappers
- Key settings involved
- `merge.tool` → selects merge tool name
- `mergetool.<tool>.cmd` → how to invoke tool (with `$BASE`, `$LOCAL`, `$REMOTE`, `$MERGED`)
- `mergetool.<tool>.trustExitCode` → whether tools exit code indicates success
- `diff.external` → command for external diffs
- Example config commands
- `git config --global merge.tool extMerge`
- `git config --global mergetool.extMerge.cmd 'extMerge "$BASE" "$LOCAL" "$REMOTE" "$MERGED"'`
- `git config --global mergetool.extMerge.trustExitCode false`
- `git config --global diff.external extDiff`
- Equivalent `.gitconfig` blocks (concept)
- `[merge] tool = extMerge`
- `[mergetool "extMerge"] cmd = … ; trustExitCode = false`
- `[diff] external = extDiff`
### Using the configured tools
- External diff example
- `git diff <rev1> <rev2>` opens GUI instead of printing to terminal
- (Figure reference in chapter: P4Merge screenshot)
- Merge conflicts
- `git mergetool` launches GUI tool to resolve conflicts
### Switching tools easily
- Benefit of wrapper design
- Change the underlying tool by editing `extMerge`
- `extDiff` continues calling `extMerge`
- Example: switch to KDiff3 by changing the binary invoked by `extMerge`
### Built-in mergetool presets
- Git supports many merge tools without custom `cmd`
- List supported tools
- `git mergetool --tool-help`
- Environment caveat
- Windowed tools require a GUI; terminal-only sessions may fail
### Using a tool only for merges (not diffs)
- If tool command is in `PATH` (example: `kdiff3`)
- `git config --global merge.tool kdiff3`
- Result
- Merge resolution uses KDiff3
- Diffs remain Gits normal diff output
## Formatting and Whitespace
### Problems addressed
- Cross-platform line endings (Windows vs macOS/Linux)
- Subtle whitespace edits introduced by editors/tools
### `core.autocrlf` (line ending normalization)
- Background
- Windows newline: CRLF (`\r\n`)
- macOS/Linux newline: LF (`\n`)
- Behavior: auto-convert at boundaries
- On add/commit: convert as configured into repository-friendly form
- On checkout: convert as configured into working-tree-friendly form
- Recommended settings by environment
- Windows + cross-platform collaboration
- `git config --global core.autocrlf true`
- Checkout uses CRLF; repo stores LF
- macOS/Linux (LF) but want to “clean up” accidental CRLF commits
- `git config --global core.autocrlf input`
- Convert CRLF→LF on commit; do not convert on checkout
- Windows-only project, want CRLF stored as-is
- `git config --global core.autocrlf false`
### `core.whitespace` (detect/fix whitespace issues)
- Six primary whitespace issues
- Enabled by default (can be disabled)
- `blank-at-eol` (spaces at end of line)
- `blank-at-eof` (blank lines at end of file)
- `space-before-tab` (spaces before tabs in indentation)
- Disabled by default (can be enabled)
- `indent-with-non-tab` (indent begins with spaces; uses `tabwidth`)
- `tab-in-indent` (tabs in indentation portion)
- `cr-at-eol` (treat CR at EOL as acceptable)
- How to set
- Comma-separated list
- Disable an option by prefixing with `-`
- Omit options to keep defaults
- Shorthand
- `trailing-space` = `blank-at-eol` + `blank-at-eof`
- Example intent from chapter
- Enable most checks, disable `space-before-tab`, and enable the three disabled-by-default checks
- Where its used
- `git diff` highlights whitespace problems
- `git apply` uses it for patch application
- Warn: `git apply --whitespace=warn <patch>`
- Fix: `git apply --whitespace=fix <patch>`
- `git rebase` can also fix while rewriting patches
- `git rebase --whitespace=fix`
## Server Configuration
### General note
- Fewer server-side config options, but some are important for integrity and policy
### `receive.fsckObjects`
- Purpose: validate object integrity during push reception
- Check SHA-1 checksums
- Ensure objects point to valid objects
- Tradeoff: expensive; can slow pushes (especially large repos/pushes)
- Enable
- `git config --system receive.fsckObjects true`
- Benefit
- Helps prevent corrupt or malicious objects being introduced
### `receive.denyNonFastForwards`
- Purpose: refuse non-fast-forward updates (blocks most force-pushes)
- Typical scenario
- Rebase already-pushed commits, then attempt to push rewritten history
- Enable
- `git config --system receive.denyNonFastForwards true`
- Alternative/enhancement
- Server-side hooks can enforce this with per-user/per-ref logic
### `receive.denyDeletes`
- Purpose: prevent deletion of branches/tags on the server
- Stops the “delete and recreate” workaround to bypass non-FF restrictions
- Enable
- `git config --system receive.denyDeletes true`
- Effect
- No user can delete branches/tags via push
- Must remove ref files manually on server (or via ACLs/policy hooks)
## Git Attributes
### What attributes are
- Path-specific settings controlling Git behavior for subsets of files
- Where to define them
- `.gitattributes` (committed, shared with the project)
- `.git/info/attributes` (local-only, not committed)
- Typical uses
- Choose merge strategies per file/directory
- Teach Git how to diff “non-text” formats
- Filter content on check-in/check-out (clean/smudge filters)
### Binary Files
#### Identifying binary-like files
- Motivation: some “text” is effectively binary for Git operations (diff/merge not meaningful)
- Example from chapter
- Xcode `*.pbxproj` (UTF-8 text, but acts like machine-managed DB)
- Diffs/merges are not helpful; conflicts are not realistically resolvable by humans
- Attribute
- In `.gitattributes`: `*.pbxproj binary`
- Effects
- Avoid CRLF conversions/fixes for those paths
- Avoid computing/printing diffs for those files
#### Diffing binary files via text conversion (`textconv`)
- Core idea
- Convert binary content to a text representation, then use normal diff on that representation
##### Microsoft Word (`.docx`) diffing
- Attribute mapping
- `.gitattributes`: `*.docx diff=word`
- Define the `word` diff “driver” with `textconv`
- Install `docx2txt` (chapter references SourceForge project + INSTALL instructions)
- Create wrapper script named `docx2txt` in `PATH` (concept)
- Calls `docx2txt.pl "$1" -` to emit text to stdout
- Make executable (`chmod a+x docx2txt`)
- Configure Git
- `git config diff.word.textconv docx2txt`
- Result
- `git diff` shows added/removed text instead of “Binary files differ”
- Limitation noted
- Formatting-only changes may not be represented perfectly
##### Image metadata diffing (EXIF)
- Attribute mapping
- `.gitattributes`: `*.png diff=exif`
- Tool
- Install `exiftool`
- Configure Git
- `git config diff.exif.textconv exiftool`
- Result
- `git diff` shows textual metadata differences (e.g., file size, width/height)
### Keyword Expansion (CVS/SVN-style substitutions)
#### Why its tricky in Git
- Git hashes file content (blobs); modifying file contents “after commit” would change the hash
- Solution pattern
- Inject content on checkout
- Remove/normalize before staging/commit
#### Built-in `ident` attribute (`$Id$`)
- Attribute
- `.gitattributes`: `*.txt ident`
- Behavior
- On checkout, replaces `$Id$` with `$Id: <blob-sha1> $`
- Note: uses blob SHA-1 (not commit SHA-1)
- Limitation
- Blob SHA-1 isnt a human-friendly timestamp/ordering signal
#### Custom clean/smudge filters
- Terminology
- **smudge**: runs on checkout (into working directory)
- **clean**: runs when staging (into index)
- (Figure references in chapter: smudge-on-checkout and clean-on-stage diagrams)
##### Example: auto-format C code using `indent`
- `.gitattributes`
- `*.c filter=indent`
- Config filter behavior
- Clean (before staging): `git config --global filter.indent.clean indent`
- Smudge (on checkout): `git config --global filter.indent.smudge cat` (no-op)
- Effect
- Code is run through `indent` before being committed
##### Example: `$Date$` expansion (RCS-like)
- Smudge script (concept)
- Reads stdin
- Computes last commit date: `git log --pretty=format:"%ad" -1`
- Replaces `$Date$``$Date: <last_date>$`
- Script name in chapter: `expand_date` (Ruby), placed in `PATH`
- Configure the filter “driver” (named `dater`)
- Smudge: `git config filter.dater.smudge expand_date`
- Clean: `git config filter.dater.clean 'perl -pe "s/\\\$Date[^\\\$]*\\\$/\\\$Date\\\$/"'`
- Strips expanded date back to literal `$Date$` before storing
- Apply to files
- `.gitattributes`: `date*.txt filter=dater`
- Demonstrated workflow
- Create file containing `$Date$`
- Commit
- Remove + checkout again
- Observe expanded date in working directory
- Portability caveat
- `.gitattributes` is shared with the repo
- Filter scripts/config are not automatically shared
- Filters should fail gracefully so project still works without them
### Exporting Your Repository (archives)
#### `export-ignore`
- Purpose: exclude files/dirs from `git archive` output while still tracking them in Git
- Example
- `.gitattributes`: `test/ export-ignore`
- Result
- `git archive` tarball omits `test/`
#### `export-subst`
- Purpose: apply `git log` formatting/keyword-style substitutions during `git archive`
- Mark file(s)
- `.gitattributes`: `LAST_COMMIT export-subst`
- Embed placeholders in file content (concept)
- Example pattern: `$Format:%cd by %aN$`
- Behavior on archive
- `git archive` injects metadata (date/author/etc.) into exported file
- Can include commit message, git notes, and word-wrapped formatting (chapter shows `%+w(...)` usage)
- Important limitation
- Exported archive is suitable for deployment
- Not suitable for continued development like a full Git checkout
### Merge Strategies (per-path)
- Goal: apply special merge behavior for specific files
- Example: keep “our” version of a config-like file
- `.gitattributes`: `database.xml merge=ours`
- Configure merge driver
- `git config --global merge.ours.driver true` (dummy driver; always “succeeds” taking ours)
- Result when merging
- Git uses current branch version for that path, avoiding manual conflict resolution for that file
## Git Hooks
### What hooks are
- Custom scripts triggered by Git events
- Two groups
- Client-side: local operations (commit, rebase, merge, checkout, push initiation, etc.)
- Server-side: network operations (receiving pushes)
### Installing a hook
- Location
- `.git/hooks` in a repository
- Defaults
- `git init` creates example hook scripts (typically `*.sample`)
- Enabling a hook
- Create/rename a file with the proper hook name (no extension)
- Make it executable
- Implementation language
- Any executable script works (shell, Perl, Ruby, Python, …)
### Client-side hooks
- Critical distribution note
- Client-side hooks are **not** transferred when cloning
- To **enforce** policy, prefer server-side hooks (client-side can only assist)
#### Committing-workflow hooks
- `pre-commit`
- Runs: before commit message entry
- Use: inspect staged snapshot, run tests/lint, detect trailing whitespace, verify docs, etc.
- Abort rule: non-zero exit cancels commit
- Bypass: `git commit --no-verify`
- `prepare-commit-msg`
- Runs: after default message is created, before editor opens
- Inputs (parameters)
- Commit message file path
- Commit type
- Commit SHA-1 (for amended commits)
- Use: adjust auto-generated messages (merge commits, squashes, amended commits, template-based flows)
- `commit-msg`
- Runs: after message is written, before commit is finalized
- Input: commit message file path
- Use: validate message format / required patterns
- `post-commit`
- Runs: after commit completes
- No parameters
- Use: notifications; can identify last commit via `git log -1 HEAD`
#### Email workflow hooks (for `git am`)
- Scope note
- Only relevant if using email patch workflows (`git format-patch``git am`)
- `applypatch-msg`
- Runs: first
- Input: temp file with proposed commit message
- Abort rule: non-zero cancels patch application
- Use: validate/normalize commit messages (can edit file in place)
- `pre-applypatch`
- Runs: after patch applied, before commit is made
- Use: inspect snapshot; run tests; abort `git am` if failures occur
- `post-applypatch`
- Runs: after commit is made
- Use: notify author/team that patch was applied
- Cannot stop the patching process
#### Other client hooks
- `pre-rebase`
- Runs: before rebase
- Abort rule: non-zero cancels rebase
- Use: prevent rebasing commits that have already been pushed (sample hook attempts this)
- `post-rewrite`
- Triggered by: commands that replace commits (`git commit --amend`, `git rebase`; not `git filter-branch`)
- Input: argument naming the triggering command; rewrite list on stdin
- Use: similar to post-checkout/post-merge automation/notifications
- `post-checkout`
- Runs: after successful `git checkout`
- Use: project environment setup (populate large binaries not tracked, generate docs, etc.)
- `post-merge`
- Runs: after successful merge
- Use: restore non-tracked working-tree data (e.g., permissions), validate external dependencies
- `pre-push`
- Runs: during `git push` after remote refs updated but before objects transferred
- Inputs
- Parameters: remote name + remote location
- stdin: refs to be updated
- Abort rule: non-zero cancels push
- Use: validate ref updates before transferring objects
- `pre-auto-gc`
- Runs: before automatic garbage collection (`git gc --auto`)
- Use: notify user or abort GC if inconvenient
### Server-side hooks
- Admin-focused: enforce policies on pushes
- Pre hooks can reject pushes
- Exit non-zero to reject
- Print message to stdout to show error to client
#### `pre-receive`
- Runs: first during push handling
- Input: list of refs on stdin
- Reject behavior
- Non-zero exit rejects **all** refs in the push
- Use cases
- Block non-fast-forward updates globally
- Access control across refs and paths being modified
#### `update`
- Similar role to `pre-receive`, but:
- Runs **once per branch/ref** being updated
- Inputs (arguments)
- Ref name
- Old SHA-1
- New SHA-1
- Reject behavior
- Non-zero exit rejects **only that ref**; other refs can still update
#### `post-receive`
- Runs: after push process completes
- Input: same stdin data as `pre-receive`
- Use cases
- Notify services/users (email lists, CI, ticket trackers)
- Parse commit messages for automation
- Performance note
- Cannot stop push; client waits until hook finishes
- Avoid long-running tasks or offload them
#### Hook scripting tip (from chapter)
- Prefer long-form command-line flags in scripts for readability/maintainability
## An Example Git-Enforced Policy
### Goals
- Enforce commit message format (must include a ticket/reference token)
- Enforce user-based access control (who can change which directories/files)
- Provide client-side hooks to warn users early (reduce rejected pushes)
### Implementation language in chapter
- Ruby (chosen for readability), but any scripting language works
### Server-side enforcement (in `hooks/update`)
#### Update hook inputs & environment
- Runs once per branch being pushed
- Arguments
- `refname` (ref being updated)
- `oldrev` (old SHA-1)
- `newrev` (new SHA-1)
- User identification assumption
- User available in `$USER`
- SSH single-user setups may need a wrapper to map public keys to a user and set env var
- Hook prints an “Enforcing Policies…” banner
- Anything printed to stdout is relayed to the pushing client
#### Policy 1: Enforce commit message format
- Requirement: each commit message must contain something like `[ref: 1234]`
- Identify commits included in the push
- `git rev-list oldrev..newrev` (lists new commits by SHA-1)
- Extract commit message for each commit
- `git cat-file commit <sha>` gives raw commit object
- Message content begins after first blank line
- Use `sed '1,/^$/d'` to print message portion
- Validate messages
- Regex (concept): `/\[ref: (\d+)\]/`
- If any commit lacks the pattern
- Print policy message
- `exit 1` → reject push
#### Policy 2: Enforce directory/file ACL (user-based permissions)
- ACL file location (server-side)
- `acl` file stored in the bare repository
- ACL format (CVS-like)
- Lines: `avail|user1,user2|path`
- Pipe `|` delimits fields
- Blank `path` means access to everything
- (Example also mentions `unavail`, but the sample enforcement only handles `avail`)
- Example intent
- Admin users: full access
- Doc writers: only `doc/`
- Limited dev: only `lib/` and `tests/`
- Parse ACL into structure
- Map: `user -> [allowed_paths]`
- `nil` path denotes “allowed everywhere”
- Determine what files are modified by pushed commits
- For each new commit: `git log -1 --name-only --pretty=format:'' <rev>`
- Validate each changed path against users allowed paths
- Allowed if
- user has a `nil` access path (full access), or
- file path starts with an allowed directory prefix
- On violation
- Print `[POLICY] You do not have access to push to <path>`
- `exit 1` to reject
#### Testing behavior (server-side)
- Enable hook: `chmod u+x .git/hooks/update`
- Pushing with a bad commit message
- Hook prints policy banner + error
- Git reports hook failure and rejects the ref update
- Pushing unauthorized file edits
- Similar rejection, specifying the disallowed path
- Outcome
- Repo never accepts commits missing the required reference pattern
- Users are sandboxed to allowed paths
### Client-side helper hooks (reduce “last-minute” rejections)
#### Distribution limitation
- Hooks dont clone with the repository
- Must distribute scripts separately and have users install them into `.git/hooks/` and make executable
#### Client policy 1: commit message check (`commit-msg` hook)
- Runs before commit finalization
- Input: commit message file path (`ARGV[0]`)
- Enforces same regex pattern as server policy
- Behavior
- Non-matching message → print policy message → exit non-zero → commit aborted
- Matching message → commit proceeds
#### Client policy 2: ACL check before commit (`pre-commit` hook)
- Requires local copy of ACL file
- Expected at: `.git/acl`
- Key differences vs server-side ACL enforcement
- Uses staging area (index) instead of commit history
- File list command
- `git diff-index --cached --name-only HEAD`
- Same core permission logic
- If staged changes include a disallowed path, abort commit
- Identity caveat
- Assumes local `$USER` matches the user used when pushing to the server; otherwise set user explicitly
#### Client policy 3: prevent rebasing already-pushed commits (`pre-rebase` hook)
- Motivation
- Server likely already denies non-fast-forward updates (`receive.denyNonFastForwards`) and deletes
- Client hook helps prevent accidental rebases that rewrite already-pushed commits
- Script logic (concept)
- Determine base branch + topic branch (`HEAD` default)
- List commits to be rewritten: `git rev-list base..topic`
- List remote refs: `git branch -r`
- For each commit SHA, check if reachable from any remote ref
- Uses revision syntax `sha^@` (all parents)
- Uses `git rev-list ^<sha>^@ refs/remotes/<remote_ref>` to test reachability
- If any commit already exists remotely, abort rebase with policy message
- Tradeoffs
- Can be slow
- Often unnecessary unless you were going to force-push
- Still a useful preventative exercise
## Summary (chapter wrap-up)
- Customization categories mastered
- Config settings (client + server)
- Attributes (path-specific diff/merge/filter/export behavior)
- Hooks (client assistance + server enforcement)
- Practical outcome
- Git can be shaped to match nearly any workflow, including enforceable policies and automation

926
mindmap/Distributed Git.md Normal file
View File

@@ -0,0 +1,926 @@
```markdown
# Distributed Git
## Context & Goals (Why this chapter)
- Starting point
- A remote Git repository already exists as a shared focal point
- You already know basic local Git commands/workflow
- What “distributed” enables
- Git supports distributed collaboration patterns beyond centralized VCSs
- Two roles youll learn to operate in
- Contributor
- Contribute code successfully
- Make it easy for you + project maintainer
- Integrator / Maintainer
- Maintain a project with many contributors
- Integrate others work sustainably and clearly
## Distributed Workflows (Collaboration models)
### Git vs Centralized Version Control Systems (CVCSs)
- CVCS mental model
- Central hub/repository is the primary “source”
- Developers are “nodes” consuming/syncing with that hub
- Git mental model (distributed)
- Every developer is potentially both:
- A node (contributor to others)
- A hub (maintains a public repo others can base work on)
- Result: many possible workflow designs
- You can choose one workflow
- Or mix-and-match features
### Centralized Workflow (Single shared hub)
- Core idea
- One central repository (“hub”) accepts code
- Everyone synchronizes with that central location
- What happens when changes overlap in time
- Two developers clone from hub and both change things
- First developer to push succeeds
- Second developer must first merge the upstream work locally
- Prevents overwriting the first developers work
- True in Git just like Subversion (or any CVCS)
- Why people like it
- Familiar paradigm for many teams
- Git-specific note
- You can keep using this model in Git
- Give everyone push access
- Git prevents overwriting (rejects non-fast-forward pushes)
- Scale note
- Not limited to small teams
- Git branching makes it possible for hundreds of devs to work across dozens of branches
- Figure 53: Centralized workflow
- Hub repository with multiple developers syncing to it
### Integration-Manager Workflow (Multiple repos + canonical “official” repo)
- Core idea
- Each developer:
- Has write access to their own public repository
- Has read access to others repositories
- There is a canonical (“official”) repository
- Contribution mechanics (typical steps)
1. Maintainer pushes to their public repository
2. Contributor clones that repository and makes changes
3. Contributor pushes to their own public copy
4. Contributor requests maintainer to pull changes (often email)
5. Maintainer adds contributor repo as remote, tests locally, merges locally
6. Maintainer pushes merged result to canonical repo
- Where its common
- Hub-based hosting (e.g., GitHub/GitLab-style forking)
- Key advantage emphasized
- Asynchronous pace
- Contributor keeps working without waiting for acceptance
- Maintainer can pull in changes whenever ready
- Figure 54: Integration-manager workflow
- Blessed/canonical repo + integration manager + contributor public/private repos
### Dictator and Lieutenants Workflow (Large hierarchical projects)
- What it is
- Variant of multi-repo workflow
- Used in huge projects with hundreds of collaborators (example: Linux kernel)
- Roles
- Lieutenants
- Integration managers for specific subsystems/areas
- Benevolent dictator
- Single final integration manager
- Pushes to reference (“blessed”) repository
- Process (typical steps)
1. Regular developers
- Work on topic branches
- Rebase their work on top of `master`
- `master` here = reference repos `master` that dictator pushes to
2. Lieutenants
- Merge developers topic branches into lieutenants `master`
3. Dictator
- Merges lieutenants `master` branches into dictators `master`
4. Dictator pushes to reference repository
- Other developers rebase on that updated `master`
- When it helps
- Very large projects
- Highly hierarchical environments
- Delegates integration work; collects large subsets before final integration
- Noted as
- Not common overall
- But useful in specific contexts
- Figure 55: Benevolent dictator workflow
- Dictator integrates from lieutenants; pushes to blessed repository
### Patterns for Managing Source Code Branches (external guide callout)
- Reference
- Martin Fowler guide: “Patterns for Managing Source Code Branches”
- Link: https://martinfowler.com/articles/branching-patterns.html
- What the guide covers (as noted)
- Common Git workflows
- How/when to use them
- Comparison of high vs low integration frequencies
### Workflows Summary (transition)
- Distributed Git allows many variations
- Choose a workflow (or combination) that fits reality
- Next: specific contributing patterns and use cases
## Contributing to a Project (How to contribute effectively)
### Why its hard to prescribe one “right” way
- Git is flexible → many real-world collaboration styles
- Every project differs
### Key variables that change the “best” approach
- Active contributor count (and activity level)
- Small/dormant: 23 devs, few commits/day (or less)
- Large: thousands of devs, hundreds/thousands of commits/day
- Practical risk with larger activity
- Your changes may not apply cleanly later
- Your changes may become obsolete/broken before merging
- Need to keep work up to date and commits valid
- Workflow in use
- Centralized (everyone can push to main line)
- Integration manager / maintainer gatekeeping
- Peer review requirements
- Lieutenant system (submit to subsystem maintainers first)
- Your commit access
- Write access vs no write access changes everything
- If no access
- How does project accept contributions (policy/method)?
- Contribution size & frequency
- How much you contribute at a time
- How often you contribute
### Approach in chapter
- Presents a series of use cases (simple → complex)
- Goal
- Let you construct the workflow you need in practice
### Commit Guidelines (preparing high-quality contributions)
- Source of guidance
- Git projects tips: `Documentation/SubmittingPatches`
- Guideline 1: Avoid whitespace errors
- Check before committing:
- `git diff --check`
- Purpose
- Identify whitespace problems early
- Prevent annoying reviewers/maintainers
- Figure 56: Output of `git diff --check`
- Guideline 2: Make commits logically separate changesets
- Keep changes digestible
- Dont bundle multiple unrelated issues into one massive commit
- Use staging area to split work
- Even if you did work across multiple issues before committing
- If changes overlap in same file
- Use partial staging:
- `git add --patch`
- (Referenced as covered in “Interactive Staging”)
- Key point emphasized
- Final snapshot can be identical whether you do 1 commit or 5
- But review/revert is much easier with clean, separated commits
- Benefit
- Easier to later pull out/revert one changeset
- Tooling reference
- “Rewriting History” provides techniques for crafting clean history
- Guideline 3: Write high-quality commit messages
- Why
- Makes using Git + collaborating easier
- Recommended structure
- Summary line
- ~50 characters or less
- Concise description of changeset
- Blank line (important)
- Tools (e.g., rebase) can get confused without it
- Body (optional, but recommended when needed)
- More detailed explanation
- Wrap around ~72 characters
- Include motivation for change
- Contrast implementation with previous behavior (Git project requirement)
- Style rules
- Imperative mood
- “Fix bug” (not “Fixed bug” / “Fixes bug”)
- Matches messages auto-generated by `git merge` and `git revert`
- Formatting details
- Multiple paragraphs separated by blank lines
- Bullet points are okay
- Hyphen/asterisk + single space
- Hanging indent
- Conventions vary
- Example quality reference
- Look at Git project history:
- `git log --no-merges`
- Callout: “Do as we say, not as we do”
- Book examples often use `git commit -m` for brevity
- Not meant as best-practice formatting
## Use Case 1: Private Small Team (simple shared repo)
### Setting
- Private = closed-source, not public
- 12 other developers
- Everyone has push access to the shared repository
- Workflow resembles centralized systems (e.g., Subversion), but with Git advantages
- Offline commits
- Easier branching/merging
- Key difference vs Subversion: merges happen client-side
### Example scenario: John & Jessica pushing to shared repo
- John: clone, edit, commit
- `git clone john@githost:simplegit.git`
- edit `lib/simplegit.rb`
- `git commit -am 'Remove invalid default value'`
- Jessica: clone, edit, commit
- `git clone jessica@githost:simplegit.git`
- edit `TODO`
- `git commit -am 'Add reset task'`
- Jessica pushes successfully
- `git push origin master`
- Push output explained (last line format)
- `<oldref>..<newref> fromref → toref`
- `oldref`: previous remote reference
- `newref`: updated remote reference
- `fromref`: local ref being pushed
- `toref`: remote ref being updated
- Reference: `git-push` documentation
- John tries to push and is rejected
- `git push origin master`
- Error: rejected (non-fast forward)
- Key lesson vs Subversion
- Even if they edited different files
- Git requires John to fetch + merge locally before pushing
- Subversion might do server-side merge; Git does not
### John resolves the non-fast-forward
- Step 1: Fetch upstream changes
- `git fetch origin`
- Fetch downloads changes but does not merge them
- Figure 57: Johns divergent history (local master vs `origin/master`)
- Step 2: Merge fetched upstream
- `git merge origin/master`
- Merge strategy shown: `recursive`
- Figure 58: Johns repository after merging `origin/master`
- Step 3: Test merged code (recommended)
- Step 4: Push merged result
- `git push origin master`
- Figure 59: Johns history after pushing to origin
### Jessicas parallel work: topic branch + later integration
- Jessica created topic branch `issue54`
- 3 commits on that branch
- She hadnt fetched Johns updates yet
- Figure 60: Jessicas topic branch
- Jessica fetches new work
- `git fetch origin`
- Figure 61: Jessicas history after fetching Johns changes
- Jessica determines what new commits exist on `origin/master`
- `git log --no-merges issue54..origin/master`
- Meaning of `issue54..origin/master`
- Show commits on `origin/master` that are not on `issue54`
- Note: range syntax referenced as covered later in “Commit Ranges”
- Jessica integrates (order doesnt matter for final snapshot)
- Switch to master
- `git checkout master`
- Message may indicate behind `origin/master` and fast-forwardable
- Merge topic branch to master (chosen first)
- `git merge issue54`
- Result: fast-forward merge (no new merge commit)
- Merge Johns upstream work
- `git merge origin/master`
- Result: merge commit created via recursive strategy
- Figure 62: Jessicas history after merging Johns changes
- Jessica pushes
- `git push origin master`
- Figure 63: Jessicas history after pushing all changes
### General “simple multi-developer” sequence (as summarized)
- Work locally (often in a topic branch)
- Merge topic branch into `master` when ready
- Before sharing
- Fetch and merge `origin/master` if it changed
- Push `master` to server
- Figure 64: General sequence of events for this simple workflow
## Use Case 2: Private Managed Team (team branches + integrators)
### Setting
- Larger private group
- Small groups collaborate on features
- Integrators (a subset of engineers) merge into mainline
- Only integrators can update `master` of main repo
- Team collaboration happens on shared feature branches
### Scenario setup
- FeatureA: John + Jessica
- FeatureB: Jessica + Josie
- Work happens on team-based branches; integrators pull together later
### Jessica works on FeatureA (with John)
- Create feature branch
- `git checkout -b featureA`
- Work + commit
- `git commit -am 'Add limit to log function'`
- Share with John
- Push feature branch (no `master` push permission)
- `git push -u origin featureA`
- `-u` / `--set-upstream` sets upstream tracking for easier push/pull
- Notify John (email)
### Jessica works on FeatureB (with Josie)
- Base new branch off server `master`
- `git fetch origin`
- `git checkout -b featureB origin/master`
- Work + commits
- `git commit -am 'Make ls-tree function recursive'`
- `git commit -am 'Add ls-files'`
- Figure 65: Jessicas initial commit history (featureA and featureB in progress)
### Josie already started an upstream branch for FeatureB
- Josie pushed initial work as branch `featureBee`
- Jessica fetches
- `git fetch origin`
- New remote-tracking branch: `origin/featureBee`
- Jessica merges Josies work into her local `featureB`
- `git merge origin/featureBee`
- Jessica pushes merged result back to the shared upstream branch
- Uses a refspec:
- `git push -u origin featureB:featureBee`
- Refspec concept
- Push local `featureB` to remote branch `featureBee`
- Reference: “The Refspec”
- `-u` sets upstream for simpler future pushes/pulls
### John updates FeatureA; Jessica reviews and merges
- Fetch updates (includes Johns latest on featureA)
- `git fetch origin`
- See what John added (compare local vs fetched)
- `git log featureA..origin/featureA`
- Merge it in
- `git checkout featureA`
- `git merge origin/featureA` (fast-forward in example)
- Add minor tweaks
- `git commit -am 'Add small tweak to merged content'`
- Push featureA back to server
- `git push`
- Figure 66: Jessicas history after committing on a feature branch
### Integrators merge FeatureA and FeatureBee into mainline
- Team informs integrators the branches are ready
- Integrators merge into mainline
- After a fetch, Jessica sees merge commit(s)
- Figure 67: Jessicas history after integrators merged both topic branches
### Benefits emphasized
- Multiple teams can work in parallel
- Late merging of independent lines of work
- Remote branches let subgroups collaborate without blocking entire team
- Figure 68: Basic sequence of the managed-team workflow
## Use Case 3: Forked Public Project (contribute via forks & pull requests)
### Why this differs
- Public project: you typically cannot push to the official repo
- Need a different path to get work to maintainers
### Typical fork-based flow
- Clone main repository
- `git clone <url>`
- Create a topic branch for your work
- `git checkout -b featureA`
- Commit as you work
- `git commit` (repeat as needed)
- Optional cleanup for review
- Use interactive rebase:
- `rebase -i`
- Goals: squash commits, reorder, make review easier
- Reference: “Rewriting History”
### Fork + push topic branch to your fork
- Fork via hosting site (“Fork” button) → writable fork
- Add fork as remote
- `git remote add myfork <url>`
- Push only the topic branch (recommended)
- `git push -u myfork featureA`
- Why avoid merging into your `master` before pushing?
- If rejected or cherry-picked, you dont need to rewind your master
- Maintainers may merge/rebase/cherry-pick; youll receive it later by pulling upstream
### Notify maintainers: Pull request / request-pull
- “Pull request” can be created:
- Through the website (e.g., GitHub mechanism)
- Or manually via `git request-pull` + email
- `git request-pull` purpose
- Produces a summary of changes being requested for pull
- Inputs
- Base branch to pull into (e.g., `origin/master`)
- Repo URL to pull from (your fork)
- Output includes (as shown)
- Base commit reference (“changes since commit …”)
- Where to pull from (URL + branch)
- Commit list + diffstat summary
### Best practice for multiple contributions
- Keep a local `master` tracking `origin/master`
- Work in topic branches
- Easy to discard if rejected
- Easy to rebase if upstream moves
### Starting a second topic: dont stack topics on old branches
- Start new branch from current upstream master
- `git checkout -b featureB origin/master`
- Work + commit
- Push
- `git push myfork featureB`
- Request pull
- `git request-pull origin/master myfork`
- Update your view of upstream
- `git fetch origin`
- Resulting structure
- Topics become separate silos (patch-queue-like)
- Figure 69: Initial commit history with featureB work
### Scenario: maintainer cant merge your featureA cleanly anymore
- Cause
- Upstream `origin/master` moved; your topic doesnt apply cleanly
- Fix
- Rebase your topic onto current upstream
- `git checkout featureA`
- `git rebase origin/master`
- Force push updated branch to your fork
- `git push -f myfork featureA`
- Why `-f` is required
- Rebase rewrites history
- New commits may not be descendants of the remote branch tip
- Alternative mentioned
- Push to a new branch (e.g., `featureAv2`) instead of force-updating
- Figure 70: Commit history after rebasing featureA work
### Scenario: maintainer likes featureB but requests implementation changes
- Goal
- Re-base on current `origin/master`
- Provide revised branch version
- Workflow
- Create new branch from current upstream
- `git checkout -b featureBv2 origin/master`
- Squash merge old feature branch changes
- `git merge --squash featureB`
- Make requested implementation changes
- Commit
- `git commit`
- Push new branch
- `git push myfork featureBv2`
- Meaning of `--squash`
- Combines all changes into one changeset
- Produces final state as if merged, but without a merge commit
- New commit has only one parent
- Lets you add more edits before recording the final commit
- Extra option callout
- `--no-commit` can delay commit in default merge process
- Figure 71: Commit history after featureBv2 work
## Use Case 4: Public Project over Email (patch series via mailing list)
### When this is used
- Many older/larger projects accept patches via mailing lists
- Each project has specific procedures → you must check their rules
### High-level flow
- Create a topic branch per patch series
- Instead of forking/pushing
- Generate email-ready patches
- Email to developer mailing list
### Create commits on a topic branch
- `git checkout -b topicA`
- Work + commit
- `git commit` (repeat)
### Generate mbox-formatted patch emails: `git format-patch`
- Command example
- `git format-patch -M origin/master`
- What it produces
- One `*.patch` file per commit
- Each patch file = one email message
- Subject = first line of commit message
- Body = remainder of message + the diff
- Why its nice
- Applying patches generated this way preserves commit info properly
- Option noted
- `-M` makes Git detect renames
### Patch file structure (what maintainers/reviewers see)
- Email-like headers
- `From <sha> Mon Sep 17 00:00:00 2001`
- `From: <author>`
- `Date: <date>`
- `Subject: [PATCH x/y] <summary>`
- Commit message body text
- Separator
- `---`
- Patch begins
- `diff --git ...`
- Version footer (as shown in example)
### Adding extra explanation without changing commit message
- You can edit patch files
- Place extra notes between:
- the `---` line
- and the `diff --git` line
- These notes
- are readable by developers
- are ignored by patch application
### Sending patches without breaking formatting
- Copy/paste into email client can break whitespace/newlines
- Git-provided tools to send properly formatted patches
#### Option A: IMAP (drafts workflow) with `git imap-send`
- Setup `~/.gitconfig` `[imap]` section (example values shown)
- `folder` (e.g., `[Gmail]/Drafts`)
- `host` (e.g., `imaps://imap.gmail.com`)
- `user`
- `pass`
- `port` (e.g., `993`)
- `sslverify = false`
- SSL note
- If IMAP server doesnt use SSL
- last lines may be unnecessary
- host uses `imap://` not `imaps://`
- Send patches to Drafts folder
- `cat *.patch | git imap-send`
- Then in email client
- Set `To:` mailing list
- Possibly CC maintainer/area owner
- Send
#### Option B: SMTP with `git send-email`
- Setup `~/.gitconfig` `[sendemail]` section (example values shown)
- `smtpencryption = tls`
- `smtpserver = smtp.gmail.com`
- `smtpuser`
- `smtpserverport = 587`
- Send patch files
- `git send-email *.patch`
- Interactive prompts noted
- “From” identity
- Recipients
- Message-ID / In-Reply-To for threading
- Output includes per-patch send logs and headers
- Tip resource
- Configuration help + sandbox for trial patches:
- https://git-send-email.io
## Contribution Summary (end of “Contributing” portion)
- Covered
- Multiple workflows (private vs public)
- How to handle merges in those workflows
- Commit hygiene
- whitespace checks
- logically separated commits
- strong commit messages
- Patch generation and emailing
- Transition
- Next: maintaining a project (integration/maintainer side)
## Maintaining a Project (Integrator/Maintainer perspective)
### What “maintaining” involves
- Accepting and applying patches from email
- Often produced by `format-patch`
- Integrating changes from remote branches
- From repos you add as remotes
- Applies whether you
- maintain a canonical repository
- or help by verifying/approving patches
- Goal
- Accept work in a way that is clear for contributors
- Sustainable long-term
### Working in Topic Branches (safe integration practice)
- Best practice
- Try new contributions in a temporary topic branch
- Why
- Easy to test and tweak
- Easy to abandon temporarily and return later
- Naming guidance
- Use descriptive theme-based names (e.g., `ruby_client`)
- Git maintainer convention:
- namespace: `sc/ruby_client`
- `sc` = contributor shorthand
- Create topic branch from `master`
- Create only:
- `git branch sc/ruby_client master`
- Create and switch immediately:
- `git checkout -b sc/ruby_client master`
### Applying Patches from Email (two main tools)
- Two methods
- `git apply`
- `git am`
#### Applying with `git apply` (for raw diffs)
- When to use
- Patch generated from `git diff` or generic Unix diff (not recommended if `format-patch` available)
- Apply a patch file
- `git apply /tmp/patch-ruby-client.patch`
- What it does
- Modifies files in working directory
- Similar to `patch -p1`, but:
- More paranoid (fewer fuzzy matches)
- Understands Git diff format adds/deletes/renames (patch tool may not)
- “Apply all or abort all” (atomic)
- Unlike `patch`, which can partially apply and leave a messy state
- Important limitation
- Does not create a commit
- You must stage and commit manually afterward
- Preflight check
- `git apply --check <patch>`
- Behavior
- No output → should apply cleanly
- Non-zero exit status on failure → script-friendly
#### Applying with `git am` (for `format-patch` / mbox)
- When to use (preferred)
- Contributor used `git format-patch`
- Patch includes author info and commit message
- Meaning / concept
- `am` = apply patches from a mailbox
- Reads mbox format (plain-text emails in one file)
- Apply a patch file generated by `format-patch`
- `git am 0001-limit-log-function.patch`
- What it does automatically
- Creates commits for you
- Uses email headers/body to populate:
- Author info: From + Date
- Commit message: Subject + body (before diff)
- Committer info becomes the applier + apply time
- Inspecting result (example command shown)
- `git log --pretty=fuller -1`
- Distinction highlighted
- Author vs Committer (applier)
- If patch fails to apply cleanly
- Common causes noted
- Your branch diverged too far
- Patch depends on another patch not applied yet
- Failure behavior
- Stops and shows options:
- Continue after fixing: `git am --resolved`
- Skip patch: `git am --skip`
- Abort and restore original branch: `git am --abort`
- Adds conflict markers to files (like merge/rebase)
- Manual conflict workflow
- Fix file(s)
- Stage:
- `git add <file>`
- Continue:
- `git am --resolved`
- Smarter conflict handling option: `-3`
- `git am -3 <patch>`
- What it does
- Attempts a three-way merge
- Caveat
- Doesnt work if the base commit referenced by patch isnt in your repo
- When it works well
- Patch based on a public commit you have
- Example behavior shown
- Can detect “Patch already applied” when appropriate
- Interactive mode for patch series
- `git am -3 -i <mbox>`
- Stops at each patch and asks:
- yes / no / edit / view patch / accept all
- Useful when
- You have many patches saved
- You want to preview or skip already-applied patches
### Checking Out Remote Branches (pulling history from contributor repos)
- When to use
- Contributor provides:
- repository URL
- branch name containing their changes
- One-time setup + local testing
- Add remote:
- `git remote add jessica git://github.com/jessica/myproject.git`
- Fetch:
- `git fetch jessica`
- Checkout local branch from remote-tracking branch:
- `git checkout -b rubyclient jessica/ruby-client`
- Ongoing benefit
- If same contributor sends more branches
- you can fetch/checkout without re-adding remote
- Pros emphasized
- You get the full commit history
- You know where its based → proper three-way merges by default
- Avoid needing `-3` guesswork
- Cons / practicality
- Not efficient to maintain hundreds of remotes for occasional contributors
- For one-off patches, email may be easier
- Scripts/hosted services may change the trade-off
- One-time pull without saving a remote
- `git pull <url>`
- Does not store the remote in your config
### Determining What Is Introduced (reviewing a topic branch)
- Review commits unique to topic branch
- Exclude master commits:
- `git log contrib --not master`
- Equivalent idea to `master..contrib`
- Review changes per commit
- `git log -p ...` to append diffs
- Review overall diff of what merging would introduce
- Pitfall
- `git diff master` can be misleading if histories diverged
- It compares tip snapshots and may make it look like topic removes master-only changes
- Correct intention
- Diff topic tip vs common ancestor with master
- Compute common ancestor explicitly
- `git merge-base contrib master`
- Then:
- `git diff <merge-base-sha>`
- or `git diff $(git merge-base contrib master)`
- Shorthand: triple-dot diff
- `git diff master...contrib`
- Shows only changes introduced on topic branch since divergence
### Integrating Contributed Work (strategies)
#### Merging Workflows (merge-based integration)
- Simple merge-into-master workflow
- `master` contains stable code
- For each completed/verified topic branch
- merge into `master`
- delete topic branch
- repeat
- Figures
- Figure 72: History with several topic branches (`ruby_client`, `php_client`)
- Figure 73: After merging topic branches
- Two-phase merge cycle (master + develop)
- Two long-running branches
- `master` = only updated on stable releases
- `develop` = integration branch for new code
- Both pushed to public repository
- Process
- Merge topic branches into `develop`
- When ready to release
- tag release
- fast-forward `master` to `develop`
- Figures
- Figure 74: Before topic merge
- Figure 75: After topic merge (into develop)
- Figure 76: After project release (master fast-forward)
- User-facing implication
- Users can choose:
- `master` for stable builds
- `develop` for cutting-edge
- Extension: add `integrate` branch
- `integrate` collects work together
- When stable + tests pass
- merge into `develop`
- After `develop` proves stable
- fast-forward `master`
#### Large-Merging Workflows (Git project example)
- Git project long-running branches
- `master`
- `next`
- `seen` (formerly `pu` = proposed updates)
- `maint` (maintenance backports)
- Workflow
- New contributions collected as topic branches
- Topics evaluated
- Safe/ready → merge into `next` and push for wider testing
- Need work → merge into `seen`
- Totally stable → re-merge into `master`
- After master updates
- `next` and `seen` rebuilt from `master`
- Behavior noted
- `master` moves forward steadily
- `next` rebased occasionally
- `seen` rebased more often
- Topic branches removed after they reach `master`
- `maint` branch purpose
- Forked from last release for backports / maintenance releases
- Figures
- Figure 77: Managing many parallel contributed topic branches
- Figure 78: Merging topics into `next`/`seen` and re-merging to `master`
- Note
- Specialized; refer to Git Maintainers guide for full clarity
#### Rebasing and Cherry-Picking Workflows (linear history preference)
- Rebase-based integration
- Maintainer rebases topic branch on top of current `master` (or `develop`)
- If successful
- fast-forward `master`
- Outcome
- Mostly linear history
- Cherry-pick-based integration
- Cherry-pick = reapply the patch from a single commit onto current branch
- Useful when
- You want only some commits from a topic branch
- Or topic branch contains only one commit
- Example
- Before: Figure 79 (commit `e43a6` on topic branch)
- Command:
- `git cherry-pick e43a6`
- After: Figure 80 (new commit SHA because applied at different time)
- After cherry-picking
- remove topic branch / drop unwanted commits
### Rerere (Reuse Recorded Resolution)
- When it helps
- Lots of merges/rebases
- Long-lived topic branches
- Meaning
- “reuse recorded resolution”
- What it does
- Records successful conflict resolutions (pre/post images)
- Reapplies the same resolution automatically if conflict repeats
- Enable (recommended global)
- `git config --global rerere.enabled true`
- Interacting with rerere
- `git rerere` command
- With no args:
- attempts to match current conflicts to recorded resolutions
- (automatic if enabled)
- Subcommands mentioned
- show what will be recorded
- erase specific resolutions
- clear entire cache
- Reference
- Covered in more detail later in “Rerere”
### Tagging Your Releases
- Purpose
- Mark releases so they can be recreated later
- Signed tagging example
- `git tag -s v1.5 -m 'my signed 1.5 tag'`
- Requires PGP key + passphrase
- Distributing public PGP key (for verifying signed tags)
- Problem
- Others need your public key to verify signatures
- Git projects approach
- Store public key in repo as a blob
- Add a tag pointing directly to that blob
- Steps
1. Find the key
- `gpg --list-keys`
2. Export key and write to Git object database (blob)
- `gpg -a --export <KEYID> | git hash-object -w --stdin`
- Output is the blob SHA-1
3. Tag that blob
- `git tag -a maintainer-pgp-pub <blob-sha>`
4. Share tag(s)
- `git push --tags`
5. Users import key from repo
- `git show maintainer-pgp-pub | gpg --import`
6. Users verify signed tags using imported key
- Extra note
- Put verification instructions in tag message
- `git show <tag>` displays them
### Generating a Build Number (human-readable commit identifier)
- Problem
- Git doesnt provide monotonically increasing build numbers per commit
- Solution
- `git describe <commit>`
- Output format
- `<most recent tag>-<commits since tag>-g<abbrev sha>`
- `g` indicates Git
- Example
- `git describe master``v1.6.2-rc1-20-g8c5b85c`
- Behavior notes
- If commit itself is tagged
- Output is just the tag name
- Default requires annotated tags (`-a` or `-s`)
- Include lightweight tags with `--tags`
- Usability note
- Can use describe string with `git checkout` / `git show`
- But relies on abbreviated SHA → may become invalid if abbreviation length changes
- Example mentioned: Linux kernel increased abbrev length (8 → 10) for uniqueness
### Preparing a Release (archives for non-Git users)
- Tool
- `git archive`
- Create tar.gz snapshot (example)
- `git archive master --prefix='project/' | gzip > \`git describe master\`.tar.gz`
- Create zip snapshot (example)
- `git archive master --prefix='project/' --format=zip > \`git describe master\`.zip`
- Resulting archive contents
- Latest snapshot
- Under a top-level directory prefix (e.g., `project/`)
### The Shortlog (release notes / mailing list summary)
- Purpose
- Quick changelog summary since last release (or last email)
- Tool
- `git shortlog`
- Example
- `git shortlog --no-merges master --not v1.0.1`
- Output properties
- Groups commits by author
- Lists commit summaries
- Excludes merge commits with `--no-merges`
## Chapter Summary (end)
- You should now be comfortable
- Contributing using multiple distributed workflows
- Maintaining/integrating contributed work
- Next chapter preview
- GitHub (major Git hosting service)
```

798
mindmap/Git Branching.md Normal file
View File

@@ -0,0 +1,798 @@
```markmap
# Git Branching
## Why branching matters (and why Git is different)
- Branching (general VCS concept)
- Meaning: diverge from the main line of development
- Goal: keep working without disturbing the main line
- Traditional VCS branching (typical tradeoff)
- Often “expensive”
- May require creating a full new copy of the source directory
- Large projects → branching can take a long time
- Git branching “killer feature”
- Incredibly lightweight model
- Branch operations are (nearly) instantaneous
- Switching branches is typically just as fast
- Encourages frequent branching + merging (even multiple times per day)
- Mastering branching can significantly change how you develop
## Branches in a Nutshell (how Gits model works)
- Why you must understand Gits storage model
- Branching is “just pointers” in Git, but that only makes sense once you know what commits are
- Reference: earlier concept “What is Git?” (snapshots, SHA-1, objects)
### Git stores snapshots, not diffs/changesets
- Gits core model
- Instead of storing a sequence of diffs, Git stores a sequence of snapshots
- Each commit represents the state of the project at that point
### Commit objects: what a commit contains
- Commit object includes
- Pointer to the snapshot you committed (via a tree object)
- Metadata
- author name + email
- commit message
- Parent commit pointer(s)
- 0 parents → initial commit
- 1 parent → normal commit
- 2+ parents → merge commit (merging 2+ branches)
### Example: first commit with 3 files (blobs + tree + commit)
- Scenario
- Working directory contains 3 files
- You stage all and commit
- Staging step (`git add …`)
- Example:
- `git add README test.rb LICENSE`
- Git computes a checksum (SHA-1) for each file version
- Git stores each file version as a **blob** object
- The staging area (index) records the blob checksums for whats staged
- Commit step (`git commit …`)
- Example:
- `git commit -m "Initial commit"`
- Git checksums each directory (here: project root) and stores a **tree** object
- Tree object
- Lists directory contents
- Maps filenames → blob IDs
- (And subdirectories → subtree IDs)
- Git creates a **commit** object
- Contains metadata + pointer to the root tree
- Object count after this commit (in this example)
- 3 blobs (file contents)
- 1 tree (directory listing + blob references)
- 1 commit (metadata + pointer to root tree)
### Commit history: parents create a graph
- Each new commit typically
- Points to a new snapshot (tree)
- Points to its direct parent commit (previous tip)
- Merge commits
- Have multiple parents
- Represent snapshots produced by merges
### What a branch is (Git definition)
- Branch = lightweight movable pointer to a commit (the tip)
- Default starting branch name
- `master` (historical default name)
- Moves forward automatically as you commit on it
#### Note: “master” is not special
- Its identical to any other branch
- Its common because
- `git init` creates it by default
- many repos never rename it
### Creating a new branch (pointer creation only)
- Command
- `git branch testing`
- Effect
- Creates a new pointer named `testing`
- Points to the same commit youre currently on
- Does **not** switch your working branch
### HEAD: how Git tracks “current branch”
- HEAD in Git
- Special pointer to the local branch you currently have checked out
- Difference from other VCSs (conceptual)
- In Git, HEAD is a pointer to the current local branch (not just “latest revision” in a repo)
### Seeing branch pointers in `git log`
- Useful visualization option
- `git log --oneline --decorate`
- What `--decorate` shows
- Labels like `HEAD -> master`
- Other branch pointers (e.g., `testing`) attached to commits
### Switching branches (checkout)
- Switch to an existing branch
- `git checkout testing`
- Effect
- Moves HEAD to point to `testing`
- Your next commit will advance `testing` (because HEAD points to it)
### Committing advances only the checked-out branch pointer
- If you commit while on `testing`
- `testing` pointer moves forward
- `master` pointer stays behind (unchanged)
### Switching back updates pointers and your working directory
- Switch back
- `git checkout master`
- Checkout does two major things
- Moves HEAD to `master`
- Resets working directory to match the snapshot at `master`s tip
- Result
- Your future work on `master` diverges from the commit you left behind on `testing`
#### Note: `git log` doesnt show all branches by default
- Default behavior
- `git log` shows the history reachable from the currently checked-out branch
- To see another branchs history explicitly
- `git log testing`
- To see all branches
- `git log --all` (often paired with `--graph` and `--decorate`)
#### Note: switching branches changes working directory files
- On branch switch, Git may
- add files
- remove files
- modify files
- Safety rule
- If Git cant switch cleanly (because it would overwrite conflicting uncommitted changes), it will block the checkout
### Divergent history and visualization
- When both branches get new commits after diverging
- History becomes a DAG with multiple “tips”
- View divergence clearly
- `git log --oneline --decorate --graph --all`
### Why Git branches are cheap
- A branch is stored as a simple reference (a file) containing
- the 40-character SHA-1 of the commit it points to
- plus a newline → ~41 bytes written
- Consequences
- Create/delete branches instantly
- Switching is fast because its mostly pointer movement + updating working directory snapshot
- Contrast: older VCS branching
- Often implemented by copying the entire project directory
- Can take seconds/minutes depending on repo size
- Merge support benefit
- Git records parent pointers in commits
- Merge-base detection for merges is typically automatic and easy
### Creating a branch and switching immediately
- Common pattern
- `git checkout -b <newbranch>`
- Git ≥ 2.23 alternative: `git switch`
- switch to existing: `git switch <branch>`
- create + switch: `git switch -c <newbranch>` (or `--create`)
- return to previous branch: `git switch -`
## Basic Branching and Merging (realistic workflow)
- Example workflow goal: develop features while handling urgent production fixes
- High-level steps (website scenario)
- Work on site
- Create branch for a user story
- Work on that branch
- Urgent hotfix appears
- switch to production branch
- create hotfix branch
- test hotfix
- merge hotfix and deploy
- return to user story branch
### Basic Branching example (issue branch + hotfix branch)
- Starting assumption
- You already have a few commits on `master`
#### Create and work on a topic branch (issue #53)
- Create + switch
- `git checkout -b iss53`
- Shorthand for
- `git branch iss53`
- `git checkout iss53`
- Do work and commit
- edit `index.html`
- `git commit -a -m "Create new footer [issue 53]"`
- Result
- `iss53` advances (HEAD points to it)
#### Interrupt with urgent hotfix (without mixing in feature work)
- Key rule before switching branches
- If working directory or staging area has uncommitted changes that would conflict, Git blocks switching
- Best practice: keep a clean working state when switching
- Mentioned workarounds (covered later): stashing, commit amending
- Switch back to production/stable branch
- `git checkout master`
- What you gain
- working directory restored to `master` snapshot (pre-issue work)
- you can focus on hotfix cleanly
#### Create and finish the hotfix
- Create + switch to hotfix branch
- `git checkout -b hotfix`
- Fix and commit
- edit `index.html`
- `git commit -a -m "Fix broken email address"`
#### Merge hotfix into master (fast-forward)
- Merge steps
- `git checkout master`
- `git merge hotfix`
- Why its a “fast-forward” merge
- hotfix tip commit is directly ahead of master tip commit
- No divergence to reconcile
- Git simply moves the `master` pointer forward
- Deployment outcome
- master now points to a commit whose snapshot includes the hotfix
- you can deploy production fix
#### Delete completed hotfix branch
- Delete (safe when merged)
- `git branch -d hotfix`
- Rationale
- master already contains the hotfix work
#### Return to feature branch (iss53) and continue
- Switch back
- `git checkout iss53`
- Continue work and commit
- `git commit -a -m "Finish the new footer [issue 53]"`
- Important note: hotfix isnt in `iss53` automatically
- Options if needed
- merge master into iss53: `git merge master`
- or wait until iss53 is merged back into master
### Basic Merging (merge feature branch into master)
- When issue #53 is done
- `git checkout master`
- `git merge iss53`
- Why this merge differs from the hotfix merge
- histories diverged earlier
- master tip is not an ancestor of iss53 tip
- Git performs a three-way merge
- Inputs
- snapshot at master tip
- snapshot at iss53 tip
- snapshot at their common ancestor
- Output
- a new merged snapshot
- a new merge commit
- “special” because it has more than one parent
- Merge strategy note (as shown in output)
- merge made by the `recursive` strategy (typical default for two heads)
#### Clean up merged branch
- Delete iss53 after merge
- `git branch -d iss53`
### Basic Merge Conflicts (when Git cannot auto-merge)
- When conflicts occur
- both branches changed the same part of the same file differently
- What `git merge` does on conflict
- stops and reports conflict(s)
- does NOT create the merge commit yet
- requires manual resolution
#### Identify unmerged paths
- Use
- `git status`
- Status shows
- you are in a merging state
- list of “unmerged paths”
- hints to:
- fix conflicts
- `git add` files to mark resolution
- then `git commit` to conclude merge
#### Conflict markers inserted into files
- Git writes markers like
- `<<<<<<<` (start of conflict block)
- `=======` (separator)
- `>>>>>>>` (end of block)
- Meaning
- Top section = HEAD version (current branch at merge time, e.g., master)
- Bottom section = incoming branch version (e.g., iss53)
#### Resolve and mark as resolved
- Manual resolution workflow
- edit file(s)
- choose one side or combine them
- remove all markers
- Mark resolution
- `git add <file>` for each conflicted file
- staging indicates conflict resolved in Git
#### Using a merge tool (optional)
- Run
- `git mergetool`
- Behavior
- opens a visual merge tool
- helps walk through conflict resolution
- If not configured
- Git warns `merge.tool` isnt configured
- offers possible tool choices (platform dependent)
- you can specify an alternative tool name
#### Finalize the merge
- Verify state
- `git status`
- typically indicates “all conflicts fixed” but merge still in progress
- Conclude
- `git commit`
- Merge commit message details
- default message mentions merged branch
- often lists conflicts
- note in message references merge metadata (e.g., `.git/MERGE_HEAD`)
- you may edit message to explain how/why conflicts were resolved
- Reference for deeper conflict handling
- “Advanced Merging” (mentioned as later coverage)
## Branch Management (everyday utilities)
- `git branch` does more than create/delete
- provides multiple views and filters of branch state
### Listing branches
- `git branch`
- lists local branches
- `*` shows current branch (HEAD points here)
### See last commit on each branch
- `git branch -v`
- shows branch tip commit SHA + message summary
### Filter by merge status
- `git branch --merged`
- branches already merged into current branch
- usually safe to delete those (except the current `*` branch)
- `git branch --no-merged`
- branches not merged into current branch
- deletion safety
- `git branch -d <branch>` fails if not fully merged
- `git branch -D <branch>` forces deletion (discarding unmerged work)
#### Note: merge-status filters are relative to a base
- Default base
- current branch (if no argument given)
- You can compare relative to a different branch without checking it out
- `git branch --no-merged master`
## Changing a branch name (rename)
- Safety warning
- do not rename branches still used by other collaborators
- do not rename default branches (master/main/etc.) without reading next section
### Rename locally
- `git branch --move bad-branch-name corrected-branch-name`
- Effect
- preserves history
- changes only your local ref name initially
### Publish the renamed branch and set upstream
- `git push --set-upstream origin corrected-branch-name`
- Effect
- creates the new remote branch name
- configures tracking
### Remove the old remote branch name
- `git push origin --delete bad-branch-name`
- Effect
- fully replaces the bad remote name with the corrected one
### Verification
- `git branch --all`
- shows local branches and `remotes/origin/...` remote-tracking refs
## Changing the master branch name (e.g., `master` → `main`)
- High-impact warning
- renaming default branch can break
- integrations/services
- helper utilities
- build/release scripts
- any references in code, configs, docs
- consult collaborators
- search/update all references to the old name
### Local rename
- `git branch --move master main`
- Result
- local `master` ref no longer exists
- local `main` points to the same commit tip
### Push and set upstream
- `git push --set-upstream origin main`
- Result
- remote now has `main`
- remote may still have `master`
- remote HEAD may still point to `origin/master` until host settings change
### Migration checklist (must update external references)
- Dependent projects
- update code/config referencing old branch
- Test runner configs
- update any branch-name assumptions
- Build/release scripts
- update target branch names
- Repo host settings
- default branch
- merge rules / protections
- other branch-name-based settings
- Documentation
- update old references
- Pull requests
- close/merge/retarget PRs aimed at old branch
### Delete old remote branch after transition
- `git push origin --delete master`
## Branching Workflows (patterns enabled by lightweight branches)
- Goal
- choose a branching strategy that matches team/release needs
- Key enabler
- easy repeated three-way merges over time
### Long-Running Branches (progressive stability)
- Concept
- keep multiple always-open branches for different stability levels
- merge “upwards” as code becomes stable
- Common pattern
- `master`: only stable/released (or release-candidate) code
- `develop` / `next`: integration/testing branch; can be unstable
- topic branches merged into develop/next for testing before master
- How to think about “stability”
- linear commit history view
- stable branches are “behind” (older, tested commits)
- bleeding-edge branches are “ahead” (newer, less proven commits)
- “silo” view
- commits graduate to more stable silos once fully tested
- Multi-level stability in large projects
- additional branches like `proposed` / `pu` (proposed updates)
- idea: not everything is ready for `next` or `master` immediately
- Note
- not required, but often helpful for large/complex projects
### Topic Branches (short-lived branches)
- Definition
- branch created for a single feature/bugfix/experiment
- typically merged and deleted after completion
- Why Git makes this common
- branch creation/merging is cheap → can do it many times a day
- Benefits
- clean context switching (work isolated by topic)
- easier code review (topics commits grouped)
- flexible integration timing (minutes, days, months later)
- can merge in any order regardless of creation order
- Example topology from the chapter
- work on `master`
- branch `iss91` (issue work)
- branch `iss91v2` off `iss91` (alternate approach)
- return to `master` and continue other work
- branch `dumbidea` off `master` (experimental idea)
- outcome
- discard `iss91` if inferior
- merge `iss91v2` and `dumbidea` if chosen
- Reminder: local operations
- branching/merging is local-only until you fetch/push/pull
- Reference mention
- more workflow discussion later in “Distributed Git”
## Remote Branches (remote references + remote-tracking branches)
### Remote references overview
- Remote repos contain references (pointers) to
- branches
- tags
- other refs
- Ways to inspect
- `git ls-remote <remote>` (full list of remote refs)
- `git remote show <remote>` (focus on remote branches + info)
### Remote-tracking branches
- Definition
- local references that record the state of remote branches
- you cant move them yourself
- Git updates them during network communication
- Naming
- `<remote>/<branch>`
- Examples
- `origin/master`
- `origin/iss53`
- Mental model
- bookmarks showing where a remote branch was last time you connected
### Clone example (how origin/master appears)
- When cloning from a server
- Git names the remote `origin` by default
- downloads data
- creates `origin/master` (remote-tracking)
- creates your local `master` starting at same commit as origins master
#### Note: “origin” is not special
- Its just the default name created by `git clone`
- You can rename the default remote at clone time
- `git clone -o booyah ...` → remote-tracking branch becomes `booyah/master`
### Divergence between local and remote
- If you commit locally and someone else pushes to the remote
- histories diverge
- `origin/master` does not move until you communicate
### Fetching updates remote-tracking branches
- `git fetch origin`
- contacts remote
- downloads objects you dont have
- updates pointers like `origin/master` to newer commits
### Multiple remotes
- Add another remote
- `git remote add teamone <url>`
- Fetch it
- `git fetch teamone`
- Possible outcome
- if teamone has only a subset of commits you already have from origin:
- fetch downloads no new objects
- still updates `teamone/master` pointer to match teamones master tip
## Pushing (sharing branches)
### Why pushing is explicit
- Local branches do not automatically sync to remotes
- Benefit
- you can keep private local branches
- push only branches you intend to share/collaborate on
### Push a branch
- Pattern
- `git push <remote> <branch>`
- Example
- `git push origin serverfix`
- What Git expands it to (conceptual)
- `refs/heads/serverfix:refs/heads/serverfix`
- Push local branch to a different remote branch name
- `git push origin serverfix:awesomebranch`
### Authentication convenience (HTTPS)
- HTTPS push commonly prompts for username/password
- To avoid typing credentials repeatedly
- credential cache example:
- `git config --global credential.helper cache`
- reference mentioned: “Credential Storage” (for other options)
### After someone else fetches
- Fetching a pushed branch
- `git fetch origin`
- Result
- creates/updates a remote-tracking ref (e.g., `origin/serverfix`)
- does NOT create a local editable branch automatically
### Using fetched remote-tracking branch work
- Merge directly into current branch
- `git merge origin/serverfix`
- Create a local branch based on it (editable) and track it
- `git checkout -b serverfix origin/serverfix`
## Tracking Branches (local branches that track upstream)
### Definitions
- Tracking branch
- local branch tied to a remote-tracking branch
- Upstream branch
- remote-tracking branch the local branch tracks
### Why tracking matters
- On a tracking branch, `git pull` can automatically
- fetch from the right remote
- merge the right branch
### How tracking branches are created
- Common creation form
- `git checkout -b <branch> <remote>/<branch>`
- Shorthand
- `git checkout --track origin/serverfix`
- Extra shortcut
- `git checkout serverfix`
- works if
- local `serverfix` doesnt exist, and
- exactly one remote has `serverfix`
- Different local name than remote branch
- `git checkout -b sf origin/serverfix`
- local `sf` tracks `origin/serverfix`
### Set or change upstream later
- `git branch -u origin/serverfix`
- also available as `--set-upstream-to`
### Upstream shorthand in commands
- `@{upstream}` or `@{u}`
- references the upstream branch of the current branch
- Example
- `git merge @{u}` instead of `git merge origin/master` (when master tracks origin/master)
### Inspect tracking status and ahead/behind
- `git branch -vv`
- shows local branches
- indicates upstream tracking target
- shows ahead/behind counts
- Interpreting counts
- ahead N → N local commits not pushed
- behind N → N remote commits not merged locally
- Cache caveat
- ahead/behind shown is from last fetch; command doesnt contact server
- To refresh counts
- `git fetch --all; git branch -vv`
## Pulling (fetch + merge convenience)
- `git fetch`
- downloads new data
- does not modify working directory
- leaves integration to you (merge/rebase)
- `git pull`
- in most cases = `fetch` immediately followed by `merge`
- uses tracking (upstream) info to pick remote + branch
- Guidance from the chapter
- explicit `fetch` + `merge` is often clearer than the “magic” of `pull`
## Deleting Remote Branches
- When a remote branch is no longer needed
- merged into mainline/stable branch on the server
- Delete remote branch pointer
- `git push origin --delete serverfix`
- Effect
- removes the branch pointer on the server
- server may keep underlying objects until garbage collection
- accidental deletions can often be recovered before GC runs
## Rebasing (the other integration strategy)
- Two main ways to integrate changes between branches
- `merge`
- `rebase`
### The Basic Rebase (replaying commits)
- Starting situation
- branches diverged; each has unique commits
- Merge recap (already covered earlier)
- three-way merge of:
- tip snapshot A
- tip snapshot B
- common ancestor snapshot
- creates a new snapshot + merge commit
- Rebase concept
- take the patch introduced by commits on one branch
- reapply them on top of another branchs tip
- Example commands
- `git checkout experiment`
- `git rebase master`
- Internal steps (conceptual)
- find common ancestor between current branch and target branch
- compute diffs for each commit on current branch since ancestor
- save diffs temporarily
- reset current branch to target tip
- apply diffs sequentially (creating new commits with new SHAs)
- After rebase
- integrate by fast-forward merge
- `git checkout master`
- `git merge experiment`
- Result comparison
- final snapshot content is the same as with merge
- history is different
- rebase → linear-looking history
- merge → preserves the true parallel shape
- Common use case (contributing workflow)
- rebase your work onto `origin/master` before submitting patches
- maintainer can integrate via fast-forward / clean apply
- Core conceptual distinction
- rebase: replay changes in order introduced
- merge: combine endpoints and record a merge
### More Interesting Rebases (rebasing a branch off another topic branch)
- Scenario
- topic branch `server` created from master; commits added
- topic branch `client` created from `server`; commits added
- later additional commits added to `server`
- Goal
- ship client changes now (merge into master)
- delay server changes until tested
- Use `--onto`
- `git rebase --onto master server client`
- Meaning
- take commits on `client` that are not on `server`
- replay them as if `client` started from `master`
- Integrate client quickly
- `git checkout master`
- `git merge client` (fast-forward)
- Integrate server later without manual checkout
- `git rebase master server`
- checks out `server` and replays onto master
- `git checkout master`
- `git merge server` (fast-forward)
- Cleanup
- delete topic branches once integrated
- `git branch -d client`
- `git branch -d server`
### The Perils of Rebasing (rewriting published history)
- The one-line rule
- Do not rebase commits that exist outside your repository and that people may have based work on
- Why rebasing public commits is dangerous
- rebase abandons existing commits and creates new ones
- new commits have different SHAs
- collaborators who based work on old SHAs must reconcile mismatched history
- Example failure pattern (from the chapter)
- you clone and do work
- someone else pushes a merge to the central server
- later they rebase their work and `push --force` (rewriting server history)
- you fetch new commits
- if you `git pull` normally, you may create a merge combining old + new lines
- can lead to duplicate-looking commits (same message/author/date) with different IDs
- pushing that back can reintroduce commits the other dev tried to eliminate
- Social consequence emphasized
- if you rewrite shared history, teammates will have to re-merge and untangle confusion
### Rebase When You Rebase (recovering after a force-push)
- Problem after force-push
- determine which commits are uniquely yours vs rewritten copies
- Patch-id concept
- besides commit SHA-1, Git can compute a checksum based on the patch content (“patch-id”)
- How rebase helps
- rebasing onto the updated target can let Git:
- identify which commits are already represented (same patch)
- replay only the unique commits
- Example approach
- `git rebase teamone/master`
- What Git may compute during this recovery rebase (as described)
- determine commits unique to your branch
- exclude merge commits from replay
- detect commits that were rewritten but represent the same patch in the target
- apply remaining unique commits on top of the updated branch
- Limitation noted
- works best if rewritten commits are almost the same patch
- otherwise Git may not detect duplication and may reapply a similar patch (possibly failing)
- Convenience options
- `git pull --rebase` instead of normal pull
- or manual: `git fetch` then `git rebase <remote>/<branch>`
- configure default:
- `git config --global pull.rebase true`
- Safety guideline recap
- safe: rebase commits that never left your machine
- generally ok: rebase pushed commits if nobody based work on them
- risky: rebase publicly shared commits → coordinate + warn others to use `pull --rebase`
### Rebase vs. Merge (choosing based on what “history” means)
- Two viewpoints on commit history
- History as a factual record
- commit history documents what actually happened
- rewriting is “lying” about events
- merge commits reflect real parallel work
- History as a curated story
- raw development includes missteps and dead ends
- before mainline, rewrite history to tell a clearer story
- tools mentioned: `rebase`, `filter-branch`
- Conclusion
- no universal best choice; depends on team/project
- Practical “best of both worlds” guideline
- rebase local changes before pushing (clean up)
- never rebase anything youve pushed somewhere shared/public
## Summary (skills this chapter expects you to have now)
- Branch creation and switching
- create branches, move between them
- understand HEAD as “current branch pointer”
- Merging
- fast-forward merges
- three-way merges and merge commits (multiple parents)
- resolve conflicts (markers, `status`, `add`, `mergetool`, final `commit`)
- Branch management
- list branches and identify current branch
- inspect branch tips (`-v`)
- find merged/unmerged branches (`--merged`, `--no-merged`)
- delete safely (`-d`) or forcibly (`-D`)
- rename branches (local + remote cleanup)
- rename default branch (master/main) with ecosystem updates
- Collaboration with remotes
- remote-tracking branches, fetch/push/pull behaviors
- create tracking branches and set upstream
- delete remote branches
- Rebasing
- what rebase does and why it can make history linear
- advanced rebase (`--onto`)
- when rebasing is dangerous and how to mitigate with `pull --rebase`
- Next topic preview (mentioned)
- how to run your own Git repository-hosting server
```

667
mindmap/Git Internals.md Normal file
View File

@@ -0,0 +1,667 @@
```markmap
# Git Internals (Chapter 8)
## Why this chapter exists / positioning in the book
- Can be read early (curiosity) or late (after learning porcelain)
- Understanding internals helps explain *why* Git behaves as it does
- Tradeoff: powerful insight vs. potential complexity for beginners
- Core premise
- Git = **content-addressable filesystem** + **VCS user interface** layered on top
- Historical note
- Early Git (mostly pre-1.5) UI emphasized filesystem concepts → felt complex
- Modern Git UI refined; early “complex Git” stereotype lingers
- Chapter flow
- Content-addressable storage layer (objects) first
- Then transports (protocols)
- Then maintenance + recovery tasks
## Plumbing and Porcelain
- Porcelain commands (high-level UX)
- Examples: `checkout`, `branch`, `remote`, …
- Most of the book focuses on these
- Plumbing commands (low-level toolkit)
- Designed to be chained (UNIX-style) or used from scripts/tools
- Used here to expose internals and demonstrate implementation
- Often not meant for humans to type frequently
## The `.git` directory (what Git stores/manipulates)
- Created by `git init`
- Backups/clones
- Copying `.git/` elsewhere gives *nearly everything* needed
- Fresh repo typical contents
- `config`
- Project-specific configuration
- `description`
- Used by GitWeb only
- `HEAD`
- Points to current branch (or object in detached HEAD)
- `hooks/`
- Client/server hook scripts (covered elsewhere)
- `info/`
- Global excludes (patterns you dont want in `.gitignore`)
- `objects/`
- Object database (content store)
- `refs/`
- Pointers into commits (branches, tags, remotes, …)
- `index` (not shown initially)
- Staging area data (created when needed)
- “Core” pieces emphasized here
- `objects/` — all stored content
- `refs/` — names/pointers into commit graph
- `HEAD` — whats checked out
- `index` — staging area snapshot used to build trees/commits
## Git Objects (content-addressable store)
### Concept: a keyvalue database
- Insert arbitrary data → receive a unique key → retrieve later
- Key is a checksum (SHA-1 in these examples) of:
- a header + the content (details later)
### Creating a blob object with `git hash-object`
- What it does
- hashes content
- optionally writes object into `.git/objects/`
- returns the object id (40 hex chars = SHA-1)
- Key options
- `-w` — write object to object database
- `--stdin` — read content from stdin (otherwise expects a filename)
- Object storage layout on disk (loose objects)
- Path: `.git/objects/<first2>/<remaining38>`
- Directory name = first 2 chars of SHA-1
- Filename = remaining 38 chars
- Inspecting an object
- `git cat-file -p <sha>` — pretty-print content (auto-detect type)
- `git cat-file -t <sha>` — print object type
- Blob objects
- store *only content* (no filename)
- example: versions of `test.txt` stored as different blobs
### Retrieving content
- You can “recreate” a file from a blob by redirecting `cat-file` output
- `git cat-file -p <sha> > test.txt`
- Limitations of blobs alone
- Must remember SHA-1 per version
- No filenames or directory structure
## Tree Objects (filenames + directories + grouping)
### What a tree is
- Stores a directory listing-like structure
- Entries contain
- mode
- type (`blob` or `tree`)
- SHA-1 of target object
- filename
- Conceptual model (simplified UNIX-like)
- tree ↔ directory entries
- blob ↔ file contents
### Inspecting trees
- `git cat-file -p master^{tree}`
- shows top-level tree for the last commit on `master`
- example entries include blobs (files) and trees (subdirectories)
- Subtrees
- a directory entry points to another tree object
- Shell quoting pitfalls for `master^{tree}`
- Windows CMD: `^` is escape → use `master^^{tree}`
- PowerShell: quote braces → `git cat-file -p 'master^{tree}'`
- ZSH: `^` globbing → quote expression → `git cat-file -p "master^{tree}"`
### Building trees manually (via the index)
- Normal Git behavior
- Creates trees from the staging area (index)
- Plumbing commands used
- `git update-index`
- manipulate index entries
- `--add` required if path not in index yet
- `--cacheinfo` used when content isnt in working tree (already in DB)
- requires: `<mode> <sha> <path>`
- valid file modes for blobs
- `100644` normal file
- `100755` executable
- `120000` symlink
- `git write-tree`
- writes current index to a tree object
- `git read-tree`
- reads a tree into index
- `--prefix=<dir>/` stages it as a subtree
### Example sequence (three trees)
- Tree 1: `test.txt` v1
- stage blob via `update-index --add --cacheinfo 100644 <sha_v1> test.txt`
- `write-tree` → tree1 (contains `test.txt` → blob v1)
- Tree 2: `test.txt` v2 + `new.txt`
- update index to point `test.txt` to blob v2
- add `new.txt`
- `write-tree` → tree2 (two file entries)
- Tree 3: include Tree 1 under `bak/`
- `read-tree --prefix=bak <tree1>`
- `write-tree` → tree3
- tree3 contains
- `bak/` → tree1
- `new.txt` → blob
- `test.txt` → blob v2
## Commit Objects (snapshots + history + metadata)
### Why commits exist
- Trees represent snapshots but:
- SHA-1s are not memorable
- need who/when/why metadata
- need parent links to form history
### Creating commits with `git commit-tree`
- Inputs
- a tree SHA-1 (snapshot)
- optional parent commit SHA-1(s)
- message from stdin
- Commit object fields
- `tree <tree_sha>`
- `parent <parent_sha>` (none for first commit)
- `author ...` (from `user.name`, `user.email`, timestamp)
- `committer ...` (same source)
- blank line
- commit message
- Note about hashes in book
- commit hashes differ due to timestamps/author data; use your own
### Example history
- Commit 1 points to tree1 (no parent)
- Commit 2 points to tree2, parent = commit1
- Commit 3 points to tree3, parent = commit2
- View history
- `git log --stat <commit3_sha>`
- Key takeaway
- Porcelain `git add`/`git commit` do essentially:
- write blobs for changed content
- update index
- write tree(s)
- write commit referencing tree + parent
## Object Storage (how objects are actually stored)
### Common storage recipe
- Each object stored as:
- header + content
- Header format
- `<type> <size>\0`
- type: `blob`, `tree`, `commit`, `tag`
- size: bytes in content
- null byte terminator
- Object id
- SHA-1 of (header + content)
- Compression
- zlib-compressed before writing to disk
### Ruby walk-through (blob example)
- Build content string
- Build header (`"blob #{bytesize}\0"`)
- Concatenate and hash with SHA-1
- matches `git hash-object` (use `echo -n` to avoid newline)
- Compress with zlib
- Write to `.git/objects/<sha[0,2]>/<sha[2,38]>`
- Validate with `git cat-file -p <sha>`
## Git References (refs) — naming commits/objects
### What refs are
- Human-friendly names → files containing SHA-1s
- Stored under `.git/refs/`
- `refs/heads/` — branches
- `refs/tags/` — tags
- (later) `refs/remotes/` — remote-tracking refs
### Creating/updating refs
- Direct edit possible but discouraged
- `echo <sha> > .git/refs/heads/master`
- Safer: `git update-ref`
- `git update-ref refs/heads/master <sha>`
- Branch meaning
- A branch is a ref that points to the tip commit of a line of work
- Example: create branch at older commit
- `git update-ref refs/heads/test <sha_of_commit2>`
- `git log test` shows only commits reachable from that ref
## `HEAD` — what you have checked out
### Symbolic reference (usual case)
- `.git/HEAD` commonly contains
- `ref: refs/heads/<branch>`
- On checkout, Git updates `HEAD` to point at chosen branch ref
- Commit parent determination
- `git commit` uses commit pointed to by ref that `HEAD` references
### Detached HEAD (special case)
- Sometimes `HEAD` contains a raw SHA-1
- Happens when checking out
- a tag
- a commit
- a remote-tracking branch
### Managing HEAD safely
- `git symbolic-ref HEAD` — read where HEAD points
- `git symbolic-ref HEAD refs/heads/test` — set symbolic HEAD
- Constraint
- cannot point outside `refs/` namespace
## Tags (lightweight vs annotated)
### Tag object
- Fourth object type: `tag`
- Similar to commit object (tagger/date/message/pointer)
- Usually points to a commit, but can tag any object (blob/tree/commit)
### Lightweight tags
- Just a ref under `refs/tags/` pointing directly to an object
- `git update-ref refs/tags/v1.0 <commit_sha>`
- Never moves (unlike branch tips)
### Annotated tags
- Create a tag object and a ref that points to it
- `git tag -a v1.1 <commit_sha> -m '...'`
- `.git/refs/tags/v1.1` contains SHA-1 of the *tag object*
- Tag object content includes
- `object <target_sha>`
- `type <target_type>`
- `tag <name>`
- `tagger ...`
- message
- Examples mentioned
- Tagging a maintainers GPG key stored as a blob
- Kernel repo has an early tag pointing at an initial tree
## Remotes (remote-tracking references)
### What they are
- Refs under `refs/remotes/<remote>/...`
- Store last known state of remote branches after communicating
### Example
- After `git remote add origin ...` and `git push origin master`
- `.git/refs/remotes/origin/master` stores last known remote SHA-1
### Key characteristics
- Read-only from user standpoint
- You can checkout one, but Git wont set `HEAD` as symbolic ref to it
- They act as bookmarks managed by Git for remote state
## Packfiles (space-efficient object storage)
### Loose objects vs packed objects
- Loose object: one zlib file per object
- Packfile:
- single `.pack` containing many objects
- `.idx` index mapping SHA-1 → offsets
### When packing happens
- Automatically when:
- many loose objects
- many packfiles
- Manually via `git gc`
- Often during push to a server
### Demonstration scenario (why deltas matter)
- Add large file (`repo.rb`, ~22K) and commit
- file stored as blob
- Modify it slightly and commit again
- creates a whole new blob
- two near-identical large blobs now exist
### `git gc` effects
- Creates pack + index
- Removes many loose objects (reachable ones)
- Leaves dangling/unreachable blobs loose (not in pack)
### Inspecting whats packed
- `git verify-pack -v <pack>.idx`
- shows objects, sizes, offsets, delta bases
- Delta storage behavior shown
- newer version often stored in full
- older version stored as delta against newer
- optimized for fast access to most recent version
- Repacking
- can happen automatically
- can be triggered any time via `git gc`
## Refspec (ref mapping rules for fetch/push)
### Where it appears
- `.git/config` remote section created by `git remote add`
- `fetch = +refs/heads/*:refs/remotes/origin/*`
### Syntax
- `(+)?<src>:<dst>`
- optional `+` forces update even if not fast-forward
- `<src>`: refs on remote
- `<dst>`: local tracking refs
### Default fetch behavior
- Fetch all remote branches (`refs/heads/*`)
- Track locally as `refs/remotes/origin/*`
- Equivalent references
- `origin/master`
- `remotes/origin/master`
- `refs/remotes/origin/master`
### Custom fetch examples
- Fetch only master always
- `fetch = +refs/heads/master:refs/remotes/origin/master`
- One-time fetch to a different local name
- `git fetch origin master:refs/remotes/origin/mymaster`
- Multiple refspecs
- CLI or multiple `fetch =` lines in config
- Fast-forward enforcement and overrides
- non-FF rejected unless `+` used
- Partial globs (Git ≥ 2.6.0)
- `qa*` patterns for multiple branches
- Namespaces/directories for teams
- e.g., `refs/heads/qa/*` → `refs/remotes/origin/qa/*`
## Pushing refspecs & deleting remote refs
### Pushing into a namespace
- Push local `master` to remote `qa/master`
- `git push origin master:refs/heads/qa/master`
- Configure default push mapping
- `push = refs/heads/master:refs/heads/qa/master`
### Deleting remote references
- Old refspec deletion form
- `git push origin :topic`
- Newer explicit flag (Git ≥ 1.7.0)
- `git push origin --delete topic`
### Note/limitation
- Refspecs cant fetch from one repo and push to another (as a single refspec trick)
## Transfer Protocols (moving data between repositories)
### Two major approaches
- Dumb protocol
- simple, HTTP read-only, no Git server-side logic
- inefficient, hard to secure/private; rarely used now
- Smart protocol
- Git-aware server process
- negotiates what data is needed
- supports pushes
### Dumb protocol (HTTP) — conceptual clone walkthrough
- `git clone http://server/<repo>.git`
- Fetch refs list (requires server-generated metadata)
- `GET info/refs`
- generated by `update-server-info` (often via post-receive hook)
- Fetch HEAD to determine default branch
- `GET HEAD` → `ref: refs/heads/master`
- Walk objects starting from advertised commit SHA
- `GET objects/<sha_prefix>/<sha_rest>` for loose objects
- parse commit → learn `tree` + `parent`
- If tree object not found as loose (404)
- check alternates
- `GET objects/info/http-alternates`
- check available packfiles
- `GET objects/info/packs`
- `GET objects/pack/pack-....idx`
- `GET objects/pack/pack-....pack`
- Once required objects are fetched
- checkout working tree for branch pointed to by downloaded `HEAD`
### Smart protocol — overview
- Upload (push): `send-pack` (client) ↔ `receive-pack` (server)
- Download (fetch/clone): `fetch-pack` (client) ↔ `upload-pack` (server)
#### Uploading data (push)
- SSH transport
- client runs remote command (conceptually)
- `ssh ... "git-receive-pack '<repo>.git'"`
- server advertises
- current refs + SHA-1s
- capabilities appended on the first line after a NUL separator
- pkt-line framing
- each chunk begins with 4 hex chars = length (including those 4 chars)
- `0000` indicates end
- client sends per-ref updates
- `<old_sha> <new_sha> <refname>`
- all zeros on left = create ref
- all zeros on right = delete ref
- client sends a packfile of objects server lacks
- server replies success/failure
- e.g., `unpack ok`
- HTTP(S) transport
- discovery
- `GET .../info/refs?service=git-receive-pack`
- push
- `POST .../git-receive-pack` with update commands + packfile
- note: HTTP may wrap in chunked transfer encoding
#### Downloading data (fetch/clone)
- SSH transport
- client runs remote command
- `ssh ... "git-upload-pack '<repo>.git'"`
- server advertises
- refs and capabilities
- `symref=HEAD:refs/heads/master` so client knows default branch
- negotiation
- client sends `want <sha>`
- client sends `have <sha>`
- client sends `done` to request packfile generation
- server returns packfile (optionally multiplexing progress via side-band)
- HTTP(S) transport
- discovery
- `GET .../info/refs?service=git-upload-pack`
- negotiation/data request
- `POST .../git-upload-pack` with want/have data
- response includes packfile
### Protocols summary note
- Only the high-level handshake is covered
- Many capabilities/features (e.g., `multi_ack`, `side-band`) exist beyond this chapters scope
## Maintenance and Data Recovery
### Maintenance (`gc`, packing, pruning)
- Auto maintenance
- Git may run `auto gc` occasionally
- Usually no-op unless thresholds exceeded
- What `git gc` does
- packs loose objects into packfiles
- consolidates packfiles
- removes unreachable objects older than a few months
- Trigger thresholds (approx)
- ~7000 loose objects
- >50 packfiles
- Config knobs
- `gc.auto`
- `gc.autopacklimit`
- Manual auto-gc run
- `git gc --auto` (often does nothing)
### Packing refs into `packed-refs`
- Before gc: refs stored as many small files
- `.git/refs/heads/*`, `.git/refs/tags/*`, …
- After gc: moved for efficiency into `.git/packed-refs`
- format lines: `<sha> <refname>`
- annotated tags include a “peeled” line starting with `^`
- indicates the commit the tag ultimately points to
- Updating a ref after packing
- Git writes a new loose ref file under `.git/refs/...`
- doesnt edit `packed-refs`
- Lookup behavior
- Git checks loose refs first, then `packed-refs` fallback
### Data Recovery (finding lost commits)
#### Common loss causes
- force-delete a branch containing work you later want
- `git reset --hard` moving a branch tip back, abandoning newer commits
#### Reflog-based recovery
- Reflog records where `HEAD` pointed whenever it changes
- commits, branch switches, resets
- also updated by `git update-ref` (reason to prefer it over manual ref edits)
- Useful commands
- `git reflog` — concise HEAD history
- `git log -g` — reflog shown as a log
- Recovery technique
- find lost commit SHA-1 in reflog
- create a ref/branch pointing to it
- `git branch recover-branch <sha>`
#### Recovery without reflog
- If reflog is missing (e.g., `.git/logs/` removed)
- Use integrity checker
- `git fsck --full`
- shows dangling/unreachable objects
- `dangling commit <sha>`
- Recover similarly
- create a new branch ref pointing to the dangling commit
### Removing objects (purging big files from history)
#### Problem statement
- Git clones fetch full history
- A huge file added once remains in history forever if reachable
- even if deleted next commit
- Especially painful in imported repos (SVN/Perforce)
#### Strong warning
- Destructive: rewrites commit history (new commit IDs)
- Must coordinate contributors (rebase onto rewritten history)
#### Workflow to locate and remove large objects
- Confirm repo size after packing
- `git gc`
- `git count-objects -v` (check `size-pack`)
- Find largest packed objects
- `git verify-pack -v <pack>.idx | sort -k 3 -n | tail -3`
- third field in output is object size
- Map blob SHA to filename
- `git rev-list --objects --all | grep <blob_sha_prefix>`
- Identify commits that touched the path
- `git log --oneline --branches -- <file>`
- Rewrite history to remove the file from every tree
- `git filter-branch --index-filter 'git rm --ignore-unmatch --cached <file>' -- <bad_commit>^..`
- `--index-filter` is fast (no full checkout per commit)
- `git rm --cached` removes from index/tree, not just working dir
- Remove pointers to old history
- `rm -Rf .git/refs/original`
- `rm -Rf .git/logs/`
- Repack/clean
- `git gc`
- optionally remove remaining loose objects
- `git prune --expire now`
## Environment Variables (controlling Git behavior)
> Chapter note: not exhaustive; highlights the most useful
### Global behavior
- `GIT_EXEC_PATH`
- where Git finds sub-programs (e.g., `git-commit`, `git-diff`)
- inspect via `git --exec-path`
- `HOME`
- where Git finds global config
- can be overridden for portable Git setups
- `PREFIX`
- system-wide config path: `$PREFIX/etc/gitconfig`
- `GIT_CONFIG_NOSYSTEM`
- disable system-wide config
- Output paging/editing
- `GIT_PAGER` (fallback `PAGER`)
- `GIT_EDITOR` (fallback `EDITOR`)
### Repository locations
- `GIT_DIR`
- where `.git` directory is
- if unset, Git walks up directory tree searching
- `GIT_CEILING_DIRECTORIES`
- stops upward search early (useful for slow filesystems)
- `GIT_WORK_TREE`
- working tree root for non-bare repos
- `GIT_INDEX_FILE`
- alternate index path
- Object database
- `GIT_OBJECT_DIRECTORY` — override `.git/objects`
- `GIT_ALTERNATE_OBJECT_DIRECTORIES`
- colon-separated additional object stores (share objects across repos)
### Pathspecs (path matching rules)
- Pathspecs used in `.gitignore` and CLI patterns (e.g., `git add *.c`)
- Wildcard behavior toggles
- `GIT_GLOB_PATHSPECS=1` — wildcards enabled (default)
- `GIT_NOGLOB_PATHSPECS=1` — wildcards literal (e.g., `*.c` matches file named `*.c`)
- Per-argument overrides
- prefix with `:(glob)` or `:(literal)`
- `GIT_LITERAL_PATHSPECS`
- disables wildcard matching and override prefixes
- `GIT_ICASE_PATHSPECS`
- case-insensitive pathspec matching
### Committing (author/committer identity)
- Used primarily by `git-commit-tree` (then falls back to config)
- Author fields
- `GIT_AUTHOR_NAME`
- `GIT_AUTHOR_EMAIL`
- `GIT_AUTHOR_DATE`
- Committer fields
- `GIT_COMMITTER_NAME`
- `GIT_COMMITTER_EMAIL`
- `GIT_COMMITTER_DATE`
- `EMAIL`
- fallback email if `user.email` is unset
### Networking (HTTP behavior)
- `GIT_CURL_VERBOSE`
- emit libcurl debug messages
- `GIT_SSL_NO_VERIFY`
- skip SSL cert verification (self-signed/setup scenarios)
- Low-speed abort settings
- `GIT_HTTP_LOW_SPEED_LIMIT`
- `GIT_HTTP_LOW_SPEED_TIME`
- override `http.lowSpeedLimit` / `http.lowSpeedTime`
- `GIT_HTTP_USER_AGENT`
- override user-agent string
### Diffing and merging
- `GIT_DIFF_OPTS`
- only supports unified context count: `-u<n>` / `--unified=<n>`
- `GIT_EXTERNAL_DIFF`
- program invoked instead of built-in diff
- Batch diff metadata for external diff tool
- `GIT_DIFF_PATH_COUNTER`
- `GIT_DIFF_PATH_TOTAL`
- `GIT_MERGE_VERBOSITY` (recursive merge)
- 0: only errors
- 1: conflicts only
- 2: + file changes (default)
- 3: + skipped unchanged
- 4: + all processed paths
- 5+: deep debug
### Debugging/tracing (observability)
- Output destinations
- `"true"`, `"1"`, `"2"` → stderr
- absolute path `/...` → write to file
- `GIT_TRACE`
- general tracing (alias expansion, sub-program exec)
- `GIT_TRACE_PACK_ACCESS`
- pack access tracing: packfile + offset
- `GIT_TRACE_PACKET`
- packet-level tracing for network operations
- `GIT_TRACE_PERFORMANCE`
- timing for each internal step/subcommand
- `GIT_TRACE_SETUP`
- shows discovered repo paths (`git_dir`, `worktree`, `cwd`, `prefix`, ...)
### Miscellaneous
- `GIT_SSH`
- program used instead of `ssh`
- invoked as: `$GIT_SSH [user@]host [-p <port>] <command>`
- wrapper script often needed for extra args; `~/.ssh/config` may be easier
- `GIT_ASKPASS`
- program to prompt for credentials (returns answer on stdout)
- `GIT_NAMESPACE`
- namespaced refs (like `--namespace`), often server-side
- `GIT_FLUSH`
- stdout buffering
- `1` flush frequently; `0` buffer
- `GIT_REFLOG_ACTION`
- custom text written to reflog entries (action descriptor)
## Summary (what you should now understand)
- Git internals = object database + refs + a UI on top
- Main object types
- blob (content), tree (directories), commit (history + metadata), tag (named pointer + metadata)
- Refs and `HEAD` provide human-friendly naming and current-state tracking
- Packfiles optimize storage through compression and deltas
- Refspecs control fetch/push mappings and enable namespaced workflows
- Transfer protocols
- dumb: simple HTTP reads (rare)
- smart: negotiated pack exchange (common) for fetch/push
- Maintenance/recovery tools
- `gc`, `packed-refs`, `reflog`, `fsck`, `filter-branch`, `prune`
- Environment variables provide control, portability, and deep debugging capabilities
```

1122
mindmap/Git Tools.md Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,774 @@
```markmap
# Git on the Server
## Purpose & context
- Current baseline
- You should now be able to handle most day-to-day Git tasks
- Why a remote repository is needed for collaboration
- Collaboration requires a remote Git repository
- While you *can* push/pull directly between individuals repositories:
- Its discouraged (easy to confuse what others are working on)
- Collaborators need access even if your computer is offline
- Preferred approach
- Set up an *intermediate* repository that everyone can access
- Everyone pushes to / pulls from that common repository
- Running a Git server (high-level)
- Choose which protocols your server should support
- Chapter structure
- Protocol options + pros/cons
- Typical setups and how to get them running
- Hosted options (if you dont want to self-host)
- If you dont want to run your own server
- Skip to “Third Party Hosted Options”
- Then continue to the next chapter about distributed workflows
- What a “remote repository” usually is
- Generally a **bare repository**
- No working directory
- Used only as a collaboration point
- Bare repository in simplest terms
- Contents of your projects `.git` directory
- And nothing else
## The Protocols (ways Git transfers data)
- Four distinct protocols
- **Local**
- **HTTP** (Smart + Dumb)
- **SSH**
- **Git** (git://)
- Selection depends on
- Authentication requirements
- Public vs private access needs
- Firewall constraints
- Ease of setup/maintenance
- Performance expectations
### Local Protocol
- Definition
- Remote repository is just another directory on the **same host**
- Typical use cases
- Team shares a filesystem (e.g., **NFS mount**)
- Less likely: everyone logs into the **same computer**
- Not ideal: all repos on one machine → catastrophic loss risk higher
- How to use (clone / add remote)
- Clone using a filesystem path
- `git clone /srv/git/project.git`
- Clone explicitly using `file://`
- `git clone file:///srv/git/project.git`
- Add as a remote to an existing project
- `git remote add local_proj /srv/git/project.git`
- Then push/pull using `local_proj` like a network remote
- Path vs `file://` behavior
- Plain path (no `file://`)
- Git tries to use **hardlinks** or directly **copies** needed files
- With `file://`
- Git uses network-style transfer processes
- Generally **less efficient**
- Why ever use `file://`?
- To get a “clean” copy (leave out extraneous refs/objects)
- Often after importing from another VCS (maintenance tasks noted in “Git Internals”)
- Recommended in this chapter
- Use the normal path (almost always faster)
- Pros
- Simple to set up
- Uses existing filesystem permissions and access
- Easy when you already have a shared filesystem
- Put a bare repo where everyone can access it
- Set read/write permissions like any shared directory
- Convenient for quick sharing from someones working repo
- e.g., `git pull /home/john/project` can be simpler than using a server
- Cons
- Shared filesystem access can be harder from multiple locations than network access
- Example: pushing from home may require mounting a remote disk (slow/difficult)
- Performance caveat on shared mounts
- “Local” is fast only if data access is fast
- NFS can be slower than SSH (even to same server) because SSH lets Git use local disks
- Risk of accidental repository damage
- Users have full shell access to the “remote” directory
- Nothing prevents deleting/modifying internal Git files → corruption risk
### HTTP Protocols
- Two modes
- **Dumb HTTP**
- Older (pre Git 1.6.6)
- Simple, generally read-only
- **Smart HTTP**
- Introduced in Git 1.6.6
- More capable: negotiates transfers intelligently (similar to SSH)
- Has become very popular due to usability + efficiency
#### Smart HTTP
- How it works (conceptually)
- Similar to SSH/Git protocols but runs over standard **HTTPS ports**
- Can use multiple HTTP authentication mechanisms
- Often easier than SSH key setup
- Supports username/password prompting
- Usability benefits
- Single URL can support
- Anonymous reads (like `git://`)
- Authenticated/encrypted pushes (like SSH)
- Client behavior
- If push requires auth → server prompts for username/password
- Same for reads if configured
- Example behavior (hosted services)
- For GitHub-like services
- The web URL (e.g., `https://github.com/...`) can also be used to clone/push (if authorized)
#### Dumb HTTP
- When used
- Git client falls back to Dumb HTTP if server doesnt respond with a smart HTTP service
- Expectations
- Bare Git repository is served as static files by a web server
- Setup (read access)
- Place bare repo under HTTP document root + enable a `post-update` hook
- Example sequence
- `cd /var/www/htdocs/`
- `git clone --bare /path/to/git_project gitproject.git`
- `cd gitproject.git`
- `mv hooks/post-update.sample hooks/post-update`
- `chmod a+x hooks/post-update`
- Why the hook matters
- Default `post-update` runs `git update-server-info`
- That makes HTTP fetching/cloning work properly
- Typically triggered when someone pushes to the repo (often over SSH)
- Cloning from Dumb HTTP
- `git clone https://example.com/gitproject.git`
- Server flexibility
- Apache example uses `/var/www/htdocs`
- Any static web server works if it can serve the bare repo files
- “Git Internals” referenced for details of how the data is served
- Common deployment choice
- Usually you run either:
- Smart HTTP read/write, **or**
- Dumb HTTP read-only
- Rare to run a mix
#### HTTP Pros (focus on Smart HTTP)
- Single URL for all access types
- Server prompts only when authentication is needed
- Username/password authentication
- Avoids SSH key generation/upload steps
- Especially useful for less sophisticated users or environments where SSH is uncommon
- Performance
- Fast and efficient (comparable to SSH)
- Security options
- Serve read-only over HTTPS with encrypted transfer
- Can require signed SSL client certificates (stronger client auth)
- Firewall friendliness
- HTTP/HTTPS ports are commonly allowed through corporate firewalls
#### HTTP Cons
- Setup complexity
- HTTPS Git can be trickier to set up than SSH on some servers
- Otherwise, other protocols offer little advantage over Smart HTTP for serving Git content
- Credential handling for authenticated pushes
- Sometimes more complicated than SSH keys
- Mitigations (credential caching tools)
- Keychain access (macOS)
- Credential Manager (Windows)
- “Credential Storage” referenced for secure password caching setup
### SSH Protocol
- Why its common for self-hosting
- SSH is often already installed/configured on servers
- If not, its generally easy to set up
- Provides authenticated, encrypted transport
- Clone URL forms
- Explicit SSH URL
- `git clone ssh://[user@]server/project.git`
- scp-like shorthand
- `git clone [user@]server:project.git`
- Username handling
- If not specified, Git assumes your current local username
- Pros
- Easy to set up (SSH daemon is common; admins often know it; OS tools exist)
- Secure
- Encrypted + authenticated transfer
- Efficient
- Data compacted before transfer (like HTTPS/Git/Local)
- Cons
- No anonymous access
- Users need SSH access even for read-only cloning
- Not ideal for open source “browse/clone without accounts”
- If you want anonymous read-only + authenticated writes
- Use SSH for push
- Add another protocol for public fetch (e.g., HTTPS or git://)
### Git Protocol (git://)
- What it is
- A Git-provided daemon
- Listens on port **9418**
- Similar transfer behavior to SSH but:
- **No authentication**
- (No encryption/auth overhead)
- Repository export control
- Repo must contain a file named `git-daemon-export-ok`
- Without it, the daemon wont serve the repository
- Push behavior
- Typically no pushing
- You *can* enable pushes, but its insecure:
- Anyone who discovers the URL could push
- Rare in practice
- Pros
- Often the fastest network transfer option
- Useful for
- high-traffic public projects, or
- very large projects
- when no user authentication is required for read access
- Uses SSH-like transfer mechanism without encryption/auth overhead
- Cons
- No authentication (major downside)
- Usually should not be the only access method
- Common pairing
- Developers: SSH or HTTPS (write access)
- Everyone else: `git://` (read-only)
- Harder to set up than others
- Requires its own daemon
- Needs system integration (xinetd, systemd, etc.)
- Firewall constraints
- Requires port **9418**
- Often blocked behind corporate firewalls
## Getting Git on a Server (self-host setup)
- Scope note (environment assumptions)
- Commands shown are simplified for a Linux-based server
- Possible on macOS or Windows servers too
- Production deployments will differ:
- security measures
- OS tooling
- Step 1: create/export a bare repository
- Requirement
- Export an existing repo into a **new bare repository** (no working dir)
- Create a bare clone
- `git clone --bare my_project my_project.git`
- Naming convention
- bare repos typically end with `.git`
- Rough equivalent (not identical)
- `cp -Rf my_project/.git my_project.git`
- Result
- Git data only (no checked-out snapshot)
- Directory dedicated to the repositorys internal data
- Step 2: put the bare repository on a server
- Example environment
- Server: `git.example.com`
- Repos stored under: `/srv/git`
- SSH access available
- Copy bare repo to server
- `scp -r my_project.git user@git.example.com:/srv/git`
- Cloning for other users (with SSH read access to `/srv/git`)
- `git clone user@git.example.com:/srv/git/my_project.git`
- Push access rule of thumb
- SSH + filesystem write permissions to `/srv/git/my_project.git` ⇒ push access
- Group write permissions (recommended)
- Run inside the repo:
- `git init --bare --shared`
- Effects
- Adds group write permissions appropriately
- Does not remove commits/refs/etc.
- Minimal “useful Git server” takeaway
- Add SSH-capable accounts for collaborators
- Place a bare repository where they have read/write permissions
- Thats enough for private collaboration
- What later sections add (optional sophistication)
- avoid per-user accounts
- add public read access
- add web UIs
- etc.
## Small Setups (few developers / trying Git)
- Common pain point
- User management + permissions
- Some repos read-only for some users
- Read/write for others
- SSH Access approach
- If everyone already has SSH access to a server
- Easiest initial setup (almost no additional work)
- For more complex access control
- Use OS filesystem permissions
- If server has no accounts for all writers
- Set up SSH access for those users
- Assumption stated
- If you have a server for this, you likely already have SSH installed and use it to access the server
- Ways to grant SSH write access
- Option 1: create an account for each person
- Straightforward
- Can be cumbersome (adduser/useradd + temporary passwords)
- Option 2: single shared `git` account using `authorized_keys`
- Create one `git` user on server
- Collect users SSH public keys
- Append to `~git/.ssh/authorized_keys`
- Everyone connects as `git`
- Commit data unaffected by SSH username used to connect
- Option 3: centralized auth
- LDAP or other central auth source
- Any SSH authentication method works if user can get shell access
### Generating an SSH public key (client side)
- Purpose
- Many Git servers authenticate using SSH public keys
- Each user must generate a key pair if they dont have one
- Check for an existing key
- SSH keys usually stored in `~/.ssh`
- Example checks
- `cd ~/.ssh`
- `ls`
- Look for pairs like
- `id_dsa` + `id_dsa.pub`
- `id_rsa` + `id_rsa.pub`
- Meaning
- `.pub` file = public key
- non-`.pub` file = private key
- Generate a key if missing
- Tool
- `ssh-keygen` (Linux/macOS SSH package; also included with Git for Windows)
- Recommended command shown
- `ssh-keygen -o`
- Prompts and outputs
- Choose file path (default `~/.ssh/id_rsa`)
- Enter passphrase twice (optional)
- Key is saved as:
- private key: `~/.ssh/id_rsa`
- public key: `~/.ssh/id_rsa.pub`
- Passphrase guidance
- Can be empty (no password prompts when using the key)
- If you do set a password
- use `-o` (more resistant key format than default)
- `ssh-agent` can help avoid typing passphrase repeatedly
- Sharing the public key
- User sends the contents of the `.pub` file to the admin
- Example command
- `cat ~/.ssh/id_rsa.pub`
- Reference link mentioned
- GitHub SSH key guide:
- `https://docs.github.com/en/github/authenticating-to-github/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent`
### Setting up the server (authorized_keys method)
- Note about automation
- Much can be automated with `ssh-copy-id` (instead of manual key install)
- Create the `git` user and SSH directory
- `sudo adduser git`
- `su git`
- `cd`
- `mkdir .ssh && chmod 700 .ssh`
- `touch .ssh/authorized_keys && chmod 600 .ssh/authorized_keys`
- Add developer public keys
- Assumption
- Trusted public keys saved to temporary files (e.g., `/tmp/id_rsa.john.pub`)
- Append to `authorized_keys`
- `cat /tmp/id_rsa.john.pub >> ~/.ssh/authorized_keys`
- `cat /tmp/id_rsa.josie.pub >> ~/.ssh/authorized_keys`
- `cat /tmp/id_rsa.jessica.pub >> ~/.ssh/authorized_keys`
- Create a bare repository on the server
- `cd /srv/git`
- `mkdir project.git`
- `cd project.git`
- `git init --bare`
- First push into the empty bare repository (example workflow)
- Note
- Someone must create a bare repo on the server for each new project
- Example (on Johns computer)
- `cd myproject`
- `git init`
- `git add .`
- `git commit -m 'Initial commit'`
- `git remote add origin git@gitserver:/srv/git/project.git`
- `git push origin master`
- Typical collaboration thereafter
- Clone
- `git clone git@gitserver:/srv/git/project.git`
- Edit/commit/push example
- `cd project`
- `vim README`
- `git commit -am 'Fix for README file'`
- `git push origin master`
- Restricting interactive shell access (optional)
- Default situation
- Users can log in and get a shell as `git`
- Approach
- Change shell in `/etc/passwd`
- Use `git-shell` (limited shell bundled with Git)
- Allows Git push/pull via SSH
- Denies normal interactive shell access
- Ensure `git-shell` is listed as a valid shell
- Check `/etc/shells`
- `cat /etc/shells`
- Find `git-shell`
- `which git-shell`
- Add its path to `/etc/shells` if missing
- `sudo -e /etc/shells`
- Set login shell for user
- `sudo chsh git -s $(which git-shell)`
- Result of interactive SSH attempt
- `ssh git@gitserver`
- Message includes
- `fatal: Interactive git shell is not enabled.`
- hint about `~/git-shell-commands` existing and being executable/readable
- Customize git-shell behavior
- Create `~/git-shell-commands`
- Possible customizations
- restrict accepted Git commands
- customize the SSH login rejection message
- Reference
- `git help shell`
- Preventing SSH forwarding features (optional hardening)
- Even with `git-shell`, users may still use SSH port forwarding
- To prevent, prepend options to each key line in `authorized_keys`
- `no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty`
- Effect
- Git network commands still work
- Users cant get a shell / cant use those forwarding mechanisms
## Git Daemon (Git protocol: git://)
- Purpose
- Fast, unauthenticated access to Git data
- Security reminder
- Not authenticated
- Anything served is public within its network
- Recommended use
- Outside firewall: only for publicly visible projects
- Inside firewall: useful for many read-only consumers (e.g., CI/build servers) without managing many SSH keys
- Run the daemon (base command)
- `git daemon --reuseaddr --base-path=/srv/git/ /srv/git/`
- Options
- `--reuseaddr`
- Restart without waiting for old connections to time out
- `--base-path=/srv/git/`
- Allows cloning without specifying full paths
- Trailing `/srv/git/`
- Where to look for repos to export
- Firewall
- Open port `9418`
- Daemonizing & supervising the process
- Depends on OS/init system
- systemd example (common on modern Linux)
- Create `/etc/systemd/system/git-daemon.service`:
- `[Unit]`
- `Description=Start Git Daemon`
- `[Service]`
- `ExecStart=/usr/bin/git daemon --reuseaddr --base-path=/srv/git/ /srv/git/`
- `Restart=always`
- `RestartSec=500ms`
- `StandardOutput=syslog`
- `StandardError=syslog`
- `SyslogIdentifier=git-daemon`
- `User=git`
- `Group=git`
- `[Install]`
- `WantedBy=multi-user.target`
- Adjust as needed
- Ensure the `git` user exists or change user/group
- Verify Git binary path is `/usr/bin/git` (change if needed)
- Service control
- Enable on boot: `systemctl enable git-daemon`
- Start: `systemctl start git-daemon`
- Stop: `systemctl stop git-daemon`
- Other alternatives mentioned
- xinetd
- sysvinit scripts
- anything that daemonizes + monitors the process
- Exporting repositories over git://
- Per-repository opt-in via file:
- `cd /path/to/project.git`
- `touch git-daemon-export-ok`
## Smart HTTP (Git over HTTP with smart negotiation)
- Goal
- One protocol that can provide both:
- authenticated push/pull
- unauthenticated read-only access
- Mechanism
- Enable Gits CGI script: `git-http-backend`
- How it behaves
- CGI reads URL path + headers from `git fetch` / `git push`
- Determines if client supports Smart HTTP (true for clients since Git 1.6.6)
- If client is smart
- speaks Smart HTTP
- Else
- falls back to Dumb HTTP behavior (backward-compatible reads)
- Basic Apache setup example
- Install Apache + utilities
- `sudo apt-get install apache2 apache2-utils`
- Enable needed modules
- `a2enmod cgi alias env`
- Enables: `mod_cgi`, `mod_alias`, `mod_env`
- Ensure Apache can read/write repositories
- Set group of `/srv/git` to `www-data`:
- `chgrp -R www-data /srv/git`
- Rationale
- Apache CGI runs as `www-data` by default
- Apache configuration for `/git` path
- Environment variables
- `SetEnv GIT_PROJECT_ROOT /srv/git`
- `SetEnv GIT_HTTP_EXPORT_ALL`
- Route `/git/` to the backend
- `ScriptAlias /git/ /usr/lib/git-core/git-http-backend/`
- Note about `GIT_HTTP_EXPORT_ALL`
- If omitted:
- unauthenticated clients can only access repos containing `git-daemon-export-ok`
- Authenticate writes (example Auth block)
- `<Files "git-http-backend">`
- `AuthType Basic`
- `AuthName "Git Access"`
- `AuthUserFile /srv/git/.htpasswd`
- `Require expr !(%{QUERY_STRING} -strmatch '*service=git-receive-pack*' || %{REQUEST_URI} =~ m#/git-receive-pack$#)`
- `Require valid-user`
- Create `.htpasswd` (example user `schacon`)
- `htpasswd -c /srv/git/.htpasswd schacon`
- Notes
- Many authentication methods exist in Apache; this is just a simple example
- Strongly recommended to use SSL so data is encrypted
- Key architectural point
- `git-http-backend` handles Git protocol negotiation and data transfer
- Authentication is handled by the *web server layer* (Apache or other CGI-capable server)
- Web server flexibility
- Works with nearly any CGI-capable web server
- Reference (Apache auth docs)
- `https://httpd.apache.org/docs/current/howto/auth.html`
## GitWeb (simple web UI)
- Motivation
- After enabling read/write and/or read-only access, you may want a basic web visualizer
- What it is
- Git includes a CGI script called **GitWeb**
- (Figure 49 referenced as the GitWeb UI screenshot)
- Quick temporary instance: `git instaweb`
- Uses a lightweight web server (e.g., `lighttpd` or `webrick`)
- Linux note
- `lighttpd` often installed → `git instaweb` may “just work”
- macOS note (example)
- Ruby (and thus `webrick`) may be a convenient choice
- Start with a specified handler
- `git instaweb --httpd=webrick`
- Starts HTTP server on port `1234` and opens a browser automatically
- Example log lines shown include WEBrick and Ruby version info
- Stop the server
- `git instaweb --httpd=webrick --stop`
- Running GitWeb continuously (server deployment)
- Option 1: install a distro package
- Some distros offer a `gitweb` package (via `apt` or `dnf`)
- Option 2: install manually (quick walkthrough)
- Clone Git source (GitWeb included)
- `git clone git://git.kernel.org/pub/scm/git/git.git`
- Build GitWeb with project root configured
- `cd git/`
- `make GITWEB_PROJECTROOT="/srv/git" prefix=/usr gitweb`
- Generates `gitweb.cgi` + static assets (e.g., `static/gitweb.js`)
- Deploy to web directory
- `sudo cp -Rf gitweb /var/www/`
- Apache VirtualHost example (CGI enablement)
- `<VirtualHost *:80>`
- `ServerName gitserver`
- `DocumentRoot /var/www/gitweb`
- `<Directory /var/www/gitweb>`
- `Options +ExecCGI +FollowSymLinks +SymLinksIfOwnerMatch`
- `AllowOverride All`
- `order allow,deny`
- `Allow from all`
- `AddHandler cgi-script cgi`
- `DirectoryIndex gitweb.cgi`
- `</Directory>`
- `</VirtualHost>`
- Result
- Visit `http://gitserver/` to browse repositories
- Server flexibility
- Can be served by any CGI-/Perl-capable web server
## GitLab (modern, fully featured Git server example)
- Why mentioned
- GitWeb is simplistic
- GitLab is a popular open source alternative:
- more capable
- more complex to install/maintain
- database-backed web application
### Installation
- Recommended approach
- Install via official **Omnibus GitLab** package
- Other installation options listed
- GitLab Helm chart (Kubernetes)
- Dockerized GitLab packages (Docker)
- Install from source
- Cloud providers / platforms
- AWS
- Google Cloud Platform
- Azure
- OpenShift
- Digital Ocean
- Reference mentioned
- GitLab Community Edition (CE) README
### Administration (web UI)
- Access method
- Browser to GitLab hostname/IP
- Log in as admin
- Default credentials (must change immediately)
- Username: `admin@local.host`
- Password: `5iveL!fe`
- Entering admin interface
- Click “Admin area” icon (top right menu)
- (Figure 50 referenced)
### Users
- Requirement
- Everyone must have a GitLab user account
- Account contents
- Personal info tied to login data
- Namespaces
- Each user has a namespace grouping their projects
- Example
- user `jane`, project `project`
- URL: `http://server/jane/project`
- Removing accounts (two modes)
- Blocking
- Prevents login
- Preserves namespace data
- Commits signed with that email still link to profile
- Destroying
- Removes user from database and filesystem
- Deletes projects/data in their namespace
- Removes groups they own
- More permanent/destructive; rarely needed
- (Figure 51 referenced as user admin screen)
### Groups
- Definition
- Collection of projects + access control data for those projects
- Group namespace
- Similar to user namespaces
- Example
- group `training`, project `materials`
- URL: `http://server/training/materials`
- Permissions
- Group users have permission levels for group and projects
- Range example
- Guest: issues/chat only
- Owner: full control (group, members, projects)
- Too numerous to list (GitLab links from admin screen)
- (Figure 52 referenced as group admin screen)
### Projects
- Meaning
- Roughly corresponds to a single Git repository
- Namespace association
- Every project belongs to exactly one namespace:
- user, or
- group
- Access control behavior
- User-owned project
- owner directly controls access
- Group-owned project
- group member permissions apply
- Visibility levels (read access control)
- Private
- owner explicitly grants access to specific users
- Internal
- visible to any logged-in user
- Public
- visible to anyone
- Applies to both
- `git fetch` access
- web UI access
### Hooks
- Hook support
- Project-level hooks
- System-level hooks
- Behavior
- GitLab sends HTTP POST with descriptive JSON when events occur
- Purpose
- Integrate with automation and tooling
- CI servers
- chat rooms
- deployment tools
### Basic Usage
- Create a project
- Click “+” icon on toolbar
- Provide
- project name
- namespace
- visibility level
- Most settings can be changed later
- Click “Create Project”
- Connect project to local Git workflow
- Access methods
- HTTPS
- SSH
- URLs shown at top of project home page
- Add remote for an existing local repository (example remote name `gitlab`)
- `git remote add gitlab https://server/namespace/project.git`
- Or clone if you dont have a local copy
- `git clone https://server/namespace/project.git`
- Web UI repository views
- Project home: recent activity
- Navigation links: files view + commit log
### Working Together
- Model 1: direct push access
- Add users via project settings → “Members”
- Assign access level
- “Developer” or above can push commits/branches directly
- Model 2: merge requests (more decoupled)
- Users with push access
- create branch
- push commits
- open merge request back into `master` (or another branch)
- Users without push permission
- fork project
- push to fork
- open merge request from fork into main project
- Benefits
- owner controls what/when changes merge
- supports contributions from untrusted users
- Discussion units
- Merge requests + issues are main long-lived discussion objects
- Merge requests support
- line-by-line discussion (lightweight code review)
- overall discussion thread
- Both can be
- assigned to users
- organized into milestones
- Broader feature note (beyond Git)
- Also provides features like
- project wikis
- system maintenance tools
- Operational benefit
- after initial setup, little need for config-file edits or SSHing to server
- most admin/usage via browser UI
## Third Party Hosted Options
- When to choose
- You dont want to set up/maintain your own Git server
- Advantages
- quick setup
- easy project creation
- no maintenance/monitoring
- even if you self-host internally:
- public hosting for open source can be easier for the community to find/contribute
- Choosing a host
- Many options with different pros/cons
- Up-to-date list referenced
- GitHosting page on the main Git wiki:
- `https://git.wiki.kernel.org/index.php/GitHosting`
- GitHub note
- GitHub covered in detail in the “GitHub” chapter
- Its the largest Git host, and you may need to interact with GitHub-hosted projects
- Many other hosts exist if you prefer alternatives
## Summary (decision guidance)
- You have multiple options for running a remote Git repository to collaborate/share work
- Self-hosting
- Pros
- high control
- can run within your firewall
- Cons
- time/effort to set up
- ongoing maintenance burden
- Hosted services
- Pros
- easy to set up and maintain
- Cons
- your code resides on someone elses servers
- some organizations prohibit this
- Practical takeaway
- Choose the solution (or combination) that fits your organizations needs
```

954
mindmap/GitHub.md Normal file
View File

@@ -0,0 +1,954 @@
# GitHub
## What GitHub is (context)
- Largest host for Git repositories
- Central collaboration hub for millions of developers and projects
- Commonly used for
- Git hosting
- Issue tracking
- Code review
- Other collaboration features
- Not part of the Git open source project itself
- But very likely youll need to interact with it professionally
## Chapter scope (what youll learn)
- Use GitHub effectively:
- Sign up for and manage an account
- Create and use Git repositories on GitHub
- Contribute to other projects (and accept contributions to yours)
- Use GitHubs programmatic interface (API)
- Tips and small features that make workflows easier
- If you dont plan to use GitHub for hosting/collaboration
- You can skip ahead to **Git Tools**
## Note: Interfaces change (UI disclaimer)
- GitHub UI and screenshots change over time
- The *concepts* should remain applicable
- Online versions of the book may have newer screenshots
---
## Account Setup and Configuration
### Create a free account (sign-up)
- Go to: `https://github.com`
- Fill in:
- Username (must be unique)
- Email address
- Password
- Click **“Sign up for GitHub”** (green button)
- You may see an upgrades/pricing page next
- Safe to ignore initially
- Verify your email address (GitHub sends a verification email)
- Important for later steps/workflows
- Account capabilities & plans
- Free accounts provide almost all functionality
- Paid plans: advanced tools/features + increased limits
- More info: `https://github.com/pricing`
- Navigation
- Clicking the **Octocat** logo (top-left) takes you to your dashboard
### SSH Access (HTTPS vs SSH)
- HTTPS remotes
- You can connect to Git repos over HTTPS using username/password
- Cloning public projects doesnt require an account
- Account becomes important for:
- forking projects
- pushing to your fork
- SSH remotes
- Requires configuring an SSH public key
- If you dont have a key yet: generate one (referenced elsewhere in the book)
#### Add an SSH key to your GitHub account
- Open **Account settings** (link at the top-right)
- Select **“SSH keys”** in the left sidebar
- Click **“Add an SSH key”**
- Provide:
- A recognizable **title/name** for the key (helps later revocation)
- Example naming patterns: “My Laptop”, “Work Account”
- Paste the contents of your public key file (e.g. `~/.ssh/id_rsa.pub`)
- Click **“Add key”**
### Your Avatar (profile picture)
- Optional customization
- Steps
- Go to **Profile** tab (in account settings)
- Click **“Upload new picture”**
- Select an image
- Crop it
- Effect
- Your avatar appears next to your username everywhere you interact on GitHub
- Gravatar integration
- If you already have a Gravatar avatar, GitHub may use it by default
### Your Email Addresses (commit ↔ account mapping)
- How GitHub associates commits with your account
- By **email address** in commits
- If you commit with multiple emails
- Add all of them in the **Emails** section in settings
- Email address states (as shown in the example)
- Verified + Primary
- Receives notifications/receipts
- Verified (non-primary)
- Can be promoted to primary
- Unverified
- Cannot be primary
- Once added
- Any commit on GitHub using one of these emails will link to your user
### Two-Factor Authentication (2FA)
- Purpose
- Extra security
- Reduces impact if password is stolen/compromised
- Location
- **Security** tab in account settings
- Setup
- Click **“Set up two-factor authentication”**
- Choose a second factor method:
- Phone app generating a **time-based one-time password (TOTP)**
- SMS code sent each login
- After enabling
- GitHub requires password + code on login
---
## Contributing to a Project
### Forking Projects (contribute without push access)
- Problem: you want to contribute but you cant push to the upstream repo
- Solution: **Fork**
- GitHub creates a full copy of the repo under your namespace
- You can push to your fork
- Terminology note (historical vs GitHub meaning)
- Historically “fork” could imply a split/competing direction
- On GitHub: “fork” generally means “same project under your account to propose changes”
- Why this works well
- Upstream maintainers dont need to add you as a collaborator
- You propose changes via a **Pull Request (PR)**
- PR becomes a discussion + review thread
- Maintainer merges when satisfied
- How to fork
- Visit the project page
- Click **“Fork”** (top-right)
- GitHub redirects you to your forked repo page
### The GitHub Flow (PR-centered collaboration model)
- Works for:
- Small teams sharing a repo
- Large distributed groups and many forks
- Built around **Topic Branches** (as covered in Git Branching)
- Typical sequence
1. Fork the project
2. Create a topic branch from `master`
3. Commit improvements
4. Push the topic branch to your GitHub fork
5. Open a Pull Request
6. Discuss; optionally keep committing to the same branch
7. Owner merges or closes the PR
8. Sync upstream changes back into your fork
- Related concept
- Similar to the **Integration Manager** workflow, but discussion/review is web-based rather than email-based
- Alternative tooling tip
- GitHub CLI can do most web-interface tasks
- Runs on Windows, macOS, Linux (installation/manual referenced in chapter)
---
## Creating a Pull Request (walkthrough example)
### Scenario
- Tony wants Arduino code and finds: `https://github.com/schacon/blink`
- Problem
- Blink delay is too fast
- Goal
- Change delay from 1 second to 3 seconds
- Submit improvement via PR
### Local workflow steps (fork → branch → change → push)
- Fork upstream repo (`schacon/blink`) to your namespace
- Example fork URL: `https://github.com/tonychacon/blink`
- Clone your fork locally
- `git clone https://github.com/tonychacon/blink`
- Create a descriptive topic branch
- `git checkout -b slow-blink`
- Make the code change (example uses `sed`)
- macOS:
- `sed -i '' 's/1000/3000/' blink.ino`
- Linux:
- `sed -i 's/1000/3000/' blink.ino`
- Review the change
- `git diff --word-diff`
- Commit
- `git commit -a -m 'Change delay to 3 seconds'`
- Push the topic branch to your fork
- `git push origin slow-blink`
- If using HTTPS remote, you may be prompted for GitHub username/password
#### Meaning of the numbered actions (as presented in the example)
- ① clone fork locally
- ② create topic branch
- ③ edit code
- ④ verify diff
- ⑤ commit to topic branch
- ⑥ push topic branch to GitHub fork
### Open the Pull Request on GitHub
- GitHub notices the new branch on your fork and suggests creating a PR (green button)
- Alternative path
- Use the Branches page:
- `https://github.com/<user>/<project>/branches`
- PR creation page typically shows
- Title + description fields (recommended to write clearly)
- Commits “ahead” of `master`
- Unified diff of all changes that would be merged
- After clicking **Create pull request**
- The upstream maintainer is notified
- They can review and respond
### PRs can be opened early (not only “final” work)
- Common in internal/team settings
- Because you can keep pushing commits to the PR branch after opening the PR
- PR becomes a shared context for iterative development and review
---
## Iterating on a Pull Request (review + updates)
### How review happens on GitHub
- Maintainer can:
- merge
- reject/close
- comment (line-level or general)
- Line-level comments
- Maintainer comments by clicking specific lines in the diff
- General comments
- In the PR discussion thread
- Line comments are also pulled into the overall conversation
### Notifications during PR discussion
- PR author and watchers get notified on comments/activity
- If email notifications are enabled
- Comments may arrive as emails
### Updating an existing PR
- GitHub workflow approach
- Dont “re-roll” patches like mailing lists
- Instead:
- add commits to the same topic branch
- push again
- PR updates automatically
- Old line comments may collapse when code changes (they become “outdated”)
- Note about notifications
- Adding commits to an open PR does **not** necessarily trigger a notification
- Contributors often add a comment saying they pushed updates
### “Files Changed” tab meaning (unified diff)
- Shows total aggregate difference introduced by merging the PR branch
- Equivalent to:
- `git diff master...<branch>` (three-dot diff) for the PR base
### Merging the PR (server-side vs local)
- GitHub checks if PR merges cleanly
- If you have write access and merge is trivial
- GitHub shows a merge button
- Clicking it performs a **non-fast-forward merge**
- Creates a merge commit even if fast-forward was possible
- Alternative
- Pull the PR branch and merge locally
- If you push the merge to GitHub, the PR closes automatically
### Not only forks (internal PRs)
- You can open PRs between branches in the same repository
- Useful when:
- multiple collaborators with write access
- you want structured review/discussion without forking
---
## Advanced Pull Requests
### Pull Requests as iterative conversations (vs “perfect patch queues”)
- Many GitHub projects view PRs as:
- a branch where the change evolves through discussion
- culminating in the final unified diff applied via merge
- Contrast with mailing-list patch series
- Mailing lists often expect a clean sequence of patches
- GitHub supports earlier engagement and incremental improvements
- Practical implication
- Contributors frequently add commits instead of rebasing/resubmitting PRs
- Merge commits created by GitHub can reference the PR for traceability
---
## Keeping up with upstream (fixing out-of-date/conflicting PR branches)
### When you need this
- PR becomes out of date
- GitHub reports: PR “does not merge cleanly”
- Goal
- Make PR mergeable (green) so maintainers dont have extra work
### Two main strategies
- Rebase onto upstream target branch (usually upstream `master`)
- Merge upstream target branch into your topic branch
- Common preference on GitHub:
- preserves history/context
- simpler/less error-prone than rebasing for many teams
### Merge upstream into your topic branch (step-by-step)
- Add upstream repo as remote
- `git remote add upstream https://github.com/schacon/blink`
- Fetch upstream
- `git fetch upstream`
- Merge upstream main branch into your topic branch
- `git merge upstream/master`
- Resolve conflicts (if any)
- edit conflicting files (example: `blink.ino`)
- `git add <file>`
- `git commit` (records the merge)
- Push updated topic branch back to your fork/branch
- `git push origin slow-blink`
- Result
- GitHub updates PR and re-checks mergeability automatically
### Long-running work
- You can repeat “merge from upstream” regularly
- Conflicts are limited to changes since your last merge
### Rebasing caution (especially once PR is open)
- If you rebase and force-push over the branch used by an open PR
- it can disrupt collaborators who fetched the branch
- it can trigger the issues described in “Perils of Rebasing” (referenced)
- Recommended alternative if you want a clean rebased history
- push rebased commits to a **new** branch
- open a **new** PR referencing the old PR
- close the original PR
---
## References (cross-linking Issues, PRs, commits)
### Issue/PR number references (within a repository)
- Every Issue and PR has a unique number within a project
- Quick reference syntax
- `#<num>` in comments/descriptions links automatically
### Cross-repo / cross-fork references
- In a fork of the repo youre in:
- `username#<num>`
- In a different repository:
- `username/repo#<num>`
- Full GitHub URLs can also be pasted
- GitHub renders them as shortened references
### Trackbacks
- Mentioning a PR in another PR can create timeline cross-links
- Helps connect superseding PRs when one is closed
### Commit references
- You can reference a commit by SHA-1
- Requirement in chapter
- must use the **full 40-character SHA-1** for auto-linking
- Same cross-repo patterns apply (similar to issue references)
---
## GitHub Flavored Markdown (GFM)
### Where it works
- PR descriptions
- Issue descriptions
- Comments
- Code comments
- Many GitHub text boxes
### What it is
- Markdown: plain text that renders richly
- GitHub adds extensions beyond base Markdown
### Task Lists
- Purpose
- Checklist of work items (often “before merge/complete” items)
- Syntax
- `- [X]` checked
- `- [ ]` unchecked
- Convenience
- Checkboxes can be clicked directly (no need to edit Markdown)
- Visibility
- GitHub summarizes task progress on PR/Issue list pages
- Common usage pattern
- Open PR early, track progress via tasks
### Code Snippets (fenced code blocks)
- Use cases
- propose code ideas before committing
- share failing examples
- demonstrate intended behavior
- Syntax
- Fence with triple backticks
- Add a language name for syntax highlighting (example: `java`)
### Quoting
- Quote lines by prefixing with `>`
- Shortcut
- Select text in a comment and press `r` to quote it in the reply box
### Emoji
- Emoji helper/autocomplete appears when typing `:`
- Syntax
- `:<name>:` (e.g., `:+1:`)
- Comment tone
- Adds emotion/fun; not essential but commonly used
- Emoji cheat sheet link provided
- `https://www.webfx.com/tools/emoji-cheat-sheet/`
### Images (drag & drop)
- Not strictly a “GFM extension,” but supported
- Easier than manually creating Markdown image links
- Drag and drop images into comment text areas
- GitHub uploads and auto-embeds
- “Parsed as Markdown” hint
- Provides a cheat sheet of supported Markdown features
---
## Keep your GitHub public fork up-to-date (syncing your forks `master`)
### Key point
- A fork is independent of the original repo
- GitHub may show “X commits behind upstream”
- GitHub will not automatically update your fork
### Simple method (no extra configuration)
- `git checkout master`
- `git pull https://github.com/progit/progit2.git`
- fetch + merge into your `master`
- `git push origin master`
- update your fork on GitHub
- Downside
- Typing the upstream URL repeatedly is tedious
### Configured method (more convenient)
- Add upstream remote
- `git remote add progit https://github.com/progit/progit2.git`
- Fetch upstream
- `git fetch progit`
- Set your local `master` to pull from upstream `master`
- `git branch --set-upstream-to=progit/master master`
- Set default push destination to your fork
- `git config --local remote.pushDefault origin`
- Then the routine becomes
- `git checkout master`
- `git pull`
- `git push`
### Important caution
- With this setup Git wont warn if you:
- commit to `master`
- pull from upstream
- push to origin
- You must treat `master` as “belonging to upstream”
- avoid committing directly to `master`
---
## Maintaining a Project
## Creating a New Repository
- Entry points
- “New repository” button on dashboard
- `+` menu in top toolbar → “New repository”
- New repository form
- Required: repository/project name
- All other fields optional
- After creation
- Repo available as `<user>/<project_name>`
- If empty, GitHub provides instructions to:
- create a new repo locally and push
- connect an existing repo
- (Refresher referenced: Git Basics)
- Repo URLs
- HTTPS: `https://github.com/<user>/<project_name>`
- SSH: `git@github.com:<user>/<project_name>`
- Sharing tip (public projects)
- Prefer sharing HTTPS URL
- cloning doesnt require a GitHub account
- SSH requires account + SSH key
## Adding Collaborators
- Purpose
- Grant push access (read + write)
- Steps
- Repo page → **Settings**
- Left menu → **Collaborators**
- Add by username → “Add collaborator”
- Remove access by clicking “X” next to collaborator
---
## Managing Pull Requests (as a maintainer)
### Where PRs come from
- Fork-based PRs
- contributors branch in their fork
- usually you cannot push to their branch
- Internal PRs (same repo)
- branches inside your repo
- typically both sides can push to the branch
### Email notifications (new PR)
- Maintainer receives email for a new PR
- Email includes
- Diffstat (files changed and how much)
- Link to the PR on GitHub
- Useful command-line URLs/commands
- `git pull <url> patch-1` (merge remote branch without adding remote)
- `.diff` and `.patch` links for PR content
- Example patch application:
- `curl https://github.com/<user>/<repo>/pull/<id>.patch | git am`
### Collaborating on the PR (discussion + review)
- You can comment on
- specific lines
- whole commits
- general PR discussion
- Uses GitHub Flavored Markdown everywhere
- Email replies can be included back into the GitHub thread (metadata supports this)
### Merging or closing
- Merge locally (various ways)
- `git pull <url> <branch>`
- or add remote for the fork, fetch, then merge
- Merge on GitHub site
- If merge is trivial, use **Merge** button
- Creates **non-fast-forward** merge commit even if fast-forward is possible
- If you decide not to merge
- Close the PR
- Author is notified
---
## Pull Request Refs (advanced: fetching PRs like branches)
### Motivation
- Many PRs → avoid:
- adding numerous remotes
- repeating one-off `git pull <url>` operations
### Key idea
- GitHub exposes PRs as “pseudo-branches”
- They exist under:
- `refs/pull/…`
- Not fetched by default because normal fetch refspec targets:
- `refs/heads/*`
### Discover PR refs on the remote
- Use plumbing command:
- `git ls-remote <repo-url>`
- Youll see entries like:
- `refs/heads/master`
- `refs/pull/<id>/head`
- `refs/pull/<id>/merge`
### Two refs per PR (what they mean)
- `refs/pull/<id>/head`
- points to the tip commit of the PR branch
- `refs/pull/<id>/merge`
- points to the merge commit GitHub would create when using the merge button
- useful for testing the would-be merge result
### Fetch a single PR head ref directly
- Example
- `git fetch origin refs/pull/958/head`
- Result
- stored in `.git/FETCH_HEAD`
- Downsides
- repetitive for many PRs
- merging `FETCH_HEAD` can produce awkward merge messages
### Fetch all PR heads automatically via refspec
- Edit `.git/config` and add a second `fetch =` line under the `origin` remote:
- Existing (typical):
- `fetch = +refs/heads/*:refs/remotes/origin/*`
- Add:
- `fetch = +refs/pull/*/head:refs/remotes/origin/pr/*`
- After `git fetch`
- PRs appear locally as:
- `origin/pr/<id>`
- You can test a PR locally
- `git checkout pr/2`
- Creates a local branch tracking the fetched PR ref
- Note in chapter
- The `/head` suffix is intentional; GitHub also provides `/merge` refs
---
## Pull Requests on Pull Requests
- PR target is not limited to `master`
- You can target
- any branch in the network
- even another Pull Request branch
- Why this is useful
- proposed change depends on another PR
- exploratory/uncertain change
- you dont have push access to the target branch
- How to do it (PR creation UI)
- edit the base/compare selection
- can change both:
- branches
- forks (where the branches live)
---
## Mentions and Notifications
### @mentions
- Type `@` in a comment to autocomplete:
- collaborators
- contributors
- Can mention users not in the dropdown too
### Notification effects
- Mentioned user is notified
- Mentioned user becomes subscribed (keeps receiving updates)
- You are also subscribed if you:
- open the Issue/PR
- watch the repo
- comment on the thread
### Unsubscribe
- Use **Unsubscribe** button on Issue/PR page to stop updates
---
## The Notifications Page (config + handling)
### Where to configure
- Settings → **Notification center**
- Two delivery channels
- Email
- Web
- Each can be toggled separately for:
- **Participating** (threads you engage in / @mentioned)
- **Watching** (repos you watch)
### Web notifications
- GitHub-only
- Indicated by a blue dot on the notifications icon
- Notification center features
- grouped by project
- filter by project (left sidebar)
- mark read individually (checkmark)
- mark all in project read
- mute an item (stop future notifications)
### Email notifications
- Emails are threaded in many email clients
- Headers include metadata useful for filtering/rules, such as:
- `Message-ID` encoding: `<user>/<project>/<type>/<id>`
- `<type>` differs for issues vs pull requests
- List-style headers:
- `List-Post`
- `List-Unsubscribe`
- `List-Archive`
- Mail clients can use these headers to:
- reply-post into the thread
- unsubscribe/mute via email
- Read synchronization
- if both web + email are enabled:
- reading the email can mark the web notification as read (when images are allowed)
---
## Special Files GitHub Recognizes
### README
- GitHub renders a README on the project landing page
- Recognized formats include:
- `README`
- `README.md`
- `README.asciidoc`
- etc.
- Common README contents
- project purpose
- install/config instructions
- usage/running example
- license
- contribution guidance
- Because GitHub renders it, you can include
- images
- links
### CONTRIBUTING
- A `CONTRIBUTING` file (any extension) triggers GitHub to show contribution guidelines when opening a PR
- Purpose
- define what you want/dont want in PR submissions
- increase chance contributors read guidelines before submitting
---
## Project Administration
### Changing the default branch
- If you want a default branch other than `master`
- affects what users see by default
- affects PR defaults
- affects default branch checked out on clone
- Location
- Repo settings → **Options** → Default branch dropdown
### Transferring a project (ownership transfer)
- Repo settings → **Options** → “Transfer ownership”
- Transfer to:
- another user
- an organization
- Useful for:
- handing off a project you no longer maintain
- moving a growing project into an organization
- Effects
- moves repo with watchers and stars
- creates redirects for:
- web URLs
- Git clone/fetch URLs (not only web)
---
## Managing an Organization
## Organization basics
- Organizations have:
- shared namespace for projects
- shared ownership and management features
- Common usage
- open source groups
- companies
- Create an organization
- `+` menu (top-right) → “New organization”
- provide:
- organization name
- main contact email
- optionally invite other users as co-owners
- Free tier note (as stated)
- organizations are free if everything stored is open source
- Owner capabilities
- fork a repo into:
- personal namespace
- org namespace
- create repos under:
- personal account
- any org you own
- automatically watch new repos created under the org
- Customization
- upload an org avatar
- Org landing page
- lists org repositories and is viewable by others
## Teams
- Teams connect:
- people (user accounts)
- repositories
- access levels
- Example use case
- repos: `frontend`, `backend`, `deployscripts`
- team-based permissions for devs vs ops
- Team management includes
- adding members
- granting repo access
- adjusting access levels
- Access levels
- read-only
- read/write
- administrative
- Team invitations
- invited members get an email
- Team mentions
- `@org/team` mentions notify all team members and subscribe them
- Team strategy tip
- users can be in multiple teams
- create special-interest teams (not only access-control teams)
- `ux`, `css`, `refactoring`
- `legal`, `colorblind`, etc.
## Audit log
- Available to organization owners
- Shows
- org-level events
- who performed them
- where they were performed (geographic context)
- Filtering available by
- event type
- place
- person
---
## Scripting GitHub
## Services and Hooks (integrations)
### Where to find
- Repo **Settings**
- **Webhooks and Services** section/tab
### Services (prebuilt integrations)
- Dozens of integrations available
- Common categories
- CI
- issue trackers
- chat systems
- documentation systems
- Example: Email service
- choose “email” from “Add Service”
- configure email address
- “Add service”
- result: email sent on every push
- Event model
- services may support multiple events
- many focus on push events
- Integration tip
- check for built-in service for your tool (example given: Jenkins)
### Hooks (generic webhooks)
- Use when
- you need a custom integration
- a service isnt available as a built-in “Service”
- How it works
- configure a URL
- GitHub POSTs an HTTP payload to that URL on selected events
- Enabling/configuring
- click “Add webhook”
- provide:
- payload URL
- secret key
- choose events (default: push events on any branch)
#### Example webhook service logic (Sinatra)
- Goal example
- send email if:
- a specific user pushes
- to a specific branch
- touching a specific file
- Steps in the handler
- parse JSON payload from request body
- identify:
- pusher name
- ref/branch
- gather files touched across commits (added/modified/removed)
- if criteria match → send email alert
#### Webhook debugging & redelivery
- GitHub provides a webhook delivery console
- shows recent deliveries
- success/failure
- request/response headers and bodies
- Can “redeliver” old payloads to test your service
- Webhook docs reference
- `https://developer.github.com/webhooks/`
---
## The GitHub API
### What its for
- Automate actions or retrieve information beyond webhook notifications
- Can do (as stated)
- nearly anything you can do on the website programmatically
### Basic usage (unauthenticated GET)
- Example: user info
- `curl https://api.github.com/users/schacon`
- Many read-only endpoints exist:
- orgs, repos, issues, commits, etc.
- Example: get a `.gitignore` template
- `curl https://api.github.com/gitignore/templates/Java`
- Also mentioned capability
- render arbitrary Markdown via the API
### Authentication (for write actions/private data)
- Options mentioned
- basic auth (username/password)
- personal access token (recommended)
#### Personal access tokens
- Generated in settings → **Applications**
- Configure:
- scopes
- description (helps later cleanup)
- Token visibility
- shown only once → copy it
- Advantages
- scoped + revocable
- higher rate limits
- unauthenticated: 60 requests/hour
- authenticated: 5,000 requests/hour
### Commenting on an Issue (API example)
- Endpoint pattern
- `repos/<user>/<repo>/issues/<num>/comments`
- HTTP method
- `POST`
- Headers
- `Content-Type: application/json`
- `Authorization: token TOKEN`
- Example payload
- JSON with `"body"` (can include emoji codes like `:+1:`)
- Result
- API returns comment data (id, URLs, timestamps, author info)
- Comment appears on the GitHub issue
### Changing the status of a Pull Request (commit status API)
- Concept
- Each commit can have one or more statuses
- API supports adding/querying statuses
- Typical users
- CI and testing services
- Other validation examples (as stated)
- commit message formatting checks
- contribution guideline validation
- signed/valid commit checks
- Example webhook-based validator (Signed-off-by)
- On push:
- inspect each commit message
- set status `state` to one of:
- `success`
- `failure`
- `error`
- include:
- description
- target URL for more info
- context (e.g., `validate/signoff`) to distinguish multiple status providers
- Status POST endpoint pattern
- `/repos/<user>/<repo>/statuses/<commit_sha>`
- UI effect on PR
- green check vs red X per commit
- PR overall status reflects last commits status
- warns you if the last commit is failing (useful to avoid merging broken state)
### Octokit (API client libraries)
- Purpose
- Provide idiomatic wrappers around GitHub API (avoid manual HTTP)
- Languages supported (at time of writing in chapter)
- Go
- Objective-C
- Ruby
- .NET
- Info link
- `https://github.com/octokit`
### Documentation link (API)
- Full API docs + guides
- `https://developer.github.com`
---
## Summary (end of chapter)
- After this chapter you can:
- create/manage a GitHub account
- create repos and push code
- contribute via forks + pull requests
- review/merge/close PRs as a maintainer
- manage orgs, teams, audit logs
- integrate via services/webhooks
- automate via the GitHub API (tokens, comments, statuses)
- Next step (as stated)
- learn more powerful Git tools/tips for complex situations to become a “Git master”