Recent changes to this wiki:
Added a comment
diff --git a/doc/design/external_special_remote_protocol/comment_61_f214c6f610a2be1beec00a973e3ed994._comment b/doc/design/external_special_remote_protocol/comment_61_f214c6f610a2be1beec00a973e3ed994._comment new file mode 100644 index 0000000000..da81bf72a6 --- /dev/null +++ b/doc/design/external_special_remote_protocol/comment_61_f214c6f610a2be1beec00a973e3ed994._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Katie" + avatar="http://cdn.libravatar.org/avatar/38e04123b913160b66d8117cada14532" + subject="comment 61" + date="2026-01-11T06:18:07Z" + content=""" +Thanks a lot for the quick fix, Joey! +"""]]
external: Respond to GETGITREMOTENAME during INITREMOTE with the remote name
diff --git a/CHANGELOG b/CHANGELOG
index d7f8ec5d7b..92a10ca152 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,8 @@ git-annex (10.20251216) UNRELEASED; urgency=medium
* Pass www-authenticate headers in to to git credential, to support
eg, git-credential-oauth.
* import: Fix display of some import errors.
+ * external: Respond to GETGITREMOTENAME during INITREMOTE with the remote
+ name.
* When displaying sqlite error messages, include the path to the database.
* webapp: Remove support for local pairing; use wormhole pairing instead.
* git-annex.cabal: Removed pairing build flag, and no longer depends
diff --git a/Remote/External.hs b/Remote/External.hs
index d9871eaf41..87a23a2b9e 100644
--- a/Remote/External.hs
+++ b/Remote/External.hs
@@ -1,6 +1,6 @@
{- External special remote interface.
-
- - Copyright 2013-2025 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2026 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -193,7 +193,7 @@ externalSetup externalprogram setgitconfig ss mu remotename _ c gc = do
else do
pc' <- either giveup return $ parseRemoteConfig c' (lenientRemoteConfigParser externalprogram)
let p = fromMaybe (ExternalType externaltype) externalprogram
- external <- newExternal p (Just u) pc' (Just gc) Nothing Nothing
+ external <- newExternal p (Just u) pc' (Just gc) (Just remotename) Nothing
-- Now that we have an external, ask it to LISTCONFIGS,
-- and re-parse the RemoteConfig strictly, so we can
-- error out if the user provided an unexpected config.
@@ -953,3 +953,4 @@ remoteConfigParser externalprogram c
where
isproposed (Accepted _) = False
isproposed (Proposed _) = True
+
diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn
index 5a1f9fa969..f79b8230ae 100644
--- a/doc/design/external_special_remote_protocol.mdwn
+++ b/doc/design/external_special_remote_protocol.mdwn
@@ -379,6 +379,9 @@ handling a request.
passed to `git-annex initremote` and `enableremote`, but it is possible
for git remotes to be renamed, and this will provide the remote's current
name.
+ If this is used during INITREMOTE, the git remote may not be
+ configured yet. (Older versions of git-annex responded with an ERROR
+ when this is used during INITREMOTE.)
(git-annex replies with VALUE followed by the name.)
This message is a protocol extension; it's only safe to send it to
git-annex after it sent an `EXTENSIONS` that included `GETGITREMOTENAME`.
diff --git a/doc/design/external_special_remote_protocol/comment_60_92ddb8c0da5260c619467ba8a5bf753c._comment b/doc/design/external_special_remote_protocol/comment_60_92ddb8c0da5260c619467ba8a5bf753c._comment
new file mode 100644
index 0000000000..12aa7212e7
--- /dev/null
+++ b/doc/design/external_special_remote_protocol/comment_60_92ddb8c0da5260c619467ba8a5bf753c._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""Re: How do I get GETGITREMOTENAME to work in INITREMOTE?"""
+ date="2026-01-09T17:26:59Z"
+ content="""
+@Katie, thanks for pointing out that doesn't work. I was able to fix that,
+so check out a daily build.
+"""]]
comment
diff --git a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_4_f6d3abcc128796acc7ccfa50a3d0f907._comment b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_4_f6d3abcc128796acc7ccfa50a3d0f907._comment new file mode 100644 index 0000000000..0d8cdfb882 --- /dev/null +++ b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_4_f6d3abcc128796acc7ccfa50a3d0f907._comment @@ -0,0 +1,35 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2026-01-08T19:46:02Z" + content=""" +Unfortunately, that design doesn't optimize the preferred content +expression that you were wanting to use: + +`include=docs/* or (include=*.md and exclude=*/*)` + +In this case, the exclude limits the include to md files in the top directory, +not subdirectories, but with the current design it will recurse and find +all files to handle the `include=*.md`. + +To optimise that, it needs to look at when includes are ANDed with +excludes. With `"exclude=*/*"`, only files in the root directory can match, +and those are always listed. So, that include can be filtered out before +step #3 above. + +The other cases of excludes that can be ANDed with an include are: + +* `exclude=bar/*` -- This needs to do a full listing, same reasons I + discussed in comment 2. +* `exclude=*/foo.*` -- Also needs a full listing. +* `exclude=foo` -- Also needs a full listing. +* `exclude=foo.*` -- Also needs a full listing. +* `exclude=*[/]*` -- Same as "exclude=*/*" +* `exclude=*[//]*` -- Same (and so on for other numbers of slashes). +* `exclude=*/**` -- Same (and so on for more asterisks in the front or back) +* `exclude=*[/]**` -- Same (and so on for more slashes and asterisks in the + front or back) +* `exclude=*` -- Pointless to AND with an include since the combination + can never match. May as well optimise it anyway by avoiding a full listing. +* `exclude=**` -- Same as above (and so on) +"""]]
correction
diff --git a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_1_842a1243cd6f15004a178f607912ca33._comment b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_1_842a1243cd6f15004a178f607912ca33._comment index a7cca3fb31..fbdcd966f4 100644 --- a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_1_842a1243cd6f15004a178f607912ca33._comment +++ b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_1_842a1243cd6f15004a178f607912ca33._comment @@ -3,11 +3,14 @@ subject="""comment 1""" date="2026-01-08T13:49:52Z" content=""" -Paths in preferred content expressions match relative to the top, so -this preferred content expression will match only md files in the top, +This preferred content expression will match only md files in the top, and files in the docs subdirectory: -`include=docs/* or include=*.md` +`include=docs/* or (include=*.md and exclude=*/*)` + +I got this wrong at first; this version will work! The `"include=*.md"` +matches files with that extension anywhere in the tree, so the `"exclude=*/*` +is needed to limit to ones not in a subdirectory. Only preferred content is downloaded, but S3 is still queried for the entire list of files in the bucket.
markdown
diff --git a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_2_f5a391a3e62284e0c503139eade4fdda._comment b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_2_f5a391a3e62284e0c503139eade4fdda._comment index b1a0c2585c..4d32e682d6 100644 --- a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_2_f5a391a3e62284e0c503139eade4fdda._comment +++ b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_2_f5a391a3e62284e0c503139eade4fdda._comment @@ -12,11 +12,11 @@ subdirectories. Eg, if the bucket contains "foo", "bar/...", and "baz/...", the response will list only the file "foo", and CommonPrefixes contains "bar" and "baz". -So, git-annex could make that request, and then if "include=bar/*" is not -in preferred content, but "include=foo/*" is, it could make a request to +So, git-annex could make that request, and then if `"include=bar/*"` is not +in preferred content, but `"include=foo/*"` is, it could make a request to list files prefixed by "foo/". And so avoid listing all the files in "bar". -If preferred content contained "include=foo/x/*" and "include=foo/y/*", +If preferred content contained `"include=foo/x/*"` and `"include=foo/y/*"`, when CommonPrefixes includes "foo", git-annex could follow up with 2 requests to list those subdirectories. @@ -24,7 +24,7 @@ So this ends up making at most 1 additional request per subdirectory included in preferred content. When preferred content excludes a subdirectory though, more requests would -be needed. For "exclude=bar/*", if the response lists 100 other +be needed. For `"exclude=bar/*"`, if the response lists 100 other subdirectories in CommonPrefixes, it would need to make 100 separate requests to list those while avoiding listing bar. That could easily be more expensive than the current behavior. So it does not seem to make sense
markdown
diff --git a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_3_0914c14c2b2b97bd0c79f3d9c990719f._comment b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_3_0914c14c2b2b97bd0c79f3d9c990719f._comment
index e18502378a..361471327b 100644
--- a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_3_0914c14c2b2b97bd0c79f3d9c990719f._comment
+++ b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_3_0914c14c2b2b97bd0c79f3d9c990719f._comment
@@ -5,12 +5,12 @@
content="""
There are some complications in possible preferred content expressions:
-"include=foo*/*" -- we want "foo/*" but also "foooooom/*"... but what if
+`"include=foo*/*"` -- we want `"foo/*"` but also `"foooooom/*"`... but what if
there are 100 such subdirectories? It would be an unexpected cost to need
to make so many requests. Like exclude=, the optimisation should not be
used in this case.
-"include=foo/bar" -- we want only this file.. so would prefer to avoid
+`"include=foo/bar"` -- we want only this file.. so would prefer to avoid
recursing through the rest of foo. If there are multiple ones like this
that are all in the same subdirectory, it might be nice to make
one single request to find them all. But this seems like an edge case,
@@ -22,16 +22,16 @@ Here's a design:
2. Filter for "include=" that contain a "/" in the value. If none are
found, do the usual full listing of the bucket.
3. If any of those includes contain a glob before a "/", do the usual full
- listing of the bucket. (This handles the "include=foo*/* case)
+ listing of the bucket. (This handles the `"include=foo*/*"` case)
4. Otherwise, list the top level of the bucket with delimiter set to "/".
5. Include all the top-level files in the list.
6. Filter the includes to ones that start with a subdirectory in the
CommonPrefixes.
7. For each remaining include, make a request to list the bucket, with
the prefix set to the non-glob directory from the include. For example,
- for "include=foo/bar/*", set prefix to "foo/bar/", but for
- "include=foo/*bar", set prefix to "foo/". And for "include=foo/bar",
- set prefix to "foo/".
+ for `"include=foo/bar/*"`, set prefix to `"foo/bar/"`, but for
+ `"include=foo/*bar"`, set prefix to `"foo/"`. And for
+ `"include=foo/bar"`, set prefix to `"foo/"`.
8. Add back the prefixes to each file in the responses.
Note that, step #1 hides some complexity, because currently preferred
design
diff --git a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_1_842a1243cd6f15004a178f607912ca33._comment b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_1_842a1243cd6f15004a178f607912ca33._comment new file mode 100644 index 0000000000..a7cca3fb31 --- /dev/null +++ b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_1_842a1243cd6f15004a178f607912ca33._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2026-01-08T13:49:52Z" + content=""" +Paths in preferred content expressions match relative to the top, so +this preferred content expression will match only md files in the top, +and files in the docs subdirectory: + +`include=docs/* or include=*.md` + +Only preferred content is downloaded, but S3 is still queried for the +entire list of files in the bucket. +"""]] diff --git a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_2_f5a391a3e62284e0c503139eade4fdda._comment b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_2_f5a391a3e62284e0c503139eade4fdda._comment new file mode 100644 index 0000000000..b1a0c2585c --- /dev/null +++ b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_2_f5a391a3e62284e0c503139eade4fdda._comment @@ -0,0 +1,32 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2026-01-08T14:16:26Z" + content=""" +I do think it would be possible to avoid the overhead of listing the +contents of subdirectories that are not preferred content. At +least sometimes. + +When a bucket is listed with a "/" delimiter, S3 does not recurse into +subdirectories. Eg, if the bucket contains "foo", "bar/...", and "baz/...", +the response will list only the file "foo", and CommonPrefixes contains +"bar" and "baz". + +So, git-annex could make that request, and then if "include=bar/*" is not +in preferred content, but "include=foo/*" is, it could make a request to +list files prefixed by "foo/". And so avoid listing all the files in "bar". + +If preferred content contained "include=foo/x/*" and "include=foo/y/*", +when CommonPrefixes includes "foo", git-annex could follow up with 2 requests +to list those subdirectories. + +So this ends up making at most 1 additional request per subdirectory included +in preferred content. + +When preferred content excludes a subdirectory though, more requests would +be needed. For "exclude=bar/*", if the response lists 100 other +subdirectories in CommonPrefixes, it would need to make 100 separate +requests to list those while avoiding listing bar. That could easily be +more expensive than the current behavior. So it does not seem to make sense +to try to optimise handling of excludes. +"""]] diff --git a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_3_0914c14c2b2b97bd0c79f3d9c990719f._comment b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_3_0914c14c2b2b97bd0c79f3d9c990719f._comment new file mode 100644 index 0000000000..e18502378a --- /dev/null +++ b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree/comment_3_0914c14c2b2b97bd0c79f3d9c990719f._comment @@ -0,0 +1,42 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2026-01-08T14:46:44Z" + content=""" +There are some complications in possible preferred content expressions: + +"include=foo*/*" -- we want "foo/*" but also "foooooom/*"... but what if +there are 100 such subdirectories? It would be an unexpected cost to need +to make so many requests. Like exclude=, the optimisation should not be +used in this case. + +"include=foo/bar" -- we want only this file.. so would prefer to avoid +recursing through the rest of foo. If there are multiple ones like this +that are all in the same subdirectory, it might be nice to make +one single request to find them all. But this seems like an edge case, +and one request per include is probably acceptable. + +Here's a design: + +1. Get preferred content expression of the remote. +2. Filter for "include=" that contain a "/" in the value. If none are + found, do the usual full listing of the bucket. +3. If any of those includes contain a glob before a "/", do the usual full + listing of the bucket. (This handles the "include=foo*/* case) +4. Otherwise, list the top level of the bucket with delimiter set to "/". +5. Include all the top-level files in the list. +6. Filter the includes to ones that start with a subdirectory in the + CommonPrefixes. +7. For each remaining include, make a request to list the bucket, with + the prefix set to the non-glob directory from the include. For example, + for "include=foo/bar/*", set prefix to "foo/bar/", but for + "include=foo/*bar", set prefix to "foo/". And for "include=foo/bar", + set prefix to "foo/". +8. Add back the prefixes to each file in the responses. + +Note that, step #1 hides some complexity, because currently preferred +content is loaded and parsed to a MatchFiles, which does not allow +introspecting to get the expression. Since we only care about include +expressions, it would suffice to add to MatchFiles a +`matchInclude :: Maybe String` which gets set for includes. +"""]]
Added a comment: How do I get GETGITREMOTENAME to work in INITREMOTE?
diff --git a/doc/design/external_special_remote_protocol/comment_59_ca91c66cf172e0e859dfe6c6e8d62dd3._comment b/doc/design/external_special_remote_protocol/comment_59_ca91c66cf172e0e859dfe6c6e8d62dd3._comment new file mode 100644 index 0000000000..657eca57b2 --- /dev/null +++ b/doc/design/external_special_remote_protocol/comment_59_ca91c66cf172e0e859dfe6c6e8d62dd3._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="Katie" + avatar="http://cdn.libravatar.org/avatar/38e04123b913160b66d8117cada14532" + subject="How do I get GETGITREMOTENAME to work in INITREMOTE?" + date="2026-01-07T23:37:01Z" + content=""" +I am writing a external special remote using this protocol. This is little similar to the directory remote and there's a path on the local system where content is stored. + +I don't want this location to be saved in the git-annex branch and I thought I'll be able to use GETGITREMOTENAME to persist it myself. However, I'm running into an issue where GETGITREMOTENAME fails during INITREMOTE (presumably since the remote has not yet been created). It does work during Prepare, but that feels a bit late to ask for a required piece of configuration. + +What are my options? My ideal behavior would be if it behaves very similar to `directory=` field in directory remote, but I can hand-manage it too if that's the recommendation as long as I get some identifier for this remote (there can be multiple of these in the same repo) +"""]]
desire for a limited import/export.
diff --git a/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree.mdwn b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree.mdwn new file mode 100644 index 0000000000..3eacf23742 --- /dev/null +++ b/doc/todo/way_to_limit_recursion_for_import__47__export_S3_tree.mdwn @@ -0,0 +1,6 @@ +I wanted to implement management and synchronization of descriptive files (README.md etc) on the top of the large S3 bucket via git-annex so I could keep files in a git repo and rely on importree/exporttree functionality to keep bucket and repo in sync. + +Looking at [special_remotes/S3/](https://git-annex.branchable.com/special_remotes/S3/) I didn't spot any option to achieve that. + +I am not sure what would be the best option for this, given that greedy me might want to also eventually `sync` some `docs/` prefix there: may be could be a white list of some keys/paths to include and/or exclude? May be some [preferred content](https://git-annex.branchable.com/preferred_content/) `include` expression could be specific enough to not demand full bucket traversal (unrealistic in feasible time) but rather limit to top level, e.g. `include=^docs/ and include=^*.md` or smth smarter? +
Pass www-authenticate headers in to to git credential
To support eg, git-credential-oauth.
To support eg, git-credential-oauth.
diff --git a/Annex/Url.hs b/Annex/Url.hs
index 1cc742f522..6d0cb43767 100644
--- a/Annex/Url.hs
+++ b/Annex/Url.hs
@@ -157,7 +157,7 @@ withUrlOptions :: Maybe RemoteGitConfig -> (U.UrlOptions -> Annex a) -> Annex a
withUrlOptions mgc a = a =<< getUrlOptions mgc
-- When downloading an url, if authentication is needed, uses
--- git-credential to prompt for username and password.
+-- git-credential for the prompting.
--
-- Note that, when the downloader is curl, it will not use git-credential.
-- If the user wants to, they can configure curl to use a netrc file that
@@ -169,8 +169,8 @@ withUrlOptionsPromptingCreds mgc a = do
prompter <- mkPrompter
cc <- Annex.getRead Annex.gitcredentialcache
a $ uo
- { U.getBasicAuth = \u -> prompter $
- getBasicAuthFromCredential g cc u
+ { U.getBasicAuth = \u respheaders -> prompter $
+ getBasicAuthFromCredential g cc u respheaders
}
checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool
diff --git a/CHANGELOG b/CHANGELOG
index 8d8605dca5..39e6e628a2 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -13,6 +13,8 @@ git-annex (10.20251216) UNRELEASED; urgency=medium
* import: Fix display of some import errors.
* Fix bug that could result in a tree imported from a remote containing
missing git blobs.
+ * Pass www-authenticate headers in to to git credential, to support
+ eg, git-credential-oauth.
-- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
diff --git a/Git/Credential.hs b/Git/Credential.hs
index 379fe585b0..1b69381996 100644
--- a/Git/Credential.hs
+++ b/Git/Credential.hs
@@ -1,6 +1,6 @@
{- git credential interface
-
- - Copyright 2019-2022 Joey Hess <id@joeyh.name>
+ - Copyright 2019-2026 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -19,6 +19,8 @@ import Utility.Url.Parse
import qualified Data.Map as M
import Network.URI
+import Network.HTTP.Types
+import Network.HTTP.Types.Header
import Control.Concurrent.STM
data Credential = Credential { fromCredential :: M.Map String String }
@@ -35,7 +37,7 @@ credentialBasicAuth cred = BasicAuth
<*> credentialPassword cred
getBasicAuthFromCredential :: Repo -> TMVar CredentialCache -> GetBasicAuth
-getBasicAuthFromCredential r ccv u = do
+getBasicAuthFromCredential r ccv u respheaders = do
(CredentialCache cc) <- atomically $ readTMVar ccv
case mkCredentialBaseURL r u of
Just bu -> case M.lookup bu cc of
@@ -44,8 +46,8 @@ getBasicAuthFromCredential r ccv u = do
let storeincache = \c -> atomically $ do
CredentialCache cc' <- takeTMVar ccv
putTMVar ccv (CredentialCache (M.insert bu c cc'))
- go storeincache =<< getUrlCredential u r
- Nothing -> go (const noop) =<< getUrlCredential u r
+ go storeincache =<< getUrlCredential u respheaders r
+ Nothing -> go (const noop) =<< getUrlCredential u respheaders r
where
go storeincache c =
case credentialBasicAuth c of
@@ -61,8 +63,9 @@ getBasicAuthFromCredential r ccv u = do
-- | This may prompt the user for the credential, or get a cached
-- credential from git.
-getUrlCredential :: URLString -> Repo -> IO Credential
-getUrlCredential = runCredential "fill" . urlCredential
+getUrlCredential :: URLString -> ResponseHeaders -> Repo -> IO Credential
+getUrlCredential url respheaders = runCredential "fill" $
+ urlCredential url respheaders
-- | Call if the credential the user entered works, and can be cached for
-- later use if git is configured to do so.
@@ -73,8 +76,12 @@ approveUrlCredential c = void . runCredential "approve" c
rejectUrlCredential :: Credential -> Repo -> IO ()
rejectUrlCredential c = void . runCredential "reject" c
-urlCredential :: URLString -> Credential
-urlCredential = Credential . M.singleton "url"
+urlCredential :: URLString -> ResponseHeaders -> Credential
+urlCredential url respheaders = Credential $ M.fromList $
+ ("url", url) : map wwwauth (filter iswwwauth respheaders)
+ where
+ iswwwauth (h, _) = h == hWWWAuthenticate
+ wwwauth (_, v) = ("wwwauth[]", decodeBS v)
runCredential :: String -> Credential -> Repo -> IO Credential
runCredential action input r =
diff --git a/P2P/Http/Client.hs b/P2P/Http/Client.hs
index 024fce2242..1588728850 100644
--- a/P2P/Http/Client.hs
+++ b/P2P/Http/Client.hs
@@ -2,7 +2,7 @@
-
- https://git-annex.branchable.com/design/p2p_protocol_over_http/
-
- - Copyright 2024 Joey Hess <id@joeyh.name>
+ - Copyright 2024-2026 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -42,7 +42,7 @@ import Servant hiding (BasicAuthData(..))
import Servant.Client.Streaming
import qualified Servant.Types.SourceT as S
import Network.HTTP.Types.Status
-import Network.HTTP.Client
+import Network.HTTP.Client hiding (responseHeaders)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy.Internal as LI
import qualified Data.Map as M
@@ -52,6 +52,7 @@ import Control.Concurrent
import System.IO.Unsafe
import Data.Time.Clock.POSIX
import qualified Data.ByteString.Lazy as L
+import Data.Foldable (toList)
type ClientAction a
= ClientEnv
@@ -119,7 +120,7 @@ p2pHttpClientVersions' allowedversion rmt rmtrepo fallback clientaction =
go clientenv mcred credcached mauth vs
| statusCode (responseStatusCode resp) == 401 ->
case mcred of
- Nothing -> authrequired clientenv (v:vs)
+ Nothing -> authrequired clientenv resp (v:vs)
Just cred -> do
inRepo $ Git.rejectUrlCredential cred
Just <$> fallback (showstatuscode resp)
@@ -134,9 +135,10 @@ p2pHttpClientVersions' allowedversion rmt rmtrepo fallback clientaction =
catchclienterror a = a `catch` \(ex :: ClientError) -> pure (Left ex)
- authrequired clientenv vs = do
+ authrequired clientenv resp vs = do
+ let respheaders = toList $ responseHeaders resp
cred <- prompt $
- inRepo $ Git.getUrlCredential credentialbaseurl
+ inRepo $ Git.getUrlCredential credentialbaseurl respheaders
go clientenv (Just cred) False (credauth cred) vs
showstatuscode resp =
diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs
index 2ec2f429d7..89d70b6e91 100644
--- a/Remote/GitLFS.hs
+++ b/Remote/GitLFS.hs
@@ -316,7 +316,10 @@ discoverLFSEndpoint tro h =
resp <- makeSmallAPIRequest testreq
if needauth (responseStatus resp)
then do
- cred <- prompt $ inRepo $ Git.getUrlCredential (show lfsrepouri)
+ cred <- prompt $ inRepo $
+ Git.getUrlCredential
+ (show lfsrepouri)
+ (responseHeaders resp)
let endpoint' = addbasicauth (Git.credentialBasicAuth cred) endpoint
let testreq' = LFS.startTransferRequest endpoint' transfernothing
flip catchNonAsync (const (returnendpoint endpoint')) $ do
diff --git a/Utility/Url.hs b/Utility/Url.hs
index d98ade2738..c40a3ee748 100644
--- a/Utility/Url.hs
+++ b/Utility/Url.hs
@@ -281,7 +281,7 @@ getUrlInfo url uo = case parseURIRelaxed url of
fn <- extractFromResourceT (extractfilename resp)
return $ found len fn
else if responseStatus resp == unauthorized401
- then return $ getBasicAuth uo' (show (getUri req)) >>= \case
+ then return $ getBasicAuth uo' (show (getUri req)) (responseHeaders resp) >>= \case
Nothing -> return dne
Just (ba, signalsuccess) -> do
ui <- existsconduit'
@@ -476,7 +476,7 @@ downloadConduit meterupdate iv req file uo =
else do
rf <- extractFromResourceT (respfailure resp)
if responseStatus resp == unauthorized401
- then return $ getBasicAuth uo (show (getUri req')) >>= \case
+ then return $ getBasicAuth uo (show (getUri req')) (responseHeaders resp) >>= \case
Nothing -> giveup rf
Just ba -> retryauthed ba
else return $ giveup rf
@@ -516,7 +516,7 @@ downloadConduit meterupdate iv req file uo =
else do
rf <- extractFromResourceT (respfailure resp)
if responseStatus resp == unauthorized401
- then return $ getBasicAuth uo (show (getUri req'')) >>= \case
(Diff truncated)
sig
diff --git a/doc/todo/support_push_to_create.mdwn b/doc/todo/support_push_to_create.mdwn index 2bda5e2520..50d9cd0a65 100644 --- a/doc/todo/support_push_to_create.mdwn +++ b/doc/todo/support_push_to_create.mdwn @@ -30,4 +30,4 @@ remotes that don't have a UUID. This would slow down pushes to eg github slightl since it would ignore annex-ignore being set, and re-probe the git config to see if a UUID has appeared. That seems a small enough price to pay. -The assistant would also need to be made to handle this. jjjj +The assistant would also need to be made to handle this. --[[Joey]]
break todo out of bug report
diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_6_1855b50e8aa0124b9f526c40b6498133._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_6_1855b50e8aa0124b9f526c40b6498133._comment index e6fed5674d..cba034b3da 100644 --- a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_6_1855b50e8aa0124b9f526c40b6498133._comment +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_6_1855b50e8aa0124b9f526c40b6498133._comment @@ -10,4 +10,6 @@ than "push to create". I do think my idea in comment #2 would be better than how you implemented that. But it's also not directly relevant to this bug report. + +I did open [[todo/support_push_to_create]]. """]] diff --git a/doc/todo/support_push_to_create.mdwn b/doc/todo/support_push_to_create.mdwn new file mode 100644 index 0000000000..2bda5e2520 --- /dev/null +++ b/doc/todo/support_push_to_create.mdwn @@ -0,0 +1,33 @@ +"push to create" as supported by eg Forgejo makes a `git push` to a new +git repository create the repository. + +Since the repository does not exist when git-annex probes the UUID, +which happens before any push, annex-ignore is set to true. +So a command like `git-annex push` will do the git push and create the +repository, but fail to discover the uuid of that repository, and so +not send annexed files to it. + +forgejo-aneksajo has worked around this by making git-annex's request for +"$url/config" create the repository. See: + +* <https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo/commit/3c53e9803de9c59e9e78ac19f0bb107651bb48f8> +* <https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo/issues/85> +* <https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo/issues/83#issuecomment-5093679> and following comments + +But that means that `git-annex pull` will also auto-create the repository. +Or even a command like `git-annex info` that does UUID discovery of a newly +added remote. + +git-annex could support push to create better by having `git-annex push`, +after pushing the git branches, regenerate the remote list, while +ignoring the annex-ignore configuration of remotes. +So if the branch push created the git repo, any annex uuid that the +new repo has would be discovered at that point. (And at that point annex-ignore +would need to be cleared.) + +The remote list regeneration would only need to be done when there are git +remotes that don't have a UUID. This would slow down pushes to eg github slightly, +since it would ignore annex-ignore being set, and re-probe the git config +to see if a UUID has appeared. That seems a small enough price to pay. + +The assistant would also need to be made to handle this. jjjj
followup
diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_6_1855b50e8aa0124b9f526c40b6498133._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_6_1855b50e8aa0124b9f526c40b6498133._comment new file mode 100644 index 0000000000..e6fed5674d --- /dev/null +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_6_1855b50e8aa0124b9f526c40b6498133._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 6""" + date="2026-01-07T16:45:48Z" + content=""" +> Forgejo-aneksajo also creates the repository for requests to /config, and will git-annex-init it if the request comes from a git-annex user agent and the user has write permissions. + +Hmm, then `git-annex pull` will create a repository. Which is going further +than "push to create". + +I do think my idea in comment #2 would be better than how you implemented +that. But it's also not directly relevant to this bug report. +"""]] diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_7_13b7c0b807f6b19be1d2b097fe597f5c._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_7_13b7c0b807f6b19be1d2b097fe597f5c._comment new file mode 100644 index 0000000000..a7b472c66a --- /dev/null +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_7_13b7c0b807f6b19be1d2b097fe597f5c._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 7""" + date="2026-01-07T16:47:38Z" + content=""" +The www-authenticate header is also sent when the request for `/config` is +a 401. So git-annex can use that to set the wwwauth field. + +The capability fields are indicating capabilities of git. +I checked and git-credential-oauth does not rely on those capabilities. + +(Wildly, git-credential-oauth is looking for "GitLab", "GitHub", and +"Gitea" in order to sniff what backend it's authenticating to, and that's +all it uses the wwwauth for.) +"""]]
Added a comment
diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_5_53cc071de11aa604e6eecb68ce15baba._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_5_53cc071de11aa604e6eecb68ce15baba._comment new file mode 100644 index 0000000000..ebb2a6c868 --- /dev/null +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_5_53cc071de11aa604e6eecb68ce15baba._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="matrss" + avatar="http://cdn.libravatar.org/avatar/cd1c0b3be1af288012e49197918395f0" + subject="comment 5" + date="2026-01-06T17:47:53Z" + content=""" +`git push` seems to first make a GET request for something like `/m.risse/test-push-oauth2.git/info/refs?service=git-receive-pack`, which responds with a 401 and `www-authenticate: Basic realm=\"Gitea\"` among the headers. Git then seems to pass this information on to the git-credential-helper. + +`git annex push` likewise receives a 401 response from the `/config` endpoint with the same www-authenticate header, so it could pass it on to the credential helper too. + +I am not sure where the `capability`s are coming from... +"""]]
Added a comment
diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_4_094fe78aaf919e54d5457fb3274a023e._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_4_094fe78aaf919e54d5457fb3274a023e._comment new file mode 100644 index 0000000000..d44c824587 --- /dev/null +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_4_094fe78aaf919e54d5457fb3274a023e._comment @@ -0,0 +1,52 @@ +[[!comment format=mdwn + username="matrss" + avatar="http://cdn.libravatar.org/avatar/cd1c0b3be1af288012e49197918395f0" + subject="comment 4" + date="2026-01-06T17:36:19Z" + content=""" +The chicken-and-egg problem you are describing is actually something msz has already encountered and reported, but that issue is fixed: Forgejo-aneksajo also creates the repository for requests to /config, and will git-annex-init it if the request comes from a git-annex user agent and the user has write permissions. More about that here: + +- <https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo/commit/3c53e9803de9c59e9e78ac19f0bb107651bb48f8> +- <https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo/issues/85> +- <https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo/issues/83#issuecomment-5093679> and following comments + +So that's not it... I've investigated a bit and I think I led you astray with the comment about a \"non-existing repository\". I am also seeing the issue with a pre-created repository, and even with a pre-created and git-annex-init'ialized repository. + +The issue is actually that for ATRIS I rely on git-credential-oauth's \"Gitea-like-Server\" discovery here: <https://github.com/hickford/git-credential-oauth/blob/f01271d94c70b9280c19f489f90c05e9aba0d757/main.go#L206> + +When doing a `git push origin main` the git-credential-oauth helper actually receives this request: + +``` +$ git push origin main +capability[]=authtype +capability[]=state +protocol=https +host=atris.fz-juelich.de +wwwauth[]=Basic realm=\"Gitea\" +``` + +while with `git annex push` it is just this: + +``` +$ git annex push +protocol=https +host=atris.fz-juelich.de +``` + +Git-credential-oauth recognizes that it is talking to a Gitea/Forgejo server based on this `wwwauth[]=Basic realm=\"Gitea\"` data. Without it and in the absence of a more specific configuration for the server it doesn't try to handle it and falls back to the standard http credential handling of git. I am not sure where these capability and wwwauth fields are coming from, but I think git-annex should somehow do the same as git here... + +--- + +I've gotten at the data git sends to the credential helper with this trivial script: + +``` +$ cat ~/bin/git-credential-echo +#!/usr/bin/env bash + +exec cat >&2 +``` + +and configuring it as my credential helper. + +I have to say, I like this pattern of processes communicating over simple line-based protocols :) +"""]]
comment
diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_3_570e6b61adef7c2f8ee0dcdcff225f76._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_3_570e6b61adef7c2f8ee0dcdcff225f76._comment new file mode 100644 index 0000000000..43fc603dae --- /dev/null +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_3_570e6b61adef7c2f8ee0dcdcff225f76._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2026-01-06T17:28:34Z" + content=""" +Looks like the 401 Unauthorized happens for all non-existent repos when accessing `/config`. + +Eg: + + joey@darkstar:~>curl https://atris.fz-juelich.de/m.risse/joeytestmadeup.git + Not found. + joey@darkstar:~>curl https://atris.fz-juelich.de/m.risse/joeytestmadeup.git/config + Unauthorized + +A bug in Forgejo? +"""]]
corrections
diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_1_a3bb87a5cfd010f7f453f5adf1110fd9._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_1_a3bb87a5cfd010f7f453f5adf1110fd9._comment index 17c86a0550..dd6cd3e520 100644 --- a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_1_a3bb87a5cfd010f7f453f5adf1110fd9._comment +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_1_a3bb87a5cfd010f7f453f5adf1110fd9._comment @@ -6,24 +6,10 @@ git-annex is actually using git credential here. That's where the "Username for" prompt comes from. -I think that this is a chicken and egg problem. git-annex is doing UUID -discovery, which is the first thing it does when run with a new remote that -does not have a UUID. But the repository does not exist, so has no UUID, -and it won't be created until git push happens. - -Deferring git-annex UUID discovery would avoid the problem, but I think -that would be very complicated if possible at all. - -I wonder if there is some way that git-annex could tell, at the http level, -that this URL does not exist yet? If so, it could avoid doing UUID -discovery. Then `git-annex push` would at least be able to push the git -repo. And then on the next run git-annex would discover the UUID and would -be able to fully use the repository. Not an ideal solution perhaps, since -you would need to `git-annex push` twice in a row to fully populate the -repisitory. - -Looks like the url you gave just 404's, but I'm not sure if I'm seeing -now the same as what you would have seen. +Looks like the url you gave 404's. But git-annex is hitting +`https://atris.fz-juelich.de/m.risse/test1.git/config` and getting a 401 +Unauthorized for that. Which is why it is using git credential. +But I'm not sure if I'm seeing now the same now as what you would have seen. @matrs Any chance you could give me access to reproduce this using your server so I could look into that? diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment index eb8396320a..213f93e4a2 100644 --- a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment @@ -3,16 +3,16 @@ subject="""comment 2""" date="2026-01-06T16:39:40Z" content=""" -The chicken and egg problem could be solved by making `git-annex push`, -after pushing the git branches, regenerate the remote list. So if the -branch push created the git repo, any annex uuid that the new repo has -would be discovered at that point. +If the server sent back 404 for the /config hit, then the early UUID +discovery would not prompt with git credential. + +Then, to make "push to create" work smoothly, `git-annex push`, +after pushing the git branches, could regenerate the remote list. So if +the branch push created the git repo, any annex uuid that the new repo +has would be discovered at that point. The remote list regeneration would only need to be done when there are git remotes that don't have a UUID yet. The assistant would also need to be made to do that. - -This, combined with avoiding prompting on 404 in -UUID discovery would make "push to create" work smoothly. """]]
update
diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment index 654dc7a04c..eb8396320a 100644 --- a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment @@ -13,7 +13,6 @@ git remotes that don't have a UUID yet. The assistant would also need to be made to do that. -This, combined with avoiding the early -UUID discovery that led to the git-credential prompt, would make -"push to create" work smoothly. +This, combined with avoiding prompting on 404 in +UUID discovery would make "push to create" work smoothly. """]]
comment
diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment new file mode 100644 index 0000000000..654dc7a04c --- /dev/null +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_2_f4eaa6c45cc7cb20aa613617b78f5f56._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2026-01-06T16:39:40Z" + content=""" +The chicken and egg problem could be solved by making `git-annex push`, +after pushing the git branches, regenerate the remote list. So if the +branch push created the git repo, any annex uuid that the new repo has +would be discovered at that point. + +The remote list regeneration would only need to be done when there are +git remotes that don't have a UUID yet. + +The assistant would also need to be made to do that. + +This, combined with avoiding the early +UUID discovery that led to the git-credential prompt, would make +"push to create" work smoothly. +"""]]
verify git sha from ciddb is in git repository
Fix bug that could result in a tree imported from a remote containing
missing git blobs.
When there was a previous import that failed, the cid log gets committed to
the git-annex branch, but no tree is generated.
And so there are GIT keys that point to blobs that are not attached to any
tree, so never get pushed anywhere. So running the same import in another
clone of the repository will result in a tree that references blobs that
are missing.
In the unlikely situation where the ciddb contains a git sha that
is not in the git repository, this makes it just re-download the file from
the remote. Which should be no problem, since these are small files.
This does add a small performance penalty when importing. Existing
GIT keys have to be verified every time. If there are a lot of non-annexed
files in the imported tree, this could be a significant performance
penalty.
But I don't see any good way to prevent the cid log from getting
committed to the git-annex branch in a failing import? If that could be
done, the check could be avoided.
But since this bug has already affected real world repositories, this check
seems to be needed in any case, to make import do the right thing in those
repositories.
Sponsored-by: Dartmouth College's DANDI project
Fix bug that could result in a tree imported from a remote containing
missing git blobs.
When there was a previous import that failed, the cid log gets committed to
the git-annex branch, but no tree is generated.
And so there are GIT keys that point to blobs that are not attached to any
tree, so never get pushed anywhere. So running the same import in another
clone of the repository will result in a tree that references blobs that
are missing.
In the unlikely situation where the ciddb contains a git sha that
is not in the git repository, this makes it just re-download the file from
the remote. Which should be no problem, since these are small files.
This does add a small performance penalty when importing. Existing
GIT keys have to be verified every time. If there are a lot of non-annexed
files in the imported tree, this could be a significant performance
penalty.
But I don't see any good way to prevent the cid log from getting
committed to the git-annex branch in a failing import? If that could be
done, the check could be avoided.
But since this bug has already affected real world repositories, this check
seems to be needed in any case, to make import do the right thing in those
repositories.
Sponsored-by: Dartmouth College's DANDI project
diff --git a/Annex/Import.hs b/Annex/Import.hs
index cda82022a9..67b845ddd5 100644
--- a/Annex/Import.hs
+++ b/Annex/Import.hs
@@ -982,14 +982,23 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
ImportSubTree subdir _ ->
getTopFilePath subdir </> fromImportLocation loc
- getcidkey cidmap db cid = liftIO $
+ getcidkey cidmap db cid = do
-- Avoiding querying the database when it's empty speeds up
-- the initial import.
- if CIDDb.databaseIsEmpty db
+ l <- liftIO $ if CIDDb.databaseIsEmpty db
then getcidkeymap cidmap cid
else CIDDb.getContentIdentifierKeys db rs cid >>= \case
[] -> getcidkeymap cidmap cid
l -> return l
+ filterM validcidkey l
+
+ -- Guard against a content identifier containing a git sha that is
+ -- not present in the repository. This can happen when a previous,
+ -- import failed and the tree was not recorded, and this import is
+ -- being run in another clone of the repository.
+ validcidkey k = case keyGitSha k of
+ Just sha -> isJust <$> catObjectMetaData sha
+ Nothing -> return True
getcidkeymap cidmap cid =
atomically $ maybeToList . M.lookup cid <$> readTVar cidmap
diff --git a/CHANGELOG b/CHANGELOG
index cd6231b075..8d8605dca5 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -11,6 +11,8 @@ git-annex (10.20251216) UNRELEASED; urgency=medium
on network-multicast or network-info.
* stack.yaml: Update to lts-24.26.
* import: Fix display of some import errors.
+ * Fix bug that could result in a tree imported from a remote containing
+ missing git blobs.
-- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn
index 938d68ba57..deec27e116 100644
--- a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn
+++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn
@@ -114,4 +114,4 @@ Originally all keys in the bucket
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
-
+> [[fixed|done]] --[[Joey]]
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_11_b582afc1f538b76cd7605e80fcd43adb._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_11_b582afc1f538b76cd7605e80fcd43adb._comment
index 93b97e6dd6..41a8d49fec 100644
--- a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_11_b582afc1f538b76cd7605e80fcd43adb._comment
+++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_11_b582afc1f538b76cd7605e80fcd43adb._comment
@@ -21,4 +21,10 @@ result:
error: unable to read sha1 file of 1 (d00491fd7e5bb6fa28c517a0bb32b8b506539d4d)
error: unable to read sha1 file of 2 (5716ca5987cbf97d6bb54920bea6adde242d87e6)
error: unable to read sha1 file of 3 (aab959616afa9408f5efc385eb98f63fdb990ba5)
+
+Verified that [[!commit 69e6c4d024dcff7c2f8ea1a2ed3b483a86b2cc7d]] does in
+fact avoid this problem. Running steps 9 and 10 with that commit results in
+a non-broken repository.
+
+Yay, solved!
"""]]
reproduced
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_11_b582afc1f538b76cd7605e80fcd43adb._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_11_b582afc1f538b76cd7605e80fcd43adb._comment new file mode 100644 index 0000000000..93b97e6dd6 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_11_b582afc1f538b76cd7605e80fcd43adb._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 11""" + date="2026-01-06T16:16:56Z" + content=""" +Replicated this problem as follows: + +1. modified `importKeys` to fail at the end +2. set up a directory special remote with importtree=yes +3. git config annex.largefiles nothing +4. run, git-annex import, which fails +5. that left git-annex branch changes in the journal, for `GIT` keys +6. git-annex sync back to origin +7. return `importKeys` to usual behavior +8. make new clone from origin +9. run git-annex import in the new clone +10. merge the imported branch into master + +result: + + error: unable to read sha1 file of 1 (d00491fd7e5bb6fa28c517a0bb32b8b506539d4d) + error: unable to read sha1 file of 2 (5716ca5987cbf97d6bb54920bea6adde242d87e6) + error: unable to read sha1 file of 3 (aab959616afa9408f5efc385eb98f63fdb990ba5) +"""]]
comment
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_10_4f08f7a0665bfd30d5c32eb326b04e66._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_10_4f08f7a0665bfd30d5c32eb326b04e66._comment new file mode 100644 index 0000000000..c844e68a3a --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_10_4f08f7a0665bfd30d5c32eb326b04e66._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 10""" + date="2026-01-06T15:57:43Z" + content=""" +I think that a previous, failed import from the remote, run in a +different clone of the repository than the import that later fails, +could have caused the problem. + +My thinking is, while import is downloading files, the content identifiers +get recorded in the git-annex branch. Only once the import is complete does +the imported tree get grafted into the git-annex branch. So, if the import +fails (or is interrupted), this can leave content identifiers in the log. +The git blobs for small files have already been stored in git, but no tree +references them. If that git-annex branch gets pushed, then in a separate +clone of the repository, running the import again would see those content +identifiers. But the git blobs referenced by them would not have been pushed, +and so would not be available. + +We already know that the import was failing due to the S3 permissions, +so the only other thing that would have been needed is for the git-annex +branch to be pushed to origin, and then this same import tried later in a +different clone. + +@yarikoptic does this seem plausibly what could have happened? +"""]]
comment
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_9_672d9ee1ac2db009702b3f307cf93517._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_9_672d9ee1ac2db009702b3f307cf93517._comment new file mode 100644 index 0000000000..f939169eca --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_9_672d9ee1ac2db009702b3f307cf93517._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 9""" + date="2026-01-06T15:46:38Z" + content=""" +I was wrong, `git-annex forget` cannot cause this, since +[[!commit 8e7dc958d20861a91562918e24e071f70d34cf5b]] in 8.20210428 +made exported tree grafts be preserved through a forget. + +This leaves me with no scenario that might cause this problem. Unless +a git-annex version older than that were used. + +I've reverted [[!commit 69e6c4d024dcff7c2f8ea1a2ed3b483a86b2cc7d]] which I +had made to guard against the `git-annex forget` scenario, since it would +slow down imports of trees that contain a lot of small files. + +It still seems possible that commit would have avoided the problem, but +until I understand what actually caused the problem, I don't want to +unncessarily slow git-annex down with an unverified fix for it. +"""]]
Revert "verify git sha from ciddb is in git repository"
This reverts commit 69e6c4d024dcff7c2f8ea1a2ed3b483a86b2cc7d.
git-annex forget cannot cause this problem with any recent version of
git-annex, see commit 8e7dc958d20861a91562918e24e071f70d34cf5b
This reverts commit 69e6c4d024dcff7c2f8ea1a2ed3b483a86b2cc7d.
git-annex forget cannot cause this problem with any recent version of
git-annex, see commit 8e7dc958d20861a91562918e24e071f70d34cf5b
diff --git a/Annex/Import.hs b/Annex/Import.hs
index 6a71538563..cda82022a9 100644
--- a/Annex/Import.hs
+++ b/Annex/Import.hs
@@ -982,22 +982,14 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
ImportSubTree subdir _ ->
getTopFilePath subdir </> fromImportLocation loc
- getcidkey cidmap db cid = do
+ getcidkey cidmap db cid = liftIO $
-- Avoiding querying the database when it's empty speeds up
-- the initial import.
- l <- liftIO $ if CIDDb.databaseIsEmpty db
+ if CIDDb.databaseIsEmpty db
then getcidkeymap cidmap cid
else CIDDb.getContentIdentifierKeys db rs cid >>= \case
[] -> getcidkeymap cidmap cid
l -> return l
- filterM validcidkey l
-
- -- Guard against a content identifier containing a git sha that is
- -- not present in the repository. It's possible that it's not,
- -- when git-annex forget is used.
- validcidkey k = case keyGitSha k of
- Just sha -> isJust <$> catObjectMetaData sha
- Nothing -> return True
getcidkeymap cidmap cid =
atomically $ maybeToList . M.lookup cid <$> readTVar cidmap
diff --git a/CHANGELOG b/CHANGELOG
index a8ada4f875..cd6231b075 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -11,8 +11,6 @@ git-annex (10.20251216) UNRELEASED; urgency=medium
on network-multicast or network-info.
* stack.yaml: Update to lts-24.26.
* import: Fix display of some import errors.
- * Fix bug importing a tree from a remote after git-annex forget has been
- used, that could result in the imported tree mising git blobs.
-- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_8_4d86764ebd02d547cad7eebbcd116759._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_8_4d86764ebd02d547cad7eebbcd116759._comment
deleted file mode 100644
index 52156dd365..0000000000
--- a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_8_4d86764ebd02d547cad7eebbcd116759._comment
+++ /dev/null
@@ -1,7 +0,0 @@
-[[!comment format=mdwn
- username="joey"
- subject="""comment 8"""
- date="2026-01-02T15:59:56Z"
- content="""
-I've made it deal with the `git-annex forget` scenario now.
-"""]]
response
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_5_ba6b286216609fe250010b549828f4e4._comment b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_5_ba6b286216609fe250010b549828f4e4._comment new file mode 100644 index 0000000000..06e1ed1c8c --- /dev/null +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_5_ba6b286216609fe250010b549828f4e4._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2026-01-06T15:43:39Z" + content=""" +Currently: `git-annex smudge --update` + +In next release, optionally: `git-annex fix` (can be run on the specific +file if that makes it faster in a large repo) +"""]]
Added a comment
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_4_42d3bc3283fbe69a8444ca62622b4932._comment b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_4_42d3bc3283fbe69a8444ca62622b4932._comment new file mode 100644 index 0000000000..d021437301 --- /dev/null +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_4_42d3bc3283fbe69a8444ca62622b4932._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 4" + date="2026-01-02T17:36:34Z" + content=""" +Thank you Joey for looking into it. Since there was a bit of exploration above, in the nutshell, what should the tandem of git-annex command(s) for users to do after `git reset --hard COMMITISH` to \"time travel\" most efficiently (assuming heavy repos)? +"""]]
followup
diff --git a/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_1_a3bb87a5cfd010f7f453f5adf1110fd9._comment b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_1_a3bb87a5cfd010f7f453f5adf1110fd9._comment new file mode 100644 index 0000000000..17c86a0550 --- /dev/null +++ b/doc/bugs/__96__git_annex_push__96___does_not_use_git-credential-oauth/comment_1_a3bb87a5cfd010f7f453f5adf1110fd9._comment @@ -0,0 +1,30 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2026-01-02T16:42:10Z" + content=""" +git-annex is actually using git credential here. That's +where the "Username for" prompt comes from. + +I think that this is a chicken and egg problem. git-annex is doing UUID +discovery, which is the first thing it does when run with a new remote that +does not have a UUID. But the repository does not exist, so has no UUID, +and it won't be created until git push happens. + +Deferring git-annex UUID discovery would avoid the problem, but I think +that would be very complicated if possible at all. + +I wonder if there is some way that git-annex could tell, at the http level, +that this URL does not exist yet? If so, it could avoid doing UUID +discovery. Then `git-annex push` would at least be able to push the git +repo. And then on the next run git-annex would discover the UUID and would +be able to fully use the repository. Not an ideal solution perhaps, since +you would need to `git-annex push` twice in a row to fully populate the +repisitory. + +Looks like the url you gave just 404's, but I'm not sure if I'm seeing +now the same as what you would have seen. + +@matrs Any chance you could give me access to reproduce this using your +server so I could look into that? +"""]]
comment
diff --git a/doc/bugs/some_conflict_resolution_tests_fail_some_time/comment_4_f9f79c336e6887c718c04608300b6040._comment b/doc/bugs/some_conflict_resolution_tests_fail_some_time/comment_4_f9f79c336e6887c718c04608300b6040._comment new file mode 100644 index 0000000000..de162d3aa3 --- /dev/null +++ b/doc/bugs/some_conflict_resolution_tests_fail_some_time/comment_4_f9f79c336e6887c718c04608300b6040._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2026-01-02T16:35:22Z" + content=""" +Nothing changed on the git-annex side I'm pretty sure that would have fixed +this. + +I am inclined to chalk this up to something having crashed in some way on +that machine, and the problem later clearing up. Ugh. +"""]]
verify git sha from ciddb is in git repository
Fix bug importing a tree from a remote after git-annex forget has been
used, that could result in the imported tree mising git blobs.
In the unlikely situation where the ciddb contains a git sha that
is not in the git repository, this makes it just re-download the file from
the remote. Which should be no problem, since these are small files.
Sponsored-by: Dartmouth College's DANDI project
Fix bug importing a tree from a remote after git-annex forget has been
used, that could result in the imported tree mising git blobs.
In the unlikely situation where the ciddb contains a git sha that
is not in the git repository, this makes it just re-download the file from
the remote. Which should be no problem, since these are small files.
Sponsored-by: Dartmouth College's DANDI project
diff --git a/Annex/Import.hs b/Annex/Import.hs
index cda82022a9..6a71538563 100644
--- a/Annex/Import.hs
+++ b/Annex/Import.hs
@@ -982,14 +982,22 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
ImportSubTree subdir _ ->
getTopFilePath subdir </> fromImportLocation loc
- getcidkey cidmap db cid = liftIO $
+ getcidkey cidmap db cid = do
-- Avoiding querying the database when it's empty speeds up
-- the initial import.
- if CIDDb.databaseIsEmpty db
+ l <- liftIO $ if CIDDb.databaseIsEmpty db
then getcidkeymap cidmap cid
else CIDDb.getContentIdentifierKeys db rs cid >>= \case
[] -> getcidkeymap cidmap cid
l -> return l
+ filterM validcidkey l
+
+ -- Guard against a content identifier containing a git sha that is
+ -- not present in the repository. It's possible that it's not,
+ -- when git-annex forget is used.
+ validcidkey k = case keyGitSha k of
+ Just sha -> isJust <$> catObjectMetaData sha
+ Nothing -> return True
getcidkeymap cidmap cid =
atomically $ maybeToList . M.lookup cid <$> readTVar cidmap
diff --git a/CHANGELOG b/CHANGELOG
index cd6231b075..a8ada4f875 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -11,6 +11,8 @@ git-annex (10.20251216) UNRELEASED; urgency=medium
on network-multicast or network-info.
* stack.yaml: Update to lts-24.26.
* import: Fix display of some import errors.
+ * Fix bug importing a tree from a remote after git-annex forget has been
+ used, that could result in the imported tree mising git blobs.
-- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_8_4d86764ebd02d547cad7eebbcd116759._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_8_4d86764ebd02d547cad7eebbcd116759._comment
new file mode 100644
index 0000000000..52156dd365
--- /dev/null
+++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_8_4d86764ebd02d547cad7eebbcd116759._comment
@@ -0,0 +1,7 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 8"""
+ date="2026-01-02T15:59:56Z"
+ content="""
+I've made it deal with the `git-annex forget` scenario now.
+"""]]
comment
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_7_e144dd5bab56646d07043de394b5f44b._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_7_e144dd5bab56646d07043de394b5f44b._comment new file mode 100644 index 0000000000..95b5345ca6 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_7_e144dd5bab56646d07043de394b5f44b._comment @@ -0,0 +1,29 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 7""" + date="2026-01-02T15:27:57Z" + content=""" +One way I can see that this might happen is if `git-annex forget` +has been used, after a previous export/import. + +In that case, the content identifier database would be populated with a +GIT key, which would be used instead of downloading the file to be +imported. Resulting in a git sha being used, which could not be present in +the git repository. Because while the git-annex branch usually gets +imported/exported trees linked into it, `git-annex forget` erases that. + +So a possible scenario: + + git-annex export or import + git-annex forget + pushing git-annex branch to somewhere + in a separate git clone, pulling that git-annex branch + git-annex import + +That is worth trying to replicate. But it seems pretty unlikely to me that +is what you actually did ...? + +Leaving aside the possibility that `git hash-object` might be buggy and not +record the object in the git repository, that's the only way I can find for +this to possibly happen, after staring at the code for far too long. +"""]]
comment
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_6_7d0415ce72f0c9a609dc9ebb87dc69eb._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_6_7d0415ce72f0c9a609dc9ebb87dc69eb._comment
new file mode 100644
index 0000000000..76d73e52e1
--- /dev/null
+++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_6_7d0415ce72f0c9a609dc9ebb87dc69eb._comment
@@ -0,0 +1,29 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 6"""
+ date="2026-01-02T15:04:03Z"
+ content="""
+I was able to set up this same special remote myself (manually populating remote.log)
+and use with my own S3 creds (which of course have no special access rights to this bucket
+so it was all public access only), importing into a fresh repository.
+
+Part of that import included:
+
+ import s3-dandiarchive 000345/draft/dandiset.yaml
+ HttpExceptionRequest Request {
+ [...]
+ (StatusCodeException (Response {responseStatus = Status {statusCode = 403, statusMessage = "Forbidden"}, responseVersion = HTTP/1.1, responseHeaders = [("x-amz-request-id","T0PNM10TN8STRTK4"),("x-amz-id-2","pqZXYNtU9T0mQxmHvtBjr2weztjwWwP3GleV7Jy5P3DcZbCi7Mt4Kzqo1wpPj9Zy85cZ3CUPHro="),("Content-Type","application/xml"),("Transfer-Encoding","chunked"),("Date","Fri, 02 Jan 2026 15:01:16 GMT"),("Server","AmazonS3")], responseBody = (), responseCookieJar = CJ {expose = []}, responseClose' = ResponseClose, responseOriginalRequest = Request {
+ [...]
+ , responseEarlyHints = []}) "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Error><Code>AccessDenied</Code><Message>Access Denied</Message><RequestId>T0PNM10TN8STRTK4</RequestId><HostId>pqZXYNtU9T0mQxmHvtBjr2weztjwWwP3GleV7Jy5P3DcZbCi7Mt4Kzqo1wpPj9Zy85cZ3CUPHro=</HostId></Error>")
+ ok
+
+But, the import ended with:
+
+ Failed to import some files from s3-dandiarchive. Re-run command to resume import.
+
+And did not create a branch, so I have not been able to reproduce the
+problem.
+
+Digging into why it says "ok" there, that was unfortunately only a display
+problem. Corrected that.
+"""]]
comment
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_5_d60e3214f167ef42f76938738acf135b._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_5_d60e3214f167ef42f76938738acf135b._comment new file mode 100644 index 0000000000..968081e5bf --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_5_d60e3214f167ef42f76938738acf135b._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2026-01-02T14:43:14Z" + content=""" +All being small files does make me think this bug is somehow specfic to +adding the files to git. So it would be very useful to re-run the +reproducer again, with annex.largefiles this time configured so everything +is annexed. + +> > And when you replicated the problem from the backup, were you using it in the configuration where it cannot access those? +> +> if I got the question right and since I do not recall now -- judging from me using `( source .git/secrets.env; git-annex import master...` I think I was with credentials allowing to access them (hence no errors while importing) + +Well that's why I asked. It's not clear to me if it ever did show a failure, +when used in the configuration where it couldn't access the files. + +It seems equally likely that it somehow incorrectly thought it succeeded. +"""]]
response
diff --git a/doc/tips/offline_archive_drives/comment_9_ed4ebae6bb903dcb6447dd3efe6c1617._comment b/doc/tips/offline_archive_drives/comment_9_ed4ebae6bb903dcb6447dd3efe6c1617._comment new file mode 100644 index 0000000000..574703d9ad --- /dev/null +++ b/doc/tips/offline_archive_drives/comment_9_ed4ebae6bb903dcb6447dd3efe6c1617._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""Re: Directory remotes in offline drives for archiving?""" + date="2026-01-02T14:29:49Z" + content=""" +The only time git-annex will complain about being unable to lock down a file on +a remote is when you are dropping a file from a special remote, and the only +copy is in another special remote. + + drop foo (from dirremote...) (unsafe) + Unable to lock down 1 copy of file necessary to safely drop it. + + These remotes do not support locking: otherdirremote + + (Use --force to override this check, or adjust numcopies.) + +In that situation, you can either use `--force` or `git-annex get` the file, +then drop from the remote, and then drop the file from the local repository. +The latter avoids any possible concurrency problems, but `--force` is of +course faster, and would be fine in your situation. + +Dropping a file from a local repository that is present in a special remote +does not have this problem. +"""]]
dumbpipe version
diff --git a/doc/special_remotes/p2p/git-annex-p2p-iroh b/doc/special_remotes/p2p/git-annex-p2p-iroh index 83be015c6f..f03f5ae0b6 100755 --- a/doc/special_remotes/p2p/git-annex-p2p-iroh +++ b/doc/special_remotes/p2p/git-annex-p2p-iroh @@ -1,8 +1,8 @@ #!/bin/sh # Allows git-annex to use iroh for P2P connections. # -# This uses iroh's dumbpipe program. It needs a version with the -# generate-ticket command, which was added in this pull request: +# This uses iroh's dumbpipe program. It needs version 0.33 or newer, +# with the generate-ticket command, which was added in this pull request: # https://github.com/n0-computer/dumbpipe/pull/86 # # Copyright 2025 Joey Hess; licenced under the GNU GPL version 3 or higher.
comment
diff --git a/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_9e31745d4e890b2d0fe8d997c9bf169a._comment b/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_9e31745d4e890b2d0fe8d997c9bf169a._comment new file mode 100644 index 0000000000..e90c172b60 --- /dev/null +++ b/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_9e31745d4e890b2d0fe8d997c9bf169a._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2026-01-01T18:45:26Z" + content=""" +Looks to me like arch is no longer stuck on the old 9.4.8 ghc but has a +slightly newer 9.6.6. Which is the same as Debian stable. + +So, I am probably going to make git-annex only support back to that +version, to simplify things. + +Please let me know if I have misunderstood the situation in arch land.. +"""]]
correct link to arch linux package
It seems to have moved sections? Don't understand arch
Also, remove the non-official packages, which all seem very old or gone.
It seems to have moved sections? Don't understand arch
Also, remove the non-official packages, which all seem very old or gone.
diff --git a/doc/install/ArchLinux.mdwn b/doc/install/ArchLinux.mdwn index 6919c4cbec..303904a9a8 100644 --- a/doc/install/ArchLinux.mdwn +++ b/doc/install/ArchLinux.mdwn @@ -1,21 +1,3 @@ -There is now an [official git-annex package for Arch](https://www.archlinux.org/packages/community/x86_64/git-annex/), so to install it: +There is now an [official git-annex package for Arch](https://www.archlinux.org/packages/extra/x86_64/git-annex/), so to install it: pacman -S git-annex - -There are at least three non non-official packages for git-annex in the Arch Linux User Repository. Any of these may be installed manually per [AUR guidelines](https://wiki.archlinux.org/index.php/AUR_User_Guidelines#Installing_packages) or using a wrapper such as [`yaourt`](https://wiki.archlinux.org/index.php/yaourt) shown below. - -1. A git-annex package is available in the haskell-core AUR <https://wiki.archlinux.org/index.php/ArchHaskell> - -2. A development package is available at [git-annex-git](https://aur.archlinux.org/packages/git-annex-git/) that functions similarly to the source package but builds directly from the HEAD of the git repository rather that the last official release. - - $ yaourt -Sy git-annex-git - -3. A Cabal sandbox build is also available - - $ yaourt -Sy git-annex-cabal - -Finally you may choose to forgo the Arch Linux package system entirely and install git-annex directly through cabal. - - $ pacman -S git rsync curl wget gnupg openssh cabal-install - $ cabal update - $ cabal install git-annex --bindir=$HOME/bin
found it
diff --git a/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_2818941822cf1c1563c420e4d055dd4b._comment b/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_2818941822cf1c1563c420e4d055dd4b._comment deleted file mode 100644 index 8ba801115d..0000000000 --- a/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_2818941822cf1c1563c420e4d055dd4b._comment +++ /dev/null @@ -1,9 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 3""" - date="2026-01-01T18:33:42Z" - content=""" -@caleb from what I can see there is no current version of git-annex -packaged in Arch, at least <https://aur.archlinux.org/packages?O=0&SeB=nd&K=git-annex&outdated=&SB=p&SO=d&PP=50&submit=Go> -only has old stuff. Where did your package go? -"""]]
comment
diff --git a/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_2818941822cf1c1563c420e4d055dd4b._comment b/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_2818941822cf1c1563c420e4d055dd4b._comment new file mode 100644 index 0000000000..8ba801115d --- /dev/null +++ b/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_2818941822cf1c1563c420e4d055dd4b._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2026-01-01T18:33:42Z" + content=""" +@caleb from what I can see there is no current version of git-annex +packaged in Arch, at least <https://aur.archlinux.org/packages?O=0&SeB=nd&K=git-annex&outdated=&SB=p&SO=d&PP=50&submit=Go> +only has old stuff. Where did your package go? +"""]]
Remove support for building with old versions of persistent-sqlite
Old versions of persistent-sqlite don't properly support non-ascii
paths when run in a non-unicode locale. So this both simplifies the code
and avoids buggy behavior.
Old versions of persistent-sqlite don't properly support non-ascii
paths when run in a non-unicode locale. So this both simplifies the code
and avoids buggy behavior.
diff --git a/BuildFlags.hs b/BuildFlags.hs
index d4a3a4f73e..60f240c368 100644
--- a/BuildFlags.hs
+++ b/BuildFlags.hs
@@ -80,7 +80,6 @@ dependencyVersions = map fmt $ sortBy (comparing (CI.mk . fst))
, ("uuid", VERSION_uuid)
, ("bloomfilter", VERSION_bloomfilter)
, ("http-client", VERSION_http_client)
- , ("persistent-sqlite", VERSION_persistent_sqlite)
, ("crypton", VERSION_crypton)
, ("aws", VERSION_aws)
, ("DAV", VERSION_DAV)
diff --git a/CHANGELOG b/CHANGELOG
index 85aa0528a3..96d3df4f89 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -3,6 +3,7 @@ git-annex (10.20251216) UNRELEASED; urgency=medium
* fix: Populate unlocked pointer files in situations where a git command,
like git reset or git stash, leaves them unpopulated.
* When displaying sqlite error messages, include the path to the database.
+ * Remove support for building with old versions of persistent-sqlite.
-- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
diff --git a/Database/ContentIdentifier.hs b/Database/ContentIdentifier.hs
index 4fdfd5b292..e5a701ba3f 100644
--- a/Database/ContentIdentifier.hs
+++ b/Database/ContentIdentifier.hs
@@ -5,7 +5,6 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
-{-# LANGUAGE CPP #-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TypeOperators, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts, EmptyDataDecls #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
@@ -50,13 +49,7 @@ import qualified Logs.ContentIdentifier as Log
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
-
-#if MIN_VERSION_persistent_sqlite(2,13,3)
import Database.RawFilePath
-#else
-import Database.Persist.Sqlite (runSqlite)
-import qualified Data.Text as T
-#endif
data ContentIdentifierHandle = ContentIdentifierHandle H.DbQueue Bool
@@ -103,13 +96,8 @@ openDb = do
runMigrationSilent migrateContentIdentifier
-- Migrate from old versions of database, which had buggy
-- and suboptimal uniqueness constraints.
-#if MIN_VERSION_persistent_sqlite(2,13,3)
else liftIO $ runSqlite' (fromOsPath db) $ void $
runMigrationSilent migrateContentIdentifier
-#else
- else liftIO $ runSqlite (T.pack (fromRawFilePath db)) $ void $
- runMigrationSilent migrateContentIdentifier
-#endif
h <- liftIO $ H.openDbQueue db "content_identifiers"
return $ ContentIdentifierHandle h isnew
diff --git a/Database/Handle.hs b/Database/Handle.hs
index f859467b8e..135811ca86 100644
--- a/Database/Handle.hs
+++ b/Database/Handle.hs
@@ -195,11 +195,7 @@ runSqliteRobustly tablename db a = do
| otherwise -> rethrow $ errmsg ("after successful sqlite database " ++ fromOsPath (safeOutput db) ++ " open") ex
opensettle retries ic = do
-#if MIN_VERSION_persistent_sqlite(2,13,3)
conn <- Sqlite.open' (fromOsPath db)
-#else
- conn <- Sqlite.open (T.pack (fromOsPath db))
-#endif
settle conn retries ic
settle conn retries ic = do
diff --git a/Database/Init.hs b/Database/Init.hs
index eab3a6f32d..c516c89c76 100644
--- a/Database/Init.hs
+++ b/Database/Init.hs
@@ -5,7 +5,7 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
-{-# LANGUAGE OverloadedStrings, CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
module Database.Init where
@@ -13,9 +13,7 @@ import Annex.Common
import Annex.Perms
import Utility.FileMode
import qualified Utility.RawFilePath as R
-#if MIN_VERSION_persistent_sqlite(2,13,3)
import Database.RawFilePath
-#endif
import Database.Persist.Sqlite
import Lens.Micro
@@ -36,11 +34,7 @@ initDb db migration = do
let tmpdb = tmpdbdir </> literalOsPath "db"
let tmpdb' = fromOsPath tmpdb
createAnnexDirectory tmpdbdir
-#if MIN_VERSION_persistent_sqlite(2,13,3)
liftIO $ runSqliteInfo' tmpdb' (enableWAL tmpdb) migration
-#else
- liftIO $ runSqliteInfo (enableWAL tmpdb) migration
-#endif
setAnnexDirPerm tmpdbdir
-- Work around sqlite bug that prevents it from honoring
-- less restrictive umasks.
diff --git a/Database/RawFilePath.hs b/Database/RawFilePath.hs
index e154b74a3a..fdedf65762 100644
--- a/Database/RawFilePath.hs
+++ b/Database/RawFilePath.hs
@@ -31,11 +31,10 @@
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-}
-{-# LANGUAGE OverloadedStrings, CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
module Database.RawFilePath where
-#if MIN_VERSION_persistent_sqlite(2,13,3)
import Database.Persist.Sqlite
import qualified Database.Sqlite as Sqlite
import Utility.RawFilePath (RawFilePath)
@@ -92,4 +91,3 @@ withSqliteConnInfo'
-> (SqlBackend -> m a)
-> m a
withSqliteConnInfo' db = withSqlConn . openWith' db const
-#endif
diff --git a/doc/bugs/Get_crashes_when_remote_contains_non-english_chars.mdwn b/doc/bugs/Get_crashes_when_remote_contains_non-english_chars.mdwn
index fe6098f754..3610ac03f3 100644
--- a/doc/bugs/Get_crashes_when_remote_contains_non-english_chars.mdwn
+++ b/doc/bugs/Get_crashes_when_remote_contains_non-english_chars.mdwn
@@ -1,6 +1,7 @@
Hi,
### Please describe the problem.
+
I'm trying to set up a git-annex repo for my books/technical papers to have easy access to them on my desktop and laptop. I'm using a centralized server (following [this guide](https://git-annex.branchable.com/tips/centralized_git_repository_tutorial/on_your_own_server/)) to make it easy to sync between my machines.
The issue is however that sqlite crashes when I'm trying to get a file from my server. See the log further down for the error message. I'm suspecting it is due to the repo on my server is named `Böcker` (swedish name for books). It does work if I'm cloning it locally on my server. E.g.
@@ -105,3 +106,11 @@ I'm not giving up on this that easily. Worst case I'll just rename my repo on my
Thank you for all the hours developing this software!
+> This seems to be the same bug that was fixed in [[!commit 8a3beabf350899e369dcd57a72432930581fbc25]].
+> and released in version 10.20231227. While this bug actually has a fixed
+> version of git-annex, the version output shows it was built with too
+> old a version of persistent-sqlite to get the fix.
+>
+> I've now updated git-annex's build deps, so all future versions will
+> be with a sufficiently new persistent-sqlite to not have this problem.
+> [[done]] --[[Joey]]
diff --git a/git-annex.cabal b/git-annex.cabal
index f0fdb7c031..1006e7a59a 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -245,7 +245,7 @@ Executable git-annex
conduit,
time (>= 1.9.1),
old-locale,
- persistent-sqlite (>= 2.8.1),
+ persistent-sqlite (>= 2.13.3),
persistent (>= 2.8.1),
persistent-template (>= 2.8.0),
unliftio-core,
When displaying sqlite error messages, include the path to the database
diff --git a/CHANGELOG b/CHANGELOG
index 476c305d8f..85aa0528a3 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,7 @@ git-annex (10.20251216) UNRELEASED; urgency=medium
* fix: Populate unlocked pointer files in situations where a git command,
like git reset or git stash, leaves them unpopulated.
+ * When displaying sqlite error messages, include the path to the database.
-- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
diff --git a/Database/Handle.hs b/Database/Handle.hs
index ff358f7588..f859467b8e 100644
--- a/Database/Handle.hs
+++ b/Database/Handle.hs
@@ -1,6 +1,6 @@
{- Persistent sqlite database handles.
-
- - Copyright 2015-2023 Joey Hess <id@joeyh.name>
+ - Copyright 2015-2026 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -24,6 +24,7 @@ import Utility.Debug
import Utility.DebugLocks
import Utility.InodeCache
import Utility.OsPath
+import Utility.SafeOutput
import Database.Persist.Sqlite
import qualified Database.Sqlite as Sqlite
@@ -78,7 +79,7 @@ closeDb (DbHandle _db worker jobs _) = do
- it is able to run.
-}
queryDb :: DbHandle -> SqlPersistM a -> IO a
-queryDb (DbHandle _db _ jobs errvar) a = do
+queryDb (DbHandle db _ jobs errvar) a = do
res <- newEmptyMVar
putMVar jobs $ QueryJob $
debugLocks $ liftIO . putMVar res =<< tryNonAsync a
@@ -86,7 +87,7 @@ queryDb (DbHandle _db _ jobs errvar) a = do
Right r -> either throwIO return r
Left BlockedIndefinitelyOnMVar -> do
err <- takeMVar errvar
- giveup $ "sqlite worker thread crashed: " ++ err
+ giveup $ "sqlite worker thread for " ++ fromOsPath (safeOutput db) ++ " crashed: " ++ err
{- Writes a change to the database.
-
@@ -111,7 +112,7 @@ commitDb h@(DbHandle db _ _ errvar) wa =
robustly a
Left BlockedIndefinitelyOnMVar -> do
err <- takeMVar errvar
- giveup $ "sqlite worker thread crashed: " ++ err
+ giveup $ "sqlite worker thread for " ++ fromOsPath (safeOutput db) ++ " crashed: " ++ err
briefdelay = 100000 -- 1/10th second
@@ -191,7 +192,7 @@ runSqliteRobustly tablename db a = do
briefdelay
retryHelper "access" ex maxretries db retries ic $
go conn
- | otherwise -> rethrow $ errmsg "after successful open" ex
+ | otherwise -> rethrow $ errmsg ("after successful sqlite database " ++ fromOsPath (safeOutput db) ++ " open") ex
opensettle retries ic = do
#if MIN_VERSION_persistent_sqlite(2,13,3)
@@ -217,7 +218,7 @@ runSqliteRobustly tablename db a = do
if e == Sqlite.ErrorIO
then opensettle
else settle conn
- | otherwise -> rethrow $ errmsg "while opening database connection" ex
+ | otherwise -> rethrow $ errmsg ("while opening sqlite database " ++ fromOsPath (safeOutput db) ++ " connection") ex
-- This should succeed for any table.
nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1"
@@ -274,7 +275,7 @@ closeRobustly db conn = go maxretries emptyDatabaseInodeCache
| e == Sqlite.ErrorBusy -> do
threadDelay briefdelay
retryHelper "close" ex maxretries db retries ic go
- | otherwise -> rethrow $ errmsg "while closing database connection" ex
+ | otherwise -> rethrow $ errmsg ("while closing sqlite database " ++ fromOsPath (safeOutput db) ++ " connection") ex
briefdelay = 1000 -- 1/1000th second
@@ -312,7 +313,7 @@ retryHelper action err maxretries db retries ic a = do
databaseAccessStalledMsg :: Show err => String -> OsPath -> err -> String
databaseAccessStalledMsg action db err =
- "Repeatedly unable to " ++ action ++ " sqlite database " ++ fromOsPath db
+ "Repeatedly unable to " ++ action ++ " sqlite database " ++ fromOsPath (safeOutput db)
++ ": " ++ show err ++ ". "
++ "Perhaps another git-annex process is suspended and is "
++ "keeping this database locked?"
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed.mdwn b/doc/bugs/SQLite3_database_disk_image_malformed.mdwn
index ca25c815f4..6e38637d05 100644
--- a/doc/bugs/SQLite3_database_disk_image_malformed.mdwn
+++ b/doc/bugs/SQLite3_database_disk_image_malformed.mdwn
@@ -41,3 +41,5 @@ The only SQLite3 database I can find is in .git/annex/keysdb . I can open that u
I've been happily using git-annex for many many years, first time I've encountered an issue like this.
+> Calling this [[done]] since the sqlite error messages have been improved.
+> --[[Joey]]
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed/comment_6_e16f300193b36db6793d9d6e2808e56a._comment b/doc/bugs/SQLite3_database_disk_image_malformed/comment_6_e16f300193b36db6793d9d6e2808e56a._comment
new file mode 100644
index 0000000000..c610260d53
--- /dev/null
+++ b/doc/bugs/SQLite3_database_disk_image_malformed/comment_6_e16f300193b36db6793d9d6e2808e56a._comment
@@ -0,0 +1,15 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 6"""
+ date="2026-01-01T17:58:00Z"
+ content="""
+> A useful thing to display might be the path to the corrupted database file and advice to remove it?
+
+Good idea to display the path. I've made that change.
+
+I don't think I want to make git-annex suggest deleting sqlite databases
+anytime sqlite crashes for any reason. While they are safe to delete,
+that encourages users to shrug and move on and tends to normalize any
+problem with sqlite. In reality, problems with sqlite are very rare,
+and I'd like to hear about them and understand them.
+"""]]
response
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed/comment_5_2f6a291a2bb37000f6e3b757a00a0713._comment b/doc/bugs/SQLite3_database_disk_image_malformed/comment_5_2f6a291a2bb37000f6e3b757a00a0713._comment
new file mode 100644
index 0000000000..4c90a0235e
--- /dev/null
+++ b/doc/bugs/SQLite3_database_disk_image_malformed/comment_5_2f6a291a2bb37000f6e3b757a00a0713._comment
@@ -0,0 +1,21 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 5"""
+ date="2026-01-01T17:29:54Z"
+ content="""
+Your previous problem with the sqlite database cannot have caused fsck to
+detect a checksum problem with your annexed file.
+
+It looks like you have somehow modified annex object files, eg files in
+`.git/annex/objects`. git-annex sets permissions that usually prevent such
+a thing from happening.
+
+There is no way to make git-annex accept a version of a file with a different
+checksum than the one recorded in git. Instead you need to `git-annex add` the
+new version of the files to the repository in place of the old version.
+
+Here is a bash script that will pull the files out of `.git/annex/bad/`
+and update the annexed files:
+
+ IFS=$'\n'; for x in $(git-annex find --format='${key}\n${file}\n'); do if [ "$l" ]; then f="$x"; l=; if [ -e ".git/annex/bad/$k" ]; then mv ".git/annex/bad/$k" "$f"; git-annex add "$f" ; fi; else k="$x"; l=1; fi; done
+"""]]
improve synopsis for fix
It operates on pointers, whether those are symlinks or unlocked pointer
files.
It operates on pointers, whether those are symlinks or unlocked pointer
files.
diff --git a/Command/Fix.hs b/Command/Fix.hs index 05292059e5..2852fac9a3 100644 --- a/Command/Fix.hs +++ b/Command/Fix.hs @@ -29,7 +29,7 @@ import Utility.Touch cmd :: Command cmd = noCommit $ withAnnexOptions [annexedMatchingOptions, jsonOptions] $ command "fix" SectionMaintenance - "fix up links to annexed content" + "fix up pointers to annexed content" paramPaths (withParams seek) seek :: CmdParams -> CommandSeek diff --git a/doc/git-annex-fix.mdwn b/doc/git-annex-fix.mdwn index 1ac2165c89..e1ec3fc771 100644 --- a/doc/git-annex-fix.mdwn +++ b/doc/git-annex-fix.mdwn @@ -1,6 +1,6 @@ # NAME -git-annex fix - fix up links to annexed content +git-annex fix - fix up pointers to annexed content # SYNOPSIS @@ -13,8 +13,9 @@ content. This is useful to run manually when you have been moving the symlinks around, but is done automatically when committing a change with git too. -Also, populates unlocked files with annexed content. Usually this happens -automatically, but some git commands can leave them as unpopulated. +Also, populates unlocked pointer files with annexed content. +Usually this happens automatically, but some git commands can leave them +unpopulated. Also, adjusts unlocked files to be copies or hard links as configured by annex.thin.
fix: handle unlocked pointer files
fix: Populate unlocked pointer files in situations where a git command,
like git reset or git stash, leaves them unpopulated.
populatePointerFile' is safe to use here because seeking has found the
key, and isPointerFile is checked just before calling it.
fix: Populate unlocked pointer files in situations where a git command,
like git reset or git stash, leaves them unpopulated.
populatePointerFile' is safe to use here because seeking has found the
key, and isPointerFile is checked just before calling it.
diff --git a/Annex/Content/PointerFile.hs b/Annex/Content/PointerFile.hs
index 51c431d5ad..4c0743c2b4 100644
--- a/Annex/Content/PointerFile.hs
+++ b/Annex/Content/PointerFile.hs
@@ -1,6 +1,6 @@
{- git-annex pointer files
-
- - Copyright 2010-2018 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2026 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -33,21 +33,29 @@ import System.PosixCompat.Files (fileMode)
populatePointerFile :: Restage -> Key -> OsPath -> OsPath -> Annex (Maybe InodeCache)
populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f)
where
- go (Just k') | k == k' = do
- destmode <- liftIO $ catchMaybeIO $
- fileMode <$> R.getFileStatus (fromOsPath f)
- (ic, populated) <- replaceWorkTreeFile f $ \tmp -> do
- ok <- linkOrCopy k obj tmp destmode >>= \case
- Just _ -> thawContent tmp >> return True
- Nothing -> liftIO (writePointerFile tmp k destmode) >> return False
- ic <- withTSDelta (liftIO . genInodeCache tmp)
- return (ic, ok)
- maybe noop (restagePointerFile restage f) ic
- if populated
- then return ic
- else return Nothing
+ go (Just k') | k == k' = populatePointerFile' restage k obj f
go _ = return Nothing
+{- Before calling, must verify that the pointer file is a pointer to the key.
+ -
+ - This returns Nothing when populating the pointer file fails due to eg,
+ - not enough disk space.
+ -}
+populatePointerFile' :: Restage -> Key -> OsPath -> OsPath -> Annex (Maybe InodeCache)
+populatePointerFile' restage k obj f = do
+ destmode <- liftIO $ catchMaybeIO $
+ fileMode <$> R.getFileStatus (fromOsPath f)
+ (ic, populated) <- replaceWorkTreeFile f $ \tmp -> do
+ ok <- linkOrCopy k obj tmp destmode >>= \case
+ Just _ -> thawContent tmp >> return True
+ Nothing -> liftIO (writePointerFile tmp k destmode) >> return False
+ ic <- withTSDelta (liftIO . genInodeCache tmp)
+ return (ic, ok)
+ maybe noop (restagePointerFile restage f) ic
+ if populated
+ then return ic
+ else return Nothing
+
{- Removes the content from a pointer file, replacing it with a pointer.
-
- Does not check if the pointer file is modified. -}
diff --git a/CHANGELOG b/CHANGELOG
index a034823796..476c305d8f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,10 @@
+git-annex (10.20251216) UNRELEASED; urgency=medium
+
+ * fix: Populate unlocked pointer files in situations where a git command,
+ like git reset or git stash, leaves them unpopulated.
+
+ -- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
+
git-annex (10.20251215) upstream; urgency=medium
* Added annex.trashbin configuration.
diff --git a/Command/Fix.hs b/Command/Fix.hs
index a12747ee49..05292059e5 100644
--- a/Command/Fix.hs
+++ b/Command/Fix.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2026 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -14,6 +14,7 @@ import Config
import qualified Annex
import Annex.ReplaceFile
import Annex.Content
+import Annex.Content.PointerFile
import Annex.Perms
import Annex.Link
import qualified Database.Keys
@@ -54,11 +55,24 @@ start fixwhat si file key = do
fixby $ fixSymlink file wantlink
| otherwise -> stop
Nothing -> case fixwhat of
- FixAll -> fixthin
+ FixAll -> fixpointers
FixSymlinks -> stop
where
file' = fromOsPath file
+
fixby = starting "fix" (mkActionItem (key, file)) si
+
+ fixpointers =
+ ifM (isJust <$> liftIO (isPointerFile file))
+ ( stopUnless (inAnnex key) $ fixby $ do
+ obj <- calcRepo (gitAnnexLocation key)
+ populatePointerFile' QueueRestage key obj file >>= \case
+ Just ic -> Database.Keys.addInodeCaches key [ic]
+ Nothing -> giveup "not enough disk space to populate pointer file"
+ next $ return True
+ , fixthin
+ )
+
fixthin = do
obj <- calcRepo (gitAnnexLocation key)
stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do
@@ -71,7 +85,6 @@ start fixwhat si file key = do
(Just n, Just n', False) | n > 1 && n == n' ->
fixby $ breakHardLink file key obj
_ -> stop
-
breakHardLink :: OsPath -> Key -> OsPath -> CommandPerform
breakHardLink file key obj = do
replaceWorkTreeFile file $ \tmp -> do
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn
index caed1ef7e6..d4cc46a24a 100644
--- a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn
+++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn
@@ -112,3 +112,5 @@ git-annex version: 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471
```
[[!meta title="after git reset --hard, git-annex get of unlocked unpopulated pointer file does nothing"]]
+
+> [[fixed|done]] --[[Joey]]
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_3_09e90b656763e3a8452260f0abead168._comment b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_3_09e90b656763e3a8452260f0abead168._comment
new file mode 100644
index 0000000000..d52acaf827
--- /dev/null
+++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_3_09e90b656763e3a8452260f0abead168._comment
@@ -0,0 +1,9 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 3"""
+ date="2026-01-01T16:17:58Z"
+ content="""
+I think it makes sense for `git-annex fix` to deal with this situation.
+In both cases the user has run a git command that affects files in the
+workint tree, and it has left the annexed content not accessible.
+"""]]
diff --git a/doc/git-annex-fix.mdwn b/doc/git-annex-fix.mdwn
index 5a670cd1a0..1ac2165c89 100644
--- a/doc/git-annex-fix.mdwn
+++ b/doc/git-annex-fix.mdwn
@@ -9,10 +9,12 @@ git annex fix `[path ...]`
# DESCRIPTION
Fixes up symlinks that have become broken to again point to annexed
-content.
+content. This is useful to run manually when you have been moving the
+symlinks around, but is done automatically when committing a change
+with git too.
-This is useful to run manually when you have been moving the symlinks
-around, but is done automatically when committing a change with git too.
+Also, populates unlocked files with annexed content. Usually this happens
+automatically, but some git commands can leave them as unpopulated.
Also, adjusts unlocked files to be copies or hard links as
configured by annex.thin.
diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn
index 6f6eba8140..7c44779641 100644
--- a/doc/git-annex-smudge.mdwn
+++ b/doc/git-annex-smudge.mdwn
@@ -47,8 +47,8 @@ it records which worktree files need to be updated, and
the content. That is run by several git hooks, including post-checkout
and post-merge. However, a few git commands, notably `git stash` and
`git cherry-pick`, do not run any hooks, so after using those commands
-you can manually run `git annex smudge --update` to update the working
-tree.
+you can manually run `git annex smudge --update` (or `git-annex fix`)
+to update the working tree.
# OPTIONS
Added a comment
diff --git a/doc/special_remotes/rclone/comment_10_edef3c4eb5f6d06e496c0e90329d8143._comment b/doc/special_remotes/rclone/comment_10_edef3c4eb5f6d06e496c0e90329d8143._comment new file mode 100644 index 0000000000..4908d5b5fe --- /dev/null +++ b/doc/special_remotes/rclone/comment_10_edef3c4eb5f6d06e496c0e90329d8143._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="nadir" + avatar="http://cdn.libravatar.org/avatar/2af9174cf6c06de802104d632dc40071" + subject="comment 10" + date="2026-01-01T11:27:39Z" + content=""" +That makes a lot of sense. So if I understood things right, the correct place to work on this is rclone. I think I'll try to ask what they think of this kind of use case. + +Thanks for the explanation +"""]]
Added a comment: Fixing a bit of a mess
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed/comment_4_8cd94b23828fa865c6f04b021b971b55._comment b/doc/bugs/SQLite3_database_disk_image_malformed/comment_4_8cd94b23828fa865c6f04b021b971b55._comment new file mode 100644 index 0000000000..7d4e86b3a0 --- /dev/null +++ b/doc/bugs/SQLite3_database_disk_image_malformed/comment_4_8cd94b23828fa865c6f04b021b971b55._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="puck" + avatar="http://cdn.libravatar.org/avatar/06d3f4f0a82dd00a84f8f8fabc8e537d" + subject="Fixing a bit of a mess" + date="2026-01-01T09:07:11Z" + content=""" +While the database file was corrupt, I did some work (not realising it was corrupt) to fix up MP3 tags in my music collection. Now when I run git annex fsck I'm getting errors like: + + fsck music/Arlo_Guthrie/The_Best_Of_Arlo_Guthrie/01-Alices_Restaurant_Massacree.mp3 + music/Arlo_Guthrie/The_Best_Of_Arlo_Guthrie/01-Alices_Restaurant_Massacree.mp3: Bad file size (128 B larger); moved to .git/annex/bad/SHA256E-s17800671--1a992cda34a5ab52d42cd7a420114fc122458ff57672e468f8403faa77f209b0.mp3 + + ** No known copies exist of music/Arlo_Guthrie/The_Best_Of_Arlo_Guthrie/01-Alices_Restaurant_Massacree.mp3 + failed + +and + + fsck music/Arrow/misc/Hot_Hot_Hot.mp3 (checksum...) + music/Arrow/misc/Hot_Hot_Hot.mp3: Bad file content; moved to .git/annex/bad/SHA256E-s3444736--3178689ce4a69a0e94fe11afaf077b6471077fd2d5128a5a65a71dcf84272ed5.mp3 + + ** No known copies exist of music/Arrow/misc/Hot_Hot_Hot.mp3 + failed + +I've tried using git annex reinject, but that is refused as the checksum doesn't match. + +Can I tell git-annex to just accept the files that I have in my repository as being correct? +"""]]
Added a comment: More details in error message?
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed/comment_3_9ae97b4f4cacefef77542a65455cc1d3._comment b/doc/bugs/SQLite3_database_disk_image_malformed/comment_3_9ae97b4f4cacefef77542a65455cc1d3._comment new file mode 100644 index 0000000000..6b40bdbb51 --- /dev/null +++ b/doc/bugs/SQLite3_database_disk_image_malformed/comment_3_9ae97b4f4cacefef77542a65455cc1d3._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="puck" + avatar="http://cdn.libravatar.org/avatar/06d3f4f0a82dd00a84f8f8fabc8e537d" + subject="More details in error message?" + date="2026-01-01T07:32:23Z" + content=""" +Hey, + +I just came back to this after trying to do something in my repository. Good to hear I can just the SQlite file, done that now, and it is busy running fsck now. + +A useful thing to display might be the path to the corrupted database file and advice to remove it? +"""]]
todo
diff --git a/doc/todo/support_more_backup_software_like_borg.mdwn b/doc/todo/support_more_backup_software_like_borg.mdwn new file mode 100644 index 0000000000..3f097babf4 --- /dev/null +++ b/doc/todo/support_more_backup_software_like_borg.mdwn @@ -0,0 +1,17 @@ +The borg special remote allows git-annex to treat borg backups of a +git-annex repository as just another remote. This could also be done for +other backup software. + +restic seems like a good candidate. What other commonly used backup +software might be good to support? Comments welcome with suggestions.. + +--- + +Currently, support for these has to be in git-annex, it cannot be an +external special remote. Just providing a way to in the external special +remote interfase to set `thirdPartyPopulated` might be enough to allow +using external special remotes for this. + +The borg implementation does have getImported which looks at the git-annex +branch, and is used in an optimisation. It would be good to factor that out +to a common optimisation for all `thirdPartyPopulated` remotes. --[[Joey]]
response
diff --git a/doc/forum/Find_never__40____33____41___used_files_in_annex__63__/comment_1_470f9ec8a18e2080558af8d5a568bc97._comment b/doc/forum/Find_never__40____33____41___used_files_in_annex__63__/comment_1_470f9ec8a18e2080558af8d5a568bc97._comment new file mode 100644 index 0000000000..e40d60ca0e --- /dev/null +++ b/doc/forum/Find_never__40____33____41___used_files_in_annex__63__/comment_1_470f9ec8a18e2080558af8d5a568bc97._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-31T18:58:44Z" + content=""" +`git-annex p2phttp` does update the git-annex branch itself when recieving +files. And generally, any time git-annex stores an object in a repository, +it updates the git-annex branch accordingly. + +So, you can fetch from the remote and learn about those objects, +and then `git-annex unused --from=$remote` will show you unused objects in +the remote. + +When running `git-annex unused` on the local repository, it does list all +objects in the local repository. So if an object somehow does get into the +repository without a branch update, it will still show as unused. + +There is no way to list all objects present in a remote. Special remotes +are not required to support emumeration at all. So, if an object got sent +to a special remote, and the git-annex branch record of that was lost, +there would be no way to find that unused object. +"""]]
response
diff --git a/doc/special_remotes/rclone/comment_9_d0c23b1d2c2267ef0e1e91e8b33385df._comment b/doc/special_remotes/rclone/comment_9_d0c23b1d2c2267ef0e1e91e8b33385df._comment new file mode 100644 index 0000000000..6effbb7263 --- /dev/null +++ b/doc/special_remotes/rclone/comment_9_d0c23b1d2c2267ef0e1e91e8b33385df._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""Re: passing additional flags to rclone""" + date="2025-12-31T18:38:21Z" + content=""" +Passing arbitrary parameters to rclone is not supported. It would possibly +be a security hole if it were supported, because if there were a parameter +say --deleteeverything, you could `initremote` a special remote with that +parameter, and then wait for someone else to `enableremote` and use that +special remote and have a bad day. + +The "*" in `initremote --whatelse` output is a placeholder. It is not +intended to mean that every possible thing is passed through, but that, +if rclone supports some additional parameters, and explicitly asks for +them (via GETCONFIG), they will be passed through to it. + +I think that currently, `rclone gitannex` does not request any parameters. +It would certainly be possible to make it support something like +"bwlimit=3000". +"""]]
comment
diff --git a/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_2_5fe65196b2f160c63305cc0274cf1530._comment b/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_2_5fe65196b2f160c63305cc0274cf1530._comment new file mode 100644 index 0000000000..10f1ccfb42 --- /dev/null +++ b/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_2_5fe65196b2f160c63305cc0274cf1530._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-31T18:18:50Z" + content=""" +It might well be possible to implement this for restic too. +The crucial thing needed is for git-annex to +be able to list the backups and find the annexed files. For borg, +it does that by using `borg list`. +"""]]
comment
diff --git a/doc/git-annex-preferred-content/comment_8_284cfcc5f9ea6c534687e4d1afa2420a._comment b/doc/git-annex-preferred-content/comment_8_284cfcc5f9ea6c534687e4d1afa2420a._comment new file mode 100644 index 0000000000..6e613eec1b --- /dev/null +++ b/doc/git-annex-preferred-content/comment_8_284cfcc5f9ea6c534687e4d1afa2420a._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""Re: Order the remotes are processed in""" + date="2025-12-31T18:08:50Z" + content=""" +The order depends on the particular command. + +This kind of problem generally means you need to rethink your preferred +content expressions. +"""]]
comments
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn index 8048b070e9..caed1ef7e6 100644 --- a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn @@ -111,3 +111,4 @@ There should be a more seamless way to recover, or should I generally always use git-annex version: 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471 ``` +[[!meta title="after git reset --hard, git-annex get of unlocked unpopulated pointer file does nothing"]] diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_1_2b9108af318c95a6459fce0b6bb92abd._comment b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_1_2b9108af318c95a6459fce0b6bb92abd._comment new file mode 100644 index 0000000000..fcb8d284b8 --- /dev/null +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_1_2b9108af318c95a6459fce0b6bb92abd._comment @@ -0,0 +1,38 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-31T16:23:36Z" + content=""" +`git-annex get` is not doing anything because the content of the annexed +file is already present in the repository, as shown by the `git-annex +whereis`. All that `get` does is get objects that are not present in the +git-annex repository. It does not fix up after other problems. + +As far as I can tell, this will reproduce your situation: + + git-annex init + git config annex.addunlocked true + echo 1 > foo + git-annex add foo + git commit -m add + echo 2 > foo + git config annex.largefiles nothing + git annex add foo + git commit -m add + git config annex.largefiles anything + git reset --hard HEAD^ + +The command you could have run then, which would have fixed it right up, is +`git status`. That will populate the files with their annexed content. And +it will recommend running `git-annex restage` to fix up the index to +reflect those changes. (Running `git-annex restage` on its own is useless +though.) + +This all happens because `git reset --hard` does not run any git hooks. +So `git-annex smudge --update` does not get a chance to automatically +run like it usually would when a checkout or merge is made. +Until that `git status`. + +Running `git-annex smudge --update` after the `git reset` will also fix +things right up. +"""]] diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_2_03b5da4a5018086aa5df396d8c8559ee._comment b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_2_03b5da4a5018086aa5df396d8c8559ee._comment new file mode 100644 index 0000000000..c3d40ead0e --- /dev/null +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_2_03b5da4a5018086aa5df396d8c8559ee._comment @@ -0,0 +1,35 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-31T17:29:42Z" + content=""" +I can understand reaching for `git-annex get` in this situation. But I +don't know if it really makes sense to have that and every other command +that gets the content of a file also handle populating annex pointer +files that have been staged. That seems like it might very +well violate least surprise in other situations, or cause problems in other +situations. + +The best fix would be if `git reset`, `git stash`, `git cherry-pick` and +whatever else were made to run some hook. + +One could just as well complain about this: + + git-annex add foo + mkdir subdir + git mv foo subdir/ + cat subdir/foo + cat: subdir/foo: No such file or directory + git-annex get subdir/foo + cat subdir/foo + cat: subdir/foo: No such file or directory + +There `git-annex get` does not fix up the annex symlink for the new +location of the file. The situation will get resolved by `git-annex fix` +which gets run on `git commit` and is usually enough that the user doesn't +need to remember to run it. + +Not only is it out of scope for `git-annex get` to deal with that +situation, if it modified the symlink it would then leave a change in the +working tree. Which is surprising behavior. +"""]]
diff --git a/doc/forum/annex_forget_that_operates_on_the_master_branch__63__.mdwn b/doc/forum/annex_forget_that_operates_on_the_master_branch__63__.mdwn new file mode 100644 index 0000000000..1d13e92568 --- /dev/null +++ b/doc/forum/annex_forget_that_operates_on_the_master_branch__63__.mdwn @@ -0,0 +1,14 @@ +Greetings. + +I would like to be able to truncate the commit history of the master branch (or, ideally, arbitrarily rewrite history). + +At the moment I manually prune commit history via [this](https://gist.github.com/JuliaSprenger/05810e0f7fe04062a32b3951c6520904) guide which boils down to: +1. Squash commits of the master branch and ```git annex forget --drop-dead --force```; +2. Force-push changes to the master and git-annex branches for each repository; +3. Delete ```sync/...``` branches in each repository. + +The problem is that rewriting history this way requires all of them to be online at the same time. + +Can this be done in an asynchronous manner? + +Thanks.
removed
diff --git a/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment b/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment deleted file mode 100644 index 636780d2a6..0000000000 --- a/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment +++ /dev/null @@ -1,10 +0,0 @@ -[[!comment format=mdwn - username="MatusGoljer1" - avatar="http://cdn.libravatar.org/avatar/8152eed1d594c570563ed46e7fd8356f" - subject="Order the remotes are processed in" - date="2025-12-26T19:21:46Z" - content=""" -Hi. What is the order the remotes are processed in? I have 10 or so special remotes and often the files are uploaded to one and then dropped from another, where doing nothing would be fine. They all have the same group and preferred content. - -I think it might be some concurrency issue (I have cron jobs running sync and I also commit manually sometimes), but I want to be sure. -"""]]
Added a comment: Order the remotes are processed in
diff --git a/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment b/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment new file mode 100644 index 0000000000..636780d2a6 --- /dev/null +++ b/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="MatusGoljer1" + avatar="http://cdn.libravatar.org/avatar/8152eed1d594c570563ed46e7fd8356f" + subject="Order the remotes are processed in" + date="2025-12-26T19:21:46Z" + content=""" +Hi. What is the order the remotes are processed in? I have 10 or so special remotes and often the files are uploaded to one and then dropped from another, where doing nothing would be fine. They all have the same group and preferred content. + +I think it might be some concurrency issue (I have cron jobs running sync and I also commit manually sometimes), but I want to be sure. +"""]]
Added a comment: Order the remotes are processed in
diff --git a/doc/git-annex-preferred-content/comment_7_65842cfd8a1040fa374563e40203a197._comment b/doc/git-annex-preferred-content/comment_7_65842cfd8a1040fa374563e40203a197._comment new file mode 100644 index 0000000000..2453011448 --- /dev/null +++ b/doc/git-annex-preferred-content/comment_7_65842cfd8a1040fa374563e40203a197._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="MatusGoljer1" + avatar="http://cdn.libravatar.org/avatar/8152eed1d594c570563ed46e7fd8356f" + subject="Order the remotes are processed in" + date="2025-12-26T19:21:35Z" + content=""" +Hi. What is the order the remotes are processed in? I have 10 or so special remotes and often the files are uploaded to one and then dropped from another, where doing nothing would be fine. They all have the same group and preferred content. + +I think it might be some concurrency issue (I have cron jobs running sync and I also commit manually sometimes), but I want to be sure. +"""]]
reporting on difficulty of recovering content for unlocked file
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn new file mode 100644 index 0000000000..8048b070e9 --- /dev/null +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn @@ -0,0 +1,113 @@ +### Please describe the problem. + +situation is tricky as I forced it via `git reset --hard HEAD^` after a commit which managed to commit a file directly into git instead of git-annex due to changes in `.gitattributes` , whenever originally it was under annex unlocked. + +So now I am in situation where it is not obvious on how to "instantiate" that file in the tree since `annex get` does nothing although file is just a link: + +``` +❯ cat .strava-backup/config.toml +/annex/objects/MD5E-s1235--04d86de671070073b0bade06fd5085c1.toml +❯ git annex whereis .strava-backup/config.toml +whereis .strava-backup/config.toml (1 copy) + abe1a028-2aec-4c31-b48d-0db92e338292 -- yoh@bilena:~/proj/strava-backup-mine [here] +ok +❯ git annex get .strava-backup/config.toml +❯ git annex get --force .strava-backup/config.toml +❯ git annex restage .strava-backup/config.toml +git-annex: This command takes no parameters. +❯ git annex restage +restage ok +❯ cat .strava-backup/config.toml +/annex/objects/MD5E-s1235--04d86de671070073b0bade06fd5085c1.toml +``` + +I thought I could just "instantiate" its content from the key -- but now `git-annex` insists (well - via `git status`) that it is modified, even after I do `restage` and commit. + + +<details> +<summary></summary> + +```shell +❯ cat .git/annex/objects/F2/m8/MD5E-s1235--04d86de671070073b0bade06fd5085c1.toml/MD5E-s1235--04d86de671070073b0bade06fd5085c1.toml >| .strava-backup/config.toml +❯ git status +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") +❯ git annex restage +restage ok +❯ git status +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") +❯ git diff +❯ git diff --cached +❯ git status +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") +❯ git commit -m 'reinstantiated file' +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") +❯ git commit -m 'reinstantiated file' -a +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +nothing to commit, working tree clean +❯ git status +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") + +``` +</details> + +only if I do `git reset --hard` it becomes unmodified . I had to do `annex fsck` which made it instantiated but modified, then "commit" which committed nothing but everything became kosher. + +There should be a more seamless way to recover, or should I generally always use `annex fsck` after `git reset --hard` when working with unlocked files? + + +### What version of git-annex are you using? On what operating system? + + +``` +❯ git annex version | head -n 1 +git-annex version: 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471 +``` +
Added a comment: This is amazing
diff --git a/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_1_1e586618eadbbb70f4e274aee201c67a._comment b/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_1_1e586618eadbbb70f4e274aee201c67a._comment new file mode 100644 index 0000000000..9b57812e60 --- /dev/null +++ b/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_1_1e586618eadbbb70f4e274aee201c67a._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="nadir" + avatar="http://cdn.libravatar.org/avatar/2af9174cf6c06de802104d632dc40071" + subject="This is amazing" + date="2025-12-25T12:10:18Z" + content=""" +It (near) perfectly solves a problem I had. + +If something similar existed for restic, I could use my existing backup repositories with this. There's also the issue that borg does not support most rclone supported remotes, like restic does, so it reduces usable cloud providers pretty much to hetzner in my situation. + +Still great. Thanks! +"""]]
Added a comment: Directory remotes in offline drives for archiving?
diff --git a/doc/tips/offline_archive_drives/comment_8_21ad80b8ddb9ca0105fae302eb74e94f._comment b/doc/tips/offline_archive_drives/comment_8_21ad80b8ddb9ca0105fae302eb74e94f._comment new file mode 100644 index 0000000000..0954eaeadb --- /dev/null +++ b/doc/tips/offline_archive_drives/comment_8_21ad80b8ddb9ca0105fae302eb74e94f._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="wzhd" + avatar="http://cdn.libravatar.org/avatar/1795a91af84f4243a3bf0974bc8d79fe" + subject="Directory remotes in offline drives for archiving?" + date="2025-12-20T10:28:35Z" + content=""" +Using offline drives as remotes makes it easy to enable encryption. I can rely on git-annex to encrypt the annexed files instead of setting up block device or file system encryption. The git repo does not need to be cloned to the drive. + +Can I move annexed files out of my laptop and into archival drives only? +I have multiple drives plugged in via USB to store multiple copies. +But it seems git-annex doesn't consider it safe because copies can't be locked down with directory remotes? +I'm only moving files into these drives by invoking one instance of git-annex, I'm pretty sure files won't be concurrently removed from the drives. +Can I move the files without entirely disabling all safety checks? +"""]]
diff --git a/doc/forum/Find_never__40____33____41___used_files_in_annex__63__.mdwn b/doc/forum/Find_never__40____33____41___used_files_in_annex__63__.mdwn new file mode 100644 index 0000000000..83bc7634ba --- /dev/null +++ b/doc/forum/Find_never__40____33____41___used_files_in_annex__63__.mdwn @@ -0,0 +1,5 @@ +It is possible to deposit files in a remotes annex, for example via a p2phttp request. In this case, the deposited file was never known to the git-annex branch metadata. It is my understanding that in this case all the "unused" tooling is not applicable. + +Does git-annex provide means to scan an annex for unexpected annex keys, and maybe for ingesting them such that the appear as unused? + +Thx!
comment
diff --git a/doc/todo/recover_from_export_of_corrupted_object/comment_2_7ce55f8dbe9372085508cebc977587bd._comment b/doc/todo/recover_from_export_of_corrupted_object/comment_2_7ce55f8dbe9372085508cebc977587bd._comment new file mode 100644 index 0000000000..b740f91970 --- /dev/null +++ b/doc/todo/recover_from_export_of_corrupted_object/comment_2_7ce55f8dbe9372085508cebc977587bd._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-17T18:30:06Z" + content=""" +In a non-export S3 bucket with versioning, fsck also cannot recover from a +corrupted object, due to the same problem with the versionId. The same +method should work to handle this case. +"""]]
comment
diff --git a/doc/todo/recover_from_export_of_corrupted_object/comment_1_33113d748bc7c35ef669c90f6b82d36a._comment b/doc/todo/recover_from_export_of_corrupted_object/comment_1_33113d748bc7c35ef669c90f6b82d36a._comment new file mode 100644 index 0000000000..28adbcfb41 --- /dev/null +++ b/doc/todo/recover_from_export_of_corrupted_object/comment_1_33113d748bc7c35ef669c90f6b82d36a._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-17T17:22:32Z" + content=""" +Note that it would also be possible for a valid object to be sent, but then +get corrupted in the remote storage. I don't think that's what happened here. + +If that did happen, a similar recovery process is also needed. + +Which I think says that focusing on a recovery process, rather than on +prevention, is more useful. +"""]]
update
diff --git a/doc/todo/recover_from_export_of_corrupted_object.mdwn b/doc/todo/recover_from_export_of_corrupted_object.mdwn index 706f88cc4d..9311547310 100644 --- a/doc/todo/recover_from_export_of_corrupted_object.mdwn +++ b/doc/todo/recover_from_export_of_corrupted_object.mdwn @@ -2,10 +2,16 @@ This is a case where a truncated file was exported as part of a tree to S3. In particular a bucket with `versioning=yes`. +Note that `git-annex export` does not verify checksums before sending, and +so it's possible for this to happen if a corrupted object has somehow +gotten into the local repository. It might be possible to improve this to +deal better with object corruption, including object corruption that occurs +while exporting. + Currently there is no good way for a user to recover from this. Exporting a tree that deletes the corrupted file, followed by a tree that adds back the -right version of the file will generally work. It will not work for a -versioned S3 bucket though, because removing an export from a versioned S3 +right version of the file will generally work. But it will not work for a +versioned S3 bucket, because removing an export from a versioned S3 bucket does not remove the recorded S3 versionId. While re-exporting the file will record the new versionId, the old one remains recorded, and when multiple versionIds are recorded for the same key, either may be used when
add
diff --git a/doc/todo/recover_from_export_of_corrupted_object.mdwn b/doc/todo/recover_from_export_of_corrupted_object.mdwn new file mode 100644 index 0000000000..706f88cc4d --- /dev/null +++ b/doc/todo/recover_from_export_of_corrupted_object.mdwn @@ -0,0 +1,33 @@ +<https://github.com/OpenNeuroOrg/openneuro/issues/3446#issuecomment-2892398583> +This is a case where a truncated file was exported as part of a tree to S3. +In particular a bucket with `versioning=yes`. + +Currently there is no good way for a user to recover from this. Exporting a +tree that deletes the corrupted file, followed by a tree that adds back the +right version of the file will generally work. It will not work for a +versioned S3 bucket though, because removing an export from a versioned S3 +bucket does not remove the recorded S3 versionId. While re-exporting the +file will record the new versionId, the old one remains recorded, and when +multiple versionIds are recorded for the same key, either may be used when +retrieving it. + +What needs to be done is to remove the old versionId. But it does not seem +right to generally do this when removing an exported file from a S3 bucket, +because usually, when it's not corrupted, that versionId is still valid, +and can still be used to retrieve that object. + +`git-annex fsck --from=s3` will detect the problem, but it is unable to do +anything to resolve it, since it can only try to drop the corrupted key, +and dropping by key is not supported with an exporttree=yes remote. + +Could fsck be extended to handle this? It should be possible for fsck to: + +1. removeExport the corrupted file, and update the export log to say that + the export of the tree to the special remote is incomplete. +2. Handle the special case of the versioned S3 bucket with eg, a new Remote + method that is used when a key on the remote is corrupted. In the case + of a versioned S3 bucket, that new method would remove the versionId. + +--[[Joey]] + +[[!tag projects/openneuro]]
Added a comment
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_4_cd1cc39065715a35924f6bdfb11cbbc5._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_4_cd1cc39065715a35924f6bdfb11cbbc5._comment new file mode 100644 index 0000000000..d144cc1818 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_4_cd1cc39065715a35924f6bdfb11cbbc5._comment @@ -0,0 +1,35 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 4" + date="2025-12-16T20:33:56Z" + content=""" +yes -- that one is embargoed (can be seen by going to https://dandiarchive.org/dandiset/000675) + +> And when you replicated the problem from the backup, were you using it in the configuration where it cannot access those? + +if I got the question right and since I do not recall now -- judging from me using `( source .git/secrets.env; git-annex import master...` I think I was with credentials allowing to access them (hence no errors while importing) + +> Do you have annex.largefiles configured in this repository, and are all of the affected files non-annexed files? + +yes + +``` +(venv-annex) dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ grep largefiles .gitattributes +**/.git* annex.largefiles=nothing +* annex.largefiles=((mimeencoding=binary)and(largerthan=0)) +``` + +and it seems all go into git + +``` +(venv-annex) dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git annex list +here +|s3-dandiarchive (untrusted) +||web +|||bittorrent +|||| +``` +is empty + +"""]]
Added a comment
diff --git a/doc/bugs/assistant_does_not_add_some_of_the___40__renamed__41___files/comment_3_123b3b033b230140739a5e60ce2a8974._comment b/doc/bugs/assistant_does_not_add_some_of_the___40__renamed__41___files/comment_3_123b3b033b230140739a5e60ce2a8974._comment new file mode 100644 index 0000000000..80b11489fb --- /dev/null +++ b/doc/bugs/assistant_does_not_add_some_of_the___40__renamed__41___files/comment_3_123b3b033b230140739a5e60ce2a8974._comment @@ -0,0 +1,61 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 3" + date="2025-12-16T18:06:34Z" + content=""" +yes -- small files, go to git + +no, it is a small number of files created/renamed. In this case it is a set of 4 files [pre-created empty and closed](https://github.com/con/duct/blob/36944dafd4b555a59355ac56f93f550b375bb733/src/con_duct/duct_main.py#L831), and then 3 out of 4 opened for writing by duct and at the end of the process closed, and that original 1 (`_info.json`) is [reopened for writing to dump the record and closed](https://github.com/con/duct/blob/v0.8.0/src/con_duct/__main__.py#L1072-L1074). Then outside tool which ran it takes all of them and renames into the filename with end timestamp. git-annex manages to detect that original 0-sized `_info.json` one gets removed but does not pick up the new one which gets rapidly renamed into a longer name. + +In git log looks like: + +``` +commit 65e9f13a882ef78d743fbe634c8e05f9dcb32c45 +Author: ReproStim User <changeme@example.com> +Date: Tue Dec 16 09:44:30 2025 -0500 + + git-annex in reprostim@reproiner:/data/reprostim + + Videos/2025/12/2025.12.16-09.30.29.570--.mkv.duct_info.json | 0 + Videos/2025/12/2025.12.16-09.30.29.570--2025.12.16-09.44.28.225.mkv | 1 + + Videos/2025/12/2025.12.16-09.30.29.570--2025.12.16-09.44.28.225.mkv.duct_usage.json | 1 + + Videos/2025/12/2025.12.16-09.30.29.570--2025.12.16-09.44.28.225.mkv.log | 1 + + 4 files changed, 3 insertions(+) + +commit 3fe4710fc058e7d1433637c9af538b3bb9e5ebed +Author: ReproStim User <changeme@example.com> +Date: Tue Dec 16 09:30:31 2025 -0500 + + git-annex in reprostim@reproiner:/data/reprostim + + Videos/2025/12/2025.12.16-09.30.29.570--.mkv.duct_info.json | 0 + 1 file changed, 0 insertions(+), 0 deletions(-) + +commit f6bb6137c81ef36387ded229a4d8592964530bc8 +Author: ReproStim User <changeme@example.com> +Date: Tue Dec 16 09:30:23 2025 -0500 + + git-annex in reprostim@reproiner:/data/reprostim + + Videos/2025/12/2025.12.16-09.29.32.681--.mkv.duct_info.json | 0 + Videos/2025/12/2025.12.16-09.29.32.681--2025.12.16-09.30.21.889.mkv | 1 + + Videos/2025/12/2025.12.16-09.29.32.681--2025.12.16-09.30.21.889.mkv.duct_usage.json | 1 + + Videos/2025/12/2025.12.16-09.29.32.681--2025.12.16-09.30.21.889.mkv.log | 1 + + 4 files changed, 3 insertions(+) + +commit 00444920167e17b429d10fa29df8f1947930152c +Author: ReproStim User <changeme@example.com> +Date: Tue Dec 16 09:29:34 2025 -0500 + + git-annex in reprostim@reproiner:/data/reprostim + + Videos/2025/12/2025.12.16-09.29.32.681--.mkv.duct_info.json | 0 + 1 file changed, 0 insertions(+), 0 deletions(-) + +``` + + +Here is a copy of current process: https://www.oneukrainian.com/tmp/daemon-20251216.log + +"""]]
Added a comment: passing additional flags to rclone
diff --git a/doc/special_remotes/rclone/comment_8_f97d711efbb38e122150557832a8aa2e._comment b/doc/special_remotes/rclone/comment_8_f97d711efbb38e122150557832a8aa2e._comment new file mode 100644 index 0000000000..964e5ea98e --- /dev/null +++ b/doc/special_remotes/rclone/comment_8_f97d711efbb38e122150557832a8aa2e._comment @@ -0,0 +1,31 @@ +[[!comment format=mdwn + username="nadir" + avatar="http://cdn.libravatar.org/avatar/2af9174cf6c06de802104d632dc40071" + subject="passing additional flags to rclone" + date="2025-12-15T20:42:38Z" + content=""" +I'm trying to pass additional flags to rclone, like `--bwlimit` for example. Not sure how to do that, though. The `--whatelse` flag tells me they should just be passed by default: + +``` +> git annex initremote hetzner type=rclone rcloneremotename=hetzner rcloneprefix=someprefix encryption=shared chunk=500MiB --whatelse +embedcreds + embed credentials into git repository + (yes or no) +onlyencryptcreds + only encrypt embedded credentials, not annexed files + (yes or no) +mac + how to encrypt filenames used on the remote + (HMACSHA1 or HMACSHA224 or HMACSHA256 or HMACSHA384 or HMACSHA512) +keyid + gpg key id +keyid+ + add additional gpg key +keyid- + remove gpg key +* + all other parameters are passed to rclone +``` + +I tried `--bwlimit 3000` and `bwlimit=3000`, but that gives me `invalid option` plus help text or `git-annex: Unexpected parameters: bwlimit` respectively. +"""]]
diff --git a/doc/forum/special_remote_to___34__batch_archive__34_____40__to_tapes__41____63__.mdwn b/doc/forum/special_remote_to___34__batch_archive__34_____40__to_tapes__41____63__.mdwn new file mode 100644 index 0000000000..f04be05401 --- /dev/null +++ b/doc/forum/special_remote_to___34__batch_archive__34_____40__to_tapes__41____63__.mdwn @@ -0,0 +1,8 @@ +Ultimate goal is to backup [dandiarchive](https://dandiarchive.org/) (currently about 800TB but grows) to [NESE tapes](https://nese.readthedocs.io/en/latest/user-docs.html#nese-tape). NESE tapes service expects transfer via globus and + +> Files stored on NESE Tape should ideally be between 1 GiB and 1 TiB. Please consider creating tarballs of these target sizes before sending data via Globus to NESE Tape. + +All our dandisets range in sizes of their files from KBs to GBs, and already present in git/git-annex'es at e.g. https://github.com/dandisets . Previously, we abused Dropbox via an rclone shared special remote, but that one is gone now. So I wonder what could be our setup here to most seamlessly and "automatically" batch archive across a range of git/git-annex repos into the same "shared" globus space tarballs. + +Any ideas on the setup would be appreciated. +
add news item for git-annex 10.20251215
diff --git a/doc/news/version_10.20250828.mdwn b/doc/news/version_10.20250828.mdwn deleted file mode 100644 index 9dead20771..0000000000 --- a/doc/news/version_10.20250828.mdwn +++ /dev/null @@ -1,38 +0,0 @@ -git-annex 10.20250828 released with [[!toggle text="these changes"]] -[[!toggleable text=""" * p2p: Added --enable option, which can be used to enable P2P networks - provided by external commands git-annex-p2p-<netname> - * Added git-remote-p2p-annex, which allows git pull and push to - P2P networks provided by commands git-annex-p2p-<netname> - * S3: Default to signature=v4 when using an AWS endpoint, since some - AWS regions need v4 and all support it. When host= is used to specify - a different S3 host, the default remains signature=v2. - * webapp: Support setting up S3 buckets in regions that need v4 - signatures. - * S3: When initremote is given the name of a bucket that already exists, - automatically set datacenter to the right value, rather than needing it - to be explicitly set. - * info: Added --show option to pick which parts of the info to calculate - and display. - * Improve behavior when there are special remotes configured with - autoenable=yes with names that conflict with other remotes. - * adjust: When another branch has been manually merged into the adjusted - branch, re-adjusting errors out, rather than losing that merge commit. - * sync: When another branch has been manually merged into an adjusted - branch, error out rather than only displaying a warning. - * initremote: New onlyencryptcreds=yes which can be used along with - embedcreds=yes, to only encrypt the embedded creds, without encrypting - the content of the special remote. Useful for exporttree/importtree - remotes. - * Don't allow the type of encryption of an existing special remote to be - changed. Fixes reversion introduced in version 7.20191230. - * tahoe: Support tahoe-lafs command versions newer than 1.16. - * tahoe: Fix bug that made initremote require an encryption= parameter, - despite git-annex encryption not being used with this special remote. - Fixes reversion introduced in version 7.20191230. - * Improved error message when yt-dlp is not installed and is needed to - get a file from the web. - * The annex.youtube-dl-command git config is no longer used, git-annex - always runs the yt-dlp command, rather than the old youtube-dl command. - * Removed support for git versions older than 2.22. - * Bump aws build dependency to 0.24.1. - * stack.yaml: Update to lts-24.2."""]] \ No newline at end of file diff --git a/doc/news/version_10.20251215.mdwn b/doc/news/version_10.20251215.mdwn new file mode 100644 index 0000000000..e6aa18e260 --- /dev/null +++ b/doc/news/version_10.20251215.mdwn @@ -0,0 +1,16 @@ +git-annex 10.20251215 released with [[!toggle text="these changes"]] +[[!toggleable text=""" * Added annex.trashbin configuration. + * Added --presentsince, --lackingsince, and --changedsince file + matching options. + * Added TRANSFER-RETRIEVE-URL extension to the external special remote + protocol. + * S3: Remote can be configured with an x-amz-tagging header. + (Needs aws-0.25) + * S3: Support Google Cloud Storage + (Needs aws-0.25.1) + * S3: Support restore=yes, when used with storageclass=DEEP\_ARCHIVE and + similar. This is equivilant to the now deprecated Amazon Glacier. + (Needs aws-0.25.2) + * Add a build warning when the version of aws being built against is + too old to support all features. + * stack.yaml: Use aws-0.25.2."""]] \ No newline at end of file
close non-bug
diff --git a/doc/bugs/Walrus_storage_backend.mdwn b/doc/bugs/Walrus_storage_backend.mdwn index c5b1168c15..f9ca1ebe60 100644 --- a/doc/bugs/Walrus_storage_backend.mdwn +++ b/doc/bugs/Walrus_storage_backend.mdwn @@ -18,3 +18,5 @@ The whole infrastructure, allow to build a decentralized annex cloud storage, wh ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) I love git annex, works like a charm. Using it for 5+ years + +> [[notabug|done]] --[[Joey]] diff --git a/doc/bugs/Walrus_storage_backend/comment_1_9f32df0a59959cc8fe570b65093d8e41._comment b/doc/bugs/Walrus_storage_backend/comment_1_9f32df0a59959cc8fe570b65093d8e41._comment new file mode 100644 index 0000000000..719d8c73cc --- /dev/null +++ b/doc/bugs/Walrus_storage_backend/comment_1_9f32df0a59959cc8fe570b65093d8e41._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-15T17:33:31Z" + content=""" +This is not a bug. While it could be moved to [[todo]], anyone can write an +external special remote to use this or any other storage system. + +So I am closing this bug report. +"""]]
respond and close
diff --git a/doc/bugs/S3_fails_with_v4_signing.mdwn b/doc/bugs/S3_fails_with_v4_signing.mdwn index 4713d454fe..37aecd8822 100644 --- a/doc/bugs/S3_fails_with_v4_signing.mdwn +++ b/doc/bugs/S3_fails_with_v4_signing.mdwn @@ -43,3 +43,9 @@ initremote: 1 failed ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) Yep. It's a great little tool; up till now always local network + rsync. + +> Pass signature=v4 to `git-annex initremotr` to use v4 signing. +> This has been supported for years. +> +> That actually became the default for S3 in version +> 10.20250828. [[done]] --[[Joey]]
formatting
diff --git a/doc/todo/Delayed_drop_from_remote/comment_6_6c4078bb00d6c4cab4fbba0bc7f3ad29._comment b/doc/todo/Delayed_drop_from_remote/comment_6_6c4078bb00d6c4cab4fbba0bc7f3ad29._comment index 1ef22c6293..294e7178f4 100644 --- a/doc/todo/Delayed_drop_from_remote/comment_6_6c4078bb00d6c4cab4fbba0bc7f3ad29._comment +++ b/doc/todo/Delayed_drop_from_remote/comment_6_6c4078bb00d6c4cab4fbba0bc7f3ad29._comment @@ -6,7 +6,7 @@ Actually I have gone ahead an implemented some [[git-annex-matching-options]] that will be useful in finding content to drop from the trashbin: -`--presentsince --lackingsince `--changedsince` +`--presentsince --lackingsince --changedsince` You might use, for example:
--presentsince, --lackingsince, and --changedsince
There are some complications around vector clocks, which basically come
down to, when the clocks of a repository has been wrong, these won't
reflect actual real times. Which I think is too obvious to document to the
user.
The possibility of a distributed system updating the location log
unncessarily does seem like something a user might get confused by. It
would be a fairly rare situation that caused it though. For example
`git-annex fsck --from remote` will update the location logs for changes
it finds, and when the git-annex branch is out of sync with the remote,
will make unncessary updates. Once those get synced to the remote,
in that repository eg --presentsince=here:interval will only see the
most recent log entry, which is at a later point in time than when
the content was actually present in the repository. It would be good to
document this perhaps, but it's a rather complicated and unusual situation.
There are some complications around vector clocks, which basically come
down to, when the clocks of a repository has been wrong, these won't
reflect actual real times. Which I think is too obvious to document to the
user.
The possibility of a distributed system updating the location log
unncessarily does seem like something a user might get confused by. It
would be a fairly rare situation that caused it though. For example
`git-annex fsck --from remote` will update the location logs for changes
it finds, and when the git-annex branch is out of sync with the remote,
will make unncessary updates. Once those get synced to the remote,
in that repository eg --presentsince=here:interval will only see the
most recent log entry, which is at a later point in time than when
the content was actually present in the repository. It would be good to
document this perhaps, but it's a rather complicated and unusual situation.
diff --git a/CHANGELOG b/CHANGELOG
index b78fc604bb..4fb5f7d5cc 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -11,6 +11,8 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
(Needs aws-0.25.2)
* stack.yaml: Use aws-0.25.2.
* Added annex.trashbin configuration.
+ * Added --presentsince, --lackingsince, and --changedsince file
+ matching options.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/CmdLine/GitAnnex/Options.hs b/CmdLine/GitAnnex/Options.hs
index 4b44edda56..529935f237 100644
--- a/CmdLine/GitAnnex/Options.hs
+++ b/CmdLine/GitAnnex/Options.hs
@@ -381,6 +381,21 @@ keyMatchingOptions' =
<> help "match files accessed within a time interval"
<> hidden
)
+ , annexOption (setAnnexState . Limit.addPresentSince) $ strOption
+ ( long "presentsince" <> metavar paramValue
+ <> help "matches files present in a repository throughout a time interval"
+ <> hidden
+ )
+ , annexOption (setAnnexState . Limit.addLackingSince) $ strOption
+ ( long "lackingsince" <> metavar paramValue
+ <> help "matches files not present in a repository throughout a time interval"
+ <> hidden
+ )
+ , annexOption (setAnnexState . Limit.addChangedSince) $ strOption
+ ( long "changedsince" <> metavar paramValue
+ <> help "matches files whose presence changed during a time interval"
+ <> hidden
+ )
, annexOption (setAnnexState . Limit.addMimeType) $ strOption
( long "mimetype" <> metavar paramGlob
<> help "match files by mime type"
diff --git a/Limit.hs b/Limit.hs
index 1916a606d5..28ab1be653 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -910,6 +910,43 @@ addAccessedWithin duration = do
return $ delta <= secs
secs = fromIntegral (durationSeconds duration)
+addPresentSince :: String -> Annex ()
+addPresentSince = limitLocationDuration "presentsince"
+ (\k t -> loggedLocationsUnchangedSince k t (== InfoPresent))
+
+addLackingSince :: String -> Annex ()
+addLackingSince = limitLocationDuration "lackingsince"
+ (\k t -> loggedLocationsUnchangedSince k t (/= InfoPresent))
+
+addChangedSince :: String -> Annex ()
+addChangedSince = limitLocationDuration "changedsince"
+ (\k t -> loggedLocationsChangedAfter k t (const True))
+
+limitLocationDuration :: String -> (Key -> POSIXTime -> Annex [UUID]) -> String-> Annex ()
+limitLocationDuration desc getter s = do
+ u <- Remote.nameToUUID name
+ case parseDuration interval of
+ Left parseerr -> addLimit $ Left parseerr
+ Right duration ->
+ let check _notpresent key = do
+ now <- liftIO getPOSIXTime
+ let t = now - fromIntegral (durationSeconds duration)
+ us <- getter key t
+ return $ u `elem` us
+ in addLimit $ Right $ mkmatcher check
+ where
+ (name, interval) = separate (== ':') s
+ mkmatcher check = MatchFiles
+ { matchAction = const $ checkKey . check
+ , matchNeedsFileName = False
+ , matchNeedsFileContent = False
+ , matchNeedsKey = True
+ , matchNeedsLocationLog = True
+ , matchNeedsLiveRepoSize = False
+ , matchNegationUnstable = False
+ , matchDesc = desc =? s
+ }
+
lookupFileKey :: FileInfo -> Annex (Maybe Key)
lookupFileKey fi = case matchKey fi of
Just k -> return (Just k)
diff --git a/Logs/Location.hs b/Logs/Location.hs
index 2adcddd2e3..0f06cd738f 100644
--- a/Logs/Location.hs
+++ b/Logs/Location.hs
@@ -8,7 +8,7 @@
- Repositories record their UUID and the date when they --get or --drop
- a value.
-
- - Copyright 2010-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -23,6 +23,8 @@ module Logs.Location (
loggedLocations,
loggedPreviousLocations,
loggedLocationsHistorical,
+ loggedLocationsUnchangedSince,
+ loggedLocationsChangedAfter,
loggedLocationsRef,
isKnownKey,
checkDead,
@@ -53,6 +55,7 @@ import Git.Types (RefDate, Ref, Sha)
import qualified Annex
import Data.Time.Clock
+import Data.Time.Clock.POSIX
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import qualified Data.Set as S
@@ -74,6 +77,9 @@ logStatusAfter lu key a = ifM a
)
{- Log a change in the presence of a key's value in a repository.
+ -
+ - If the provided LogStatus is the same as what is currently in the log,
+ - the log is not updated.
-
- Cluster UUIDs are not logged. Instead, when a node of a cluster is
- logged to contain a key, loading the log will include the cluster's
@@ -98,7 +104,7 @@ loggedLocations :: Key -> Annex [UUID]
loggedLocations = getLoggedLocations presentLogInfo
{- Returns a list of repository UUIDs that the location log indicates
- - used to have the vale of a key, but no longer do.
+ - used to have the value of a key, but no longer do.
-}
loggedPreviousLocations :: Key -> Annex [UUID]
loggedPreviousLocations = getLoggedLocations notPresentLogInfo
@@ -107,6 +113,44 @@ loggedPreviousLocations = getLoggedLocations notPresentLogInfo
loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]
loggedLocationsHistorical = getLoggedLocations . historicalLogInfo
+{- Returns a list of repository UUIDs that the location log indicates
+ - have had a matching LogStatus for a key that has not changed
+ - since the given time.
+ -
+ - This assumes that logs were written with a properly set clock.
+ -
+ - Note that, while logChange avoids updating the log with the same
+ - LogStatus that is already in it, there are distributed situations
+ - where the log for a repository does get updated redundantly,
+ - setting the same LogStatus that was already logged. When that has
+ - happened, this will treat it as the LogStatus having changed at the
+ - last time it was written.
+ -}
+loggedLocationsUnchangedSince :: Key -> POSIXTime -> (LogStatus -> Bool) -> Annex [UUID]
+loggedLocationsUnchangedSince key time matchstatus =
+ loggedLocationsMatchingTime key (<= time) matchstatus
+
+{- Similar to loggedLocationsSince, but lists repository UUIDs that
+ - have had a matching LogStatus recorded after the given time.
+ -}
+loggedLocationsChangedAfter :: Key -> POSIXTime -> (LogStatus -> Bool) -> Annex [UUID]
+loggedLocationsChangedAfter key time matchstatus =
+ loggedLocationsMatchingTime key (> time) matchstatus
+
+loggedLocationsMatchingTime :: Key -> (POSIXTime -> Bool) -> (LogStatus -> Bool) -> Annex [UUID]
+loggedLocationsMatchingTime key matchtime matchstatus = do
+ config <- Annex.getGitConfig
+ locs <- map (toUUID . fromLogInfo . info)
+ . filter (matchtime' . date)
+ . filter (matchstatus . status)
+ . compactLog
+ <$> readLog (locationLogFile config key)
+ clusters <- getClusters
+ return $ addClusterUUIDs clusters locs
+ where
+ matchtime' (VectorClock t) = matchtime t
+ matchtime' Unknown = False
+
{- Gets the locations contained in a git ref. -}
loggedLocationsRef :: Ref -> Annex [UUID]
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
diff --git a/doc/git-annex-matching-options.mdwn b/doc/git-annex-matching-options.mdwn
index cf964cc71d..5bfeb73f05 100644
--- a/doc/git-annex-matching-options.mdwn
+++ b/doc/git-annex-matching-options.mdwn
@@ -64,7 +64,7 @@ in either of two repositories.
The repository should be specified using the name of a configured remote,
or the UUID or description of a repository. For the current repository,
- use `--in=here`
+ use "here".
Note that this does not check remote repositories to verify that content
is still present on them. However, when checking the current repository,
@@ -224,8 +224,8 @@ in either of two repositories.
* `--accessedwithin=interval`
- Matches when the content was accessed recently, within the specified time
- interval.
(Diff truncated)
Added a comment
diff --git a/doc/install/FreeBSD/comment_5_6372a545cb3c4a5b20f713ef80a0b6d3._comment b/doc/install/FreeBSD/comment_5_6372a545cb3c4a5b20f713ef80a0b6d3._comment new file mode 100644 index 0000000000..709208745d --- /dev/null +++ b/doc/install/FreeBSD/comment_5_6372a545cb3c4a5b20f713ef80a0b6d3._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 5" + date="2025-12-12T10:46:12Z" + content=""" +FWIW, dynamically linked binary is no good either: + +``` +[yoh@dbic-mrinbox ~]$ wget https://downloads.kitenet.net/git-annex/linux/current/git-annex-standalone-amd64.tar.gz +[yoh@dbic-mrinbox ~]$ tar -xzvf git-annex-standalone-amd64.tar.gz +[yoh@dbic-mrinbox ~]$ cd git-annex.linux/ +[yoh@dbic-mrinbox ~/git-annex.linux]$ ls +LICENSE exe git-annex git-core git-remote-tor-annex lib logo_16x16.png templates +README extra git-annex-shell git-receive-pack git-shell lib64 magic trustedkeys.gpg +bin gconvdir git-annex-webapp git-remote-annex git-upload-pack libdirs runshell usr +buildid git git-annex.MANIFEST git-remote-p2p-annex i18n logo.svg shimmed +[yoh@dbic-mrinbox ~/git-annex.linux]$ ./git-annex +ELF binary type \"3\" not known. +exec: /usr/home/yoh/git-annex.linux/exe/git-annex: Exec format error + +``` + +I will try to assemble build commands later... +"""]]
note
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5027412b50..c325adc9c1 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1327,6 +1327,9 @@ repository, using [[git-annex-config]]. See its man page for a list.) will not see a progress display for their drop action. So this is best used with a fast remote. + And, if the remote is not accessible, or a file fails to be moved to it, + the file will not be dropped from the repository. + * `annex.url` When a remote has a http url, the first time git-annex uses the remote
annex.trashbin
Note that, in the unlikely event that the reasoning in commit
5a081fc246664e7b5c17023dddfb8d123eef64e5 is wrong and there is some
situation where Annex.remotelist is not filled at a time when this is used,
the user will get back the "annex.trashbin is set to the name of an unknown
remote" error for a remote that does exist.
Note that, in the unlikely event that the reasoning in commit
5a081fc246664e7b5c17023dddfb8d123eef64e5 is wrong and there is some
situation where Annex.remotelist is not filled at a time when this is used,
the user will get back the "annex.trashbin is set to the name of an unknown
remote" error for a remote that does exist.
diff --git a/Annex/Content.hs b/Annex/Content.hs
index c113620cc9..876f526785 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -96,7 +96,7 @@ import Annex.ReplaceFile
import Annex.AdjustedBranch (adjustedBranchRefresh)
import Annex.DirHashes
import Messages.Progress
-import Types.Remote (RetrievalSecurityPolicy(..), VerifyConfigA(..))
+import Types.Remote (RetrievalSecurityPolicy(..), VerifyConfigA(..), name, storeKey, uuid)
import Types.NumCopies
import Types.Key
import Types.Transfer
@@ -779,7 +779,8 @@ unlinkAnnex key = do
{- Removes a key's file from .git/annex/objects/ -}
removeAnnex :: Annex [Remote] -> ContentRemovalLock -> Annex ()
-removeAnnex remotelist (ContentRemovalLock key) = withObjectLoc key $ \file ->
+removeAnnex remotelist (ContentRemovalLock key) = withObjectLoc key $ \file -> do
+ putouttrash
cleanObjectLoc key $ do
secureErase file
liftIO $ removeWhenExistsWith removeFile file
@@ -800,6 +801,20 @@ removeAnnex remotelist (ContentRemovalLock key) = withObjectLoc key $ \file ->
-- removal process, so thaw it.
, void $ tryIO $ thawContent file
)
+
+ putouttrash = annexTrashbin <$> Annex.getGitConfig >>= \case
+ Nothing -> return ()
+ Just trashbin -> do
+ rs <- remotelist
+ putouttrash' trashbin rs
+
+ putouttrash' _ [] = giveup "annex.trashbin is set to the name of an unknown remote"
+ putouttrash' trashbin (r:rs)
+ | name r == trashbin = do
+ catchNonAsync (storeKey r key (AssociatedFile Nothing) Nothing nullMeterUpdate)
+ (\ex -> giveup $ "Failed to move to annex.trashbin remote; unable to drop " ++ show ex)
+ logChange NoLiveUpdate key (uuid r) InfoPresent
+ | otherwise = putouttrash' trashbin rs
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
- returns the file it was moved to. -}
diff --git a/CHANGELOG b/CHANGELOG
index 3de0bf454d..b78fc604bb 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,7 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
similar. This is equivilant to the now deprecated Amazon Glacier.
(Needs aws-0.25.2)
* stack.yaml: Use aws-0.25.2.
+ * Added annex.trashbin configuration.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index 81d8201ed3..4303c09961 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -158,6 +158,7 @@ data GitConfig = GitConfig
, annexAdjustedBranchRefresh :: Integer
, annexSupportUnlocked :: Bool
, annexAssistantAllowUnlocked :: Bool
+ , annexTrashbin :: Maybe RemoteName
, coreSymlinks :: Bool
, coreSharedRepository :: SharedRepository
, coreQuotePath :: QuotePath
@@ -283,6 +284,7 @@ extractGitConfig configsource r = GitConfig
(getmayberead (annexConfig "adjustedbranchrefresh"))
, annexSupportUnlocked = getbool (annexConfig "supportunlocked") True
, annexAssistantAllowUnlocked = getbool (annexConfig "assistant.allowunlocked") False
+ , annexTrashbin = getmaybe "annex.trashbin"
, coreSymlinks = getbool "core.symlinks" True
, coreSharedRepository = getSharedRepository r
, coreQuotePath = QuotePath (getbool "core.quotepath" True)
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 747bb1eb7f..5027412b50 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -1318,6 +1318,15 @@ repository, using [[git-annex-config]]. See its man page for a list.)
After changing this config, you need to re-run `git-annex init` for it
to take effect.
+* `annex.trashbin`
+
+ When this is set to the name of a remote, files that are dropped from the
+ repository will be moved to that remote.
+
+ Note that, if it takes a long time to move a file to the remote, the user
+ will not see a progress display for their drop action. So this is best
+ used with a fast remote.
+
* `annex.url`
When a remote has a http url, the first time git-annex uses the remote
diff --git a/doc/todo/Delayed_drop_from_remote.mdwn b/doc/todo/Delayed_drop_from_remote.mdwn
index dd2d26bd4e..9c9e6b0ff3 100644
--- a/doc/todo/Delayed_drop_from_remote.mdwn
+++ b/doc/todo/Delayed_drop_from_remote.mdwn
@@ -9,3 +9,5 @@ The point is to have a fast path to recovery from over-eager dropping that might
Or maybe something like this exists already...
[[!tag projects/ICE4]]
+
+> [[done]] --[[Joey]]
diff --git a/doc/todo/Delayed_drop_from_remote/comment_5_94a46f515a4e6df7d8d7855e0bfb7de5._comment b/doc/todo/Delayed_drop_from_remote/comment_5_94a46f515a4e6df7d8d7855e0bfb7de5._comment
new file mode 100644
index 0000000000..cd55a59f8b
--- /dev/null
+++ b/doc/todo/Delayed_drop_from_remote/comment_5_94a46f515a4e6df7d8d7855e0bfb7de5._comment
@@ -0,0 +1,20 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 5"""
+ date="2025-12-11T19:25:23Z"
+ content="""
+annex.trashbin is implemented.
+
+I am going to close this todo; if it turns out there is some preferred
+content improvement that would help with cleaning out the trash, let's talk
+about that on a new todo. But I'm guessing you'll make do with `find`.
+
+> I think I would deliberately want this to be invisible to the user, since I wouldn't want anyone to actively start relying on it.
+
+With a private remote it's reasonably invisible. The very observant user
+might notice a drop time that scales with the size of the file being
+dropped and be able to guess this feature is being used. And, if there is
+some error when it tries to move the object to the remote, the drop will
+fail. The error message in that case cannot really obscure the fact that
+annex.trashbin is configured.
+"""]]
comments
diff --git a/doc/install/FreeBSD/comment_4_65c9fdcc54924ab064c78f9436924191._comment b/doc/install/FreeBSD/comment_4_65c9fdcc54924ab064c78f9436924191._comment new file mode 100644 index 0000000000..5f2a1c94ea --- /dev/null +++ b/doc/install/FreeBSD/comment_4_65c9fdcc54924ab064c78f9436924191._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-12-11T14:56:30Z" + content=""" +I don't know much about the static-annex builds, but you may have better +luck with the [[Linux_standalone]] builds due to their using a more +conventional libc. + +Building git-annex from source is not hard if you can get the stack tool +installed. It looks like the only currently supported way to do that as a +freebsd user is to install <https://www.haskell.org/ghcup/> which includes +stack. Then follow the [[fromsource]] section on "building from source with +stack". +"""]] diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_4_7fe8f0b860a765f3bfb9da7f5d61f8c8._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_4_7fe8f0b860a765f3bfb9da7f5d61f8c8._comment new file mode 100644 index 0000000000..6ee3bfde1a --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_4_7fe8f0b860a765f3bfb9da7f5d61f8c8._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-12-11T14:42:10Z" + content=""" +> IIRC user can just push `git-annex` branch directly after `git-annex` merging remote version locally, right? + +Sure, but my point was that they would have to change their workflow due to +a change on the server that might not be visible to them. Violating least +surprise. +"""]]
remove accidentially added file
diff --git a/doc/.git-annex.mdwn.swp b/doc/.git-annex.mdwn.swp deleted file mode 100644 index 704713d9ce..0000000000 Binary files a/doc/.git-annex.mdwn.swp and /dev/null differ
Added a comment
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_3_a9c504e7cd8080158fd68b4bcaa90e26._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_3_a9c504e7cd8080158fd68b4bcaa90e26._comment new file mode 100644 index 0000000000..3cfbce29d5 --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_3_a9c504e7cd8080158fd68b4bcaa90e26._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 3" + date="2025-12-11T13:33:27Z" + content=""" +> In that example, the git-annex branch is not pushed to origin after annexed files are sent to it. So how does git-annex on otherhost know that origin has those files? Well, git-annex-shell, when receiving the files, updates the git-annex branch in origin. + + +IIRC user can just push `git-annex` branch directly after `git-annex` merging remote version locally, right? + +> Making it read-only would somewhat limit the exposure to all these problems, but if it's read-only, how would any annex objects get into the remote repository in the first place? + +my use-case at hands: I manipulate git-annex repo on a linux box on an NFS mount and the original one is freebsd box with bare minimal installation. I have about 50 datasets in a hierarchy. I wanted to backup to another location and it would be more performant to talk to the original freebsd server directly instead of going through NFS mount. I [can't install git-annex on that freebsd box ATM](https://git-annex.branchable.com/install/FreeBSD/#comment-38d4cc2a1e1deb696447cc0a9e149e77). + +FWIW, on a second thought, given that I do have a workaround with `rsync` (verified that it works) and unless another more prominent usecase arrives, might be indeed not worth the hassle. + +"""]]
Added a comment
diff --git a/doc/install/FreeBSD/comment_3_369afac17cc75bec4584f3525f0c2826._comment b/doc/install/FreeBSD/comment_3_369afac17cc75bec4584f3525f0c2826._comment new file mode 100644 index 0000000000..a1d5697c9b --- /dev/null +++ b/doc/install/FreeBSD/comment_3_369afac17cc75bec4584f3525f0c2826._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 3" + date="2025-12-11T12:20:27Z" + content=""" +don't know much about freebsd but static builds from https://git.kyleam.com/static-annex do not work: + +```shell +[yoh@dbic-mrinbox ~/git-annex-10.20250828]$ bin/git-annex +ELF binary type \"0\" not known. +bash: bin/git-annex: cannot execute binary file: Exec format error +[yoh@dbic-mrinbox ~/git-annex-10.20250828]$ file bin/git-annex +bin/git-annex: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, BuildID[sha1]=a6f7f36778ade374ef6572c787cacf6ffa2ec78d, with debug_info, not stripped + +``` +"""]]
comment
diff --git a/doc/.git-annex.mdwn.swp b/doc/.git-annex.mdwn.swp new file mode 100644 index 0000000000..704713d9ce Binary files /dev/null and b/doc/.git-annex.mdwn.swp differ diff --git a/doc/install/FreeBSD/comment_2_36a9e11d3140b892c4ff334387567eab._comment b/doc/install/FreeBSD/comment_2_36a9e11d3140b892c4ff334387567eab._comment new file mode 100644 index 0000000000..cfbe1dde1e --- /dev/null +++ b/doc/install/FreeBSD/comment_2_36a9e11d3140b892c4ff334387567eab._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-10T18:15:59Z" + content=""" +Doesn't FreeBSD support emulating linux syscalls? I suspect that the linux +standalone tarball could be used to install git-annex on user-space on +FreeBSD and work that way. Have not tried it maybe there is a better way, +to install a FreeBSD port as a regular user. +"""]]
comment
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_2_a8feba19f86aeb6d3b76266051b8bebb._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_2_a8feba19f86aeb6d3b76266051b8bebb._comment new file mode 100644 index 0000000000..8191356794 --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_2_a8feba19f86aeb6d3b76266051b8bebb._comment @@ -0,0 +1,47 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-10T17:54:01Z" + content=""" +As for the idea that git-annex could access a remote without +git-annex-shell, I think that any efforts in this area are bound to end up +with some partial implementation of a quarter of git-annex-shell in shell +script, which is bound to not work as well as the real thing. + +Consider that this is a supported workflow: + + git push origin master + git-annex copy --to origin + + ssh otherhost + cd repo + git pull origin + git-annex get + +In that example, the git-annex branch is not pushed to origin after annexed +files are sent to it. So how does git-annex on otherhost know that origin +has those files? Well, git-annex-shell, when receiving the files, updates +the git-annex branch in origin. + +So, to support this workflow, the git-annex-shell reimplementation in shell +would need to update the git-annex branch. That's about 3000 lines of code +in git-annex, with complecations including concurrency, making it fast, +etc. + +Other complications include supporting different repository versions, +populating unlocked files, supporting configs like +annex.secure-erase-command, etc. And while any of these could be left out +an be documented as limitations of not having git-annex installed, I think +the real kicker is that this is behavior what would occur even if git-annex +is only *temporarily* not installed. So there's the risk that any user who +is having a bad PATH day suddenly gets a weird behavior. + +Making it read-only would somewhat limit the exposure to all these +problems, but if it's read-only, how would any annex objects get into the +remote repository in the first place? + +Using a separate special remote seems much cleaner. Then it's only used if +you choose to use it. And it works like any other special remote. +The rsync special remote is close enough to work, but a more special-purpose +one could support things a bit better. +"""]]
comment
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_1_5f9c75b6aa0a50634ff4004b89c3fe12._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_1_5f9c75b6aa0a50634ff4004b89c3fe12._comment new file mode 100644 index 0000000000..8536bfecd0 --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_1_5f9c75b6aa0a50634ff4004b89c3fe12._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-10T17:39:01Z" + content=""" +It's actually possible to use a rsync special remote to fetch objects right +out of `.git/annex/objects/`. For example: + + git-annex initremote foo-rsync type=rsync encryption=none rsyncurl=example.com:/path/to/repo/.git/annex/objects/ --sameas=foo + +Since the default hash directory paths are different for rsync than for a +git-annex repository, getting an object will first try the wrong hash path, +which does lead to rsync complaining to stderr. But then it will fall back +to a hash path that works. + +Sending an object to the rsync special remote will store it in a hash path +different from the one that git-annex usually uses. So later switching to using +git-annex in that repository will result in some unusual behavior, since +it won't see some files that were put there. `git-annex fsck` will actually +recover from this too, eg: + + fsck newfile (normalizing object location) (checksum...) ok + +There are enough problems that I can't really recommend this, +it just seemed worth pointing out that it can be done. +"""]]
fix example output
diff --git a/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn b/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn
index cac81d06b4..1a080c0a4b 100644
--- a/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn
+++ b/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn
@@ -38,7 +38,7 @@ Now the remote can be used like any other remote.
But, when you try to get a file out of S3, it'll start a restore:
# git annex get my_cool_big_file
- get my_cool_big_file (from s3...) (gpg)
+ get my_cool_big_file (from mys3...) (gpg)
Restore initiated, try again later.
failed
S3: support restore=yes
When used with GLACIER, this is similar to Amazon Glacier, which is
now depreacted by Amazon. It can also be used with other storage classes
like DEEP_ARCHIVE and lifecycle rules. Which is why it's a separate config.
Also added some associated git configs.
This needs aws-0.25.2.
Sponsored-by: Brock Spratlen on Patreon
When used with GLACIER, this is similar to Amazon Glacier, which is
now depreacted by Amazon. It can also be used with other storage classes
like DEEP_ARCHIVE and lifecycle rules. Which is why it's a separate config.
Also added some associated git configs.
This needs aws-0.25.2.
Sponsored-by: Brock Spratlen on Patreon
diff --git a/CHANGELOG b/CHANGELOG
index 97d64583f1..3de0bf454d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,9 +4,12 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
(Needs aws-0.25)
* Add a build warning when the version of aws being built against is
too old. 0.25.1 is needed to support Google Cloud Storage.
- * stack.yaml: Use aws-0.25.1.
* Added TRANSFER-RETRIEVE-URL extension to the external special remote
protocol.
+ * S3: Support restore=yes, when used with storageclass=DEEP_ARCHIVE and
+ similar. This is equivilant to the now deprecated Amazon Glacier.
+ (Needs aws-0.25.2)
+ * stack.yaml: Use aws-0.25.2.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/Remote/S3.hs b/Remote/S3.hs
index e8401d80ef..002cdc1958 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -12,8 +12,8 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE CPP #-}
-#if ! MIN_VERSION_aws(0,25,1)
-#warning Building with an old version of the aws library. Recommend updating to 0.25.1, which fixes bugs and is needed for some features.
+#if ! MIN_VERSION_aws(0,25,2)
+#warning Building with an old version of the aws library. Recommend updating to 0.25.2, which fixes bugs and is needed for some features.
#endif
module Remote.S3 (remote, iaHost, configIA, iaItemUrl) where
@@ -96,6 +96,8 @@ remote = specialRemoteType $ RemoteType
(FieldDesc "part size for multipart upload (eg 1GiB)")
, optionalStringParser storageclassField
(FieldDesc "storage class, eg STANDARD or STANDARD_IA or ONEZONE_IA")
+ , yesNoParser restoreField (Just False)
+ (FieldDesc "enable restore of files not currently accessible in the bucket")
, optionalStringParser fileprefixField
(FieldDesc "prefix to add to filenames in the bucket")
, yesNoParser versioningField (Just False)
@@ -151,7 +153,10 @@ storageclassField = Accepted "storageclass"
fileprefixField :: RemoteConfigField
fileprefixField = Accepted "fileprefix"
-
+
+restoreField :: RemoteConfigField
+restoreField = Accepted "restore"
+
publicField :: RemoteConfigField
publicField = Accepted "public"
@@ -208,7 +213,7 @@ gen r u rc gc rs = do
where
new c cst info hdl magic = Just $ specialRemote c
(store hdl this info magic)
- (retrieve hdl rs c info)
+ (retrieve gc hdl rs c info)
(remove hdl this info)
(checkKey hdl rs c info)
this
@@ -432,14 +437,14 @@ storeHelper info h magic f object p = liftIO $ case partSize info of
{- Implemented as a fileRetriever, that uses conduit to stream the chunks
- out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -}
-retrieve :: S3HandleVar -> RemoteStateHandle -> ParsedRemoteConfig -> S3Info -> Retriever
-retrieve hv rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
+retrieve :: RemoteGitConfig -> S3HandleVar -> RemoteStateHandle -> ParsedRemoteConfig -> S3Info -> Retriever
+retrieve gc hv rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
Right h ->
eitherS3VersionID info rs c k (T.pack $ bucketObject info k) >>= \case
Left failreason -> do
warning (UnquotedString failreason)
giveup "cannot download content"
- Right loc -> retrieveHelper info h loc f p iv
+ Right loc -> retrieveHelper gc info h loc f p iv
Left S3HandleNeedCreds ->
getPublicWebUrls' rs info c k >>= \case
Left failreason -> do
@@ -448,17 +453,44 @@ retrieve hv rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
Right us -> unlessM (withUrlOptions Nothing $ downloadUrl False k p iv us f) $
giveup "failed to download content"
-retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()
-retrieveHelper info h loc f p iv = retrieveHelper' h f p iv $
+retrieveHelper :: RemoteGitConfig -> S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()
+retrieveHelper gc info h loc f p iv = retrieveHelper' gc info h f p iv $
case loc of
Left o -> S3.getObject (bucket info) o
Right (S3VersionID o vid) -> (S3.getObject (bucket info) o)
{ S3.goVersionId = Just vid }
-retrieveHelper' :: S3Handle -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> S3.GetObject -> Annex ()
-retrieveHelper' h f p iv req = liftIO $ runResourceT $ do
- S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle h req
+retrieveHelper' :: RemoteGitConfig -> S3Info -> S3Handle -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> S3.GetObject -> Annex ()
+retrieveHelper' gc info h f p iv req = liftIO $ runResourceT $ do
+ S3.GetObjectResponse { S3.gorResponse = rsp } <- handlerestore $
+ sendS3Handle h req
Url.sinkResponseFile p iv zeroBytesProcessed f WriteMode rsp
+ where
+ needrestore st = restore info && statusCode st == 403
+ handlerestore a = catchJust (Url.matchStatusCodeException needrestore) a $ \_ -> do
+#if MIN_VERSION_aws(0,25,2)
+ let tier = case remoteAnnexS3RestoreTier gc of
+ Just "bulk" -> S3.RestoreObjectTierBulk
+ Just "expedited" -> S3.RestoreObjectTierExpedited
+ _ -> S3.RestoreObjectTierStandard
+ let days = case remoteAnnexS3RestoreDays gc of
+ Just n -> S3.RestoreObjectLifetimeDays n
+ Nothing -> S3.RestoreObjectLifetimeDays 1
+ let restorereq = S3.restoreObject
+ (S3.goBucket req)
+ (S3.goObjectName req)
+ tier
+ days
+ restoreresp <- sendS3Handle h $ restorereq
+ { S3.roVersionId = S3.goVersionId req
+ }
+ case restoreresp of
+ S3.RestoreObjectAccepted -> giveup "Restore initiated, try again later."
+ S3.RestoreObjectAlreadyInProgress -> giveup "Restore in progress, try again later."
+ S3.RestoreObjectAlreadyRestored -> a
+#else
+ giveup "git-annex is built with too old a version of the aws library to support restore=yes"
+#endif
remove :: S3HandleVar -> Remote -> S3Info -> Remover
remove hv r info _proof k = withS3HandleOrFail (uuid r) hv $ \h -> do
@@ -529,7 +561,7 @@ storeExportS3' hv r rs info magic f k loc p = withS3Handle hv $ \case
retrieveExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
retrieveExportS3 hv r info k loc f p = verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
withS3Handle hv $ \case
- Right h -> retrieveHelper info h (Left (T.pack exportloc)) f p iv
+ Right h -> retrieveHelper (gitconfig r) info h (Left (T.pack exportloc)) f p iv
Left S3HandleNeedCreds -> case getPublicUrlMaker info of
Just geturl -> either giveup return =<<
withUrlOptions Nothing
@@ -728,7 +760,7 @@ retrieveExportWithContentIdentifierS3 hv r rs info loc (cid:_) dest gk p =
where
go iv = withS3Handle hv $ \case
Right h -> do
- rewritePreconditionException $ retrieveHelper' h dest p iv $
+ rewritePreconditionException $ retrieveHelper' (gitconfig r) info h dest p iv $
limitGetToContentIdentifier cid $
S3.getObject (bucket info) o
k <- either return id gk
@@ -1036,6 +1068,7 @@ data S3Info = S3Info
, partSize :: Maybe Integer
, isIA :: Bool
, versioning :: Bool
+ , restore :: Bool
, publicACL :: Bool
, publicurl :: Maybe URLString
, host :: Maybe String
@@ -1060,6 +1093,8 @@ extractS3Info c = do
, isIA = configIA c
, versioning = fromMaybe False $
getRemoteConfigValue versioningField c
+ , restore = fromMaybe False $
+ getRemoteConfigValue restoreField c
, publicACL = fromMaybe False $
getRemoteConfigValue publicField c
, publicurl = getRemoteConfigValue publicurlField c
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index 156b88c32c..81d8201ed3 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -439,6 +439,8 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexTahoe :: Maybe FilePath
, remoteAnnexBupSplitOptions :: [String]
, remoteAnnexDirectory :: Maybe FilePath
+ , remoteAnnexS3RestoreTier :: Maybe String
+ , remoteAnnexS3RestoreDays :: Maybe Integer
, remoteAnnexAndroidDirectory :: Maybe FilePath
, remoteAnnexAndroidSerial :: Maybe String
, remoteAnnexGCrypt :: Maybe String
@@ -541,6 +543,8 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexTahoe = getmaybe TahoeField
, remoteAnnexBupSplitOptions = getoptions BupSplitOptionsField
, remoteAnnexDirectory = notempty $ getmaybe DirectoryField
+ , remoteAnnexS3RestoreTier = notempty $ getmaybe S3RestoreTierField
+ , remoteAnnexS3RestoreDays = getmayberead S3RestoreDaysField
, remoteAnnexAndroidDirectory = notempty $ getmaybe AndroidDirectoryField
, remoteAnnexAndroidSerial = notempty $ getmaybe AndroidSerialField
, remoteAnnexGCrypt = notempty $ getmaybe GCryptField
@@ -625,6 +629,8 @@ data RemoteGitConfigField
| TahoeField
| BupSplitOptionsField
| DirectoryField
+ | S3RestoreTierField
+ | S3RestoreDaysField
| AndroidDirectoryField
| AndroidSerialField
| GCryptField
@@ -697,6 +703,8 @@ remoteGitConfigField = \case
TahoeField -> uninherited True "tahoe"
BupSplitOptionsField -> uninherited True "bup-split-options"
(Diff truncated)
initial
request on making git-annex work without git-annex-shell to get files from remote ssh
request on making git-annex work without git-annex-shell to get files from remote sshdiff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_.mdwn b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_.mdwn new file mode 100644 index 0000000000..23f4fac3fd --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_.mdwn @@ -0,0 +1,26 @@ +I thought I had an issue on this but failed to find :-/ + +ATM git-annex does not even bother to suggest or do anything about a remote git/git-annex repository if there is no git-annex (`git-annex-shell`) available there: + +``` +yoh@typhon:/mnt/DATA/data/dbic/QA$ git annex list + + Unable to parse git config from origin + + Remote origin does not have git-annex installed; setting annex-ignore + + This could be a problem with the git-annex installation on the remote. Please make sure that git-annex-shell is available in PATH when you ssh into the remote. Once you have fixed the git-annex installation, run: git annex enableremote origin +here +|datasets.datalad.org +||origin +|||web +||||bittorrent +||||| +_X___ .datalad/metadata/objects/06/cn-2c3eade47bd2d9052658c6a9d10a57.xz + +... +``` + +a workaround, it seems as [it was posted over a decade ago](https://superuser.com/questions/526705/hosting-a-git-annex-on-a-server-without-git-annex-installed) (and now even google ai suggests that) is to setup an additional `rsync` remote and use it to fetch. upon a quick try didn't work for me but could have been an operator error... + +As files are available over regular ssh/scp and even rsync over ssh - I really do not see a technical problem for git-annex to establish interoperability with such a remote, at least for reading from, without having remote git-annex-shell. That should make it possible to access git-annex'es on servers which might be running some odd setups where installation of git-annex in user-space would be tricky if not impossible.
Added a comment: Q: any way to "install" without having root/admin privileges
diff --git a/doc/install/FreeBSD/comment_1_48c712af243119f9a525c55705edc536._comment b/doc/install/FreeBSD/comment_1_48c712af243119f9a525c55705edc536._comment new file mode 100644 index 0000000000..44e4abbc83 --- /dev/null +++ b/doc/install/FreeBSD/comment_1_48c712af243119f9a525c55705edc536._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="Q: any way to "install" without having root/admin privileges" + date="2025-12-07T19:38:46Z" + content=""" +need to install on a box where I am not an admin, just to copy the files from it (odd that git-annex can't just get anything it needs since SSH is there and working fine!). +"""]]
Revert "update"
This reverts commit 550c6b482845ec978aa796191c9931fe19dbc369.
This reverts commit 550c6b482845ec978aa796191c9931fe19dbc369.
diff --git a/doc/thanks/list b/doc/thanks/list index 563a0b6b21..dfeda7a813 100644 --- a/doc/thanks/list +++ b/doc/thanks/list @@ -126,99 +126,3 @@ Lilia.Nanne, Dusty Mabe, mpol, Andrew Poelstra, -AlexS, -Amitai Schleier, -Andrew, -anon, -Anthony DeRobertis, -Anton Grensjö, -Art S, -Arthur Lutz, -Ben, -Boyd Stephen Smith, -Bruno BEAUFILS, -Caleb Allen, -Calvin Beck, -Chris Lamb, -Christian Diller, -Christopher Baines, -Christopher Goes, -Dave Pifke, -don haraway, -DuncanConstruction, -encryptio, -Eric Drechsel, -ers35, -Evgeni Ku, -Fernando Jimenez, -fiatjaf, -Francois Marier, -Gabriel Lee, -Greg Grossmeier, -HeartBreak KB Official, -Ignacio, -Ilya Baryshnikov, -James (purpleidea), -James Valleroy, -Jan, -Jason Woofenden, -Jeff Goeke-Smith, -Jim, -Jo, -Johannes Schlatow, -John Peloquin, -Jon D, -jose_d, -Josh Taylor, -Josh Tilles, -Lacnic, -Land Reaver, -Lee Hinman, -Lee-kai Wang, -Lukas Platz, -Lukas Waymann, -Madison McGaffin, -Maggie Hess, -Matthew Willcockson, -Matthias Urlichs, -Matthieu, -Mattias J, -Mica, -Michal Politowski, -Mika Pflüger, -mo, -Mohit Munjani, -Nahum Shalman, -NinjaTrappeur, -Ole-Morten Duesund, -Paul Tötterman, -Pedro Luz, -Peter, -Renaud Casenave-Péré, -rjbl, -Ryan Newton, -Rémi Vanicat, -Sergey Karpukhin, -Shane-o, -Shawn Butts, -Stan Yamane, -Stephan Burkhardt, -Stephan Meister, -SvenDowideit, -sww, -Teremu HAMBLIN, -Thom May, -Thomas Ferris Nicolaisen, -Thomas Hochstein, -Thomas Schwinge, -Tim Howes, -tj, -Trent Lloyd, -Tyler Cipriani, -Valeria_, -Walltime, -wawatcz, -Will Hughes, -Willard Korfhage, -wzhd, -Zoé Cassiopée Gauthier,
update
diff --git a/doc/thanks/list b/doc/thanks/list index dfeda7a813..563a0b6b21 100644 --- a/doc/thanks/list +++ b/doc/thanks/list @@ -126,3 +126,99 @@ Lilia.Nanne, Dusty Mabe, mpol, Andrew Poelstra, +AlexS, +Amitai Schleier, +Andrew, +anon, +Anthony DeRobertis, +Anton Grensjö, +Art S, +Arthur Lutz, +Ben, +Boyd Stephen Smith, +Bruno BEAUFILS, +Caleb Allen, +Calvin Beck, +Chris Lamb, +Christian Diller, +Christopher Baines, +Christopher Goes, +Dave Pifke, +don haraway, +DuncanConstruction, +encryptio, +Eric Drechsel, +ers35, +Evgeni Ku, +Fernando Jimenez, +fiatjaf, +Francois Marier, +Gabriel Lee, +Greg Grossmeier, +HeartBreak KB Official, +Ignacio, +Ilya Baryshnikov, +James (purpleidea), +James Valleroy, +Jan, +Jason Woofenden, +Jeff Goeke-Smith, +Jim, +Jo, +Johannes Schlatow, +John Peloquin, +Jon D, +jose_d, +Josh Taylor, +Josh Tilles, +Lacnic, +Land Reaver, +Lee Hinman, +Lee-kai Wang, +Lukas Platz, +Lukas Waymann, +Madison McGaffin, +Maggie Hess, +Matthew Willcockson, +Matthias Urlichs, +Matthieu, +Mattias J, +Mica, +Michal Politowski, +Mika Pflüger, +mo, +Mohit Munjani, +Nahum Shalman, +NinjaTrappeur, +Ole-Morten Duesund, +Paul Tötterman, +Pedro Luz, +Peter, +Renaud Casenave-Péré, +rjbl, +Ryan Newton, +Rémi Vanicat, +Sergey Karpukhin, +Shane-o, +Shawn Butts, +Stan Yamane, +Stephan Burkhardt, +Stephan Meister, +SvenDowideit, +sww, +Teremu HAMBLIN, +Thom May, +Thomas Ferris Nicolaisen, +Thomas Hochstein, +Thomas Schwinge, +Tim Howes, +tj, +Trent Lloyd, +Tyler Cipriani, +Valeria_, +Walltime, +wawatcz, +Will Hughes, +Willard Korfhage, +wzhd, +Zoé Cassiopée Gauthier,
typo
diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index b8fd29522c..5a1f9fa969 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -463,7 +463,7 @@ The two protocol versions are actually identical. Old versions of git-annex that supported only `VERSION 1` had a bug in their implementation of the part of the protocol documented in the [[export_and_import_appendix]]. -The bug could result in ontent being exported to the wrong file. +The bug could result in content being exported to the wrong file. External special remotes that implement that should use `VERSION 2` to avoid talking to the buggy old version of git-annex.
Added TRANSFER-RETRIEVE-URL extension to the external special remote protocol
Since retrieveKeyFileM has to use fileRetriever before it sees this
response, which uses tailVerify, it's unfortunately not possible to
stream the url download to do incremental verification. That would be
more efficient.
Similarly, watchFileSize does some extra work, but the progress meter is
updated as the content streams in.
The downloadFailed case is never reached I think, since the url list
provided to downloadUrl' is not empty.
Sponsored-by: Dartmouth College's OpenNeuro project
Since retrieveKeyFileM has to use fileRetriever before it sees this
response, which uses tailVerify, it's unfortunately not possible to
stream the url download to do incremental verification. That would be
more efficient.
Similarly, watchFileSize does some extra work, but the progress meter is
updated as the content streams in.
The downloadFailed case is never reached I think, since the url list
provided to downloadUrl' is not empty.
Sponsored-by: Dartmouth College's OpenNeuro project
diff --git a/Annex/Content.hs b/Annex/Content.hs
index e10329d8c2..edb1052d52 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -1,6 +1,6 @@
{- git-annex file content managing
-
- - Copyright 2010-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -47,6 +47,7 @@ module Annex.Content (
listKeys',
saveState,
downloadUrl,
+ downloadUrl',
preseedTmp,
dirKeys,
withObjectLoc,
@@ -881,13 +882,21 @@ saveState nocommit = doSideAction $ do
- that failed.
-}
downloadUrl :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> OsPath -> Url.UrlOptions -> Annex Bool
-downloadUrl listfailedurls k p iv urls file uo =
+downloadUrl listfailedurls k p iv urls file uo =
+ downloadUrl' listfailedurls k p iv urls file uo >>= \case
+ Right r -> return r
+ Left e -> do
+ warning $ UnquotedString e
+ return False
+
+downloadUrl' :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> OsPath -> Url.UrlOptions -> Annex (Either String Bool)
+downloadUrl' listfailedurls k p iv urls file uo =
-- Poll the file to handle configurations where an external
-- download command is used.
meteredFile file (Just p) k (go urls [])
where
go (u:us) errs p' = Url.download' p' iv u file uo >>= \case
- Right () -> return True
+ Right () -> return (Right True)
Left err -> do
-- If the incremental verifier was fed anything
-- while the download that failed ran, it's unable
@@ -899,14 +908,12 @@ downloadUrl listfailedurls k p iv urls file uo =
_ -> noop
Nothing -> noop
go us ((u, err) : errs) p'
- go [] [] _ = return False
- go [] errs@((_, err):_) _ = do
+ go [] [] _ = return (Right False)
+ go [] errs@((_, err):_) _ = return $ Left $
if listfailedurls
- then warning $ UnquotedString $
- unlines $ flip map errs $ \(u, err') ->
- u ++ " " ++ err'
- else warning $ UnquotedString err
- return False
+ then unlines $ flip map errs $ \(u, err') ->
+ u ++ " " ++ err'
+ else err
{- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -}
diff --git a/CHANGELOG b/CHANGELOG
index 018efd7a3d..ac29f65722 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,8 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
* Add a build warning when the version of aws being built against is
too old. 0.25.1 is needed to support Google Cloud Storage.
* stack.yaml: Use aws-0.25.1.
+ * Added TRANSFER-RETRIEVE-URL extension to the external special remote
+ protocol.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/Remote/External.hs b/Remote/External.hs
index c392b3f31e..07d1272f24 100644
--- a/Remote/External.hs
+++ b/Remote/External.hs
@@ -1,6 +1,6 @@
{- External special remote interface.
-
- - Copyright 2013-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -93,7 +93,7 @@ gen rt externalprogram r u rc gc rs
let exportactions = if exportsupported
then ExportActions
{ storeExport = storeExportM external
- , retrieveExport = retrieveExportM external
+ , retrieveExport = retrieveExportM external gc
, removeExport = removeExportM external
, checkPresentExport = checkPresentExportM external
, removeExportDirectory = Just $ removeExportDirectoryM external
@@ -116,7 +116,7 @@ gen rt externalprogram r u rc gc rs
cheapexportsupported
return $ Just $ specialRemote c
(storeKeyM external)
- (retrieveKeyFileM external)
+ (retrieveKeyFileM external gc)
(removeKeyM external)
(checkPresentM external)
rmt
@@ -248,17 +248,19 @@ storeKeyM external = fileStorer $ \k f p ->
result (Left (respErrorMessage "TRANSFER" errmsg))
_ -> Nothing
-retrieveKeyFileM :: External -> Retriever
-retrieveKeyFileM external = fileRetriever $ \d k p ->
- either giveup return =<< watchFileSize d p (go d k)
+retrieveKeyFileM :: External -> RemoteGitConfig -> Retriever
+retrieveKeyFileM external gc = fileRetriever $ \dest k p ->
+ either giveup return =<< watchFileSize dest p (go dest k)
where
- go d k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromOsPath d)) k (Just p) $ \resp ->
+ go dest k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromOsPath dest)) k (Just p) $ \resp ->
case resp of
TRANSFER_SUCCESS Download k'
| k == k' -> result $ Right ()
TRANSFER_FAILURE Download k' errmsg
| k == k' -> result $ Left $
respErrorMessage "TRANSFER" errmsg
+ TRANSFER_RETRIEVE_URL k' url
+ | k == k' -> retrieveUrl' gc url dest k p
_ -> Nothing
removeKeyM :: External -> Remover
@@ -306,8 +308,8 @@ storeExportM external f k loc p = either giveup return =<< go
_ -> Nothing
req sk = TRANSFEREXPORT Upload sk (fromOsPath f)
-retrieveExportM :: External -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
-retrieveExportM external k loc dest p = do
+retrieveExportM :: External -> RemoteGitConfig -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
+retrieveExportM external gc k loc dest p = do
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
tailVerify iv dest $
either giveup return =<< go
@@ -317,6 +319,8 @@ retrieveExportM external k loc dest p = do
| k == k' -> result $ Right ()
TRANSFER_FAILURE Download k' errmsg
| k == k' -> result $ Left $ respErrorMessage "TRANSFER" errmsg
+ TRANSFER_RETRIEVE_URL k' url
+ | k == k' -> retrieveUrl' gc url dest k p
UNSUPPORTED_REQUEST ->
result $ Left "TRANSFEREXPORT not implemented by external special remote"
_ -> Nothing
@@ -838,7 +842,18 @@ retrieveUrl :: RemoteGitConfig -> Retriever
retrieveUrl gc = fileRetriever' $ \f k p iv -> do
us <- getWebUrls k
unlessM (withUrlOptions (Just gc) $ downloadUrl True k p iv us f) $
- giveup "failed to download content"
+ giveup downloadFailed
+
+retrieveUrl' :: RemoteGitConfig -> URLString -> OsPath -> Key -> MeterUpdate -> Maybe (Annex (ResponseHandlerResult (Either String ())))
+retrieveUrl' gc url dest k p =
+ Just $ withUrlOptions (Just gc) $ \uo ->
+ downloadUrl' False k p Nothing [url] dest uo >>= return . \case
+ Left msg -> Result (Left msg)
+ Right True -> Result (Right ())
+ Right False -> Result (Left downloadFailed)
+
+downloadFailed :: String
+downloadFailed = "failed to download content"
checkKeyUrl :: RemoteGitConfig -> CheckPresent
checkKeyUrl gc k = do
diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs
index 58bbc9f656..f265d4a1bd 100644
--- a/Remote/External/Types.hs
+++ b/Remote/External/Types.hs
@@ -1,6 +1,6 @@
{- External special remote data types.
-
- - Copyright 2013-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -116,6 +116,7 @@ supportedExtensionList = ExtensionList
[ "INFO"
, "GETGITREMOTENAME"
, "UNAVAILABLERESPONSE"
+ , "TRANSFER-RETRIEVE-URL"
, asyncExtension
]
@@ -243,6 +244,7 @@ data Response
| PREPARE_FAILURE ErrorMsg
| TRANSFER_SUCCESS Direction Key
| TRANSFER_FAILURE Direction Key ErrorMsg
+ | TRANSFER_RETRIEVE_URL Key URLString
| CHECKPRESENT_SUCCESS Key
| CHECKPRESENT_FAILURE Key
| CHECKPRESENT_UNKNOWN Key ErrorMsg
@@ -281,6 +283,7 @@ instance Proto.Receivable Response where
(Diff truncated)
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_3_1c262c9459373bff638c87d838446ed5._comment b/doc/todo/Special_remote_redirect_to_URL/comment_3_1c262c9459373bff638c87d838446ed5._comment new file mode 100644 index 0000000000..89c69c47e5 --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_3_1c262c9459373bff638c87d838446ed5._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-05T17:23:24Z" + content=""" +TRANSFEREXPORT, in the "simple export interface" also +uses TRANSFER-SUCCESS/TRANSFER-FAILURE, and should +also support this extension. +"""]]
remove incorrect comment
tailVerify is already used so it does not re-read at end.
I don't think it will be possible to avoid using tailVerify with this
exension since it's already started by the time the response comes back.
tailVerify is already used so it does not re-read at end.
I don't think it will be possible to avoid using tailVerify with this
exension since it's already started by the time the response comes back.
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment b/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment deleted file mode 100644 index b3a27d4c7e..0000000000 --- a/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment +++ /dev/null @@ -1,9 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 3""" - date="2025-12-05T16:58:21Z" - content=""" -An added benefit of this will be that git-annex can stream hash -while downloading, so it will avoid re-reading the file at the end to -verifiy it. -"""]]
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment b/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment new file mode 100644 index 0000000000..b3a27d4c7e --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-05T16:58:21Z" + content=""" +An added benefit of this will be that git-annex can stream hash +while downloading, so it will avoid re-reading the file at the end to +verifiy it. +"""]]
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_2_31c4540e06d80021d8be57393c3fb817._comment b/doc/todo/Special_remote_redirect_to_URL/comment_2_31c4540e06d80021d8be57393c3fb817._comment new file mode 100644 index 0000000000..f0258a67d2 --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_2_31c4540e06d80021d8be57393c3fb817._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-05T16:35:05Z" + content=""" +One problem with this design is that there may be HTTP headers that are +used for authorization, rather than putting authentication in the url. + +I think we may have talked about this at the hackfest, and came down on the +side of simplicity, supporting only an url. Can't quite remember. + +It might also be possible to redirect to an url when storing an object. + +I think that protocol design should leave these possibilities open to be +implemented later. +"""]]
comment
diff --git a/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_/comment_1_6536200f3ff5e076f028eef77660bae3._comment b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_/comment_1_6536200f3ff5e076f028eef77660bae3._comment new file mode 100644 index 0000000000..291ca824cd --- /dev/null +++ b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_/comment_1_6536200f3ff5e076f028eef77660bae3._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-05T16:31:47Z" + content=""" +Is the assistant running in the local repository, or on the remote? +"""]]
webapp: Remove support for local pairing
As a feature only supported by the webapp, and not by git-annex at the
command line, this is by now a very obscure corner of git-annex, and not
one I want to keep maintaining.
It's worth removing it to avoid the security expsure alone. People using
the assistant w/o the webapp probably don't expect it to be listening on
a UDP port for a handrolled protocol, but it was.
The webapp has supported pairing via magic-wormhole since 2016, which
makes a link including between local computers, albeit with the overhead
of tor. That sort of covers the same use case. Of course advanced users
can easily enough add a ssh remote to their repository themselves, using
a hostname on the local network.
git-annex-p2p-iroh would be a great alternative, since it should
communicate over LAN when both computers are on the same one. Before
supporting that in the webapp, dumbpipe would need to be reasonably
likely to be installed.
Sponsored-by: unqueued
As a feature only supported by the webapp, and not by git-annex at the
command line, this is by now a very obscure corner of git-annex, and not
one I want to keep maintaining.
It's worth removing it to avoid the security expsure alone. People using
the assistant w/o the webapp probably don't expect it to be listening on
a UDP port for a handrolled protocol, but it was.
The webapp has supported pairing via magic-wormhole since 2016, which
makes a link including between local computers, albeit with the overhead
of tor. That sort of covers the same use case. Of course advanced users
can easily enough add a ssh remote to their repository themselves, using
a hostname on the local network.
git-annex-p2p-iroh would be a great alternative, since it should
communicate over LAN when both computers are on the same one. Before
supporting that in the webapp, dumbpipe would need to be reasonably
likely to be installed.
Sponsored-by: unqueued
diff --git a/Assistant.hs b/Assistant.hs
index 64d2f3b6c5..9616895761 100644
--- a/Assistant.hs
+++ b/Assistant.hs
@@ -40,9 +40,6 @@ import Assistant.Threads.Glacier
#ifdef WITH_WEBAPP
import Assistant.WebApp
import Assistant.Threads.WebApp
-#ifdef WITH_PAIRING
-import Assistant.Threads.PairListener
-#endif
#else
import Assistant.Types.UrlRenderer
#endif
@@ -155,11 +152,6 @@ startDaemon assistant foreground startdelay cannotrun listenhost listenport star
then webappthread
else webappthread ++
[ watch commitThread
-#ifdef WITH_WEBAPP
-#ifdef WITH_PAIRING
- , assist $ pairListenerThread urlrenderer
-#endif
-#endif
, assist pushThread
, assist pushRetryThread
, assist exportThread
diff --git a/Assistant/Pairing/MakeRemote.hs b/Assistant/Pairing/MakeRemote.hs
deleted file mode 100644
index f4468bc07c..0000000000
--- a/Assistant/Pairing/MakeRemote.hs
+++ /dev/null
@@ -1,98 +0,0 @@
-{- git-annex assistant pairing remote creation
- -
- - Copyright 2012 Joey Hess <id@joeyh.name>
- -
- - Licensed under the GNU AGPL version 3 or higher.
- -}
-
-module Assistant.Pairing.MakeRemote where
-
-import Assistant.Common
-import Assistant.Ssh
-import Assistant.Pairing
-import Assistant.Pairing.Network
-import Assistant.MakeRemote
-import Assistant.Sync
-import Config.Cost
-import Config
-import qualified Types.Remote as Remote
-
-import Network.Socket
-import qualified Data.Text as T
-
-{- Authorized keys are set up before pairing is complete, so that the other
- - side can immediately begin syncing. -}
-setupAuthorizedKeys :: PairMsg -> OsPath -> IO ()
-setupAuthorizedKeys msg repodir = case validateSshPubKey $ remoteSshPubKey $ pairMsgData msg of
- Left err -> giveup err
- Right pubkey -> do
- absdir <- absPath repodir
- unlessM (liftIO $ addAuthorizedKeys True absdir pubkey) $
- giveup "failed setting up ssh authorized keys"
-
-{- When local pairing is complete, this is used to set up the remote for
- - the host we paired with. -}
-finishedLocalPairing :: PairMsg -> SshKeyPair -> Assistant ()
-finishedLocalPairing msg keypair = do
- sshdata <- liftIO $ installSshKeyPair keypair =<< pairMsgToSshData msg
- {- Ensure that we know the ssh host key for the host we paired with.
- - If we don't, ssh over to get it. -}
- liftIO $ unlessM (knownHost $ sshHostName sshdata) $
- void $ sshTranscript
- [ sshOpt "StrictHostKeyChecking" "no"
- , sshOpt "NumberOfPasswordPrompts" "0"
- , "-n"
- ]
- (genSshHost (sshHostName sshdata) (sshUserName sshdata))
- ("git-annex-shell -c configlist " ++ T.unpack (sshDirectory sshdata))
- Nothing
- r <- liftAnnex $ addRemote $ makeSshRemote sshdata
- repo <- liftAnnex $ Remote.getRepo r
- liftAnnex $ setRemoteCost repo semiExpensiveRemoteCost
- syncRemote r
-
-{- Mostly a straightforward conversion. Except:
- - * Determine the best hostname to use to contact the host.
- - * Strip leading ~/ from the directory name.
- -}
-pairMsgToSshData :: PairMsg -> IO SshData
-pairMsgToSshData msg = do
- let d = pairMsgData msg
- hostname <- liftIO $ bestHostName msg
- let dir = case remoteDirectory d of
- ('~':'/':v) -> v
- v -> v
- return SshData
- { sshHostName = T.pack hostname
- , sshUserName = Just (T.pack $ remoteUserName d)
- , sshDirectory = T.pack dir
- , sshRepoName = genSshRepoName hostname (toOsPath dir)
- , sshPort = 22
- , needsPubKey = True
- , sshCapabilities = [GitAnnexShellCapable, GitCapable, RsyncCapable]
- , sshRepoUrl = Nothing
- }
-
-{- Finds the best hostname to use for the host that sent the PairMsg.
- -
- - If remoteHostName is set, tries to use a .local address based on it.
- - That's the most robust, if this system supports .local.
- - Otherwise, looks up the hostname in the DNS for the remoteAddress,
- - if any. May fall back to remoteAddress if there's no DNS. Ugh. -}
-bestHostName :: PairMsg -> IO HostName
-bestHostName msg = case remoteHostName $ pairMsgData msg of
- Just h -> do
- let localname = h ++ ".local"
- addrs <- catchDefaultIO [] $
- getAddrInfo Nothing (Just localname) Nothing
- maybe fallback (const $ return localname) (headMaybe addrs)
- Nothing -> fallback
- where
- fallback = do
- let a = pairMsgAddr msg
- let sockaddr = case a of
- IPv4Addr addr -> SockAddrInet (fromInteger 0) addr
- IPv6Addr addr -> SockAddrInet6 (fromInteger 0) 0 addr 0
- fromMaybe (showAddr a)
- <$> catchDefaultIO Nothing
- (fst <$> getNameInfo [] True False sockaddr)
diff --git a/Assistant/Pairing/Network.hs b/Assistant/Pairing/Network.hs
deleted file mode 100644
index 62a4ea02e8..0000000000
--- a/Assistant/Pairing/Network.hs
+++ /dev/null
@@ -1,132 +0,0 @@
-{- git-annex assistant pairing network code
- -
- - All network traffic is sent over multicast UDP. For reliability,
- - each message is repeated until acknowledged. This is done using a
- - thread, that gets stopped before the next message is sent.
- -
- - Copyright 2012 Joey Hess <id@joeyh.name>
- -
- - Licensed under the GNU AGPL version 3 or higher.
- -}
-
-module Assistant.Pairing.Network where
-
-import Assistant.Common
-import Assistant.Pairing
-import Assistant.DaemonStatus
-import Utility.ThreadScheduler
-import Utility.Verifiable
-
-import Network.Multicast
-import Network.Info
-import Network.Socket
-import qualified Network.Socket.ByteString as B
-import qualified Data.ByteString.UTF8 as BU8
-import qualified Data.Map as M
-import Control.Concurrent
-
-{- This is an arbitrary port in the dynamic port range, that could
- - conceivably be used for some other broadcast messages.
- - If so, hope they ignore the garbage from us; we'll certainly
- - ignore garbage from them. Wild wild west. -}
-pairingPort :: PortNumber
-pairingPort = 55556
-
-{- Goal: Reach all hosts on the same network segment.
- - Method: Use same address that avahi uses. Other broadcast addresses seem
- - to not be let through some routers. -}
-multicastAddress :: AddrClass -> HostName
-multicastAddress IPv4AddrClass = "224.0.0.251"
-multicastAddress IPv6AddrClass = "ff02::fb"
-
-{- Multicasts a message repeatedly on all interfaces, with a 2 second
- - delay between each transmission. The message is repeated forever
- - unless a number of repeats is specified.
- -
- - The remoteHostAddress is set to the interface's IP address.
- -
- - Note that new sockets are opened each time. This is hardly efficient,
- - but it allows new network interfaces to be used as they come up.
- - On the other hand, the expensive DNS lookups are cached.
- -}
-multicastPairMsg :: Maybe Int -> Secret -> PairData -> PairStage -> IO ()
-multicastPairMsg repeats secret pairdata stage = go M.empty repeats
- where
- go _ (Just 0) = noop
- go cache n = do
- addrs <- activeNetworkAddresses
- let cache' = updatecache cache addrs
- mapM_ (sendinterface cache') addrs
- threadDelaySeconds (Seconds 2)
- go cache' $ pred <$> n
- {- The multicast library currently chokes on ipv6 addresses. -}
- sendinterface _ (IPv6Addr _) = noop
- sendinterface cache i = void $ tryIO $
(Diff truncated)