Recent changes to this wiki:
note
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5027412b50..c325adc9c1 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1327,6 +1327,9 @@ repository, using [[git-annex-config]]. See its man page for a list.) will not see a progress display for their drop action. So this is best used with a fast remote. + And, if the remote is not accessible, or a file fails to be moved to it, + the file will not be dropped from the repository. + * `annex.url` When a remote has a http url, the first time git-annex uses the remote
annex.trashbin
Note that, in the unlikely event that the reasoning in commit
5a081fc246664e7b5c17023dddfb8d123eef64e5 is wrong and there is some
situation where Annex.remotelist is not filled at a time when this is used,
the user will get back the "annex.trashbin is set to the name of an unknown
remote" error for a remote that does exist.
Note that, in the unlikely event that the reasoning in commit
5a081fc246664e7b5c17023dddfb8d123eef64e5 is wrong and there is some
situation where Annex.remotelist is not filled at a time when this is used,
the user will get back the "annex.trashbin is set to the name of an unknown
remote" error for a remote that does exist.
diff --git a/Annex/Content.hs b/Annex/Content.hs
index c113620cc9..876f526785 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -96,7 +96,7 @@ import Annex.ReplaceFile
import Annex.AdjustedBranch (adjustedBranchRefresh)
import Annex.DirHashes
import Messages.Progress
-import Types.Remote (RetrievalSecurityPolicy(..), VerifyConfigA(..))
+import Types.Remote (RetrievalSecurityPolicy(..), VerifyConfigA(..), name, storeKey, uuid)
import Types.NumCopies
import Types.Key
import Types.Transfer
@@ -779,7 +779,8 @@ unlinkAnnex key = do
{- Removes a key's file from .git/annex/objects/ -}
removeAnnex :: Annex [Remote] -> ContentRemovalLock -> Annex ()
-removeAnnex remotelist (ContentRemovalLock key) = withObjectLoc key $ \file ->
+removeAnnex remotelist (ContentRemovalLock key) = withObjectLoc key $ \file -> do
+ putouttrash
cleanObjectLoc key $ do
secureErase file
liftIO $ removeWhenExistsWith removeFile file
@@ -800,6 +801,20 @@ removeAnnex remotelist (ContentRemovalLock key) = withObjectLoc key $ \file ->
-- removal process, so thaw it.
, void $ tryIO $ thawContent file
)
+
+ putouttrash = annexTrashbin <$> Annex.getGitConfig >>= \case
+ Nothing -> return ()
+ Just trashbin -> do
+ rs <- remotelist
+ putouttrash' trashbin rs
+
+ putouttrash' _ [] = giveup "annex.trashbin is set to the name of an unknown remote"
+ putouttrash' trashbin (r:rs)
+ | name r == trashbin = do
+ catchNonAsync (storeKey r key (AssociatedFile Nothing) Nothing nullMeterUpdate)
+ (\ex -> giveup $ "Failed to move to annex.trashbin remote; unable to drop " ++ show ex)
+ logChange NoLiveUpdate key (uuid r) InfoPresent
+ | otherwise = putouttrash' trashbin rs
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
- returns the file it was moved to. -}
diff --git a/CHANGELOG b/CHANGELOG
index 3de0bf454d..b78fc604bb 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,7 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
similar. This is equivilant to the now deprecated Amazon Glacier.
(Needs aws-0.25.2)
* stack.yaml: Use aws-0.25.2.
+ * Added annex.trashbin configuration.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index 81d8201ed3..4303c09961 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -158,6 +158,7 @@ data GitConfig = GitConfig
, annexAdjustedBranchRefresh :: Integer
, annexSupportUnlocked :: Bool
, annexAssistantAllowUnlocked :: Bool
+ , annexTrashbin :: Maybe RemoteName
, coreSymlinks :: Bool
, coreSharedRepository :: SharedRepository
, coreQuotePath :: QuotePath
@@ -283,6 +284,7 @@ extractGitConfig configsource r = GitConfig
(getmayberead (annexConfig "adjustedbranchrefresh"))
, annexSupportUnlocked = getbool (annexConfig "supportunlocked") True
, annexAssistantAllowUnlocked = getbool (annexConfig "assistant.allowunlocked") False
+ , annexTrashbin = getmaybe "annex.trashbin"
, coreSymlinks = getbool "core.symlinks" True
, coreSharedRepository = getSharedRepository r
, coreQuotePath = QuotePath (getbool "core.quotepath" True)
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 747bb1eb7f..5027412b50 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -1318,6 +1318,15 @@ repository, using [[git-annex-config]]. See its man page for a list.)
After changing this config, you need to re-run `git-annex init` for it
to take effect.
+* `annex.trashbin`
+
+ When this is set to the name of a remote, files that are dropped from the
+ repository will be moved to that remote.
+
+ Note that, if it takes a long time to move a file to the remote, the user
+ will not see a progress display for their drop action. So this is best
+ used with a fast remote.
+
* `annex.url`
When a remote has a http url, the first time git-annex uses the remote
diff --git a/doc/todo/Delayed_drop_from_remote.mdwn b/doc/todo/Delayed_drop_from_remote.mdwn
index dd2d26bd4e..9c9e6b0ff3 100644
--- a/doc/todo/Delayed_drop_from_remote.mdwn
+++ b/doc/todo/Delayed_drop_from_remote.mdwn
@@ -9,3 +9,5 @@ The point is to have a fast path to recovery from over-eager dropping that might
Or maybe something like this exists already...
[[!tag projects/ICE4]]
+
+> [[done]] --[[Joey]]
diff --git a/doc/todo/Delayed_drop_from_remote/comment_5_94a46f515a4e6df7d8d7855e0bfb7de5._comment b/doc/todo/Delayed_drop_from_remote/comment_5_94a46f515a4e6df7d8d7855e0bfb7de5._comment
new file mode 100644
index 0000000000..cd55a59f8b
--- /dev/null
+++ b/doc/todo/Delayed_drop_from_remote/comment_5_94a46f515a4e6df7d8d7855e0bfb7de5._comment
@@ -0,0 +1,20 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 5"""
+ date="2025-12-11T19:25:23Z"
+ content="""
+annex.trashbin is implemented.
+
+I am going to close this todo; if it turns out there is some preferred
+content improvement that would help with cleaning out the trash, let's talk
+about that on a new todo. But I'm guessing you'll make do with `find`.
+
+> I think I would deliberately want this to be invisible to the user, since I wouldn't want anyone to actively start relying on it.
+
+With a private remote it's reasonably invisible. The very observant user
+might notice a drop time that scales with the size of the file being
+dropped and be able to guess this feature is being used. And, if there is
+some error when it tries to move the object to the remote, the drop will
+fail. The error message in that case cannot really obscure the fact that
+annex.trashbin is configured.
+"""]]
comments
diff --git a/doc/install/FreeBSD/comment_4_65c9fdcc54924ab064c78f9436924191._comment b/doc/install/FreeBSD/comment_4_65c9fdcc54924ab064c78f9436924191._comment new file mode 100644 index 0000000000..5f2a1c94ea --- /dev/null +++ b/doc/install/FreeBSD/comment_4_65c9fdcc54924ab064c78f9436924191._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-12-11T14:56:30Z" + content=""" +I don't know much about the static-annex builds, but you may have better +luck with the [[Linux_standalone]] builds due to their using a more +conventional libc. + +Building git-annex from source is not hard if you can get the stack tool +installed. It looks like the only currently supported way to do that as a +freebsd user is to install <https://www.haskell.org/ghcup/> which includes +stack. Then follow the [[fromsource]] section on "building from source with +stack". +"""]] diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_4_7fe8f0b860a765f3bfb9da7f5d61f8c8._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_4_7fe8f0b860a765f3bfb9da7f5d61f8c8._comment new file mode 100644 index 0000000000..6ee3bfde1a --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_4_7fe8f0b860a765f3bfb9da7f5d61f8c8._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-12-11T14:42:10Z" + content=""" +> IIRC user can just push `git-annex` branch directly after `git-annex` merging remote version locally, right? + +Sure, but my point was that they would have to change their workflow due to +a change on the server that might not be visible to them. Violating least +surprise. +"""]]
remove accidentially added file
diff --git a/doc/.git-annex.mdwn.swp b/doc/.git-annex.mdwn.swp deleted file mode 100644 index 704713d9ce..0000000000 Binary files a/doc/.git-annex.mdwn.swp and /dev/null differ
Added a comment
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_3_a9c504e7cd8080158fd68b4bcaa90e26._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_3_a9c504e7cd8080158fd68b4bcaa90e26._comment new file mode 100644 index 0000000000..3cfbce29d5 --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_3_a9c504e7cd8080158fd68b4bcaa90e26._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 3" + date="2025-12-11T13:33:27Z" + content=""" +> In that example, the git-annex branch is not pushed to origin after annexed files are sent to it. So how does git-annex on otherhost know that origin has those files? Well, git-annex-shell, when receiving the files, updates the git-annex branch in origin. + + +IIRC user can just push `git-annex` branch directly after `git-annex` merging remote version locally, right? + +> Making it read-only would somewhat limit the exposure to all these problems, but if it's read-only, how would any annex objects get into the remote repository in the first place? + +my use-case at hands: I manipulate git-annex repo on a linux box on an NFS mount and the original one is freebsd box with bare minimal installation. I have about 50 datasets in a hierarchy. I wanted to backup to another location and it would be more performant to talk to the original freebsd server directly instead of going through NFS mount. I [can't install git-annex on that freebsd box ATM](https://git-annex.branchable.com/install/FreeBSD/#comment-38d4cc2a1e1deb696447cc0a9e149e77). + +FWIW, on a second thought, given that I do have a workaround with `rsync` (verified that it works) and unless another more prominent usecase arrives, might be indeed not worth the hassle. + +"""]]
Added a comment
diff --git a/doc/install/FreeBSD/comment_3_369afac17cc75bec4584f3525f0c2826._comment b/doc/install/FreeBSD/comment_3_369afac17cc75bec4584f3525f0c2826._comment new file mode 100644 index 0000000000..a1d5697c9b --- /dev/null +++ b/doc/install/FreeBSD/comment_3_369afac17cc75bec4584f3525f0c2826._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 3" + date="2025-12-11T12:20:27Z" + content=""" +don't know much about freebsd but static builds from https://git.kyleam.com/static-annex do not work: + +```shell +[yoh@dbic-mrinbox ~/git-annex-10.20250828]$ bin/git-annex +ELF binary type \"0\" not known. +bash: bin/git-annex: cannot execute binary file: Exec format error +[yoh@dbic-mrinbox ~/git-annex-10.20250828]$ file bin/git-annex +bin/git-annex: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, BuildID[sha1]=a6f7f36778ade374ef6572c787cacf6ffa2ec78d, with debug_info, not stripped + +``` +"""]]
comment
diff --git a/doc/.git-annex.mdwn.swp b/doc/.git-annex.mdwn.swp new file mode 100644 index 0000000000..704713d9ce Binary files /dev/null and b/doc/.git-annex.mdwn.swp differ diff --git a/doc/install/FreeBSD/comment_2_36a9e11d3140b892c4ff334387567eab._comment b/doc/install/FreeBSD/comment_2_36a9e11d3140b892c4ff334387567eab._comment new file mode 100644 index 0000000000..cfbe1dde1e --- /dev/null +++ b/doc/install/FreeBSD/comment_2_36a9e11d3140b892c4ff334387567eab._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-10T18:15:59Z" + content=""" +Doesn't FreeBSD support emulating linux syscalls? I suspect that the linux +standalone tarball could be used to install git-annex on user-space on +FreeBSD and work that way. Have not tried it maybe there is a better way, +to install a FreeBSD port as a regular user. +"""]]
comment
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_2_a8feba19f86aeb6d3b76266051b8bebb._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_2_a8feba19f86aeb6d3b76266051b8bebb._comment new file mode 100644 index 0000000000..8191356794 --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_2_a8feba19f86aeb6d3b76266051b8bebb._comment @@ -0,0 +1,47 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-10T17:54:01Z" + content=""" +As for the idea that git-annex could access a remote without +git-annex-shell, I think that any efforts in this area are bound to end up +with some partial implementation of a quarter of git-annex-shell in shell +script, which is bound to not work as well as the real thing. + +Consider that this is a supported workflow: + + git push origin master + git-annex copy --to origin + + ssh otherhost + cd repo + git pull origin + git-annex get + +In that example, the git-annex branch is not pushed to origin after annexed +files are sent to it. So how does git-annex on otherhost know that origin +has those files? Well, git-annex-shell, when receiving the files, updates +the git-annex branch in origin. + +So, to support this workflow, the git-annex-shell reimplementation in shell +would need to update the git-annex branch. That's about 3000 lines of code +in git-annex, with complecations including concurrency, making it fast, +etc. + +Other complications include supporting different repository versions, +populating unlocked files, supporting configs like +annex.secure-erase-command, etc. And while any of these could be left out +an be documented as limitations of not having git-annex installed, I think +the real kicker is that this is behavior what would occur even if git-annex +is only *temporarily* not installed. So there's the risk that any user who +is having a bad PATH day suddenly gets a weird behavior. + +Making it read-only would somewhat limit the exposure to all these +problems, but if it's read-only, how would any annex objects get into the +remote repository in the first place? + +Using a separate special remote seems much cleaner. Then it's only used if +you choose to use it. And it works like any other special remote. +The rsync special remote is close enough to work, but a more special-purpose +one could support things a bit better. +"""]]
comment
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_1_5f9c75b6aa0a50634ff4004b89c3fe12._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_1_5f9c75b6aa0a50634ff4004b89c3fe12._comment new file mode 100644 index 0000000000..8536bfecd0 --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_1_5f9c75b6aa0a50634ff4004b89c3fe12._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-10T17:39:01Z" + content=""" +It's actually possible to use a rsync special remote to fetch objects right +out of `.git/annex/objects/`. For example: + + git-annex initremote foo-rsync type=rsync encryption=none rsyncurl=example.com:/path/to/repo/.git/annex/objects/ --sameas=foo + +Since the default hash directory paths are different for rsync than for a +git-annex repository, getting an object will first try the wrong hash path, +which does lead to rsync complaining to stderr. But then it will fall back +to a hash path that works. + +Sending an object to the rsync special remote will store it in a hash path +different from the one that git-annex usually uses. So later switching to using +git-annex in that repository will result in some unusual behavior, since +it won't see some files that were put there. `git-annex fsck` will actually +recover from this too, eg: + + fsck newfile (normalizing object location) (checksum...) ok + +There are enough problems that I can't really recommend this, +it just seemed worth pointing out that it can be done. +"""]]
fix example output
diff --git a/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn b/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn
index cac81d06b4..1a080c0a4b 100644
--- a/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn
+++ b/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn
@@ -38,7 +38,7 @@ Now the remote can be used like any other remote.
But, when you try to get a file out of S3, it'll start a restore:
# git annex get my_cool_big_file
- get my_cool_big_file (from s3...) (gpg)
+ get my_cool_big_file (from mys3...) (gpg)
Restore initiated, try again later.
failed
S3: support restore=yes
When used with GLACIER, this is similar to Amazon Glacier, which is
now depreacted by Amazon. It can also be used with other storage classes
like DEEP_ARCHIVE and lifecycle rules. Which is why it's a separate config.
Also added some associated git configs.
This needs aws-0.25.2.
Sponsored-by: Brock Spratlen on Patreon
When used with GLACIER, this is similar to Amazon Glacier, which is
now depreacted by Amazon. It can also be used with other storage classes
like DEEP_ARCHIVE and lifecycle rules. Which is why it's a separate config.
Also added some associated git configs.
This needs aws-0.25.2.
Sponsored-by: Brock Spratlen on Patreon
diff --git a/CHANGELOG b/CHANGELOG
index 97d64583f1..3de0bf454d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,9 +4,12 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
(Needs aws-0.25)
* Add a build warning when the version of aws being built against is
too old. 0.25.1 is needed to support Google Cloud Storage.
- * stack.yaml: Use aws-0.25.1.
* Added TRANSFER-RETRIEVE-URL extension to the external special remote
protocol.
+ * S3: Support restore=yes, when used with storageclass=DEEP_ARCHIVE and
+ similar. This is equivilant to the now deprecated Amazon Glacier.
+ (Needs aws-0.25.2)
+ * stack.yaml: Use aws-0.25.2.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/Remote/S3.hs b/Remote/S3.hs
index e8401d80ef..002cdc1958 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -12,8 +12,8 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE CPP #-}
-#if ! MIN_VERSION_aws(0,25,1)
-#warning Building with an old version of the aws library. Recommend updating to 0.25.1, which fixes bugs and is needed for some features.
+#if ! MIN_VERSION_aws(0,25,2)
+#warning Building with an old version of the aws library. Recommend updating to 0.25.2, which fixes bugs and is needed for some features.
#endif
module Remote.S3 (remote, iaHost, configIA, iaItemUrl) where
@@ -96,6 +96,8 @@ remote = specialRemoteType $ RemoteType
(FieldDesc "part size for multipart upload (eg 1GiB)")
, optionalStringParser storageclassField
(FieldDesc "storage class, eg STANDARD or STANDARD_IA or ONEZONE_IA")
+ , yesNoParser restoreField (Just False)
+ (FieldDesc "enable restore of files not currently accessible in the bucket")
, optionalStringParser fileprefixField
(FieldDesc "prefix to add to filenames in the bucket")
, yesNoParser versioningField (Just False)
@@ -151,7 +153,10 @@ storageclassField = Accepted "storageclass"
fileprefixField :: RemoteConfigField
fileprefixField = Accepted "fileprefix"
-
+
+restoreField :: RemoteConfigField
+restoreField = Accepted "restore"
+
publicField :: RemoteConfigField
publicField = Accepted "public"
@@ -208,7 +213,7 @@ gen r u rc gc rs = do
where
new c cst info hdl magic = Just $ specialRemote c
(store hdl this info magic)
- (retrieve hdl rs c info)
+ (retrieve gc hdl rs c info)
(remove hdl this info)
(checkKey hdl rs c info)
this
@@ -432,14 +437,14 @@ storeHelper info h magic f object p = liftIO $ case partSize info of
{- Implemented as a fileRetriever, that uses conduit to stream the chunks
- out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -}
-retrieve :: S3HandleVar -> RemoteStateHandle -> ParsedRemoteConfig -> S3Info -> Retriever
-retrieve hv rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
+retrieve :: RemoteGitConfig -> S3HandleVar -> RemoteStateHandle -> ParsedRemoteConfig -> S3Info -> Retriever
+retrieve gc hv rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
Right h ->
eitherS3VersionID info rs c k (T.pack $ bucketObject info k) >>= \case
Left failreason -> do
warning (UnquotedString failreason)
giveup "cannot download content"
- Right loc -> retrieveHelper info h loc f p iv
+ Right loc -> retrieveHelper gc info h loc f p iv
Left S3HandleNeedCreds ->
getPublicWebUrls' rs info c k >>= \case
Left failreason -> do
@@ -448,17 +453,44 @@ retrieve hv rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
Right us -> unlessM (withUrlOptions Nothing $ downloadUrl False k p iv us f) $
giveup "failed to download content"
-retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()
-retrieveHelper info h loc f p iv = retrieveHelper' h f p iv $
+retrieveHelper :: RemoteGitConfig -> S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()
+retrieveHelper gc info h loc f p iv = retrieveHelper' gc info h f p iv $
case loc of
Left o -> S3.getObject (bucket info) o
Right (S3VersionID o vid) -> (S3.getObject (bucket info) o)
{ S3.goVersionId = Just vid }
-retrieveHelper' :: S3Handle -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> S3.GetObject -> Annex ()
-retrieveHelper' h f p iv req = liftIO $ runResourceT $ do
- S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle h req
+retrieveHelper' :: RemoteGitConfig -> S3Info -> S3Handle -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> S3.GetObject -> Annex ()
+retrieveHelper' gc info h f p iv req = liftIO $ runResourceT $ do
+ S3.GetObjectResponse { S3.gorResponse = rsp } <- handlerestore $
+ sendS3Handle h req
Url.sinkResponseFile p iv zeroBytesProcessed f WriteMode rsp
+ where
+ needrestore st = restore info && statusCode st == 403
+ handlerestore a = catchJust (Url.matchStatusCodeException needrestore) a $ \_ -> do
+#if MIN_VERSION_aws(0,25,2)
+ let tier = case remoteAnnexS3RestoreTier gc of
+ Just "bulk" -> S3.RestoreObjectTierBulk
+ Just "expedited" -> S3.RestoreObjectTierExpedited
+ _ -> S3.RestoreObjectTierStandard
+ let days = case remoteAnnexS3RestoreDays gc of
+ Just n -> S3.RestoreObjectLifetimeDays n
+ Nothing -> S3.RestoreObjectLifetimeDays 1
+ let restorereq = S3.restoreObject
+ (S3.goBucket req)
+ (S3.goObjectName req)
+ tier
+ days
+ restoreresp <- sendS3Handle h $ restorereq
+ { S3.roVersionId = S3.goVersionId req
+ }
+ case restoreresp of
+ S3.RestoreObjectAccepted -> giveup "Restore initiated, try again later."
+ S3.RestoreObjectAlreadyInProgress -> giveup "Restore in progress, try again later."
+ S3.RestoreObjectAlreadyRestored -> a
+#else
+ giveup "git-annex is built with too old a version of the aws library to support restore=yes"
+#endif
remove :: S3HandleVar -> Remote -> S3Info -> Remover
remove hv r info _proof k = withS3HandleOrFail (uuid r) hv $ \h -> do
@@ -529,7 +561,7 @@ storeExportS3' hv r rs info magic f k loc p = withS3Handle hv $ \case
retrieveExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
retrieveExportS3 hv r info k loc f p = verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
withS3Handle hv $ \case
- Right h -> retrieveHelper info h (Left (T.pack exportloc)) f p iv
+ Right h -> retrieveHelper (gitconfig r) info h (Left (T.pack exportloc)) f p iv
Left S3HandleNeedCreds -> case getPublicUrlMaker info of
Just geturl -> either giveup return =<<
withUrlOptions Nothing
@@ -728,7 +760,7 @@ retrieveExportWithContentIdentifierS3 hv r rs info loc (cid:_) dest gk p =
where
go iv = withS3Handle hv $ \case
Right h -> do
- rewritePreconditionException $ retrieveHelper' h dest p iv $
+ rewritePreconditionException $ retrieveHelper' (gitconfig r) info h dest p iv $
limitGetToContentIdentifier cid $
S3.getObject (bucket info) o
k <- either return id gk
@@ -1036,6 +1068,7 @@ data S3Info = S3Info
, partSize :: Maybe Integer
, isIA :: Bool
, versioning :: Bool
+ , restore :: Bool
, publicACL :: Bool
, publicurl :: Maybe URLString
, host :: Maybe String
@@ -1060,6 +1093,8 @@ extractS3Info c = do
, isIA = configIA c
, versioning = fromMaybe False $
getRemoteConfigValue versioningField c
+ , restore = fromMaybe False $
+ getRemoteConfigValue restoreField c
, publicACL = fromMaybe False $
getRemoteConfigValue publicField c
, publicurl = getRemoteConfigValue publicurlField c
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index 156b88c32c..81d8201ed3 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -439,6 +439,8 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexTahoe :: Maybe FilePath
, remoteAnnexBupSplitOptions :: [String]
, remoteAnnexDirectory :: Maybe FilePath
+ , remoteAnnexS3RestoreTier :: Maybe String
+ , remoteAnnexS3RestoreDays :: Maybe Integer
, remoteAnnexAndroidDirectory :: Maybe FilePath
, remoteAnnexAndroidSerial :: Maybe String
, remoteAnnexGCrypt :: Maybe String
@@ -541,6 +543,8 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexTahoe = getmaybe TahoeField
, remoteAnnexBupSplitOptions = getoptions BupSplitOptionsField
, remoteAnnexDirectory = notempty $ getmaybe DirectoryField
+ , remoteAnnexS3RestoreTier = notempty $ getmaybe S3RestoreTierField
+ , remoteAnnexS3RestoreDays = getmayberead S3RestoreDaysField
, remoteAnnexAndroidDirectory = notempty $ getmaybe AndroidDirectoryField
, remoteAnnexAndroidSerial = notempty $ getmaybe AndroidSerialField
, remoteAnnexGCrypt = notempty $ getmaybe GCryptField
@@ -625,6 +629,8 @@ data RemoteGitConfigField
| TahoeField
| BupSplitOptionsField
| DirectoryField
+ | S3RestoreTierField
+ | S3RestoreDaysField
| AndroidDirectoryField
| AndroidSerialField
| GCryptField
@@ -697,6 +703,8 @@ remoteGitConfigField = \case
TahoeField -> uninherited True "tahoe"
BupSplitOptionsField -> uninherited True "bup-split-options"
(Diff truncated)
initial
request on making git-annex work without git-annex-shell to get files from remote ssh
request on making git-annex work without git-annex-shell to get files from remote sshdiff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_.mdwn b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_.mdwn new file mode 100644 index 0000000000..23f4fac3fd --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_.mdwn @@ -0,0 +1,26 @@ +I thought I had an issue on this but failed to find :-/ + +ATM git-annex does not even bother to suggest or do anything about a remote git/git-annex repository if there is no git-annex (`git-annex-shell`) available there: + +``` +yoh@typhon:/mnt/DATA/data/dbic/QA$ git annex list + + Unable to parse git config from origin + + Remote origin does not have git-annex installed; setting annex-ignore + + This could be a problem with the git-annex installation on the remote. Please make sure that git-annex-shell is available in PATH when you ssh into the remote. Once you have fixed the git-annex installation, run: git annex enableremote origin +here +|datasets.datalad.org +||origin +|||web +||||bittorrent +||||| +_X___ .datalad/metadata/objects/06/cn-2c3eade47bd2d9052658c6a9d10a57.xz + +... +``` + +a workaround, it seems as [it was posted over a decade ago](https://superuser.com/questions/526705/hosting-a-git-annex-on-a-server-without-git-annex-installed) (and now even google ai suggests that) is to setup an additional `rsync` remote and use it to fetch. upon a quick try didn't work for me but could have been an operator error... + +As files are available over regular ssh/scp and even rsync over ssh - I really do not see a technical problem for git-annex to establish interoperability with such a remote, at least for reading from, without having remote git-annex-shell. That should make it possible to access git-annex'es on servers which might be running some odd setups where installation of git-annex in user-space would be tricky if not impossible.
Added a comment: Q: any way to "install" without having root/admin privileges
diff --git a/doc/install/FreeBSD/comment_1_48c712af243119f9a525c55705edc536._comment b/doc/install/FreeBSD/comment_1_48c712af243119f9a525c55705edc536._comment new file mode 100644 index 0000000000..44e4abbc83 --- /dev/null +++ b/doc/install/FreeBSD/comment_1_48c712af243119f9a525c55705edc536._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="Q: any way to "install" without having root/admin privileges" + date="2025-12-07T19:38:46Z" + content=""" +need to install on a box where I am not an admin, just to copy the files from it (odd that git-annex can't just get anything it needs since SSH is there and working fine!). +"""]]
Revert "update"
This reverts commit 550c6b482845ec978aa796191c9931fe19dbc369.
This reverts commit 550c6b482845ec978aa796191c9931fe19dbc369.
diff --git a/doc/thanks/list b/doc/thanks/list index 563a0b6b21..dfeda7a813 100644 --- a/doc/thanks/list +++ b/doc/thanks/list @@ -126,99 +126,3 @@ Lilia.Nanne, Dusty Mabe, mpol, Andrew Poelstra, -AlexS, -Amitai Schleier, -Andrew, -anon, -Anthony DeRobertis, -Anton Grensjö, -Art S, -Arthur Lutz, -Ben, -Boyd Stephen Smith, -Bruno BEAUFILS, -Caleb Allen, -Calvin Beck, -Chris Lamb, -Christian Diller, -Christopher Baines, -Christopher Goes, -Dave Pifke, -don haraway, -DuncanConstruction, -encryptio, -Eric Drechsel, -ers35, -Evgeni Ku, -Fernando Jimenez, -fiatjaf, -Francois Marier, -Gabriel Lee, -Greg Grossmeier, -HeartBreak KB Official, -Ignacio, -Ilya Baryshnikov, -James (purpleidea), -James Valleroy, -Jan, -Jason Woofenden, -Jeff Goeke-Smith, -Jim, -Jo, -Johannes Schlatow, -John Peloquin, -Jon D, -jose_d, -Josh Taylor, -Josh Tilles, -Lacnic, -Land Reaver, -Lee Hinman, -Lee-kai Wang, -Lukas Platz, -Lukas Waymann, -Madison McGaffin, -Maggie Hess, -Matthew Willcockson, -Matthias Urlichs, -Matthieu, -Mattias J, -Mica, -Michal Politowski, -Mika Pflüger, -mo, -Mohit Munjani, -Nahum Shalman, -NinjaTrappeur, -Ole-Morten Duesund, -Paul Tötterman, -Pedro Luz, -Peter, -Renaud Casenave-Péré, -rjbl, -Ryan Newton, -Rémi Vanicat, -Sergey Karpukhin, -Shane-o, -Shawn Butts, -Stan Yamane, -Stephan Burkhardt, -Stephan Meister, -SvenDowideit, -sww, -Teremu HAMBLIN, -Thom May, -Thomas Ferris Nicolaisen, -Thomas Hochstein, -Thomas Schwinge, -Tim Howes, -tj, -Trent Lloyd, -Tyler Cipriani, -Valeria_, -Walltime, -wawatcz, -Will Hughes, -Willard Korfhage, -wzhd, -Zoé Cassiopée Gauthier,
update
diff --git a/doc/thanks/list b/doc/thanks/list index dfeda7a813..563a0b6b21 100644 --- a/doc/thanks/list +++ b/doc/thanks/list @@ -126,3 +126,99 @@ Lilia.Nanne, Dusty Mabe, mpol, Andrew Poelstra, +AlexS, +Amitai Schleier, +Andrew, +anon, +Anthony DeRobertis, +Anton Grensjö, +Art S, +Arthur Lutz, +Ben, +Boyd Stephen Smith, +Bruno BEAUFILS, +Caleb Allen, +Calvin Beck, +Chris Lamb, +Christian Diller, +Christopher Baines, +Christopher Goes, +Dave Pifke, +don haraway, +DuncanConstruction, +encryptio, +Eric Drechsel, +ers35, +Evgeni Ku, +Fernando Jimenez, +fiatjaf, +Francois Marier, +Gabriel Lee, +Greg Grossmeier, +HeartBreak KB Official, +Ignacio, +Ilya Baryshnikov, +James (purpleidea), +James Valleroy, +Jan, +Jason Woofenden, +Jeff Goeke-Smith, +Jim, +Jo, +Johannes Schlatow, +John Peloquin, +Jon D, +jose_d, +Josh Taylor, +Josh Tilles, +Lacnic, +Land Reaver, +Lee Hinman, +Lee-kai Wang, +Lukas Platz, +Lukas Waymann, +Madison McGaffin, +Maggie Hess, +Matthew Willcockson, +Matthias Urlichs, +Matthieu, +Mattias J, +Mica, +Michal Politowski, +Mika Pflüger, +mo, +Mohit Munjani, +Nahum Shalman, +NinjaTrappeur, +Ole-Morten Duesund, +Paul Tötterman, +Pedro Luz, +Peter, +Renaud Casenave-Péré, +rjbl, +Ryan Newton, +Rémi Vanicat, +Sergey Karpukhin, +Shane-o, +Shawn Butts, +Stan Yamane, +Stephan Burkhardt, +Stephan Meister, +SvenDowideit, +sww, +Teremu HAMBLIN, +Thom May, +Thomas Ferris Nicolaisen, +Thomas Hochstein, +Thomas Schwinge, +Tim Howes, +tj, +Trent Lloyd, +Tyler Cipriani, +Valeria_, +Walltime, +wawatcz, +Will Hughes, +Willard Korfhage, +wzhd, +Zoé Cassiopée Gauthier,
typo
diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index b8fd29522c..5a1f9fa969 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -463,7 +463,7 @@ The two protocol versions are actually identical. Old versions of git-annex that supported only `VERSION 1` had a bug in their implementation of the part of the protocol documented in the [[export_and_import_appendix]]. -The bug could result in ontent being exported to the wrong file. +The bug could result in content being exported to the wrong file. External special remotes that implement that should use `VERSION 2` to avoid talking to the buggy old version of git-annex.
Added TRANSFER-RETRIEVE-URL extension to the external special remote protocol
Since retrieveKeyFileM has to use fileRetriever before it sees this
response, which uses tailVerify, it's unfortunately not possible to
stream the url download to do incremental verification. That would be
more efficient.
Similarly, watchFileSize does some extra work, but the progress meter is
updated as the content streams in.
The downloadFailed case is never reached I think, since the url list
provided to downloadUrl' is not empty.
Sponsored-by: Dartmouth College's OpenNeuro project
Since retrieveKeyFileM has to use fileRetriever before it sees this
response, which uses tailVerify, it's unfortunately not possible to
stream the url download to do incremental verification. That would be
more efficient.
Similarly, watchFileSize does some extra work, but the progress meter is
updated as the content streams in.
The downloadFailed case is never reached I think, since the url list
provided to downloadUrl' is not empty.
Sponsored-by: Dartmouth College's OpenNeuro project
diff --git a/Annex/Content.hs b/Annex/Content.hs
index e10329d8c2..edb1052d52 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -1,6 +1,6 @@
{- git-annex file content managing
-
- - Copyright 2010-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -47,6 +47,7 @@ module Annex.Content (
listKeys',
saveState,
downloadUrl,
+ downloadUrl',
preseedTmp,
dirKeys,
withObjectLoc,
@@ -881,13 +882,21 @@ saveState nocommit = doSideAction $ do
- that failed.
-}
downloadUrl :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> OsPath -> Url.UrlOptions -> Annex Bool
-downloadUrl listfailedurls k p iv urls file uo =
+downloadUrl listfailedurls k p iv urls file uo =
+ downloadUrl' listfailedurls k p iv urls file uo >>= \case
+ Right r -> return r
+ Left e -> do
+ warning $ UnquotedString e
+ return False
+
+downloadUrl' :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> OsPath -> Url.UrlOptions -> Annex (Either String Bool)
+downloadUrl' listfailedurls k p iv urls file uo =
-- Poll the file to handle configurations where an external
-- download command is used.
meteredFile file (Just p) k (go urls [])
where
go (u:us) errs p' = Url.download' p' iv u file uo >>= \case
- Right () -> return True
+ Right () -> return (Right True)
Left err -> do
-- If the incremental verifier was fed anything
-- while the download that failed ran, it's unable
@@ -899,14 +908,12 @@ downloadUrl listfailedurls k p iv urls file uo =
_ -> noop
Nothing -> noop
go us ((u, err) : errs) p'
- go [] [] _ = return False
- go [] errs@((_, err):_) _ = do
+ go [] [] _ = return (Right False)
+ go [] errs@((_, err):_) _ = return $ Left $
if listfailedurls
- then warning $ UnquotedString $
- unlines $ flip map errs $ \(u, err') ->
- u ++ " " ++ err'
- else warning $ UnquotedString err
- return False
+ then unlines $ flip map errs $ \(u, err') ->
+ u ++ " " ++ err'
+ else err
{- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -}
diff --git a/CHANGELOG b/CHANGELOG
index 018efd7a3d..ac29f65722 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,8 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
* Add a build warning when the version of aws being built against is
too old. 0.25.1 is needed to support Google Cloud Storage.
* stack.yaml: Use aws-0.25.1.
+ * Added TRANSFER-RETRIEVE-URL extension to the external special remote
+ protocol.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/Remote/External.hs b/Remote/External.hs
index c392b3f31e..07d1272f24 100644
--- a/Remote/External.hs
+++ b/Remote/External.hs
@@ -1,6 +1,6 @@
{- External special remote interface.
-
- - Copyright 2013-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -93,7 +93,7 @@ gen rt externalprogram r u rc gc rs
let exportactions = if exportsupported
then ExportActions
{ storeExport = storeExportM external
- , retrieveExport = retrieveExportM external
+ , retrieveExport = retrieveExportM external gc
, removeExport = removeExportM external
, checkPresentExport = checkPresentExportM external
, removeExportDirectory = Just $ removeExportDirectoryM external
@@ -116,7 +116,7 @@ gen rt externalprogram r u rc gc rs
cheapexportsupported
return $ Just $ specialRemote c
(storeKeyM external)
- (retrieveKeyFileM external)
+ (retrieveKeyFileM external gc)
(removeKeyM external)
(checkPresentM external)
rmt
@@ -248,17 +248,19 @@ storeKeyM external = fileStorer $ \k f p ->
result (Left (respErrorMessage "TRANSFER" errmsg))
_ -> Nothing
-retrieveKeyFileM :: External -> Retriever
-retrieveKeyFileM external = fileRetriever $ \d k p ->
- either giveup return =<< watchFileSize d p (go d k)
+retrieveKeyFileM :: External -> RemoteGitConfig -> Retriever
+retrieveKeyFileM external gc = fileRetriever $ \dest k p ->
+ either giveup return =<< watchFileSize dest p (go dest k)
where
- go d k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromOsPath d)) k (Just p) $ \resp ->
+ go dest k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromOsPath dest)) k (Just p) $ \resp ->
case resp of
TRANSFER_SUCCESS Download k'
| k == k' -> result $ Right ()
TRANSFER_FAILURE Download k' errmsg
| k == k' -> result $ Left $
respErrorMessage "TRANSFER" errmsg
+ TRANSFER_RETRIEVE_URL k' url
+ | k == k' -> retrieveUrl' gc url dest k p
_ -> Nothing
removeKeyM :: External -> Remover
@@ -306,8 +308,8 @@ storeExportM external f k loc p = either giveup return =<< go
_ -> Nothing
req sk = TRANSFEREXPORT Upload sk (fromOsPath f)
-retrieveExportM :: External -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
-retrieveExportM external k loc dest p = do
+retrieveExportM :: External -> RemoteGitConfig -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
+retrieveExportM external gc k loc dest p = do
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
tailVerify iv dest $
either giveup return =<< go
@@ -317,6 +319,8 @@ retrieveExportM external k loc dest p = do
| k == k' -> result $ Right ()
TRANSFER_FAILURE Download k' errmsg
| k == k' -> result $ Left $ respErrorMessage "TRANSFER" errmsg
+ TRANSFER_RETRIEVE_URL k' url
+ | k == k' -> retrieveUrl' gc url dest k p
UNSUPPORTED_REQUEST ->
result $ Left "TRANSFEREXPORT not implemented by external special remote"
_ -> Nothing
@@ -838,7 +842,18 @@ retrieveUrl :: RemoteGitConfig -> Retriever
retrieveUrl gc = fileRetriever' $ \f k p iv -> do
us <- getWebUrls k
unlessM (withUrlOptions (Just gc) $ downloadUrl True k p iv us f) $
- giveup "failed to download content"
+ giveup downloadFailed
+
+retrieveUrl' :: RemoteGitConfig -> URLString -> OsPath -> Key -> MeterUpdate -> Maybe (Annex (ResponseHandlerResult (Either String ())))
+retrieveUrl' gc url dest k p =
+ Just $ withUrlOptions (Just gc) $ \uo ->
+ downloadUrl' False k p Nothing [url] dest uo >>= return . \case
+ Left msg -> Result (Left msg)
+ Right True -> Result (Right ())
+ Right False -> Result (Left downloadFailed)
+
+downloadFailed :: String
+downloadFailed = "failed to download content"
checkKeyUrl :: RemoteGitConfig -> CheckPresent
checkKeyUrl gc k = do
diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs
index 58bbc9f656..f265d4a1bd 100644
--- a/Remote/External/Types.hs
+++ b/Remote/External/Types.hs
@@ -1,6 +1,6 @@
{- External special remote data types.
-
- - Copyright 2013-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -116,6 +116,7 @@ supportedExtensionList = ExtensionList
[ "INFO"
, "GETGITREMOTENAME"
, "UNAVAILABLERESPONSE"
+ , "TRANSFER-RETRIEVE-URL"
, asyncExtension
]
@@ -243,6 +244,7 @@ data Response
| PREPARE_FAILURE ErrorMsg
| TRANSFER_SUCCESS Direction Key
| TRANSFER_FAILURE Direction Key ErrorMsg
+ | TRANSFER_RETRIEVE_URL Key URLString
| CHECKPRESENT_SUCCESS Key
| CHECKPRESENT_FAILURE Key
| CHECKPRESENT_UNKNOWN Key ErrorMsg
@@ -281,6 +283,7 @@ instance Proto.Receivable Response where
(Diff truncated)
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_3_1c262c9459373bff638c87d838446ed5._comment b/doc/todo/Special_remote_redirect_to_URL/comment_3_1c262c9459373bff638c87d838446ed5._comment new file mode 100644 index 0000000000..89c69c47e5 --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_3_1c262c9459373bff638c87d838446ed5._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-05T17:23:24Z" + content=""" +TRANSFEREXPORT, in the "simple export interface" also +uses TRANSFER-SUCCESS/TRANSFER-FAILURE, and should +also support this extension. +"""]]
remove incorrect comment
tailVerify is already used so it does not re-read at end.
I don't think it will be possible to avoid using tailVerify with this
exension since it's already started by the time the response comes back.
tailVerify is already used so it does not re-read at end.
I don't think it will be possible to avoid using tailVerify with this
exension since it's already started by the time the response comes back.
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment b/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment deleted file mode 100644 index b3a27d4c7e..0000000000 --- a/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment +++ /dev/null @@ -1,9 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 3""" - date="2025-12-05T16:58:21Z" - content=""" -An added benefit of this will be that git-annex can stream hash -while downloading, so it will avoid re-reading the file at the end to -verifiy it. -"""]]
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment b/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment new file mode 100644 index 0000000000..b3a27d4c7e --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-05T16:58:21Z" + content=""" +An added benefit of this will be that git-annex can stream hash +while downloading, so it will avoid re-reading the file at the end to +verifiy it. +"""]]
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_2_31c4540e06d80021d8be57393c3fb817._comment b/doc/todo/Special_remote_redirect_to_URL/comment_2_31c4540e06d80021d8be57393c3fb817._comment new file mode 100644 index 0000000000..f0258a67d2 --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_2_31c4540e06d80021d8be57393c3fb817._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-05T16:35:05Z" + content=""" +One problem with this design is that there may be HTTP headers that are +used for authorization, rather than putting authentication in the url. + +I think we may have talked about this at the hackfest, and came down on the +side of simplicity, supporting only an url. Can't quite remember. + +It might also be possible to redirect to an url when storing an object. + +I think that protocol design should leave these possibilities open to be +implemented later. +"""]]
comment
diff --git a/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_/comment_1_6536200f3ff5e076f028eef77660bae3._comment b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_/comment_1_6536200f3ff5e076f028eef77660bae3._comment new file mode 100644 index 0000000000..291ca824cd --- /dev/null +++ b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_/comment_1_6536200f3ff5e076f028eef77660bae3._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-05T16:31:47Z" + content=""" +Is the assistant running in the local repository, or on the remote? +"""]]
comment
diff --git a/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_2_9ad59f18725cd45d3ad2261f1c92f694._comment b/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_2_9ad59f18725cd45d3ad2261f1c92f694._comment new file mode 100644 index 0000000000..149bf4feb2 --- /dev/null +++ b/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_2_9ad59f18725cd45d3ad2261f1c92f694._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-05T16:29:39Z" + content=""" +Probably this. In any case, it's better to upgrade before filing a bug on something like this. + + git-annex (8.20211123) upstream; urgency=medium + + * Bugfix: When -J was enabled, getting files could leak an + ever-growing number of git cat-file processes. +"""]]
issue about unstaged files on push
diff --git a/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_.mdwn b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_.mdwn new file mode 100644 index 0000000000..c09c42564f --- /dev/null +++ b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_.mdwn @@ -0,0 +1,42 @@ +### Please describe the problem. + +This is a repo under `git-annex assistant` control and was committing and pushing fine but then stopped... there were many other oddities (some files were `git rm`ed and never committed back etc), but now it is at the level of git on remote I guess, so likely going through `git-annex filter-process` -- claims to have unstaged changes when `git status` says none: + + +```shell +reprostim@reproiner:/data/reprostim$ git push typhon +Enumerating objects: 15079, done. +Counting objects: 100% (10929/10929), done. +Delta compression using up to 4 threads +Compressing objects: 100% (6763/6763), done. +Writing objects: 100% (6764/6764), 816.02 KiB | 16.00 MiB/s, done. +Total 6764 (delta 64), reused 6388 (delta 1), pack-reused 0 +remote: Resolving deltas: 100% (64/64), completed with 30 local objects. +To typhon:/data/repronim/dbic-reproflow/sourcedata/reprostim-reproiner + ! [remote rejected] master -> master (Working directory has unstaged changes) +error: failed to push some refs to 'typhon:/data/repronim/dbic-reproflow/sourcedata/reprostim-reproiner' +reprostim@reproiner:/data/reprostim$ ssh typhon git -C /data/repronim/dbic-reproflow/sourcedata/reprostim-reproiner status +On branch master +Your branch is ahead of 'reproiner/master' by 2906 commits. + (use "git push" to publish your local commits) + +nothing to commit, working tree clean + +``` + +how to troubleshoot in the future? + +workaround done: pushed as another branch which I then just "merged" (was fast-forward) locally (on typhon). + + + +### What version of git-annex are you using? On what operating system? + + +``` +reprostim@reproiner:/data/reprostim$ git annex version | head -n 1 +git-annex version: 10.20251114-1~ndall+1 +reprostim@reproiner:/data/reprostim$ ssh typhon git annex version | head -n 1 +git-annex version: 10.20250416 + +```
Added a comment
diff --git a/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_1_014ab514d290dfaa729cce64138d7be0._comment b/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_1_014ab514d290dfaa729cce64138d7be0._comment new file mode 100644 index 0000000000..0bc19cfebf --- /dev/null +++ b/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_1_014ab514d290dfaa729cce64138d7be0._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="octvs" + avatar="http://cdn.libravatar.org/avatar/af90c1c7c98f0b5ed446a272685f29ab" + subject="comment 1" + date="2025-12-04T17:17:09Z" + content=""" +I'm currently testing it with version `10.20251114-....`. I will update/close issue according to the result. +"""]]
diff --git a/doc/bugs/does_not_clean_sub_processes_until_freeze.mdwn b/doc/bugs/does_not_clean_sub_processes_until_freeze.mdwn new file mode 100644 index 0000000000..3291e35910 --- /dev/null +++ b/doc/bugs/does_not_clean_sub_processes_until_freeze.mdwn @@ -0,0 +1,65 @@ +### Please describe the problem. + +Whenever I leave git-annex to do a long operation on a remote server, I came +back to its eventual death. I leave a `git-annex get ... -J 4` on a tmux at a +server it eventually freezes, so I tend to rely on my machine and +short/repetitive operations to be able to use git-annex. Before I thought the +parallelization might be the issue so I reduced it to 4 from 8/16/32, but it +didn't resolve the problem. + +This last time I wanted to investigate and I found out a lot of subprocess +hanging around. + +```sh +$ ps -u <me> | grep git +2460230 pts31 00:00:00 git <defunct> +2460231 pts31 00:00:00 git <defunct> +2460232 pts31 00:00:00 git <defunct> +2460233 pts31 00:00:00 git <defunct> +2460234 pts31 00:00:00 git <defunct> +2460235 pts31 00:00:00 git <defunct> +2460237 pts31 00:00:00 git <defunct> +... +$ ps -u <me> | grep git | wc -l +13193 +``` + +Which explains why lowering parallel jobs didn't resolve but delay the eventual +freeze. + +On this setup the version is old which might be the culprit. Although I can try +to get an updated version (via binary or conda) on my path, I would still like +to identify the problem. It also happened on another server setup which had a +relatively newer version but since I have no access to there anymore, I can't +provide more information on that (yet). + +### What steps will reproduce the problem? + +Run `$ git annex get * -J 4` and wait for eventual freeze + +### What version of git-annex are you using? On what operating system? + +git-annex version: 8.20210223 +build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite S3 WebDAV +dependency versions: aws-0.22 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.4 feed-1.3.0.1 ghc-8.8.4 http-client-0.6.4.1 persistent-sqlite-2.10.6.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.1.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL X* +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg hook external +operating system: linux x86_64 +supported repository versions: 8 +upgrade supported from repository versions: 0 1 2 3 4 5 6 7 + +### Please provide any additional information below. + +[[!format sh """ +# If you can, paste a complete transcript of the problem occurring here. +# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log + + +# End of transcript or log. +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +I use it for all kind of data I have both in private and at work. Amazing +piece of software. I'm sure my colleagues/IT are annoyed of me plugging it to +every possible discussion.
comment
diff --git a/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_3_261106ade035ed69fc6c9ee05db1eb48._comment b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_3_261106ade035ed69fc6c9ee05db1eb48._comment new file mode 100644 index 0000000000..6b2ff15ca4 --- /dev/null +++ b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_3_261106ade035ed69fc6c9ee05db1eb48._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-03T19:08:42Z" + content=""" +With the separate autoenabled remote for PRs, the UX could look like this: + + > git-annex add myfile + add myfile ok + > git commit -m foo + > git push origin HEAD:refs/for/main -o topic="add myfile" + > git-annex push origin-PRs + copy myfile (to origin-PRs) ... ok + +Or with a small git-annex improvement, even: + + > git-annex assist -o topic="add myfile" + add myfile ok + copy myfile (to origin-PRs) ... ok + +For this, origin-PRs would want all files not in origin, +and origin would want all files not in origin-PRs. And origin-PRs would +need to have a lower cost than origin so that it doesn't first try, and +fail, to copy the file to origin. +"""]]
fix
diff --git a/doc/projects/FZJ.mdwn b/doc/projects/FZJ.mdwn index d076a07189..1ef8eb3c27 100644 --- a/doc/projects/FZJ.mdwn +++ b/doc/projects/FZJ.mdwn @@ -35,5 +35,5 @@ Bugs Forum posts =========== -[[!inline pages="forum/* (tagged(projects/INM7) or tagged(projects/ICE4))" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist template=buglist]] +[[!inline pages="forum/* and (tagged(projects/INM7) or tagged(projects/ICE4))" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist template=buglist]]
add tagged forum posts since we have one
diff --git a/doc/projects/FZJ.mdwn b/doc/projects/FZJ.mdwn index c2953d4600..d076a07189 100644 --- a/doc/projects/FZJ.mdwn +++ b/doc/projects/FZJ.mdwn @@ -31,3 +31,9 @@ Bugs (tagged(projects/INM7) or tagged(projects/ICE4))" feeds=no actions=yes archive=yes show=0 template=buglist]] </details> + +Forum posts +=========== + +[[!inline pages="forum/* (tagged(projects/INM7) or tagged(projects/ICE4))" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist template=buglist]] +
comments
diff --git a/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_1_6740265d7182747ca1a9e6abbc0bd62b._comment b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_1_6740265d7182747ca1a9e6abbc0bd62b._comment new file mode 100644 index 0000000000..53f2f69329 --- /dev/null +++ b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_1_6740265d7182747ca1a9e6abbc0bd62b._comment @@ -0,0 +1,56 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-03T17:54:21Z" + content=""" +> Obviously annexed objects copied to the Forgejo-aneksajo instance +> via this path should only be available in the context of that PR in some way. +> +> The fundamental issue seems to be that annexed objects always belong to the entire repository, and are not scoped to any branch. + +Hmm.. git objects also don't really belong to any particular branch. +git only fetches objects referenced by the branches you clone. + +Similarly, git-annex can only ever get annex objects that are listed +in the git-annex branch. Even with `--all`, it will not know about objects +not listed there. + +So, seems to me you may only need to keep the PR's git-annex branch separate from +the main git-annex branch, so that the main git-annex branch does not list +objects from the PR. I see two problems that would need to be solved to do +that: + +1. If git-annex is able to see the PR's git-annex branch as eg + (refs/foo/git-annex), it will auto-merge it into the main git-annex branch, and + then --all will operate on objects from the PR as well. So the PR's + git-annex branch would need to be named to avoid that. + + This could be just `git push origin git-annex:refs/for/git-annex/topic-branch` + Maybe `git-annex sync` could be made to support that for its pushes? + +2. When git-annex receives an object into the repository, the receiving side + updates the git-annex branch to indicate it now has a copy of that object. So, + you would need a way to make objects sent to a PR update the PR's git-annex branch, + rather than the main git-annex branch. + + This could be something similar to `git push -o topic` in + git-annex. Which would need to be a P2P protocol extension. Or maybe + some trick with the repository UUID? + +When the PR is merged, you would then also merge its git-annex branch. + +If the PR is instead rejected, and you want to delete the objects +associated with it, you would first delete the PR's other branches, and +then run `git-annex unused`, arranging (how?) for it to see only the PR's +git-annex branch and not any other git-annex branches. That would find any +objects that were sent as part of the PR, that don't also happen to be used +in other branches (including other PRs). + +---- + +I do wonder, if this were implemeted, would the git-annex +workflow for the user be any better than if there were a per-PR +remote for them to use? If every git-annex command that pushes the +git-annex branch or sends objects to forjejo needs `-o topic` +to be given, then it might be a worse user experience. +"""]] diff --git a/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_2_bb6e73189471420640ff563530d79253._comment b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_2_bb6e73189471420640ff563530d79253._comment new file mode 100644 index 0000000000..c4049195e9 --- /dev/null +++ b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_2_bb6e73189471420640ff563530d79253._comment @@ -0,0 +1,36 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-03T18:44:56Z" + content=""" +> A per-user special remote that is assumed to contain the annexed files for all of the users AGit-PRs. If git recognizes remote +> configs in the users' global git config then it could be possible to get away with configuring things once, but I am not sure of the +> behavior of git in that case. + +I think git will do that (have not checked), but a special remote needs +information to be written to the git-annex branch, not just git config, +so there's no way to globally configure a special remote to be accessible +in every git-annex repository. + +Along similar lines, forgejo could set up an autoenabled remote +that contains annexed files for all AGit-PRs, and that wants any files +not in the main git repository. (This could be a special remote, or a +git-annex repository that just doesn't allow any ref pushes to it. The +latter might be easier to deal with since `git-annex p2phttp` could serve +it as just another git-annex repository.) + +That would solve the second problem I discussed in the comment above, +because when the user copies objects to that separate remote, it will not +cause git-annex in the forgejo repository to update the main git-annex +branch to list those objects. + +When merging a PR, forgejo would move the objects over from that remote +to the main git repository. + +You would be left with a bit of an problem in deleting objects from that +remote when a PR is rejected. Since the user may never have pushed +their git-annex branch after sending an object to it, +and so you would not know what PR that object belongs to. I suppose this +could be handled by finding all objects that are in active PRs and deleting +ones that are not after some amount of time. +"""]]
close
diff --git a/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn index ae708bb1b3..e2ab131f7b 100644 --- a/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn +++ b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn @@ -51,3 +51,6 @@ rsync exited 23 git-annex solves a lot of problems for both work and privately. I use it on a daily basis since a decade, and it is just great and very reliable. Great software, please keep up developing and maintaining it. + +> [[closing|done]] as this must have been a bug in the older version. +> --[[Joey]]
remove incorrect comment
Re-sending a restore request while a restore is ongoing does not change
the restoration period.
Re-sending a restore request while a restore is ongoing does not change
the restoration period.
diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment deleted file mode 100644 index a0581ba9e8..0000000000 --- a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment +++ /dev/null @@ -1,12 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 3""" - date="2025-12-03T17:28:28Z" - content=""" -If one `git-annex get` starts a restore, then a while later, -but before the restore is done, `git-annex get` is run again, -sending RestoreObject again will change the lifetime of the restored -object. This seems like something to at least warn users about, -since it could cost money to leave restored objects in the S3 bucket -longer than necessary. -"""]]
update
diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn index 10c1beae5f..8f671c2943 100644 --- a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn @@ -6,4 +6,4 @@ Basically, the files moved by AWS from S3 to Glacier are not available under the I suppose DELETE needs special care as well. -> [[meta title="wishlist: Restore s3 files moved to Glacier or Deep Archive"]] +[[!meta title="wishlist: Restore s3 files moved to Glacier or Deep Archive"]] diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment index f11f584e75..be2c97df2e 100644 --- a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment @@ -13,10 +13,19 @@ to move objects to deep archive, git-annex won't be able to retrieve objects stored in deep archive. To support that, the S3 special remote would need to send a request to S3 to -RESTORE an object from deep archive. Then later (on a subsequent `git-annex` run) -GET the object from S3. +restore an object from deep archive. Then later (on a subsequent `git-annex` run) +it can download the object from S3. -And then after getting the object, it would be left in the S3 bucket rather -than in deep archive, so something would need to deal with that to move it -back into deep archive. Either git-annex could do it, or a lifecycle rule could. +This is the API: +<https://docs.aws.amazon.com/AmazonS3/latest/API/API_RestoreObject.html> + +It includes a Tier tag which controls whether the restore is +expedited. There would probably need to be a git config for that, since +the user may want to get a file fast or pay less for a slower retrieval. + +And there is a Days tag, which controls how long the object should be left +accessible in S3. This would also make sense to have a git config. + +I have opened this issue, which is a prerequisite to implementing this +<https://github.com/aristidb/aws/issues/297> """]] diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment new file mode 100644 index 0000000000..a0581ba9e8 --- /dev/null +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-03T17:28:28Z" + content=""" +If one `git-annex get` starts a restore, then a while later, +but before the restore is done, `git-annex get` is run again, +sending RestoreObject again will change the lifetime of the restored +object. This seems like something to at least warn users about, +since it could cost money to leave restored objects in the S3 bucket +longer than necessary. +"""]]
fix link
diff --git a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment index 66d9ae9427..5c435b0e85 100644 --- a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment +++ b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment @@ -6,6 +6,6 @@ I don't think `storageclass=DEEP_ARCHIVE` will currently work, git-annex is not able to request that the object be restored. -See [[todo/wishlist__58___Restore_s3_files_moved_to_Glacier]] +See <https://git-annex.branchable.com/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/> for a todo which would solve this. """]]
link
diff --git a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment index 766263cb59..66d9ae9427 100644 --- a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment +++ b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment @@ -6,6 +6,6 @@ I don't think `storageclass=DEEP_ARCHIVE` will currently work, git-annex is not able to request that the object be restored. -See [[todo/wishlist__58___Restore_s3_files_moved_to_Glacier/]] +See [[todo/wishlist__58___Restore_s3_files_moved_to_Glacier]] for a todo which would solve this. """]]
comments
diff --git a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment new file mode 100644 index 0000000000..766263cb59 --- /dev/null +++ b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-03T16:58:52Z" + content=""" +I don't think `storageclass=DEEP_ARCHIVE` will currently work, +git-annex is not able to request that the object be restored. + +See [[todo/wishlist__58___Restore_s3_files_moved_to_Glacier/]] +for a todo which would solve this. +"""]] diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn index 85fc2785c4..10c1beae5f 100644 --- a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn @@ -5,3 +5,5 @@ This is different from the adding a Glacier remote to git annex because of the r Basically, the files moved by AWS from S3 to Glacier are not available under the normal Glacier API. In fact, the moved S3 files are listed as available but under the `GLACIER` storage class and need a RESTORE request before they can be GET like other S3 files. Trying to GET an S3 file that has been moved to Glacier will not restore it from Glacier and will result in an 403 error. I suppose DELETE needs special care as well. + +> [[meta title="wishlist: Restore s3 files moved to Glacier or Deep Archive"]] diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment new file mode 100644 index 0000000000..f11f584e75 --- /dev/null +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-03T17:01:50Z" + content=""" +Glacier is in the process of being deprecated, instead there is +the Deep Archive S3 storage class. +<https://aws.amazon.com/blogs/aws/new-amazon-s3-storage-class-glacier-deep-archive/> + +While it is possible to configure a S3 special remote +with `storageclass=DEEP_ARCHIVE`, or configure a bucket with lifecycle rules +to move objects to deep archive, git-annex won't be able to retrieve objects +stored in deep archive. + +To support that, the S3 special remote would need to send a request to S3 to +RESTORE an object from deep archive. Then later (on a subsequent `git-annex` run) +GET the object from S3. + +And then after getting the object, it would be left in the S3 bucket rather +than in deep archive, so something would need to deal with that to move it +back into deep archive. Either git-annex could do it, or a lifecycle rule could. +"""]]
comments
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_2_c89990dc457eaa0f09c68482f17f77eb._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_2_c89990dc457eaa0f09c68482f17f77eb._comment new file mode 100644 index 0000000000..3afae49b58 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_2_c89990dc457eaa0f09c68482f17f77eb._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-03T16:16:14Z" + content=""" +Are the `000675/draft/` files you show it importing the ones that are +access restricted? + +And when you replicated the problem from the backup, were you using it in +the configuration where it cannot access those? + +I notice that all the files affected seem to be probably smallish text +files (yaml, jsonld). Do you have annex.largefiles configured in this +repository, and are all of the affected files non-annexed files? +If so, it would be worth retrying from the backup with the config changed +so those files get annexed and see if that avoids the problem. +"""]] diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_3_b36a8529cafbb0a4020af5723cd8eda9._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_3_b36a8529cafbb0a4020af5723cd8eda9._comment new file mode 100644 index 0000000000..0403c5c9b0 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_3_b36a8529cafbb0a4020af5723cd8eda9._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-03T16:37:11Z" + content=""" +> please advise on how to mitigate (`git reset --hard` the `s3-dandiarchive/master` to prior state before yesterday and reimport with newer git-annex or ... ?) + +Simply resetting the remote tracking branch and re-importing won't cause an +import to necessarily happen again. This is because git-annex tracks +internally what has been imported from the remote. Running an import again +when it's already imported files won't re-download those same files. +And it will regenerate the same remote tracking branch. + +So running in a clone from a backup is a better way to re-run the import. +"""]]
Added a comment
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_1_3545d2b6408f21a1ad6c5ff3c0255d57._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_1_3545d2b6408f21a1ad6c5ff3c0255d57._comment new file mode 100644 index 0000000000..54695f9577 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_1_3545d2b6408f21a1ad6c5ff3c0255d57._comment @@ -0,0 +1,35 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 1" + date="2025-12-02T19:22:10Z" + content=""" +I have now tried with most recent release 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471 while operating on a copy from the backup. + +and looking at the fact that it starts with the latter, likely the \"access restricted ones\" + +``` +(venv-annex) dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ ( source .git/secrets.env; git-annex import master --from s3-dandiarchive && git merge s3-dandiarchive/master ) +list s3-dandiarchive ok +import s3-dandiarchive 000675/draft/assets.jsonld +ok +import s3-dandiarchive 000675/draft/assets.yaml +ok +... +``` + +while still making commits to earlier folders + +``` +(venv-annex) dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git log --stat s3-dandiarchive/master -- 000029/draft || echo $? +fatal: unable to read f7c097994e60c2b58dae464633583b65a6691415 +commit ce60e6d1 +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Dec 02 14:16:10 -0500 + + import from s3-dandiarchive + +128 +``` +I suspect it just somehow \"manufactures\" them for public ones without fetching their keys? +"""]]
broken git with import from s3
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn new file mode 100644 index 0000000000..938d68ba57 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn @@ -0,0 +1,117 @@ +### Please describe the problem. + +we have dandiarchive s3 bucket with versioning turned on. Currently, after I changed signature from anonymous and added region it looks like + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git show git-annex:remote.log +09b87154-c650-46d1-a036-6e03c56c0b1a bucket=dandiarchive datacenter=US encryption=none fileprefix=dandisets/ host=s3.amazonaws.com importtree=yes name=s3-dandiarchive port=80 publicurl=https://dandiarchive.s3.amazonaws.com/ region=us-east-2 signature=v4 storageclass=STANDARD type=S3 timestamp=1764626152s +``` + +Bucket has ["trailing delete"](https://github.com/dandi/dandi-archive/blob/master/doc/design/s3-trailing-delete.md) enabled since awhile (years). + +Originally it was all open and we were importing on cron, the last merge was + +``` +Date: 2025 Aug 27 21:23:09 -0400 + + Merge remote-tracking branch 's3-dandiarchive/master' +``` + +Recently-ish (sep/oct) policy got updated so some keys on s3 became protected and require authentication. We had a good number of failing due to 403 runs, including ones where I already specified AWS credentials but still had `signature=anonymous` and no region specified. Then (yesterday) I specified signature to be v4, had a run where it complained about region needing to be us-east-2 instead of us-east-1 (not sure why could not deduce automagically), so I specified it too. And then the `import` run seem to proceeded fine! + +But `git merge` then failed: + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git merge s3-dandiarchive/master +error: unable to read sha1 file of 000029/draft/dandiset.jsonld (f7c097994e60c2b58dae464633583b65a6691415) +error: unable to read sha1 file of 000029/draft/dandiset.yaml (1fa7abf602b540507c1a31e20da3d687e83ebfe6) +error: unable to read sha1 file of 000338/draft/assets.jsonld (4ad13ca757df0b39f2c20af47e5d3c9140ccfc7b) +error: unable to read sha1 file of 000338/draft/assets.yaml (08cca54d889faffc76c7911f5c700eb09c22e628) +error: unable to read sha1 file of 000338/draft/collection.jsonld (cf60b31aca7826a8d4993828e439af1f808cb17e) +... +``` + +and `git fsck` fails loudly with many blobs missing etc + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ head .duct/logs/2025.12.02T08.19.22-3737239_stdout +broken link from tree 8c233f531c125ef0edbba48300d7c2ca914c1dac + to blob 513d0a3ba28460f1c7db74b2f4b4905a9942d903 +broken link from tree 8c233f531c125ef0edbba48300d7c2ca914c1dac + to blob 2d3e42dc7935b136141f81f3113a6eac247aa570 +broken link from tree 8c233f531c125ef0edbba48300d7c2ca914c1dac + to blob e88e9ef106f8c7cdce43378079416ab353593335 +... +``` +and also similar errors while trying to git log a sample file there: + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git log s3-dandiarchive/master -- 000029/draft/dandiset.jsonld +commit 2fc1ff12 +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Dec 01 16:56:17 -0500 + + import from s3-dandiarchive + +commit 65c4ea5b +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Apr 24 16:23:07 -0400 + + import from s3-dandiarchive + +commit 832893d3 +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Apr 24 13:21:10 -0400 + + import from s3-dandiarchive +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git log -p s3-dandiarchive/master -- 000029/draft/dandiset.jsonld +fatal: unable to read f7c097994e60c2b58dae464633583b65a6691415 +commit 2fc1ff12 +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Dec 01 16:56:17 -0500 + + import from s3-dandiarchive +``` + +as the fail on the recently imported version, suggests that it is git-annex not importing correctly somehow? + +I believe this was done with this version: + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ source ~/git-annexes/static-10.20250416.sh +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git annex version | head +git-annex version: 10.20250416-static1 +build flags: Pairing DBus DesktopNotify TorrentParser MagicMime Servant Benchmark Feeds Testsuite S3 WebDAV +dependency versions: aws-0.24.4 bloomfilter-2.0.1.2 crypton-1.0.4 DAV-1.3.4 feed-1.3.2.1 ghc-9.8.4 http-client-0.7.19 persistent-sqlite-2.13.3.0 torrent-10000.1.3 uuid-1.3.16 +... +``` + +please advise on how to mitigate (`git reset --hard` the `s3-dandiarchive/master` to prior state before yesterday and reimport with newer git-annex or ... ?) + +[[!meta author=yoh]] +[[!tag projects/dandi]] + + + + +Originally all keys in the bucket + +### What steps will reproduce the problem? + + +### What version of git-annex are you using? On what operating system? + + +### Please provide any additional information below. + +[[!format sh """ +# If you can, paste a complete transcript of the problem occurring here. +# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log + + +# End of transcript or log. +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +
Added a comment: git annex and starship 2
diff --git a/doc/bugs/git-annex__58_____60__stdout__62____58___hPutBuf__58___resource_vanished/comment_4_b1e20a50159d3dceb1397e6ae57cd241._comment b/doc/bugs/git-annex__58_____60__stdout__62____58___hPutBuf__58___resource_vanished/comment_4_b1e20a50159d3dceb1397e6ae57cd241._comment new file mode 100644 index 0000000000..84761e8c3e --- /dev/null +++ b/doc/bugs/git-annex__58_____60__stdout__62____58___hPutBuf__58___resource_vanished/comment_4_b1e20a50159d3dceb1397e6ae57cd241._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="caribou" + avatar="http://cdn.libravatar.org/avatar/914e725d4ec32ad668501d14ade2e52f" + subject="git annex and starship 2" + date="2025-12-02T10:16:41Z" + content=""" +Hello, +Thank you for the pointers from the previous comments it seems to be indeed connected to starship in my case also. +However for me, increasing the timeout up to 2000ms wasn't enough. The workaround that worked for me was to ignore_submodules... + +[git_status] +ignore_submodules = true +"""]]
scaling
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index 7982bf19f4..f17dc5e749 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -37,7 +37,7 @@ contributed good bug reports and great ideas. <img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> <img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=150> -<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=75> +<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=70> git-annex development is supported in large part by:
scaling
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index 47e4baaea2..7982bf19f4 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -37,7 +37,7 @@ contributed good bug reports and great ideas. <img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> <img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=150> -<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=50> +<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=75> git-annex development is supported in large part by:
scaling
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index 2d3d8dd444..47e4baaea2 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -36,8 +36,8 @@ contributed good bug reports and great ideas. <img alt="OpenNeuro logo" src="https://raw.githubusercontent.com/OpenNeuroOrg/openneuro/1c1e0d3b2a2032729727702eb65b1b563eadce1d/packages/openneuro-components/src/assets/on-dark.svg" width=100> <img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> -<img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=200> -<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=200> +<img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=150> +<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=50> git-annex development is supported in large part by:
more logos
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index 4226420a06..2d3d8dd444 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -36,6 +36,8 @@ contributed good bug reports and great ideas. <img alt="OpenNeuro logo" src="https://raw.githubusercontent.com/OpenNeuroOrg/openneuro/1c1e0d3b2a2032729727702eb65b1b563eadce1d/packages/openneuro-components/src/assets/on-dark.svg" width=100> <img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> +<img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=200> +<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=200> git-annex development is supported in large part by:
thanksgiving update
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index d99a2b43be..4226420a06 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -34,7 +34,7 @@ contributed good bug reports and great ideas. ## financial support, 2024-2025 <img alt="OpenNeuro logo" src="https://raw.githubusercontent.com/OpenNeuroOrg/openneuro/1c1e0d3b2a2032729727702eb65b1b563eadce1d/packages/openneuro-components/src/assets/on-dark.svg" width=100> -<img alt="Standford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> +<img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> git-annex development is supported in large part by: @@ -50,6 +50,8 @@ git-annex development is supported in large part by: * [ReproNim](https://repronim.org/), funded by [a NIH grant](https://projectreporter.nih.gov/project_info_details.cfm?aid=8999833) awarded to UMass Medical School Worcester, Dartmouth College, MIT, et al. +* Institute of Climate and Energy Systems (Stratosphere; ICE-4) at + [Forschungszentrum Jülich](https://www.fz-juelich.de/en/ice/ice-4). Thanks also to these folks for their support: [[!inline raw=yes pages="thanks/list"]] and anonymous supporters.
Added a comment: Resolved: using newer version
diff --git a/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_/comment_1_0ca10bffc619ea3083bde714162c4f25._comment b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_/comment_1_0ca10bffc619ea3083bde714162c4f25._comment new file mode 100644 index 0000000000..98fa4f7b45 --- /dev/null +++ b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_/comment_1_0ca10bffc619ea3083bde714162c4f25._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="scinu@299f3f05b38f16a60b2c1d63240426946bec9ab7" + nickname="scinu" + avatar="http://cdn.libravatar.org/avatar/c5a190c5c0ce61a5be141609dff37fe1" + subject="Resolved: using newer version" + date="2025-11-25T12:39:58Z" + content=""" +After installing git-annex from Archlinux repositories, it works again. + +For some reason, I had installed git-annex-standalone (10.20220121-1) +With git-annex (10.20251114-2) everything works as intended. + +git-annex version: 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471 + +Sorry for the noise, should have done this before. + +Best, Scinu +"""]]
diff --git a/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn new file mode 100644 index 0000000000..ae708bb1b3 --- /dev/null +++ b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn @@ -0,0 +1,53 @@ +### Please describe the problem. + +In an rsync special remote, "git-annex get a.pdf" gives me the error: + +rsync: [sender] change_dir "/home/myuser/annex/M8/98/'SHA256E-s367497--24d4a5763a5cd718985a471b4d18981b38f9be73206c33ca885d1e7357a8a2f1.pdf" failed: No such file or directory (2) +rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1852) [Receiver=3.4.1] + +This seems due to different storage paths of the file in the local annex and in the remote annex. + +In the local annex, it is under objects/M8/98 + +In the remote annex, paths are of the form objects/1a3/df7 + +There seems to be a different repository layout, but git-annex attempts to obtain the file under the local path. + +### What steps will reproduce the problem? + +create new rsync-over-ssh special remote +copy some files there +git-annex drop a.pdf +git-annex get a.pdf + +### What version of git-annex are you using? On what operating system? + +archlinux, git-annex-standalone +Version : 10.20220121-1 + +git-annex version: 10.20220121-gdf6a8476e +build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite S3 WebDAV +dependency versions: aws-0.22 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.4 feed-1.3.0.1 ghc-8.8.4 http-client-0.6.4.1 persistent-sqlite-2.10.6.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.1.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL X* +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg hook external +operating system: linux x86_64 +supported repository versions: 8 9 10 +upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10 +local repository version: 8 + + +### Please provide any additional information below. + +[[!format sh """ +get 240.pdf (from rsyncnet...) + +rsync: [sender] change_dir "/home/myuser/annex/M8/98/'SHA256E-s367497--24d4a5763a5cd718985a471b4d18981b38f9be73206c33ca885d1e7357a8a2f1.pdf" failed: No such file or directory (2) +rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1852) [Receiver=3.4.1] +rsync: [Receiver] write error: Broken pipe (32) +rsync exited 23 +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +git-annex solves a lot of problems for both work and privately. I use it on a daily basis since a decade, and it is just great and very reliable. +Great software, please keep up developing and maintaining it.
Fix a WikiLink
diff --git a/doc/copies.mdwn b/doc/copies.mdwn index f79e94d009..b18215cdb9 100644 --- a/doc/copies.mdwn +++ b/doc/copies.mdwn @@ -22,7 +22,7 @@ running `git-annex mincopies N` or can be overridden on a per-file-type basis by the annex.mincopies setting in `.gitattributes` files. The --mincopies switch allows temporarily using a different value. -Note that [trusted repositories|trust]] are assumed to +Note that [[trusted repositories|trust]] are assumed to continue to contain content, so checking them is skipped. So dropping content from trusted repositories does risk numcopies and mincopies later being violated.
comment
diff --git a/doc/todo/p2phttp__58___regularly_re-check_for_annex.url_config/comment_3_667ab1f57a53590c89498423f341ae99._comment b/doc/todo/p2phttp__58___regularly_re-check_for_annex.url_config/comment_3_667ab1f57a53590c89498423f341ae99._comment new file mode 100644 index 0000000000..1573eb1d72 --- /dev/null +++ b/doc/todo/p2phttp__58___regularly_re-check_for_annex.url_config/comment_3_667ab1f57a53590c89498423f341ae99._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-11-20T18:36:31Z" + content=""" +Even if it only re-checks when git-annex is going to use the remote +(and not on every run of git-annex) that seems perhaps too often to check. + +But if it checks less often than that, once per day or whatever, there will +of course be a window where it has not yet noticed the change and uses the +cached remote.name.annexUrl and potentially fails. + +A balance might be that if it fails to connect to the remote.name.annexUrl, +it could re-check it then. +"""]]
comment
diff --git a/doc/todo/Delayed_drop_from_remote/comment_4_7cc6ca779086402fd32999441a0e5693._comment b/doc/todo/Delayed_drop_from_remote/comment_4_7cc6ca779086402fd32999441a0e5693._comment new file mode 100644 index 0000000000..75bc668fc1 --- /dev/null +++ b/doc/todo/Delayed_drop_from_remote/comment_4_7cc6ca779086402fd32999441a0e5693._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-11-20T18:16:47Z" + content=""" +Yes, the trashbin remote could be private. I think we're in agreement +that's the best way to go. + +--accessedwithin relies on atime, and looks at objects in the local +repository only, so it would not work to find objects in the trashbin +remote. + +I don't think there is anything in preferred content +expressions that would meet your need here exactly. It would probably be +possible to add an expression that matches objects that have been present +in a given repository for a given amount of time. The presence logs do have a +timestamp. + +Of course, if you used a directory special remote you could use +plain old `find`. +"""]]
comment
diff --git a/doc/todo/Ephemeral_special_remotes/comment_2_b806f390d7e4f6a1f9e87173e777a5cb._comment b/doc/todo/Ephemeral_special_remotes/comment_2_b806f390d7e4f6a1f9e87173e777a5cb._comment new file mode 100644 index 0000000000..c03b9653e1 --- /dev/null +++ b/doc/todo/Ephemeral_special_remotes/comment_2_b806f390d7e4f6a1f9e87173e777a5cb._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-11-20T14:05:10Z" + content=""" +There are also some common setup stage tasks that pose problems but could +all be fixed in one place: + +* Encryption setup generates encryption keys. Which is both slow and also + generating an then throwing away an encryption key is the wrong thing to + do. I think this could be dealt with by copying the encryption setup of the + remote that is generating the emphemeral remote into it. +* remote.name.annex-uuid is set in git config by gitConfigSpecialRemote. + Either that could be disabled for ephemerals, or the uuid and name could + also be inherited, which would make that a no-op. +"""]]
comment
diff --git a/doc/todo/Ephemeral_special_remotes/comment_1_8e2082918dff4982d1e6dbb8c2fd1f98._comment b/doc/todo/Ephemeral_special_remotes/comment_1_8e2082918dff4982d1e6dbb8c2fd1f98._comment new file mode 100644 index 0000000000..b509d69bbd --- /dev/null +++ b/doc/todo/Ephemeral_special_remotes/comment_1_8e2082918dff4982d1e6dbb8c2fd1f98._comment @@ -0,0 +1,27 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-20T13:34:28Z" + content=""" +The major difficulty in implementing this seems to be the setup stage, +which is the per-special-remote code that runs during +initremote/enableremote. That code can write to disk, or perform +expensive operations. + +A few examples: + +* S3's setup makes 1 http request to verify that the bucket exists + (or about 4 http requests when it needs to create the bucket). + It does additional work when bucket versioning is enabled. +* directory's setup modifies the git config file to set + remote.name.directory. And if that were skipped, generating the directory + special remote would fail, because it reads that git config. + +My gut feeling is that it won't be practical to make it possible to +ephemeralize every type of special remote. But it would not be too +hard to make some subset of special remotes able to be used ephemerally. + +It might be possible to maintain a cache of recently used ephemeral special +remotes across runs of git-annex, and so avoid needing to re-run the setup +stage. +"""]]
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_1_00ec4c643aaff6f9a6a70a27309aab2f._comment b/doc/todo/Special_remote_redirect_to_URL/comment_1_00ec4c643aaff6f9a6a70a27309aab2f._comment new file mode 100644 index 0000000000..7fb1e5e913 --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_1_00ec4c643aaff6f9a6a70a27309aab2f._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-20T13:23:00Z" + content=""" +This seems like a good design to me. It will need a protocol extension +to indicate when a git-annex version supports it. + +It occured to me that when `git-annex p2phttp` is used and is proxying to a +special remote that uses this feature, it would be possible to forward the +redirect to the http client, so the server would not need to download the +object itself. + +A neat optimisation potential, although implementing it would cut across +several things in a way I'm unsure how to do cleanly. + +That did make me wonder though, if the redirect url would always be safe to +share with the client, without granting the client any abilities beyond a +one-time download. And I think that's too big an assumption to make for +this optionisation. Someone could choose to redirect to an url containing +eg, http basic auth, which would be fine when using it all locally, but not +in this proxy situation. So there would need to be an additional configuration +to enable the proxy optimisation. +"""]]
fixed in aws-0.25.1
diff --git a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn index 923209bb04..698fdeaf66 100644 --- a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn +++ b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn @@ -95,3 +95,5 @@ initremote: 1 failed Thanks for all your great work, Joey! [[!tag projects/openneuro]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_1_f537c54572a418e7d126594947015a64._comment b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_1_f537c54572a418e7d126594947015a64._comment new file mode 100644 index 0000000000..b5bbf96ef4 --- /dev/null +++ b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_1_f537c54572a418e7d126594947015a64._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-19T16:24:33Z" + content=""" +Root caused to this bug: <https://github.com/aristidb/aws/issues/296> + +Seems likely that `git-annex import` from an importtree=yes S3 +remote on GCP is also broken since it also uses getBucket. + +git-annex uses getBucket to probe if the bucket already exists, +which lets it avoid dealing with the various ways that PUT of a bucket can +fail. GCP also has some incompatabilities in how it responds to that, +eg in the above log, it uses a custom "BucketNameUnavailable", +rather than the S3 standard " BucketAlreadyExists". +"""]] diff --git a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_2_74bab9cd3c546533bd8047ce9342d4c7._comment b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_2_74bab9cd3c546533bd8047ce9342d4c7._comment new file mode 100644 index 0000000000..4bf6cf4205 --- /dev/null +++ b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_2_74bab9cd3c546533bd8047ce9342d4c7._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-11-19T18:27:45Z" + content=""" +This is fixed in aws-0.25.1. I have made the git-annex stack build use that +version. I also added a build warning when built with an older version, +to hopefully encourage other builds to get updated. +"""]]
S3: Remote can be configured with an x-amz-tagging header.
Needs aws-0.25, which was just released.
stack.yaml: Use aws-0.25.
This risks some foot shooting if the user configures a bucket to hide
objects that have a given tag, and sets up the S3 special remote with it,
but then uses a build of git-annex that does not support sending it.
I considered making git-annex error out if it's built with too old a
version of aws. But that leaves the problem of older versions of git-annex,
which will just ignore the x-amz-tagging config. So it didn't seem worth
the bother to have a new failure mode. The user of this will just need to
be careful of their versions if they are using it in a security context.
The use of parseQueryText is kind of silly, because aws just reassembles it
back to a query string. But it made sense for the aws interface to be
key/value pairs. And it seems to make sense for the git-annex config
interface to be a query string. At least, I could not think of a better way
to handle the config.
Sponsored-by: Dartmouth College's OpenNeuro project
Needs aws-0.25, which was just released.
stack.yaml: Use aws-0.25.
This risks some foot shooting if the user configures a bucket to hide
objects that have a given tag, and sets up the S3 special remote with it,
but then uses a build of git-annex that does not support sending it.
I considered making git-annex error out if it's built with too old a
version of aws. But that leaves the problem of older versions of git-annex,
which will just ignore the x-amz-tagging config. So it didn't seem worth
the bother to have a new failure mode. The user of this will just need to
be careful of their versions if they are using it in a security context.
The use of parseQueryText is kind of silly, because aws just reassembles it
back to a query string. But it made sense for the aws interface to be
key/value pairs. And it seems to make sense for the git-annex config
interface to be a query string. At least, I could not think of a better way
to handle the config.
Sponsored-by: Dartmouth College's OpenNeuro project
diff --git a/CHANGELOG b/CHANGELOG
index f04408a114..b75996aedd 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,11 @@
+git-annex (10.20251118) UNRELEASED; urgency=medium
+
+ * S3: Remote can be configured with an x-amz-tagging header.
+ (Needs aws-0.25)
+ * stack.yaml: Use aws-0.25.
+
+ -- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
+
git-annex (10.20251114) upstream; urgency=medium
* p2p --pair: Fix to work with external P2P networks.
diff --git a/Remote/S3.hs b/Remote/S3.hs
index 566c8f5889..e23a281e3a 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -1,6 +1,6 @@
{- S3 remotes
-
- - Copyright 2011-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -108,6 +108,8 @@ remote = specialRemoteType $ RemoteType
(FieldDesc "for path-style requests, set to \"path\"")
, signatureVersionParser signatureField
(FieldDesc "S3 signature version")
+ , optionalStringParser taggingField
+ (FieldDesc "tagging header to add when storing on S3")
, optionalStringParser mungekeysField HiddenField
, optionalStringParser AWS.s3credsField HiddenField
]
@@ -161,6 +163,9 @@ requeststyleField = Accepted "requeststyle"
signatureField :: RemoteConfigField
signatureField = Accepted "signature"
+taggingField :: RemoteConfigField
+taggingField = Accepted "x-amz-tagging"
+
data SignatureVersion
= SignatureVersion Int
| DefaultSignatureVersion
@@ -1017,6 +1022,7 @@ data S3Info = S3Info
, bucketExportLocation :: ExportLocation -> BucketObject
, bucketImportLocation :: BucketObject -> Maybe ImportLocation
, metaHeaders :: [(T.Text, T.Text)]
+ , tagging :: [(T.Text, T.Text)]
, partSize :: Maybe Integer
, isIA :: Bool
, versioning :: Bool
@@ -1039,6 +1045,7 @@ extractS3Info c = do
, bucketExportLocation = getBucketExportLocation c
, bucketImportLocation = getBucketImportLocation c
, metaHeaders = getMetaHeaders c
+ , tagging = getTagging c
, partSize = getPartSize c
, isIA = configIA c
, versioning = fromMaybe False $
@@ -1056,6 +1063,9 @@ putObject info file rbody = (S3.putObject (bucket info) file rbody)
, S3.poMetadata = metaHeaders info
, S3.poAutoMakeBucket = isIA info
, S3.poAcl = acl info
+#if MIN_VERSION_aws(0,25,0)
+ , S3.poTagging = tagging info
+#endif
}
acl :: S3Info -> Maybe S3.CannedAcl
@@ -1083,6 +1093,14 @@ getMetaHeaders = map munge
metaprefixlen = length metaPrefix
munge (k, v) = (T.pack $ drop metaprefixlen (fromProposedAccepted k), T.pack v)
+getTagging :: ParsedRemoteConfig -> [(T.Text, T.Text)]
+getTagging c = case getRemoteConfigValue taggingField c of
+ Nothing -> []
+ Just s -> map go $ parseQueryText (encodeBS s)
+ where
+ go (k, Just v) = (k, v)
+ go (k, Nothing) = (k, mempty)
+
isMetaHeader :: RemoteConfigField -> Bool
isMetaHeader h = metaPrefix `isPrefixOf` fromProposedAccepted h
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn
index f8f800558b..cdafe7d7c8 100644
--- a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn
+++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn
@@ -11,3 +11,5 @@ An example use case is publishing a private dataset where a bucket policy is use
[[!tag projects/openneuro]]
+
+> [[done]] --[[Joey]]
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment
index 06f8922f7b..97c20cfe48 100644
--- a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment
+++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment
@@ -8,5 +8,12 @@ does not allow setting this header.
I opened an issue <https://github.com/aristidb/aws/issues/294>
-Update: This will need aws-0.25.
+Update: This will need git-annex to be built with aws-0.25. If a S3 special
+remote is configured with this header, and an older version of git-annex
+or a git-annex built with an older version of aws is used, it will just not
+send along the header when storing an object.
+
+So if your use case involves making newly uploaded objects private, you'll
+want to make sure you're always using a build of git-annex that supports
+it.
"""]]
diff --git a/doc/special_remotes/S3.mdwn b/doc/special_remotes/S3.mdwn
index d36dfa1b36..c1f21183fe 100644
--- a/doc/special_remotes/S3.mdwn
+++ b/doc/special_remotes/S3.mdwn
@@ -162,10 +162,14 @@ the S3 remote.
and to "bar/" in another special remote, and both special remotes could
then use the same bucket.
-* `x-amz-meta-*` are passed through as http headers when storing keys
- in S3.
+* `x-amz-meta-*` are passed through as http headers
+ when storing keys in S3.
* `x-archive-meta-*` are passed through as http headers when storing keys
in the Internet Archive. See
[the Internet Archive S3 interface documentation](https://archive.org/help/abouts3.txt)
for example headers.
+
+* `x-amz-tagging` is passed through as a http header
+ when storing keys in S3. (Needs git-annex 10.20251118 or newer
+ built with aws-0.25. Otherwise, the header will *not* be sent.)
diff --git a/git-annex.cabal b/git-annex.cabal
index 44b89db6bc..9da04c3618 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -1,5 +1,5 @@
Name: git-annex
-Version: 10.20251114
+Version: 10.20251118
Cabal-Version: 1.12
License: AGPL-3
Maintainer: Joey Hess <id@joeyh.name>
diff --git a/stack.yaml b/stack.yaml
index f4c26e49ae..4c8e0a9e29 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -13,3 +13,5 @@ flags:
packages:
- '.'
resolver: lts-24.2
+extra-deps:
+- aws-0.25
update
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment index a4ad7aadde..06f8922f7b 100644 --- a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment +++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment @@ -7,4 +7,6 @@ Implementing this will need changes to the haskell aws library, since it does not allow setting this header. I opened an issue <https://github.com/aristidb/aws/issues/294> + +Update: This will need aws-0.25. """]]
comment
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment new file mode 100644 index 0000000000..a4ad7aadde --- /dev/null +++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-18T16:41:31Z" + content=""" +Implementing this will need changes to the haskell aws library, since it +does not allow setting this header. + +I opened an issue <https://github.com/aristidb/aws/issues/294> +"""]]
remove comments for old release news item
diff --git a/doc/news/version_10.20250416/comment_1_910c84a8ef1d6aed657be912dd68ffbd._comment b/doc/news/version_10.20250416/comment_1_910c84a8ef1d6aed657be912dd68ffbd._comment deleted file mode 100644 index db0be8c0b9..0000000000 --- a/doc/news/version_10.20250416/comment_1_910c84a8ef1d6aed657be912dd68ffbd._comment +++ /dev/null @@ -1,86 +0,0 @@ -[[!comment format=mdwn - username="liam" - avatar="http://cdn.libravatar.org/avatar/5cb416d010c1d3f0ca677dd7f6c822ea" - subject="Corruption? invalid object for uuid.log" - date="2025-04-20T02:32:45Z" - content=""" -Hello, - -I just installed this version (10.20250416) this afternoon. -Now I have the following issue: - -``` -$ git annex info -(recording state in git...) -error: invalid object 100644 f83a2e4115f14218f574058d3b3ccf72e9cfe677 for 'uuid.log' -fatal: git-write-tree: error building trees -git-annex: failed to read sha from git write-tree -``` - -and again with --verbose and --debug - -``` -git annex info --verbose --debug -[2025-04-20 04:27:23.998527] (Utility.Process) process [12752] read: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"show-ref\",\"git-annex\"] -[2025-04-20 04:27:24.042056] (Utility.Process) process [12752] done ExitSuccess -[2025-04-20 04:27:24.042331] (Utility.Process) process [12753] read: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"show-ref\",\"--hash\",\"refs/heads/git-annex\"] -[2025-04-20 04:27:24.046759] (Utility.Process) process [12753] done ExitSuccess -[2025-04-20 04:27:24.051021] (Utility.Process) process [12754] chat: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"hash-object\",\"-w\",\"--no-filters\",\"--stdin-paths\"] -[2025-04-20 04:27:24.051495] (Utility.Process) process [12755] feed: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"update-index\",\"-z\",\"--index-info\"] -[2025-04-20 04:27:24.265152] (Utility.Process) process [12755] done ExitSuccess -[2025-04-20 04:27:24.265928] (Annex.Branch) read transitions.log -[2025-04-20 04:27:24.266808] (Utility.Process) process [12756] chat: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"cat-file\",\"--batch\"] -(recording state in git...) -[2025-04-20 04:27:24.278685] (Utility.Process) process [12757] read: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"write-tree\"] -error: invalid object 100644 f83a2e4115f14218f574058d3b3ccf72e9cfe677 for 'uuid.log' -fatal: git-write-tree: error building trees -[2025-04-20 04:27:25.065295] (Utility.Process) process [12757] done ExitFailure 128 -git-annex: failed to read sha from git write-tree -``` - -This first showed up when I tried to do: `git annex sync myremote` -Now this error keeps showing up but only in my computer's repo. - -Note: The object id shown doesn't seem to correspond to anything on my other drives. - -I'm worried it might be something in the latest version. - -On the external SSD from which I tried to sync it shows: - -``` -trusted repositories: 0 -semitrusted repositories: 12 -untrusted repositories: 1 -local annex keys: 245972 -local annex size: 2.7 terabytes -annexed files in working tree: 404264 -size of annexed files in working tree: 3.54 terabytes -combined annex size of all repositories: 9.46 terabytes -backend usage: - SHA256E: 404264 -bloom filter size: 32 mebibytes (49.2% full) -``` - -Any idea what this could be? - -Is it safe to fix this by just cloning the repo again and then replacing the annex directory? -I'm not sure how to deal with this since I've never had this error before. - -version information: - -``` -$ git annex version -git-annex version: 10.20250416 -build flags: Pairing TorrentParser MagicMime Servant Benchmark Feeds Testsuite S3 WebDAV -dependency versions: aws-0.24.4 bloomfilter-2.0.1.2 crypton-1.0.4 DAV-1.3.4 feed-1.3.2.1 ghc-9.8.4 http-client-0.7.19 persistent-sqlite-2.13.3.0 torrent-10000.1.3 uuid-1.3.16 -key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL GITBUNDLE GITMANIFEST VURL X* -remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg rclone hook external compute mask -operating system: darwin aarch64 -supported repository versions: 8 9 10 -upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10 -local repository version: 10 -``` - -Thank you, -Liam -"""]] diff --git a/doc/news/version_10.20250416/comment_2_88fc9ef91f9830f67b7658232a727ac4._comment b/doc/news/version_10.20250416/comment_2_88fc9ef91f9830f67b7658232a727ac4._comment deleted file mode 100644 index 54e94ec2bf..0000000000 --- a/doc/news/version_10.20250416/comment_2_88fc9ef91f9830f67b7658232a727ac4._comment +++ /dev/null @@ -1,34 +0,0 @@ -[[!comment format=mdwn - username="liam" - avatar="http://cdn.libravatar.org/avatar/5cb416d010c1d3f0ca677dd7f6c822ea" - subject="Repairing repositories on MacOS" - date="2025-04-20T02:49:48Z" - content=""" -Hi, - -As follow on from the above, I'm trying to repair the repo using the built-in tool from here: [[https://git-annex.branchable.com/git-annex-repair/]] -However, it seems to have problems on MacOS. - -``` -$ git annex repair -repair Running git fsck ... -Fsck found no problems. Checking for broken branches. -fatal: ambiguous argument 'refs/.DS_Store': unknown revision or path not in the working tree. -Use '--' to separate paths from revisions, like this: -'git <command> [<revision>...] -- [<file>...]' -fatal: ambiguous argument 'refs/heads/.DS_Store': unknown revision or path not in the working tree. -Use '--' to separate paths from revisions, like this: -'git <command> [<revision>...] -- [<file>...]' -^C -``` - -Somethings gone quite wrong if those .DS_Store files are showing up there. - -I'd love to dig into the source for git-annex but I don't know Haskell... -Maybe this is a sign I should learn it? - -The `uuid.log` problem in my previous post seems to have a bit of documentation here: [[https://git-annex.branchable.com/internals/]] -Maybe it has something to do with a previously failed clone attempt? I will dig some more tomorrow. - -Liam -"""]] diff --git a/doc/news/version_10.20250416/comment_3_9a9b7aa33311822ab38c00ca7477668b._comment b/doc/news/version_10.20250416/comment_3_9a9b7aa33311822ab38c00ca7477668b._comment deleted file mode 100644 index 6c4d462cee..0000000000 --- a/doc/news/version_10.20250416/comment_3_9a9b7aa33311822ab38c00ca7477668b._comment +++ /dev/null @@ -1,20 +0,0 @@ -[[!comment format=mdwn - username="liam" - avatar="http://cdn.libravatar.org/avatar/5cb416d010c1d3f0ca677dd7f6c822ea" - subject="Fetch from good repo to fix uuid.log issue" - date="2025-04-20T12:32:31Z" - content=""" -Hi, - -So to resolve the .DS_Store issue, I just had to go into the .git repo and remove all the .DS_Store files that MacOS littered around. - -To fix the repo itself, I ended up doing a `git fetch goodremote` and this seems to have fixed the issue. -I can now run `git annex info` again without a problem. - -The weird thing is, it was complaining about `uuid.log`, however, when running `git show git-annex:uuid.log` I was getting the same output on both repos. -It's not clear what went wrong. Maybe corruption but `git fsck` never showed an issue. Fetching from a good repo seems to have fixed it. - -Hopefully this helps if anyone has a similar problem. - -Liam -"""]] diff --git a/doc/news/version_10.20250416/comment_4_b6ac0341afd1c15eed0af4161db9c021._comment b/doc/news/version_10.20250416/comment_4_b6ac0341afd1c15eed0af4161db9c021._comment deleted file mode 100644 index 83d87f4084..0000000000 --- a/doc/news/version_10.20250416/comment_4_b6ac0341afd1c15eed0af4161db9c021._comment +++ /dev/null @@ -1,14 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 4""" - date="2025-05-13T14:55:52Z" - content=""" -First of all, a news item for a release is not the appropriate place to -discuss something like this. Use the [[forum]] in the furture. - -This looks like a `.git/annex/index` that references a git object that for -some reason didn't get written to disk. A common way this can happen is a -disk getting unmounted or system shutdown at a point in time that causes a -recently written git object to get lost. Deleting `.git/annex/index` will -solve this problem. -"""]]
add news item for git-annex 10.20251114
diff --git a/doc/news/version_10.20250721.mdwn b/doc/news/version_10.20250721.mdwn deleted file mode 100644 index 09ca1b73f0..0000000000 --- a/doc/news/version_10.20250721.mdwn +++ /dev/null @@ -1,17 +0,0 @@ -git-annex 10.20250721 released with [[!toggle text="these changes"]] -[[!toggleable text=""" * Improved workaround for git 2.50 bug, avoding an occasional test suite - failure, as well as some situations where an unlocked file did not get - populated when adding another file to the repository with the same - content. - * Add --url option and url= preferred content expression, to match - content that is recorded as present in an url. - * p2phttp: Scan multilevel directories with --directory. - * p2phttp: Added --socket option. - * Fix bug in handling of linked worktrees on filesystems not supporting - symlinks, that caused annexed file content to be stored in the wrong - location inside the git directory, and also caused pointer files to not - get populated. - * fsck: Fix location of annexed files when run in linked worktrees - that have experienced the above bug. - * Fix symlinks generated to annexed content when in adjusted unlocked - branch in a linked worktree on a filesystem not supporting symlinks."""]] \ No newline at end of file diff --git a/doc/news/version_10.20251114.mdwn b/doc/news/version_10.20251114.mdwn new file mode 100644 index 0000000000..63255f2897 --- /dev/null +++ b/doc/news/version_10.20251114.mdwn @@ -0,0 +1,10 @@ +git-annex 10.20251114 released with [[!toggle text="these changes"]] +[[!toggleable text=""" * p2p --pair: Fix to work with external P2P networks. + * p2phttp: Significant robustness fixes for bugs that caused the + server to stall. + * p2phttp: Fix a file descriptor leak. + * p2phttp: Added the --lockedfiles option. + * dropunused: Run the annex.secure-erase-command + (or .git/hooks/secure-erase-annex) when deleting + temp and bad object files. + * remotedaemon: Avoid crashing when run with --debug."""]] \ No newline at end of file
comnment
diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_9_024a91c7b0eabc888cd717208e2a7d14._comment b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_9_024a91c7b0eabc888cd717208e2a7d14._comment new file mode 100644 index 0000000000..9db1df3036 --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_9_024a91c7b0eabc888cd717208e2a7d14._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 9""" + date="2025-11-13T19:31:57Z" + content=""" +After fixing the other bug, I have successfully run the test for several +hours without any problems. +"""]]
p2phttp: fix stalling git-annex get
A race condition caused a small fraction of requests to hang with the
object mostly transferred. Or, in some cases, STM deadlock messages to
be displayed without a hang.
See comments for analysis. I don't entirely understand what is going on
with all the filling of endv, but this clearly fixes the race.
A race condition caused a small fraction of requests to hang with the
object mostly transferred. Or, in some cases, STM deadlock messages to
be displayed without a hang.
See comments for analysis. I don't entirely understand what is going on
with all the filling of endv, but this clearly fixes the race.
diff --git a/CHANGELOG b/CHANGELOG
index 3c056e1588..542fdd4658 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,10 +2,10 @@ git-annex (10.20251103) UNRELEASED; urgency=medium
* p2p --pair: Fix to work with external P2P networks.
* remotedaemon: Avoid crashing when run with --debug.
- * p2phttp: Fix server stall when there are too many concurrent clients.
- * p2phttp: Fix a file descriptor leak caused by a race condition.
+ * p2phttp: Significant robustness fixes to bugs that caused the
+ server to stall.
+ * p2phttp: Fix a file descriptor leak.
* p2phttp: Added the --lockedfiles option.
- * p2phttp: Fix server stall when a git-annex drop is interrupted.
* dropunused: Run the annex.secure-erase-command
(or .git/hooks/secure-erase-annex) when deleting
temp and bad object files.
diff --git a/P2P/Http/Server.hs b/P2P/Http/Server.hs
index d56f0e49ec..bcc11c9207 100644
--- a/P2P/Http/Server.hs
+++ b/P2P/Http/Server.hs
@@ -189,7 +189,7 @@ serveGet mst su apiver (B64Key k) cu bypass baf startat sec auth = do
validity <- atomically $ takeTMVar validityv
sz <- takeMVar szv
atomically $ putTMVar finalv ()
- atomically $ putTMVar endv ()
+ void $ atomically $ tryPutTMVar endv ()
return $ case validity of
Nothing -> True
Just Valid -> True
diff --git a/doc/bugs/get_from_p2phttp_sometimes_stalls.mdwn b/doc/bugs/get_from_p2phttp_sometimes_stalls.mdwn
index 29dd2cf56e..1e20a74a6e 100644
--- a/doc/bugs/get_from_p2phttp_sometimes_stalls.mdwn
+++ b/doc/bugs/get_from_p2phttp_sometimes_stalls.mdwn
@@ -45,3 +45,5 @@ and running it again does not block waiting for the server.
>
> Using curl as the client and seeing if
> it always receives the whole object would be a good next step. --[[Joey]]
+
+>> [[fixed|done]] --[[Joey]]
diff --git a/doc/bugs/get_from_p2phttp_sometimes_stalls/comment_1_d3adc9528152fbb041e27482d674a59b._comment b/doc/bugs/get_from_p2phttp_sometimes_stalls/comment_1_d3adc9528152fbb041e27482d674a59b._comment
new file mode 100644
index 0000000000..712706857d
--- /dev/null
+++ b/doc/bugs/get_from_p2phttp_sometimes_stalls/comment_1_d3adc9528152fbb041e27482d674a59b._comment
@@ -0,0 +1,47 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 1"""
+ date="2025-11-13T17:22:33Z"
+ content="""
+I saw this bug with git-annex built using haskell packages from current
+debian unstable.
+
+On a hunch, I tried a `stack build`, and it does not stall. However, I am
+seeing this from the http server at about the same frequency as the stall,
+and occuring during the `git-annex get`:
+
+ thread blocked indefinitely in an STM transaction
+
+And at the same time, this is reported on the client side:
+
+ get 27 (from origin...)
+ HttpExceptionRequest Request {
+ host = "localhost"
+ port = 9417
+ secure = False
+ requestHeaders = [("Accept","application/octet-stream")]
+ path = "/git-annex/a697daef-f8c3-4e64-a3e0-65927e36d06b/v4/k
+ queryString = "?clientuuid=9bc0478c-a0ff-4159-89ab-14c13343beb9&ass
+ method = "GET"
+ proxy = Nothing
+ rawBody = False
+ redirectCount = 10
+ responseTimeout = ResponseTimeoutDefault
+ requestVersion = HTTP/1.1
+ proxySecureMode = ProxySecureWithConnect
+ }
+ IncompleteHeaders
+ ok
+
+(I assume that it succeeded because it did an automatic retry when the
+first download was incomplete.)
+
+I also tried using the
+stack build for the server, and the cabal build for the client, with the same
+result. With the cabal build for the server and stack build for the client, it
+stalls as before.
+
+So it's a bug on the server side, and whatever it is causes one of the threads to
+get killed in a way that causes another STM transaction to deadlock.
+And the runtime happenes to detect the deadlock and resolve it when built with stack.
+"""]]
diff --git a/doc/bugs/get_from_p2phttp_sometimes_stalls/comment_2_a5296c101fed02a9ba3f16f94461b7d9._comment b/doc/bugs/get_from_p2phttp_sometimes_stalls/comment_2_a5296c101fed02a9ba3f16f94461b7d9._comment
new file mode 100644
index 0000000000..5dedcf362d
--- /dev/null
+++ b/doc/bugs/get_from_p2phttp_sometimes_stalls/comment_2_a5296c101fed02a9ba3f16f94461b7d9._comment
@@ -0,0 +1,15 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 2"""
+ date="2025-11-13T17:53:48Z"
+ content="""
+Using DebugLocks, found that the deadlock is in checkvalidity,
+the second time it calls `putTMVar endv ()`.
+
+That was added in [[!commit 7bd616e169827568c4ca6bc6e4f8ae5bf796d2d8]]
+"a bugfix to serveGet, it hung at the end".
+
+Looks like a race between checkvalidity and waitfinal,
+which both fill endv. waitfinal does not deadlock when endv is already
+full, but checkvalidity does.
+"""]]
Adding ON tag
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn index 5af44ef3de..f8f800558b 100644 --- a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn +++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn @@ -10,3 +10,4 @@ An example use case is publishing a private dataset where a bucket policy is use ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) +[[!tag projects/openneuro]]
Feature request for OpenNeuro
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn new file mode 100644 index 0000000000..5af44ef3de --- /dev/null +++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn @@ -0,0 +1,12 @@ +### Please describe the problem. +Similar to the x-amz-meta-* S3 remote configuration, it would be useful to be able to configure an S3 remote with the x-amz-tagging header passed to putObject. Unlike x-amz-meta values, tags can be updated without copying objects to a new version. + +An example use case is publishing a private dataset where a bucket policy is used to limit access by default (tagged private on the initial export) and objects are progressively made public after an embargo period. + +### What version of git-annex are you using? On what operating system? + +10.20250929 on Fedora 43. + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +
close one bug and open a new one I found while testing it
diff --git a/doc/bugs/get_from_p2phttp_sometimes_stalls.mdwn b/doc/bugs/get_from_p2phttp_sometimes_stalls.mdwn new file mode 100644 index 0000000000..29dd2cf56e --- /dev/null +++ b/doc/bugs/get_from_p2phttp_sometimes_stalls.mdwn @@ -0,0 +1,47 @@ +`git-annex get` from a p2phttp remote sometimes stalls out. + +This has been observed when using loopback. Eg, run in one repo, +which contains about 1000 annexed files of size 1 mb each: + + git-annex p2phttp -J2 --bind 127.0.0.1 --wideopen + +Then in a clone: + + git config remote.origin.annexUrl annex+http://localhost/git-annex/ + while true; do git-annex get --from origin -J20; git-annex drop; done + +The concurrency is probably not strictly needed to reproduce this. +But it makes it more likely to occur sooner, at least. + +The total stall looks like this: + + 1% 7.82 KiB 6 MiB/s 0s + +Here is another one: + + 1% 7.82 KiB 6 MiB/s 0s + +The progress display never updates. Every time +I've seen the total stall, it's been at 7.82 KiB, +which seems odd. + +Looking at the object in `.git/annex/tmp`, it has the correct +content, but is 4368 bytes short of the full 1048576 byte size. +I've verified this is the case every time. So it looks like +the client didn't get the final chunk of the file in the response. + +Note that, despite p2phttp being run with -J2, +so only supporting 2 concurrent get operations, +interrupting the `git-annex get` that stalled out +and running it again does not block waiting for the server. + So p2phttp seems to have finished processing the request. + Or possibly failed in a way that returns a worker to the pool. +--[[Joey]] + +> Initial investigation in serveGet seems to show it successfully +> sending the whole object. At least up to fromActionStep, +> I've not verified servant always does the right thing with that +> or doesn't close the connection early sometimes. +> +> Using curl as the client and seeing if +> it always receives the whole object would be a good next step. --[[Joey]] diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients.mdwn b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients.mdwn index 78a552343d..021bbd3481 100644 --- a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients.mdwn +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients.mdwn @@ -39,3 +39,5 @@ local repository version: 10 ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) [[!tag projects/ICE4]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_8_f5018216098c02b4770cced15c94a275._comment b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_8_f5018216098c02b4770cced15c94a275._comment new file mode 100644 index 0000000000..36dcf454f4 --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_8_f5018216098c02b4770cced15c94a275._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 8""" + date="2025-11-12T19:53:41Z" + content=""" +Fixed the problem with interrupted `git-annex drop`. + +Opened a new bug report about sometimes stalling `git-annex +drop`: [[get_from_p2phttp_sometimes_stalls]] + +I think I've fully addressed this bug report now, so will close it. +"""]]
fix
diff --git a/doc/videos/distribits2025.mdwn b/doc/videos/distribits2025.mdwn index a48d57d6f2..663149204e 100644 --- a/doc/videos/distribits2025.mdwn +++ b/doc/videos/distribits2025.mdwn @@ -17,14 +17,14 @@ Matthias Riße's talk covered this increasingly important integration between git-annex and forgejo, and how it is developed and maintained. +The above three speakers were in +a [panel discussion](https://www.distribits.live/talks/2025/discussion-hess-vogel-szczepanik-risse/) as well. + Michał Szczepanik's talk ["Compute on demand"](https://www.distribits.live/talks/2025/szczepanik-compute-on-demand/) compared and contrasted the git-annex compute special remote with the datalad-remake special remote. -The above three speakers were in -a [panel discussion](https://www.distribits.live/talks/2025/discussion-hess-vogel-szczepanik-risse/) as well. - Timothy Sanders's talk ["Using Git-annex to enhance the MediaWiki file repository system"](https://www.distribits.live/talks/2025/sanders-using-git-annex-to-enhance-the/) presented a git-annex mediawiki backend.
add
diff --git a/doc/videos/distribits2025.mdwn b/doc/videos/distribits2025.mdwn new file mode 100644 index 0000000000..a48d57d6f2 --- /dev/null +++ b/doc/videos/distribits2025.mdwn @@ -0,0 +1,40 @@ +At [Distribits 2025](https://distribits.live/), there were several talks on +git-annex and closely related subjects. + +Joey Hess's talk +["git-annex for computer scientists"](https://www.distribits.live/talks/2025/hess-git-annex-for-computer-scientists/) +explained the core data structures that make up git-annex, and then +used that as a basis to understand several recent git-annex features. +([mirror](https://downloads.kitenet.net/talks/distribits_2025_git-annex_for_computer_scientists.webm)) + +Steffen Vogel's talk +["Managing Tape Archives with git-annex: A Special Remote for Sequential Media"](https://www.distribits.live/talks/2025/vogel-managing-tape-archives-with-git-annex/) +presented a soon to be released special remote for LTO tape, breaking new +ground in what git-annex can use. + +Matthias Riße's talk +["Forgejo-aneksajo: a git-annex/DataLad forge"](https://www.distribits.live/talks/2025/risse-forgejo-aneksajo-a-git-annex-datalad-forge/) +covered this increasingly important integration between git-annex and +forgejo, and how it is developed and maintained. + +Michał Szczepanik's talk +["Compute on demand"](https://www.distribits.live/talks/2025/szczepanik-compute-on-demand/) +compared and contrasted the git-annex compute special remote with +the datalad-remake special remote. + +The above three speakers were in +a [panel discussion](https://www.distribits.live/talks/2025/discussion-hess-vogel-szczepanik-risse/) as well. + +Timothy Sanders's talk +["Using Git-annex to enhance the MediaWiki file repository system"](https://www.distribits.live/talks/2025/sanders-using-git-annex-to-enhance-the/) +presented a git-annex mediawiki backend. + +Christopher Markiewicz's talk +["Maintaining large datasets at scale"](https://www.distribits.live/talks/2025/markiewicz-maintaining-large-datasets-at-scale/) +covered finding and fixing defects that arise in automatically managed +git-annex repositories. + +Many of the other talks at Distribits also involved git-annex. +[Playlist](https://www.youtube.com/playlist?list=PLEQHbPfpVqU6_bZ4gUQn_9OX-LvDmKoby) + +[[!meta title="git-annex presentations at Distribits 2025"]]
Added a comment
diff --git a/doc/todo/Delayed_drop_from_remote/comment_3_f6914ae82921124e26c31ae89175d6de._comment b/doc/todo/Delayed_drop_from_remote/comment_3_f6914ae82921124e26c31ae89175d6de._comment new file mode 100644 index 0000000000..f2ffbea4ae --- /dev/null +++ b/doc/todo/Delayed_drop_from_remote/comment_3_f6914ae82921124e26c31ae89175d6de._comment @@ -0,0 +1,30 @@ +[[!comment format=mdwn + username="matrss" + avatar="http://cdn.libravatar.org/avatar/cd1c0b3be1af288012e49197918395f0" + subject="comment 3" + date="2025-11-10T21:14:20Z" + content=""" +> The deletion could be handled by a cron job that the user is responsible for setting up, which avoids needing to configure a time limit in git-annex, and also avoids the question of what git-annex command(s) would handle the clean up. + +Agreed, that makes sense. + +> An alternative way to handle this would be to use the \"appendonly\" config of git-annex p2phttp (and git-annex-shell has something similar). Then the repository would refuse to drop. And instead you could have a cron job that uses git-annex unused to drop old objects. + +While realistically most force drops probably would be unused files those two things aren't necessarily the same. + +> I think there are some benefits to that path, it makes explicit to the user that they data they wanted to drop is not immediately going away from the server. + +I think I would deliberately want this to be invisible to the user, since I wouldn't want anyone to actively start relying on it. + +> Which might be important for legal reasons (although the prospect of backups of annexed files makes it hard to be sure if a server has really deleted something anyway). + +That's a tradeoff for sure, but the expectation should already be that a hosted service like a Forgejo-aneksajo instance will retain backups at least for disaster recovery purposes. But that's on the admin(s) to communicate, and within a personal setting it doesn't matter at all. + +> And if the repository had a disk quota, this would make explicit to the user why dropping content from it didn't free up quota. + +Actually for that reason I would not count this soft-deleted data towards quotas for my own purposes. + +> A third approach would be to have a config setting that makes dropped objects be instead moved to a remote. So the drop would succeed, but whereis would indicate that the object was being retained there. Then a cron job on the remote could finish the deletions. + +I like this! Considering that such a \"trash bin\" (special) remote could be initialized with `--private` (right?) it would be possible to make it fully invisible to the user too, while indeed being much more flexible. I suppose the cron job would then be something like `git annex drop --from trash-bin --all --not --accessedwithin=30d`, assuming that moving it there counts as \"accessing\" and no background job on the server accesses it afterwards (maybe an additional matching option for mtime or ctime instead of atime would be useful here?). This feels very much git-annex'y 🙂 +"""]]
comment
diff --git a/doc/todo/Delayed_drop_from_remote/comment_2_b217080d4b983fc9aac4cfe0cd5da0fc._comment b/doc/todo/Delayed_drop_from_remote/comment_2_b217080d4b983fc9aac4cfe0cd5da0fc._comment new file mode 100644 index 0000000000..ddfe8cb305 --- /dev/null +++ b/doc/todo/Delayed_drop_from_remote/comment_2_b217080d4b983fc9aac4cfe0cd5da0fc._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-11-10T15:34:06Z" + content=""" +A third approach would be to have a config setting that makes dropped +objects be instead moved to a remote. So the drop would succeed, but +whereis would indicate that the object was being retained there. Then +a cron job on the remote could finish the deletions. + +This would not be singifinantly more heavyweight than just moving to a +directory, if you used eg a directory special remote. And it's also a lot +more flexible. + +Of course, this would make dropping take longer than usual, depending on +how fast the object could be moved to the remote. If it were slow, there +would be no way to convey progress back to the user without a lot more +complication than this feature warrants. + +Open to your thoughts on these alternatives.. +"""]]
dropunused: Run the annex.secure-erase-command
(or .git/hooks/secure-erase-annex) when deleting
temp and bad object files.
As was already done when deleting unlocked files.
(or .git/hooks/secure-erase-annex) when deleting
temp and bad object files.
As was already done when deleting unlocked files.
diff --git a/CHANGELOG b/CHANGELOG
index b888e00163..568f553a2a 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,9 @@ git-annex (10.20251103) UNRELEASED; urgency=medium
* p2phttp: Fix server stall when there are too many concurrent clients.
* p2phttp: Fix a file descriptor leak caused by a race condition.
* p2phttp: Added the --lockedfiles option.
+ * dropunused: Run the annex.secure-erase-command
+ (or .git/hooks/secure-erase-annex) when deleting
+ temp and bad object files.
-- Joey Hess <id@joeyh.name> Mon, 03 Nov 2025 14:02:46 -0400
diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs
index 6733b42235..e8717f8185 100644
--- a/Command/DropUnused.hs
+++ b/Command/DropUnused.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010,2012,2018 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -17,6 +17,7 @@ import qualified Git
import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused)
import Annex.NumCopies
import Annex.Content
+import Annex.Content.LowLevel
cmd :: Command
cmd = withAnnexOptions [jobsOption, jsonOptions] $
@@ -79,5 +80,7 @@ perform from numcopies mincopies key = case from of
performOther :: (Key -> Git.Repo -> OsPath) -> Key -> CommandPerform
performOther filespec key = do
f <- fromRepo $ filespec key
- pruneTmpWorkDirBefore f (liftIO . removeWhenExistsWith removeFile)
+ pruneTmpWorkDirBefore f $ \f' -> do
+ secureErase f'
+ liftIO $ removeWhenExistsWith removeFile f'
next $ return True
diff --git a/doc/todo/dropunused_of_tmp_and_bad_files_should_honor_annex.secure-erase-command_config.mdwn b/doc/todo/dropunused_of_tmp_and_bad_files_should_honor_annex.secure-erase-command_config.mdwn
index eb53b9d715..1429ef1f69 100644
--- a/doc/todo/dropunused_of_tmp_and_bad_files_should_honor_annex.secure-erase-command_config.mdwn
+++ b/doc/todo/dropunused_of_tmp_and_bad_files_should_honor_annex.secure-erase-command_config.mdwn
@@ -1,3 +1,5 @@
Currently, when `annex.secure-erase-command` is configured,
`git-annex dropunused` does not use it for deleting tmp and bad files.
Since those can contain the content of objects, it should. --[[Joey]]
+
+> [[done]]
comment and related todo
diff --git a/doc/todo/Delayed_drop_from_remote/comment_1_e670391c20cdec5d40b55a06305bdfca._comment b/doc/todo/Delayed_drop_from_remote/comment_1_e670391c20cdec5d40b55a06305bdfca._comment new file mode 100644 index 0000000000..3d9bc5e5de --- /dev/null +++ b/doc/todo/Delayed_drop_from_remote/comment_1_e670391c20cdec5d40b55a06305bdfca._comment @@ -0,0 +1,29 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-10T15:06:35Z" + content=""" +The deletion could be handled by a cron job that the user is +responsible for setting up, which avoids needing to configure a time limit +in git-annex, and also avoids the question of what git-annex command(s) +would handle the clean up. + +An alternative way to handle this would be to use the "appendonly" config +of `git-annex p2phttp` (and `git-annex-shell` has something similar). Then +the repository would refuse to drop. And instead you could have a cron job +that uses `git-annex unused` to drop old objects. This would need some way +to only drop unused objects after some period of time. + +I think there are some benefits to that path, it makes explicit to the user +that they data they wanted to drop is not immediately going away from the +server. Which might be important for legal reasons (although the prospect +of backups of annexed files makes it hard to be sure if a server has really +deleted something anyway). And if the repository had a disk quota, this +would make explicit to the user why dropping content from it didn't free up +quota. + +(I think it would also be possible to (ab)use the `annex.secure-erase-command` +to instead move objects to the directory. Probably not a good idea, +especially because there's no guarantee that command is only run on +complete annex objects.) +"""]] diff --git a/doc/todo/dropunused_of_tmp_and_bad_files_should_honor_annex.secure-erase-command_config.mdwn b/doc/todo/dropunused_of_tmp_and_bad_files_should_honor_annex.secure-erase-command_config.mdwn new file mode 100644 index 0000000000..eb53b9d715 --- /dev/null +++ b/doc/todo/dropunused_of_tmp_and_bad_files_should_honor_annex.secure-erase-command_config.mdwn @@ -0,0 +1,3 @@ +Currently, when `annex.secure-erase-command` is configured, +`git-annex dropunused` does not use it for deleting tmp and bad files. +Since those can contain the content of objects, it should. --[[Joey]]
diff --git a/doc/todo/Delayed_drop_from_remote.mdwn b/doc/todo/Delayed_drop_from_remote.mdwn new file mode 100644 index 0000000000..dd2d26bd4e --- /dev/null +++ b/doc/todo/Delayed_drop_from_remote.mdwn @@ -0,0 +1,11 @@ +In the name of protecting people from themselves I'd like to have an option to configure repositories on a Forgejo-aneksajo instance (or rather in general) to _not_ immediately obey a `git annex drop --from ... --force`. + +I am thinking of having an `annex.delayeddrop` config option (names subject to bike-shedding of course) to set in each repo's git config. With it set to e.g. "30d" `git annex drop` on that repository would, from the point of view of the user, do everything like always including recording that the repo no longer has the data, but instead of deleting the files immediately, move them into e.g. .git/annex/deleted-objects. This directory would then be cleaned of files that have been there for more than 30 days at some point in the future, e.g. when an fsck is done, or maybe on other operations too. + +I don't think any tooling around ".git/annex/deleted-objects" would be necessary, rather with the information that the data for some key was lost one could then manually dive into that directory, retrieve the data out of it, and reinject it into the repository. + +The point is to have a fast path to recovery from over-eager dropping that might otherwise lead to data loss, even though `--force` should be totally clear to everyone. + +Or maybe something like this exists already... + +[[!tag projects/ICE4]]
status update
diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_7_3008ba0d43374a4dbf87335aaf0a9477._comment b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_7_3008ba0d43374a4dbf87335aaf0a9477._comment new file mode 100644 index 0000000000..9f0e3122a8 --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_7_3008ba0d43374a4dbf87335aaf0a9477._comment @@ -0,0 +1,27 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 7""" + date="2025-11-07T20:37:44Z" + content=""" +I've landed a complete fix for this. The server no longer locks up +when I run the test case for a long time. + +Additionally, there was a bug that caused p2phttp to leak lock file +descriptors, which gets triggered by the same test case. I've fixed that. + +There are two problems I noticed in testing though. + +`git-annex get` sometimes slows down to just bytes per second, +or entirely stalls. This is most apparent with `-J10`, but I've seen it +happen even when there is no concurrency or other clients. +This should probably be treated as a separate bug, but it does +cause the test case to eventually hang, unless git-annex is configured +to do stall detection. The server keeps responding to +other requests though. + +Running `git-annex drop` and interrupting it at the wrong moment +while it's locking content on the server seems to cause a P2P protocol +worker to not get returned to the worker pool. When it happens enough +times, this can cause the server to stop responding to new requests. +Which seems closely related to this bug. +"""]]
p2phttp: Added the --lockedfiles option
This prevents serveLockContent from starting an unbounded number of
threads.
Note that, when it goes over this limit, git-annex is still able to drop
from the local repository in most situations, it just falls back to
checking content presence and is still able to prove the drop is safe.
But of course there are some cases where an active lock is needed in order to
drop.
The ugly getTimestamp hack works around a bug in the server. I suspect that
bug is also responsible for what happens if git-annex drop is interrupted
at the wrong time when checking the lock on the server -- as well as
leaving the lock fd open, the annex worker is not released to the pool,
so later connections to the server stall out. This needs to be
investigated, and the hack removed.
This prevents serveLockContent from starting an unbounded number of
threads.
Note that, when it goes over this limit, git-annex is still able to drop
from the local repository in most situations, it just falls back to
checking content presence and is still able to prove the drop is safe.
But of course there are some cases where an active lock is needed in order to
drop.
The ugly getTimestamp hack works around a bug in the server. I suspect that
bug is also responsible for what happens if git-annex drop is interrupted
at the wrong time when checking the lock on the server -- as well as
leaving the lock fd open, the annex worker is not released to the pool,
so later connections to the server stall out. This needs to be
investigated, and the hack removed.
diff --git a/CHANGELOG b/CHANGELOG
index ebdfd81d04..b888e00163 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,7 @@ git-annex (10.20251103) UNRELEASED; urgency=medium
* remotedaemon: Avoid crashing when run with --debug.
* p2phttp: Fix server stall when there are too many concurrent clients.
* p2phttp: Fix a file descriptor leak caused by a race condition.
+ * p2phttp: Added the --lockedfiles option.
-- Joey Hess <id@joeyh.name> Mon, 03 Nov 2025 14:02:46 -0400
diff --git a/Command/P2PHttp.hs b/Command/P2PHttp.hs
index 140b2d4c94..f03509b333 100644
--- a/Command/P2PHttp.hs
+++ b/Command/P2PHttp.hs
@@ -57,6 +57,7 @@ data Options = Options
, proxyConnectionsOption :: Maybe Integer
, jobsOption :: Maybe Concurrency
, clusterJobsOption :: Maybe Int
+ , lockedFilesOption :: Maybe Integer
, directoryOption :: [FilePath]
}
@@ -119,6 +120,10 @@ optParser _ = Options
( long "clusterjobs" <> metavar paramNumber
<> help "number of concurrent node accesses per connection"
))
+ <*> optional (option auto
+ ( long "lockedfiles" <> metavar paramNumber
+ <> help "number of content files that can be locked"
+ ))
<*> many (strOption
( long "directory" <> metavar paramPath
<> help "serve repositories in subdirectories of a directory"
@@ -128,8 +133,10 @@ startAnnex :: Options -> Annex ()
startAnnex o
| null (directoryOption o) = ifM ((/=) NoUUID <$> getUUID)
( do
+ lockedfilesqsem <- liftIO $
+ mkLockedFilesQSem (lockedFilesOption o)
authenv <- liftIO getAuthEnv
- st <- mkServerState o authenv
+ st <- mkServerState o authenv lockedfilesqsem
liftIO $ runServer o st
-- Run in a git repository that is not a git-annex repository.
, liftIO $ startIO o
@@ -146,20 +153,21 @@ startIO o
runServer o st
where
mkst authenv oldst = do
+ lockedfilesqsem <- mkLockedFilesQSem (lockedFilesOption o)
repos <- findRepos o
sts <- forM repos $ \r -> do
strd <- Annex.new r
- Annex.eval strd (mkstannex authenv oldst)
+ Annex.eval strd (mkstannex authenv oldst lockedfilesqsem)
return (mconcat sts)
{ updateRepos = updaterepos authenv
}
- mkstannex authenv oldst = do
+ mkstannex authenv oldst lockedfilesqsem = do
u <- getUUID
if u == NoUUID
then return mempty
else case M.lookup u (servedRepos oldst) of
- Nothing -> mkServerState o authenv
+ Nothing -> mkServerState o authenv lockedfilesqsem
Just old -> return $ P2PHttpServerState
{ servedRepos = M.singleton u old
, serverShutdownCleanup = mempty
@@ -213,14 +221,15 @@ runServer o mst = go `finally` serverShutdownCleanup mst
Socket.listen sock Socket.maxListenQueue
return sock
-mkServerState :: Options -> M.Map Auth P2P.ServerMode -> Annex P2PHttpServerState
-mkServerState o authenv =
+mkServerState :: Options -> M.Map Auth P2P.ServerMode -> LockedFilesQSem -> Annex P2PHttpServerState
+mkServerState o authenv lockedfilesqsem =
withAnnexWorkerPool (jobsOption o) $
mkP2PHttpServerState
(mkGetServerMode authenv o)
return
(fromMaybe 1 $ proxyConnectionsOption o)
(fromMaybe 1 $ clusterJobsOption o)
+ lockedfilesqsem
mkGetServerMode :: M.Map Auth P2P.ServerMode -> Options -> GetServerMode
mkGetServerMode _ o _ Nothing
diff --git a/P2P/Http/Server.hs b/P2P/Http/Server.hs
index b94b2486ab..3f1917398f 100644
--- a/P2P/Http/Server.hs
+++ b/P2P/Http/Server.hs
@@ -474,7 +474,7 @@ serveLockContent
-> Handler LockResult
serveLockContent mst su apiver (B64Key k) cu bypass sec auth = do
(conn, st) <- getP2PConnection apiver mst cu su bypass sec auth LockAction id
- let lock = do
+ let lock = checklocklimit conn st $ do
lockresv <- newEmptyTMVarIO
unlockv <- newEmptyTMVarIO
-- A thread takes the lock, and keeps running
@@ -490,6 +490,7 @@ serveLockContent mst su apiver (B64Key k) cu bypass sec auth = do
void $ runFullProto (clientRunState conn) (clientP2PConnection conn) $ do
net $ sendMessage UNLOCKCONTENT
_ -> return ()
+ liftIO $ releaseLockedFilesQSem st
atomically (takeTMVar lockresv) >>= \case
Right True -> return (Just (annexworker, unlockv))
_ -> return Nothing
@@ -501,7 +502,19 @@ serveLockContent mst su apiver (B64Key k) cu bypass sec auth = do
Just (locker, lockid) -> do
liftIO $ storeLock lockid locker st
return $ LockResult True (Just lockid)
- Nothing -> return $ LockResult False Nothing
+ Nothing -> do
+ releaseP2PConnection conn
+ return $ LockResult False Nothing
+ where
+ checklocklimit conn st a =
+ ifM (consumeLockedFilesQSem st)
+ ( a
+ , do
+ -- This works around a problem when nothing
+ -- is sent to the P2P connection.
+ _ <- liftIO $ proxyClientNetProto conn getTimestamp
+ return Nothing
+ )
serveKeepLocked
:: APIVersion v
diff --git a/P2P/Http/State.hs b/P2P/Http/State.hs
index 47057dc779..6105a00cc8 100644
--- a/P2P/Http/State.hs
+++ b/P2P/Http/State.hs
@@ -79,6 +79,7 @@ data PerRepoServerState = PerRepoServerState
, annexRead :: Annex.AnnexRead
, getServerMode :: GetServerMode
, openLocks :: TMVar (M.Map LockID Locker)
+ , lockedFilesQSem :: LockedFilesQSem
}
type AnnexWorkerPool = TMVar (WorkerPool (Annex.AnnexState, Annex.AnnexRead))
@@ -93,14 +94,15 @@ data ServerMode
}
| CannotServeRequests
-mkPerRepoServerState :: AcquireP2PConnection -> AnnexWorkerPool -> Annex.AnnexState -> Annex.AnnexRead -> GetServerMode -> IO PerRepoServerState
-mkPerRepoServerState acquireconn annexworkerpool annexstate annexread getservermode = PerRepoServerState
+mkPerRepoServerState :: AcquireP2PConnection -> AnnexWorkerPool -> Annex.AnnexState -> Annex.AnnexRead -> GetServerMode -> LockedFilesQSem -> IO PerRepoServerState
+mkPerRepoServerState acquireconn annexworkerpool annexstate annexread getservermode lockedfilesqsem = PerRepoServerState
<$> pure acquireconn
<*> pure annexworkerpool
<*> newTMVarIO annexstate
<*> pure annexread
<*> pure getservermode
<*> newTMVarIO mempty
+ <*> pure lockedfilesqsem
data ActionClass = ReadAction | WriteAction | RemoveAction | LockAction
deriving (Eq)
@@ -258,14 +260,36 @@ type AcquireP2PConnection
= ConnectionParams
-> IO (Either ConnectionProblem P2PConnectionPair)
+type LockedFilesQSem = TMVar Integer
+
+mkLockedFilesQSem :: Maybe Integer -> IO LockedFilesQSem
+mkLockedFilesQSem = newTMVarIO . fromMaybe 100
+
+consumeLockedFilesQSem :: PerRepoServerState -> IO Bool
+consumeLockedFilesQSem st = atomically $ do
+ n <- takeTMVar (lockedFilesQSem st)
+ if n < 1
+ then do
+ putTMVar (lockedFilesQSem st) n
+ return False
+ else do
+ putTMVar (lockedFilesQSem st) (pred n)
+ return True
+
+releaseLockedFilesQSem :: PerRepoServerState -> IO ()
+releaseLockedFilesQSem st = atomically $ do
+ n <- takeTMVar (lockedFilesQSem st)
+ putTMVar (lockedFilesQSem st) (succ n)
+
mkP2PHttpServerState
:: GetServerMode
-> UpdateRepos
-> ProxyConnectionPoolSize
-> ClusterConcurrency
+ -> LockedFilesQSem
-> AnnexWorkerPool
-> Annex P2PHttpServerState
-mkP2PHttpServerState getservermode updaterepos proxyconnectionpoolsize clusterconcurrency workerpool = do
+mkP2PHttpServerState getservermode updaterepos proxyconnectionpoolsize clusterconcurrency lockedfilesqsem workerpool = do
enableInteractiveBranchAccess
myuuid <- getUUID
myproxies <- M.lookup myuuid <$> getProxies
(Diff truncated)
Added a comment
diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_6_eaf326343b6c358de25ee9a0448613bc._comment b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_6_eaf326343b6c358de25ee9a0448613bc._comment new file mode 100644 index 0000000000..9c97dd9e73 --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_6_eaf326343b6c358de25ee9a0448613bc._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="matrss" + avatar="http://cdn.libravatar.org/avatar/cd1c0b3be1af288012e49197918395f0" + subject="comment 6" + date="2025-11-07T09:12:13Z" + content=""" +> I think that --debug output from the p2phttp server would be helpful in narrowing down if there is particular operation that causes this hang. + +I should have been a bit more clear, I also saw the deadlock sometimes with concurrent get's, sometimes with drop's, and sometimes with a mix of both, so there wasn't one particular operation that seemed to be the issue. + +> -J2 also seems quite low though. + +This is for Forgejo-aneksajo, where there is still one p2phttp process being started per repository. Since there could potentially be 1000's of concurrent processes at any given time I thought it might be wise to start with the bare minimum by default. Due to how p2phttp and proxying is supposed to interact I've also realized that the current integration is not working as it should (<https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo/issues/96>) and that I probably won't be able to make use of the single p2phttp process for all repositories (because of ambiguity with authorization when there are multiple different repositories with differing permissions that proxy for the same remote). +"""]]
ask about S3 DEEP_ARCHIVE and the glacier special remote
diff --git a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__.mdwn b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__.mdwn new file mode 100644 index 0000000000..7b48d8b977 --- /dev/null +++ b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__.mdwn @@ -0,0 +1,10 @@ +In the git-annex docs for [S3](https://git-annex.branchable.com/special_remotes/S3/), under `storageclass`, it says + +> Amazon S3's DEEP_ARCHIVE is similar to Amazon Glacier. For that, use the glacier special remote, rather than this one. + +However, Amazon has [deprecated the standalone Glacier API](https://www.lastweekinaws.com/blog/aws-deprecates-two-dozen-services-most-of-which-youve-never-heard-of/), in favor of the S3 Glacier storage classes like [S3 Glacier Deep Archive](https://aws.amazon.com/blogs/aws/new-amazon-s3-storage-class-glacier-deep-archive/). As I understand it, new AWS accounts cannot sign up for Glacier at all, and existing accounts can only use it if they already had been using it. Instead, Amazon wants you to use the S3 classes, which are the [same price](https://aws.amazon.com/s3/pricing/) but use the S3 API instead of the Glacier API. + + +For new repositories, should we use S3 with `storageclass=DEEP_ARCHIVE`? + +It's not clear to me if this will work correctly, if the git-annex S3 implementation is built to handle S3 Glacier storage classes correctly. If not, what should we do since we can't use the standalone Glacier anymore?
comment
diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_5_216561fb495aeb73683305a20a3b66e7._comment b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_5_216561fb495aeb73683305a20a3b66e7._comment new file mode 100644 index 0000000000..f97ff5134f --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_5_216561fb495aeb73683305a20a3b66e7._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2025-11-06T20:27:42Z" + content=""" +Pushed a preliminary fix in the `p2phttp_deadlock` branch. + +That has some known problems, documented in the commit. But it does +avoid p2phttp locking up like this. +"""]]
p2phttp: Fix server stall when there are too many concurrent clients
A deadlock eventually occurred when there were more concurent clients
than the size of the annex worker pool.
A test case for the deadlock is multiple clients all running
git-annex get; git-annex drop in a loop. With more clients than the
server's -J, this tended to lock up the server fairly quickly.
The problem was that inAnnexWorker is run twice per request, once for
the P2P protocol handling thread, and once for the P2P protocol
generating thread. Those two threads were started concurrently. Which,
when the worker pool is close to full, is equivilant to two locks being
taken, in potentially two different orders, and so could deadlock.
Fixed by making P2P.Http.Server use handleRequestAnnex instead of
inAnnexWorker. That forks off a new Annex state, runs the action in it,
and merges it back in.
Also, made getP2PConnection wait until the inAnnexWorker action has
started to return. When there are more incoming requests than the size
of the worker pool, this prevents request handers from starting
handleRequestAnnex until after getP2PConnection has started, so avoiding
running more annex actions than the -J level.
While before the server needed 2 jobs per request, so would handle
concurrent requests up to 1/2 of the -J level maximum, now it matches
the -J level. Updated docs accordingly.
Note that serveLockContent starts a thread which keeps running after the
request finishes. Before, that still consumed a worker. Which was also
probably a way for the worker pool to get full. Now, it does not.
So, lots of calls to serveLockContent can result in lots of threads,
which are lightweight though since they only keep a lock held.
Considering this as a new DOS attack, the server would run out of FDs
before it runs out of memory. I'll address this in the next commit.
A deadlock eventually occurred when there were more concurent clients
than the size of the annex worker pool.
A test case for the deadlock is multiple clients all running
git-annex get; git-annex drop in a loop. With more clients than the
server's -J, this tended to lock up the server fairly quickly.
The problem was that inAnnexWorker is run twice per request, once for
the P2P protocol handling thread, and once for the P2P protocol
generating thread. Those two threads were started concurrently. Which,
when the worker pool is close to full, is equivilant to two locks being
taken, in potentially two different orders, and so could deadlock.
Fixed by making P2P.Http.Server use handleRequestAnnex instead of
inAnnexWorker. That forks off a new Annex state, runs the action in it,
and merges it back in.
Also, made getP2PConnection wait until the inAnnexWorker action has
started to return. When there are more incoming requests than the size
of the worker pool, this prevents request handers from starting
handleRequestAnnex until after getP2PConnection has started, so avoiding
running more annex actions than the -J level.
While before the server needed 2 jobs per request, so would handle
concurrent requests up to 1/2 of the -J level maximum, now it matches
the -J level. Updated docs accordingly.
Note that serveLockContent starts a thread which keeps running after the
request finishes. Before, that still consumed a worker. Which was also
probably a way for the worker pool to get full. Now, it does not.
So, lots of calls to serveLockContent can result in lots of threads,
which are lightweight though since they only keep a lock held.
Considering this as a new DOS attack, the server would run out of FDs
before it runs out of memory. I'll address this in the next commit.
diff --git a/CHANGELOG b/CHANGELOG
index 41e4572f46..ebdfd81d04 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,7 @@ git-annex (10.20251103) UNRELEASED; urgency=medium
* p2p --pair: Fix to work with external P2P networks.
* remotedaemon: Avoid crashing when run with --debug.
+ * p2phttp: Fix server stall when there are too many concurrent clients.
* p2phttp: Fix a file descriptor leak caused by a race condition.
-- Joey Hess <id@joeyh.name> Mon, 03 Nov 2025 14:02:46 -0400
diff --git a/P2P/Http/Server.hs b/P2P/Http/Server.hs
index 6a21c6e13c..b94b2486ab 100644
--- a/P2P/Http/Server.hs
+++ b/P2P/Http/Server.hs
@@ -126,7 +126,7 @@ serveGet mst su apiver (B64Key k) cu bypass baf startat sec auth = do
endv <- liftIO newEmptyTMVarIO
validityv <- liftIO newEmptyTMVarIO
finalv <- liftIO newEmptyTMVarIO
- annexworker <- liftIO $ async $ inAnnexWorker st $ do
+ annexworker <- liftIO $ async $ handleRequestAnnex st $ do
let storer _offset len = sendContentWith $ \bs -> liftIO $ do
atomically $ putTMVar bsv (len, bs)
atomically $ takeTMVar endv
@@ -401,7 +401,7 @@ servePutAction
-> Maybe B64FilePath
-> (P2P.Protocol.Offset -> Proto (Maybe [UUID]))
-> IO (Either SomeException (Either ProtoFailure (Maybe [UUID])))
-servePutAction (conn, st) (B64Key k) baf a = inAnnexWorker st $
+servePutAction (conn, st) (B64Key k) baf a = handleRequestAnnex st $
enteringStage (TransferStage Download) $
runFullProto (clientRunState conn) (clientP2PConnection conn) $
put' k af a
@@ -477,9 +477,9 @@ serveLockContent mst su apiver (B64Key k) cu bypass sec auth = do
let lock = do
lockresv <- newEmptyTMVarIO
unlockv <- newEmptyTMVarIO
- -- A single worker thread takes the lock, and keeps running
+ -- A thread takes the lock, and keeps running
-- until unlock in order to keep the lock held.
- annexworker <- async $ inAnnexWorker st $ do
+ annexworker <- async $ handleRequestAnnex st $ do
lockres <- runFullProto (clientRunState conn) (clientP2PConnection conn) $ do
net $ sendMessage (LOCKCONTENT k)
checkSuccess
diff --git a/P2P/Http/State.hs b/P2P/Http/State.hs
index 44a2588b57..47057dc779 100644
--- a/P2P/Http/State.hs
+++ b/P2P/Http/State.hs
@@ -75,6 +75,8 @@ instance Semigroup P2PHttpServerState where
data PerRepoServerState = PerRepoServerState
{ acquireP2PConnection :: AcquireP2PConnection
, annexWorkerPool :: AnnexWorkerPool
+ , annexState :: TMVar Annex.AnnexState
+ , annexRead :: Annex.AnnexRead
, getServerMode :: GetServerMode
, openLocks :: TMVar (M.Map LockID Locker)
}
@@ -91,10 +93,12 @@ data ServerMode
}
| CannotServeRequests
-mkPerRepoServerState :: AcquireP2PConnection -> AnnexWorkerPool -> GetServerMode -> IO PerRepoServerState
-mkPerRepoServerState acquireconn annexworkerpool getservermode = PerRepoServerState
+mkPerRepoServerState :: AcquireP2PConnection -> AnnexWorkerPool -> Annex.AnnexState -> Annex.AnnexRead -> GetServerMode -> IO PerRepoServerState
+mkPerRepoServerState acquireconn annexworkerpool annexstate annexread getservermode = PerRepoServerState
<$> pure acquireconn
<*> pure annexworkerpool
+ <*> newTMVarIO annexstate
+ <*> pure annexread
<*> pure getservermode
<*> newTMVarIO mempty
@@ -275,7 +279,9 @@ mkP2PHttpServerState getservermode updaterepos proxyconnectionpoolsize clusterco
liftIO $ atomically $ putTMVar endv ()
liftIO $ wait asyncservicer
let servinguuids = myuuid : map proxyRemoteUUID (maybe [] S.toList myproxies)
- st <- liftIO $ mkPerRepoServerState (acquireconn reqv) workerpool getservermode
+ annexstate <- dupState
+ annexread <- Annex.getRead id
+ st <- liftIO $ mkPerRepoServerState (acquireconn reqv) workerpool annexstate annexread getservermode
return $ P2PHttpServerState
{ servedRepos = M.fromList $ zip servinguuids (repeat st)
, serverShutdownCleanup = endit
@@ -283,8 +289,10 @@ mkP2PHttpServerState getservermode updaterepos proxyconnectionpoolsize clusterco
}
where
acquireconn reqv connparams = do
+ ready <- newEmptyTMVarIO
respvar <- newEmptyTMVarIO
- atomically $ putTMVar reqv (connparams, respvar)
+ atomically $ putTMVar reqv (connparams, ready, respvar)
+ () <- atomically $ takeTMVar ready
atomically $ takeTMVar respvar
servicer myuuid myproxies proxypool reqv relv endv = do
@@ -296,8 +304,8 @@ mkP2PHttpServerState getservermode updaterepos proxyconnectionpoolsize clusterco
`orElse`
(Left . Left <$> takeTMVar endv)
case reqrel of
- Right (connparams, respvar) -> do
- servicereq myuuid myproxies proxypool relv connparams
+ Right (connparams, ready, respvar) -> do
+ servicereq myuuid myproxies proxypool relv connparams ready
>>= atomically . putTMVar respvar
servicer myuuid myproxies proxypool reqv relv endv
Left (Right releaseconn) -> do
@@ -305,16 +313,16 @@ mkP2PHttpServerState getservermode updaterepos proxyconnectionpoolsize clusterco
servicer myuuid myproxies proxypool reqv relv endv
Left (Left ()) -> return ()
- servicereq myuuid myproxies proxypool relv connparams
+ servicereq myuuid myproxies proxypool relv connparams ready
| connectionServerUUID connparams == myuuid =
- localConnection relv connparams workerpool
+ localConnection relv connparams workerpool ready
| otherwise =
atomically (getProxyConnectionPool proxypool connparams) >>= \case
- Just conn -> proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool conn
- Nothing -> checkcanproxy myproxies proxypool relv connparams
+ Just conn -> proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool conn ready
+ Nothing -> checkcanproxy myproxies proxypool relv connparams ready
- checkcanproxy myproxies proxypool relv connparams =
- inAnnexWorker' workerpool
+ checkcanproxy myproxies proxypool relv connparams ready = do
+ inAnnexWorker workerpool
(checkCanProxy' myproxies (connectionServerUUID connparams))
>>= \case
Right (Left reason) -> return $ Left $
@@ -334,7 +342,7 @@ mkP2PHttpServerState getservermode updaterepos proxyconnectionpoolsize clusterco
bypass = P2P.Bypass $ S.fromList $ connectionBypass connparams
proxyconnection openconn = openconn >>= \case
Right conn -> proxyConnection proxyconnectionpoolsize
- relv connparams workerpool proxypool conn
+ relv connparams workerpool proxypool conn ready
Left ex -> return $ Left $
ConnectionFailed $ show ex
@@ -354,10 +362,12 @@ localConnection
:: TMVar (IO ())
-> ConnectionParams
-> AnnexWorkerPool
+ -> TMVar ()
-> IO (Either ConnectionProblem P2PConnectionPair)
-localConnection relv connparams workerpool =
+localConnection relv connparams workerpool ready =
localP2PConnectionPair connparams relv $ \serverrunst serverconn ->
- inAnnexWorker' workerpool $
+ inAnnexWorker workerpool $ do
+ liftIO $ atomically $ putTMVar ready ()
void $ runFullProto serverrunst serverconn $
P2P.serveOneCommandAuthed
(connectionServerMode connparams)
@@ -431,14 +441,16 @@ proxyConnection
-> AnnexWorkerPool
-> TMVar ProxyConnectionPool
-> ProxyConnection
+ -> TMVar ()
-> IO (Either ConnectionProblem P2PConnectionPair)
-proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool proxyconn = do
+proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool proxyconn ready = do
(clientconn, proxyfromclientconn) <-
mkP2PConnectionPair connparams ("http client", "proxy")
clientrunst <- mkClientRunState connparams
proxyfromclientrunst <- mkClientRunState connparams
asyncworker <- async $
- inAnnexWorker' workerpool $ do
+ inAnnexWorker workerpool $ do
+ liftIO $ atomically $ putTMVar ready ()
proxystate <- liftIO Proxy.mkProxyState
let proxyparams = Proxy.ProxyParams
{ Proxy.proxyMethods = mkProxyMethods
@@ -495,8 +507,8 @@ proxyConnection proxyconnectionpoolsize relv connparams workerpool proxypool pro
requestcomplete () = return ()
- closeproxyconnection =
- void . inAnnexWorker' workerpool . proxyConnectionCloser
+ closeproxyconnection =
+ void . inAnnexWorker workerpool . proxyConnectionCloser
data Locker = Locker
{ lockerThread :: Async ()
@@ -585,11 +597,8 @@ withAnnexWorkerPool mc a = do
Nothing -> giveup "Use -Jn or set annex.jobs to configure the number of worker threads."
Just wp -> a wp
-inAnnexWorker :: PerRepoServerState -> Annex a -> IO (Either SomeException a)
-inAnnexWorker st = inAnnexWorker' (annexWorkerPool st)
-
-inAnnexWorker' :: AnnexWorkerPool -> Annex a -> IO (Either SomeException a)
-inAnnexWorker' poolv annexaction = do
+inAnnexWorker :: AnnexWorkerPool -> Annex a -> IO (Either SomeException a)
+inAnnexWorker poolv annexaction = do
(workerstrd, workerstage) <- atomically $ waitStartWorkerSlot poolv
resv <- newEmptyTMVarIO
aid <- async $ do
@@ -611,6 +620,20 @@ inAnnexWorker' poolv annexaction = do
(Diff truncated)
correct -J documentation
diff --git a/doc/git-annex-p2phttp.mdwn b/doc/git-annex-p2phttp.mdwn index a7ecd581d8..453015e1f3 100644 --- a/doc/git-annex-p2phttp.mdwn +++ b/doc/git-annex-p2phttp.mdwn @@ -60,9 +60,9 @@ convenient way to download the content of any key, by using the path This or annex.jobs must be set to configure the number of worker threads, per repository served, that serve connections to the webserver. - - Since the webserver itself also uses one of these threads, - this needs to be set to 2 or more. + + This must be set to 2 or more, because each request served by the + webserver needs 2 worker threads. A good choice is often one worker per CPU core: `--jobs=cpus`
comment
diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_4_44e30c4f2dbebd4c453433d354db8f14._comment b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_4_44e30c4f2dbebd4c453433d354db8f14._comment new file mode 100644 index 0000000000..1be5ae2d05 --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_4_44e30c4f2dbebd4c453433d354db8f14._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-11-05T18:40:52Z" + content=""" +Tested a modified p2phttp that uses 2 worker pools, one for the P2P client side +and one for server side. This means that -J2 actually runs up to 4 threads, +although with only 2 capabilities, so the change won't affect CPU load. +So I tried with -J2 and 4 clients running the loop. + +It still stalls much as before, maybe after a bit longer. + +It still seems likely that the two workers used per http request +is the root of the problem. When there are more than annex.jobs concurrent +requests, each http response handler calls inAnnexWorker, and so one will +block. If the corresponding localConnection successfully gets a worker, +that means one of the other localConnections is blocked. Resulting in a +running http response handler whose corresponding localConnection is blocked. +The inverse also seems possible. + +If 2 worker pools is not the solution, it seems it would need to instead be +solved by rearchitecting the http server to not have that separation. Or to +ensure that getP2PConnection doesn't return until the localConnection has +allocated its worker. I'll try that next. +"""]]
analysis
diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_1_7c052ef5f57516192cc9e1e03362d719._comment b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_1_7c052ef5f57516192cc9e1e03362d719._comment new file mode 100644 index 0000000000..264c816e41 --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_1_7c052ef5f57516192cc9e1e03362d719._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-05T16:52:12Z" + content=""" +I think that --debug output from the p2phttp server would be helpful in +narrowing down if there is particular operation that causes this hang. + +p2phttp has a pool of worker threads, so if a thread stalls out, or +potentially crashes in some way that is not handled, it can result in all +subsequent operations hanging. +[[!commit 91dbcf0b56ba540a33ea5a79ed52f33e82f4f61b]] is one recent example +of that; I remember there were some similar problems when initially +developing it. + +-J2 also seems quite low though. With the http server itself using one of +those threads, all requests get serialized through the second thread. If there is +any situation where request A needs request B to be made and finish before +it can succeed, that would deadlock. +"""]] diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_2_378ce078e1dbd049977910630cfd47ef._comment b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_2_378ce078e1dbd049977910630cfd47ef._comment new file mode 100644 index 0000000000..e5bbecd679 --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_2_378ce078e1dbd049977910630cfd47ef._comment @@ -0,0 +1,71 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-11-05T17:14:29Z" + content=""" +I was able to reproduce this fairly quickly with 2 clones, each running +the loop on the same 5 files, which I made each be 1 mb in size. + +Both hung on get, of different files. The tail of the --debug: + + [2025-11-05 13:14:06.255833094] (P2P.IO) [http server] [ThreadId 914] P2P > DATA 1048576 + [2025-11-05 13:14:06.255872078] (P2P.IO) [http client] [ThreadId 912] P2P < DATA 1048576 + [2025-11-05 13:14:06.262783513] (P2P.IO) [http server] [ThreadId 914] P2P > VALID + [2025-11-05 13:14:06.262897622] (P2P.IO) [http client] [ThreadId 912] P2P < VALID + [2025-11-05 13:14:06.262956555] (P2P.IO) [http client] [ThreadId 912] P2P > SUCCESS + [2025-11-05 13:14:06.263008765] (P2P.IO) [http server] [ThreadId 914] P2P < SUCCESS + [2025-11-05 13:14:06.264030615] (P2P.IO) [http client] [ThreadId 883] P2P > CHECKPRESENT SHA256E-s1048576--06477b9c41f04aaa5c09af0adbd093506435193c868ef56a5510eff0d3c9fc2b + [2025-11-05 13:14:06.264088566] (P2P.IO) [http server] [ThreadId 916] P2P < CHECKPRESENT SHA256E-s1048576--06477b9c41f04aaa5c09af0adbd093506435193c868ef56a5510eff0d3c9fc2b + [2025-11-05 13:14:06.264183098] (P2P.IO) [http server] [ThreadId 916] P2P > SUCCESS + [2025-11-05 13:14:06.264219447] (P2P.IO) [http client] [ThreadId 883] P2P < SUCCESS + [2025-11-05 13:14:06.265125295] (P2P.IO) [http client] [ThreadId 920] P2P > GET 0 3 SHA256E-s1048576--06477b9c41f04aaa5c09af0adbd093506435193c868ef56a5510eff0d3c9fc2b + [2025-11-05 13:14:06.265177174] (P2P.IO) [http server] [ThreadId 921] P2P < GET 0 3 SHA256E-s1048576--06477b9c41f04aaa5c09af0adbd093506435193c868ef56a5510eff0d3c9fc2b + [2025-11-05 13:14:06.265598603] (P2P.IO) [http server] [ThreadId 921] P2P > DATA 1048576 + [2025-11-05 13:14:06.265639962] (P2P.IO) [http client] [ThreadId 920] P2P < DATA 1048576 + [2025-11-05 13:14:06.274452543] (P2P.IO) [http server] [ThreadId 921] P2P > VALID + [2025-11-05 13:14:06.274505514] (P2P.IO) [http client] [ThreadId 920] P2P < VALID + [2025-11-05 13:14:06.274551963] (P2P.IO) [http client] [ThreadId 920] P2P > SUCCESS + [2025-11-05 13:14:06.274594385] (P2P.IO) [http server] [ThreadId 921] P2P < SUCCESS + [2025-11-05 13:14:06.276689062] (P2P.IO) [http client] [ThreadId 883] P2P > CHECKPRESENT SHA256E-s1048576--81386bfd2b7880ed397001ea5325ee25cfa69cf46d097b7a69b0a31b5e990f8d + [2025-11-05 13:14:06.276783864] (P2P.IO) [http server] [ThreadId 924] P2P < CHECKPRESENT SHA256E-s1048576--81386bfd2b7880ed397001ea5325ee25cfa69cf46d097b7a69b0a31b5e990f8d + [2025-11-05 13:14:06.276799023] (P2P.IO) [http client] [ThreadId 892] P2P > CHECKPRESENT SHA256E-s1048576--06477b9c41f04aaa5c09af0adbd093506435193c868ef56a5510eff0d3c9fc2b + [2025-11-05 13:14:06.276912961] (P2P.IO) [http server] [ThreadId 924] P2P > SUCCESS + [2025-11-05 13:14:06.276939743] (P2P.IO) [http client] [ThreadId 883] P2P < SUCCESS + [2025-11-05 13:14:06.276944802] (P2P.IO) [http server] [ThreadId 926] P2P < CHECKPRESENT SHA256E-s1048576--06477b9c41f04aaa5c09af0adbd093506435193c868ef56a5510eff0d3c9fc2b + [2025-11-05 13:14:06.277069411] (P2P.IO) [http server] [ThreadId 926] P2P > SUCCESS + [2025-11-05 13:14:06.277111522] (P2P.IO) [http client] [ThreadId 892] P2P < SUCCESS + +A second hang happened with the loops each running on the same 2 files. This time, +one clone was doing "get 1" and the other clone "drop 1 (locking origin...)" when they hung. + + [2025-11-05 13:28:03.931334099] (P2P.IO) [http server] [ThreadId 8421] P2P > SUCCESS + [2025-11-05 13:28:03.931380284] (P2P.IO) [http client] [ThreadId 8424] P2P < SUCCESS + [2025-11-05 13:28:03.932204439] (P2P.IO) [http client] [ThreadId 8424] P2P > UNLOCKCONTENT + [2025-11-05 13:28:03.932251987] (P2P.IO) [http server] [ThreadId 8421] P2P < UNLOCKCONTENT + [2025-11-05 13:28:04.252596865] (P2P.IO) [http client] [ThreadId 8427] P2P > CHECKPRESENT SHA256E-s1048576--4ad843113f3ee799f2ff834a80bb2aaff35d5babd68395339406671c50e99f6a + [2025-11-05 13:28:04.252748136] (P2P.IO) [http server] [ThreadId 8429] P2P < CHECKPRESENT SHA256E-s1048576--4ad843113f3ee799f2ff834a80bb2aaff35d5babd68395339406671c50e99f6a + [2025-11-05 13:28:04.252918516] (P2P.IO) [http server] [ThreadId 8429] P2P > SUCCESS + [2025-11-05 13:28:04.253026869] (P2P.IO) [http client] [ThreadId 8427] P2P < SUCCESS + +A third hang, again with 2 files, and both hung on "drop 1 (locking +origin...)" + + [2025-11-05 13:34:34.413288012] (P2P.IO) [http client] [ThreadId 16147] P2P > CHECKPRESENT SHA256E-s1048576--c644050a65e9e93a43f5b21e1188e4e7a406057d84102c78fce0007ceb875c69 + [2025-11-05 13:34:34.413341843] (P2P.IO) [http server] [ThreadId 16172] P2P < CHECKPRESENT SHA256E-s1048576--c644050a65e9e93a43f5b21e1188e4e7a406057d84102c78fce0007ceb875c69 + [2025-11-05 13:34:34.413415351] (P2P.IO) [http server] [ThreadId 16172] P2P > SUCCESS + [2025-11-05 13:34:34.413442692] (P2P.IO) [http client] [ThreadId 16147] P2P < SUCCESS + [2025-11-05 13:34:34.414251817] (P2P.IO) [http client] [ThreadId 16176] P2P > GET 0 2 SHA256E-s1048576--c644050a65e9e93a43f5b21e1188e4e7a406057d84102c78fce0007ceb875c69 + [2025-11-05 13:34:34.4142963] (P2P.IO) [http server] [ThreadId 16177] P2P < GET 0 2 SHA256E-s1048576--c644050a65e9e93a43f5b21e1188e4e7a406057d84102c78fce0007ceb875c69 + [2025-11-05 13:34:34.414731756] (P2P.IO) [http server] [ThreadId 16177] P2P > DATA 1048576 + [2025-11-05 13:34:34.414777692] (P2P.IO) [http client] [ThreadId 16176] P2P < DATA 1048576 + [2025-11-05 13:34:34.421258237] (P2P.IO) [http server] [ThreadId 16177] P2P > VALID + [2025-11-05 13:34:34.421322858] (P2P.IO) [http client] [ThreadId 16176] P2P < VALID + [2025-11-05 13:34:34.421358204] (P2P.IO) [http client] [ThreadId 16176] P2P > SUCCESS + [2025-11-05 13:34:34.421390053] (P2P.IO) [http server] [ThreadId 16177] P2P < SUCCESS + [2025-11-05 13:34:34.764709623] (P2P.IO) [http client] [ThreadId 16188] P2P > LOCKCONTENT SHA256E-s1048576--4ad843113f3ee799f2ff834a80bb2aaff35d5babd68395339406671c50e99f6a + +Here the P2P protocol client inside the http server got a worker thread, but then apparently +the http response handler stalled out. That's different from the other 2 debug logs where +the protocol client was able to send a response. I think in the other 2 debug logs, +the P2P protocol client then stalls getting a worker thread. +"""]] diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_3_8cfb1ac7cceb3a3d6345810fd0741d7f._comment b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_3_8cfb1ac7cceb3a3d6345810fd0741d7f._comment new file mode 100644 index 0000000000..a552835ab9 --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients/comment_3_8cfb1ac7cceb3a3d6345810fd0741d7f._comment @@ -0,0 +1,33 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-11-05T17:48:02Z" + content=""" +Aha! We have two things both calling inAnnexWorker: + +1. localConnection, handling the P2P protocol client + side of things inside the http server. (Or in the case of a proxy, + other functions that do the similar thing.) +2. http response handlers, which run the P2P protocol server side. + +For each http request, both of these run asyncronously. + +So, with -J2, if two http requests happen at the same time, and +localConnection wins both races, the two worker threads are both stalled +waiting for a response from the P2P server side. Which is blocked waiting +for a worker thread. Or perhaps both of the http response handlers win, +similar deadlock. + +Maybe it could even happen that the localConnection for one request wins, +as well as the response handler for the other request? + +(And higher -J numbers would still have the same problem, +when there are more clients. The docs for -J are also a bit wrong, +they say that the http server uses 1 thread itself, but it can really +use any number of threads since localConnection does run +inAnnexWorker in an async action.) + +Anyway, if this analysis is correct, the fix is surely to have 2 worker +thread pools, once for the P2P protocol client side, and one for the P2P +protocol server side. +"""]]
Add openneuro tag
diff --git a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn index da4e6c9bce..923209bb04 100644 --- a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn +++ b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn @@ -93,3 +93,5 @@ initremote: 1 failed ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) Thanks for all your great work, Joey! + +[[!tag projects/openneuro]]
fix address example
diff --git a/doc/tips/peer_to_peer_network_with_iroh.mdwn b/doc/tips/peer_to_peer_network_with_iroh.mdwn index e20d9a6241..2259ba7af1 100644 --- a/doc/tips/peer_to_peer_network_with_iroh.mdwn +++ b/doc/tips/peer_to_peer_network_with_iroh.mdwn @@ -80,7 +80,7 @@ Here's how it all looks: remote: Compressing objects: 100% (7/7), done. remote: Total 8 (delta 0), reused 0 (delta 0) Unpacking objects: 100% (8/8), done. - From tor-annex::wa3i6wgttmworwli.onion:5162 + From p2p-annex::iroh:endpointadroxtad5dj5vaweczqnmkhk2sb7dmysazljjul6zeug7bexymejaaa 452db22..a894c60 git-annex -> peer1/git-annex c0ac431..44ca7f6 master -> peer1/master
remove now-obsolate warnings
diff --git a/doc/tips/peer_to_peer_network_with_iroh.mdwn b/doc/tips/peer_to_peer_network_with_iroh.mdwn index 6dfdbf325c..e20d9a6241 100644 --- a/doc/tips/peer_to_peer_network_with_iroh.mdwn +++ b/doc/tips/peer_to_peer_network_with_iroh.mdwn @@ -13,13 +13,6 @@ To use this, you need a few things: executable. * You also need to install [Magic Wormhole](https://github.com/warner/magic-wormhole) - here are [the installation instructions](https://magic-wormhole.readthedocs.io/en/latest/welcome.html#installation). - -*Important:* - -* The installation process must make a `wormhole` executable available - somewhere on your `$PATH`. Some distributions may only install executables - which reference the Python version, e.g. `wormhole-2.7`, in which case you - will need to manually create a symlink (and maybe file a bug with your distribution). * You need git-annex version 10.20251103 or newer. Older versions of git-annex unfortunately had a bug that prevents this process from working correctly. diff --git a/doc/tips/peer_to_peer_network_with_tor.mdwn b/doc/tips/peer_to_peer_network_with_tor.mdwn index 90f000c197..9d2d9995ba 100644 --- a/doc/tips/peer_to_peer_network_with_tor.mdwn +++ b/doc/tips/peer_to_peer_network_with_tor.mdwn @@ -16,16 +16,6 @@ To use this, you need to get Tor installed and running. See You also need to install [Magic Wormhole](https://github.com/warner/magic-wormhole) - here are [the installation instructions](https://magic-wormhole.readthedocs.io/en/latest/welcome.html#installation). -*Important:* - -* The installation process must make a `wormhole` executable available - somewhere on your `$PATH`. Some distributions may only install executables - which reference the Python version, e.g. `wormhole-2.7`, in which case you - will need to manually create a symlink (and maybe file a bug with your distribution). - -* You need git-annex version 6.20180705 or newer. Older versions of git-annex - unfortunately had a bug that prevents this process from working correctly. - ## pairing two repositories You have two git-annex repositories on different computers, and want to diff --git a/doc/tips/peer_to_peer_network_with_tor/comment_6_5237c2b408dc1841ca01a51084702b90._comment b/doc/tips/peer_to_peer_network_with_tor/comment_6_5237c2b408dc1841ca01a51084702b90._comment new file mode 100644 index 0000000000..d556240ce9 --- /dev/null +++ b/doc/tips/peer_to_peer_network_with_tor/comment_6_5237c2b408dc1841ca01a51084702b90._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""Re: Issue on openSUSE with Tor's requirement for Python 2.7 """ + date="2025-11-03T19:44:09Z" + content=""" +Thanks for that. Since that issue got fixed in 2020, it seems unncessary to +complicate this tip with the warning about it, so I've removed your +addition now. +"""]]
git-annex version for p2p --pair fix for iroh
diff --git a/CHANGELOG b/CHANGELOG index 527c5d46dd..a580a28dae 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -git-annex (10.20251030) UNRELEASED; urgency=medium +git-annex (10.20251103) UNRELEASED; urgency=medium * p2p --pair: Fix to work with external P2P networks. * remotedaemon: Avoid crashing when run with --debug. diff --git a/doc/tips/peer_to_peer_network_with_iroh.mdwn b/doc/tips/peer_to_peer_network_with_iroh.mdwn index ce162ffcf5..6dfdbf325c 100644 --- a/doc/tips/peer_to_peer_network_with_iroh.mdwn +++ b/doc/tips/peer_to_peer_network_with_iroh.mdwn @@ -20,6 +20,8 @@ To use this, you need a few things: somewhere on your `$PATH`. Some distributions may only install executables which reference the Python version, e.g. `wormhole-2.7`, in which case you will need to manually create a symlink (and maybe file a bug with your distribution). +* You need git-annex version 10.20251103 or newer. Older versions of git-annex + unfortunately had a bug that prevents this process from working correctly. ## pairing two repositories diff --git a/git-annex.cabal b/git-annex.cabal index 701c4c2530..087182bcbf 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -1,5 +1,5 @@ Name: git-annex -Version: 10.20251029 +Version: 10.20251103 Cabal-Version: 1.12 License: AGPL-3 Maintainer: Joey Hess <id@joeyh.name>
dumbpipe versioning
diff --git a/doc/special_remotes/p2p/git-annex-p2p-iroh b/doc/special_remotes/p2p/git-annex-p2p-iroh index b1969c7380..83be015c6f 100755 --- a/doc/special_remotes/p2p/git-annex-p2p-iroh +++ b/doc/special_remotes/p2p/git-annex-p2p-iroh @@ -1,13 +1,10 @@ #!/bin/sh # Allows git-annex to use iroh for P2P connections. # -# This uses a modified version of iroh's dumbpipe program, adding the -# generate-ticket command. This pull request has the necessary changes: +# This uses iroh's dumbpipe program. It needs a version with the +# generate-ticket command, which was added in this pull request: # https://github.com/n0-computer/dumbpipe/pull/86 # -# Quality: experimental. Has worked at least twice, but there are known and -# unknown bugs. -# # Copyright 2025 Joey Hess; licenced under the GNU GPL version 3 or higher. set -e diff --git a/doc/tips/peer_to_peer_network_with_iroh.mdwn b/doc/tips/peer_to_peer_network_with_iroh.mdwn index d743d8f46e..ce162ffcf5 100644 --- a/doc/tips/peer_to_peer_network_with_iroh.mdwn +++ b/doc/tips/peer_to_peer_network_with_iroh.mdwn @@ -8,7 +8,7 @@ It can be used with git-annex, to connect together two repositories. To use this, you need a few things: * Install [dumbpipe](https://www.dumbpipe.dev/). This will be used to talk - over Iroh. + over Iroh. Note that this needs version 0.33 or newer of dumbpipe. * Download [[special_remotes/p2p/git-annex-p2p-iroh]] and make the script executable. * You also need to install [Magic Wormhole](https://github.com/warner/magic-wormhole) -
add iroh tip
Adapted from the tor tip.
Also, removed some out of date stuff from the tor tip.
Adapted from the tor tip.
Also, removed some out of date stuff from the tor tip.
diff --git a/doc/tips/peer_to_peer_network_with_iroh.mdwn b/doc/tips/peer_to_peer_network_with_iroh.mdwn new file mode 100644 index 0000000000..d743d8f46e --- /dev/null +++ b/doc/tips/peer_to_peer_network_with_iroh.mdwn @@ -0,0 +1,139 @@ +[Iroh](https://www.iroh.computer/) is a peer to peer protocol that can +connect any two devices on the planet -- fast! + +It can be used with git-annex, to connect together two repositories. + +## dependencies + +To use this, you need a few things: + +* Install [dumbpipe](https://www.dumbpipe.dev/). This will be used to talk + over Iroh. +* Download [[special_remotes/p2p/git-annex-p2p-iroh]] and make the script + executable. +* You also need to install [Magic Wormhole](https://github.com/warner/magic-wormhole) - + here are [the installation instructions](https://magic-wormhole.readthedocs.io/en/latest/welcome.html#installation). + +*Important:* + +* The installation process must make a `wormhole` executable available + somewhere on your `$PATH`. Some distributions may only install executables + which reference the Python version, e.g. `wormhole-2.7`, in which case you + will need to manually create a symlink (and maybe file a bug with your distribution). + +## pairing two repositories + +You have two git-annex repositories on different computers, and want to +connect them together over Iroh so they share their contents. Or, you and a +friend want to connect your repositories together. Pairing is an easy way +to accomplish this. + +In each git-annex repository, run these commands: + + git annex p2p --enable iroh + git annex remotedaemon + +Now git-annex is listening for connections on Iroh, but +it will only talk to peers after pairing with them. + +In both repositories, run this command: + + git annex p2p --pair + +This will print out a pairing code, like "11-incredible-tumeric", +and prompt for you to enter the other repository's pairing code. + +So you have to get in contact with your friend to exchange codes. +See the section below "how to exchange pairing codes" for tips on +how to do that securely. + +Once the pairing codes are exchanged, the two repositories will be +connected to one-another via Iroh. Each will have a git remote, with a name +like "peer1", which connects to the other repository. + +Then, you can run commands like `git annex sync peer1 --content` to sync +with the paired repository. + +Pairing connects just two repositories, but you can repeat the process to +pair with as many other repositories as you like, in order to build up +larger networks of repositories. + +## example session + +Here's how it all looks: + + $ git annex p2p --enable iroh + p2p enable iroh ok + $ git annex remotedaemon + $ git annex p2p --pair + p2p pair peer1 (using Magic Wormhole) + + This repository's pairing code is: 11-incredible-tumeric + + Enter the other repository's pairing code: 1-revenue-icecream + Exchanging pairing data... + Successfully exchanged pairing data. Connecting to peer1... + ok + $ git annex sync peer1 --content + commit + On branch master + nothing to commit, working tree clean + ok + pull peer1 + remote: Enumerating objects: 10, done. + remote: Counting objects: 100% (10/10), done. + remote: Compressing objects: 100% (7/7), done. + remote: Total 8 (delta 0), reused 0 (delta 0) + Unpacking objects: 100% (8/8), done. + From tor-annex::wa3i6wgttmworwli.onion:5162 + 452db22..a894c60 git-annex -> peer1/git-annex + c0ac431..44ca7f6 master -> peer1/master + + Updating c0ac431..44ca7f6 + Fast-forward + amazing_file | 1 + + 1 file changed, 1 insertion(+) + create mode 120000 amazing_file + ok + (merging peer1/git-annex into git-annex...) + get amazing_file (from peer1...) + (checksum...) ok + +## how to exchange pairing codes + +When pairing with a friend's repository, you have to exchange +pairing codes. How to do this securely? + +The pairing codes can only be used once, so it's ok to exchange them in +a way that someone else can access later. However, if someone can overhear +your exchange of codes in real time, they could trick you into pairing +with them. + +Here are some suggestions for how to exchange the codes, +with the most secure ways first: + +* In person. +* In an encrypted message (gpg signed email, Off The Record (OTR) + conversation, etc). +* By a voice phone call. + +## starting git-annex remotedaemon on boot + +Notice the `git annex remotedaemon` being run in the above examples. +That command listens for incoming Iroh connections so that other peers +can connect to your repository over Tor. + +So, you may want to arrange for the remotedaemon to be started on boot. +You can do that with a simple cron job: + + @reboot cd ~/myannexrepo && git annex remotedaemon + +If you use the git-annex assistant, and have it auto-starting on boot, it +will take care of starting the remotedaemon for you. + +## speed of large transfers + +This should be fast! Iroh often gets peers directly connected to +one-another, handling the necessary punching through firewalls and NAT. +In some cases, when Iroh is not able to do that, traffic will be sent via a +relay, which could be slower. diff --git a/doc/tips/peer_to_peer_network_with_tor.mdwn b/doc/tips/peer_to_peer_network_with_tor.mdwn index 2a9287a5a8..90f000c197 100644 --- a/doc/tips/peer_to_peer_network_with_tor.mdwn +++ b/doc/tips/peer_to_peer_network_with_tor.mdwn @@ -3,6 +3,9 @@ git-annex has recently gotten support for running as a and easy to use way to connect repositories in different locations. No account on a central server is needed; it's peer-to-peer. +(See also [[peer_to_peer_network_with_iroh]] for something similar but +faster if you don't need all the layered security of tor.) + ## dependencies To use this, you need to get Tor installed and running. See @@ -15,15 +18,12 @@ here are [the installation instructions](https://magic-wormhole.readthedocs.io/e *Important:* -* At the time of writing, you need to install Magic Wormhole under Python 2, - because [Tor support is only available under python2.7](https://magic-wormhole.readthedocs.io/en/latest/tor.html). - * The installation process must make a `wormhole` executable available somewhere on your `$PATH`. Some distributions may only install executables which reference the Python version, e.g. `wormhole-2.7`, in which case you will need to manually create a symlink (and maybe file a bug with your distribution). -* You need git-annex version 6.20180705. Older versions of git-annex +* You need git-annex version 6.20180705 or newer. Older versions of git-annex unfortunately had a bug that prevents this process from working correctly. ## pairing two repositories
p2p --pair: Fix to work with external P2P networks
When storing a P2P authtoken, it needs to have our local address, not the
address of the peer.
When storing a P2P authtoken, it needs to have our local address, not the
address of the peer.
diff --git a/CHANGELOG b/CHANGELOG index a6490580ce..b87b05e4cd 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +git-annex (10.20251030) UNRELEASED; urgency=medium + + * p2p --pair: Fix to work with external P2P networks. + + -- Joey Hess <id@joeyh.name> Mon, 03 Nov 2025 14:02:46 -0400 + git-annex (10.20251029) upstream; urgency=medium * Support ssh remotes with '#' and '?' in the path to the repository, diff --git a/Command/P2P.hs b/Command/P2P.hs index 491355507c..2aa3f674cf 100644 --- a/Command/P2P.hs +++ b/Command/P2P.hs @@ -263,7 +263,7 @@ wormholePairing remotename ouraddrs ui = do Left _e -> return ReceiveFailed Right ls -> maybe (return ReceiveFailed) - (finishPairing 100 remotename ourhalf) + (finishPairing 100 remotename ourhalf ouraddrs) (deserializePairData ls) -- | Allow the peer we're pairing with to authenticate to us, @@ -276,8 +276,8 @@ wormholePairing remotename ouraddrs ui = do -- Since we're racing the peer as they do the same, the first try is likely -- to fail to authenticate. Can retry any number of times, to avoid the -- users needing to redo the whole process. -finishPairing :: Int -> RemoteName -> HalfAuthToken -> PairData -> Annex PairingResult -finishPairing retries remotename (HalfAuthToken ourhalf) (PairData (HalfAuthToken theirhalf) theiraddrs) = do +finishPairing :: Int -> RemoteName -> HalfAuthToken -> [P2PAddress] -> PairData -> Annex PairingResult +finishPairing retries remotename (HalfAuthToken ourhalf) ouraddrs (PairData (HalfAuthToken theirhalf) theiraddrs) = do case (toAuthToken (ourhalf <> theirhalf), toAuthToken (theirhalf <> ourhalf)) of (Just ourauthtoken, Just theirauthtoken) -> do liftIO $ putStrLn $ "Successfully exchanged pairing data. Connecting to " ++ remotename ++ "..." @@ -289,9 +289,9 @@ finishPairing retries remotename (HalfAuthToken ourhalf) (PairData (HalfAuthToke liftIO $ threadDelaySeconds (Seconds 2) liftIO $ putStrLn $ "Unable to connect to " ++ remotename ++ ". Retrying..." go (n-1) theiraddrs theirauthtoken ourauthtoken - go n (addr:rest) theirauthtoken ourauthtoken = do - storeP2PAuthToken addr ourauthtoken - r <- setupLink remotename (P2PAddressAuth addr theirauthtoken) + go n (theiraddr:rest) theirauthtoken ourauthtoken = do + forM_ ouraddrs $ \ouraddr -> storeP2PAuthToken ouraddr ourauthtoken + r <- setupLink remotename (P2PAddressAuth theiraddr theirauthtoken) case r of LinkSuccess -> return PairSuccess _ -> go n rest theirauthtoken ourauthtoken diff --git a/doc/bugs/p2p_--pair_seems_broken_for_iroh.mdwn b/doc/bugs/p2p_--pair_seems_broken_for_iroh.mdwn index 7c4de04a2f..3a2946881c 100644 --- a/doc/bugs/p2p_--pair_seems_broken_for_iroh.mdwn +++ b/doc/bugs/p2p_--pair_seems_broken_for_iroh.mdwn @@ -3,3 +3,9 @@ magic wormhole step. `git-annex p2p --link` does work with the iroh script, so this is probably a bug in git-annex. --[[Joey]] + +> --debug shows the problem is `AUTH-FAILURE`. And it appears that the +> remotedaemon's loadP2PAuthTokens is not loading any auth tokens after +> pairing writes one to `.git/annex/creds/p2pauth`. The written auth token +> incorrectly has the address of the peer, rather than the local repository. +> [[fixed|done]] --[[Joey]
diff --git a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn
new file mode 100644
index 0000000000..da4e6c9bce
--- /dev/null
+++ b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn
@@ -0,0 +1,95 @@
+### Please describe the problem.
+initremote of an S3 special remote with a GCP object storage bucket and a fileprefix fails if another repo with a different fileprefix has already been configured in the same bucket.
+
+### What steps will reproduce the problem?
+With two git-annex repos and an initially empty bucket configured without versioning or hierarchical namespaces:
+
+For the first repo:
+
+git-annex --debug initremote s3-BACKUP type=S3 partsize=1GiB fileprefix=ds001263/ encryption=none public=no bucket=openneuro-nell-test host=storage.googleapis.com storageclass=ARCHIVE cost=400
+
+For a second repo:
+
+git-annex --debug initremote s3-BACKUP type=S3 partsize=1GiB fileprefix=ds001264/ encryption=none public=no bucket=openneuro-nell-test host=storage.googleapis.com storageclass=ARCHIVE cost=400
+
+The first initremote will succeed and configure the remote. The second attempts to create a bucket and fails because it already exists. Manually populating remote.log and annex-uuid in the bucket allows this remote to function after enableremote.
+
+### What version of git-annex are you using? On what operating system?
+
+10.20250929 on Fedora 43.
+
+### Please provide any additional information below.
+
+[[!format sh """
+# If you can, paste a complete transcript of the problem occurring here.
+# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
+
+initremote s3-BACKUP [2025-11-02 15:07:58.441842173] (Utility.Process) process [3914104] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","-c","annex.debug=true","show-ref","git-annex"]
+[2025-11-02 15:07:58.442407721] (Utility.Process) process [3914104] done ExitSuccess
+[2025-11-02 15:07:58.442547945] (Utility.Process) process [3914105] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","-c","annex.debug=true","show-ref","--hash","refs/heads/git-annex"]
+[2025-11-02 15:07:58.443007509] (Utility.Process) process [3914105] done ExitSuccess
+[2025-11-02 15:07:58.443341839] (Utility.Process) process [3914106] chat: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","-c","annex.debug=true","cat-file","--batch"]
+[2025-11-02 15:07:58.445120803] (Remote.S3) String to sign: "GET\n\n\nSun, 02 Nov 2025 23:07:58 GMT\n/openneuro-nell-test/?location"
+[2025-11-02 15:07:58.445148464] (Remote.S3) Host: "openneuro-nell-test.storage.googleapis.com"
+[2025-11-02 15:07:58.445161875] (Remote.S3) Path: "/"
+[2025-11-02 15:07:58.445173305] (Remote.S3) Query string: "location"
+[2025-11-02 15:07:58.445188565] (Remote.S3) Header: [("Date","Sun, 02 Nov 2025 23:07:58 GMT"),("User-Agent","git-annex/10.20250929")]
+[2025-11-02 15:07:58.635355111] (Remote.S3) Response status: Status {statusCode = 403, statusMessage = "Forbidden"}
+[2025-11-02 15:07:58.635400652] (Remote.S3) Response header 'Content-Type': 'application/xml; charset=UTF-8'
+[2025-11-02 15:07:58.635424923] (Remote.S3) Response header 'X-GUploader-UploadID': 'AOCedOEofSsg_ed3IPSuAQerc3FtHvXPALQhf2W1S26R_51sPNFu-0-ZozTZuBqhr5pV-3fK'
+[2025-11-02 15:07:58.635441664] (Remote.S3) Response header 'Content-Length': '298'
+[2025-11-02 15:07:58.635455574] (Remote.S3) Response header 'Date': 'Sun, 02 Nov 2025 23:07:58 GMT'
+[2025-11-02 15:07:58.635469194] (Remote.S3) Response header 'Expires': 'Sun, 02 Nov 2025 23:07:58 GMT'
+[2025-11-02 15:07:58.635481435] (Remote.S3) Response header 'Cache-Control': 'private, max-age=0'
+[2025-11-02 15:07:58.635495745] (Remote.S3) Response header 'Server': 'UploadServer'
+(checking bucket...) [2025-11-02 15:07:58.635780314] (Remote.S3) String to sign: "GET\n\n\nSun, 02 Nov 2025 23:07:58 GMT\n/openneuro-nell-test/ds001264/annex-uuid"
+[2025-11-02 15:07:58.635796454] (Remote.S3) Host: "openneuro-nell-test.storage.googleapis.com"
+[2025-11-02 15:07:58.635818565] (Remote.S3) Path: "/ds001264/annex-uuid"
+[2025-11-02 15:07:58.635828655] (Remote.S3) Query string: ""
+[2025-11-02 15:07:58.635840346] (Remote.S3) Header: [("Date","Sun, 02 Nov 2025 23:07:58 GMT"),("Authorization","..."),("User-Agent","git-annex/10.20250929")]
+[2025-11-02 15:07:58.685220703] (Remote.S3) Response status: Status {statusCode = 404, statusMessage = "Not Found"}
+[2025-11-02 15:07:58.685251934] (Remote.S3) Response header 'Content-Type': 'application/xml; charset=UTF-8'
+[2025-11-02 15:07:58.685268695] (Remote.S3) Response header 'X-GUploader-UploadID': 'AOCedOHoPd6zdBzYMMr-ON5aWjlDBbGd7ZIaf_Iit8Gt74l3aRT-Ty4Fayk9Tx9tlBMYuMKH'
+[2025-11-02 15:07:58.685280535] (Remote.S3) Response header 'Content-Length': '201'
+[2025-11-02 15:07:58.685290386] (Remote.S3) Response header 'Date': 'Sun, 02 Nov 2025 23:07:58 GMT'
+[2025-11-02 15:07:58.685299996] (Remote.S3) Response header 'Expires': 'Sun, 02 Nov 2025 23:07:58 GMT'
+[2025-11-02 15:07:58.685310096] (Remote.S3) Response header 'Cache-Control': 'private, max-age=0'
+[2025-11-02 15:07:58.685319476] (Remote.S3) Response header 'Server': 'UploadServer'
+[2025-11-02 15:07:58.685365338] (Remote.S3) String to sign: "GET\n\n\nSun, 02 Nov 2025 23:07:58 GMT\n/openneuro-nell-test/"
+[2025-11-02 15:07:58.685376888] (Remote.S3) Host: "openneuro-nell-test.storage.googleapis.com"
+[2025-11-02 15:07:58.685386298] (Remote.S3) Path: "/"
+[2025-11-02 15:07:58.685394309] (Remote.S3) Query string: ""
+[2025-11-02 15:07:58.685403479] (Remote.S3) Header: [("Date","Sun, 02 Nov 2025 23:07:58 GMT"),("Authorization","..."),("User-Agent","git-annex/10.20250929")]
+[2025-11-02 15:07:58.725819533] (Remote.S3) Response status: Status {statusCode = 200, statusMessage = "OK"}
+[2025-11-02 15:07:58.725847874] (Remote.S3) Response header 'Content-Type': 'application/xml; charset=UTF-8'
+[2025-11-02 15:07:58.725861764] (Remote.S3) Response header 'X-GUploader-UploadID': 'AOCedOGjVuiFnd4UNsb069xhhamfE7ttizD8j1W9S7fGeUBqVoPxKff00jMdZyvUGFo90z_N'
+[2025-11-02 15:07:58.725873324] (Remote.S3) Response header 'x-goog-metageneration': '3'
+[2025-11-02 15:07:58.725883625] (Remote.S3) Response header 'Content-Length': '784'
+[2025-11-02 15:07:58.725893065] (Remote.S3) Response header 'Date': 'Sun, 02 Nov 2025 23:07:58 GMT'
+[2025-11-02 15:07:58.725907215] (Remote.S3) Response header 'Expires': 'Sun, 02 Nov 2025 23:07:58 GMT'
+[2025-11-02 15:07:58.725983778] (Remote.S3) Response header 'Cache-Control': 'private, max-age=0'
+[2025-11-02 15:07:58.72604907] (Remote.S3) Response header 'Server': 'UploadServer'
+(creating bucket in US...) [2025-11-02 15:07:58.726309948] (Remote.S3) String to sign: "PUT\n\n\nSun, 02 Nov 2025 23:07:58 GMT\n/openneuro-nell-test/"
+[2025-11-02 15:07:58.726329498] (Remote.S3) Host: "openneuro-nell-test.storage.googleapis.com"
+[2025-11-02 15:07:58.726341689] (Remote.S3) Path: "/"
+[2025-11-02 15:07:58.726350049] (Remote.S3) Query string: ""
+[2025-11-02 15:07:58.726366349] (Remote.S3) Header: [("Date","Sun, 02 Nov 2025 23:07:58 GMT"),("Authorization","..."),("User-Agent","git-annex/10.20250929")]
+[2025-11-02 15:07:58.75553637] (Remote.S3) Response status: Status {statusCode = 409, statusMessage = "Conflict"}
+[2025-11-02 15:07:58.755576871] (Remote.S3) Response header 'Content-Type': 'application/xml; charset=UTF-8'
+[2025-11-02 15:07:58.755590572] (Remote.S3) Response header 'X-GUploader-UploadID': 'AOCedOFn-ViFqzgcWiIW6Pun3lCz6lMnBFrRxyRpyC9LIdnv9j20Yz2Cd7MnuXIcNxZ-j6_J'
+[2025-11-02 15:07:58.755603132] (Remote.S3) Response header 'Content-Length': '421'
+[2025-11-02 15:07:58.755610962] (Remote.S3) Response header 'Vary': 'Origin'
+[2025-11-02 15:07:58.755618952] (Remote.S3) Response header 'Date': 'Sun, 02 Nov 2025 23:07:58 GMT'
+[2025-11-02 15:07:58.755628153] (Remote.S3) Response header 'Server': 'UploadServer'
+
+git-annex: S3Error {s3StatusCode = Status {statusCode = 409, statusMessage = "Conflict"}, s3ErrorCode = "BucketNameUnavailable", s3ErrorMessage = "The requested bucket name is not available. The bucket namespace is shared by all users of the system. Please select a different name and try again.", s3ErrorResource = Nothing, s3ErrorHostId = Nothing, s3ErrorAccessKeyId = Nothing, s3ErrorStringToSign = Nothing, s3ErrorBucket = Nothing, s3ErrorEndpointRaw = Nothing, s3ErrorEndpoint = Nothing}
+failed
+[2025-11-02 15:07:58.755843459] (Utility.Process) process [3914106] done ExitSuccess
+initremote: 1 failed
+
+# End of transcript or log.
+"""]]
+
+### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
+
+Thanks for all your great work, Joey!
removed
diff --git a/doc/special_remotes/directory/comment_25_11ce2a9f48ab9a043cc90d125e796685._comment b/doc/special_remotes/directory/comment_25_11ce2a9f48ab9a043cc90d125e796685._comment deleted file mode 100644 index 35fa9399e1..0000000000 --- a/doc/special_remotes/directory/comment_25_11ce2a9f48ab9a043cc90d125e796685._comment +++ /dev/null @@ -1,14 +0,0 @@ -[[!comment format=mdwn - username="hatzka" - avatar="http://cdn.libravatar.org/avatar/446138196d9d09c19f57e739e9786a99" - subject="a potentially bad idea" - date="2025-10-31T00:20:54Z" - content=""" -I have some git-annex repositories that are large enough that the objects don't fit on my SSD. I want to keep the repositories themselves on my SSD, because they also contain small versioned files that benefit from fast access. And I want git-annex to know which files are on which physical drives, so that I don't have to fsck if a drive fails (even with `--fast` it takes a while, and if one drive already failed I would rather avoid using the rest unnecessarily). - -I think it should be possible to meet all of these requirements by mounting an overlayfs over the `.git/annex/objects` folder. The writable `upperdir` would be on the same device as the rest of the repository; the read-only lower layers would be the hard drives, which I would also make accessible to git-annex as directory special remotes. This way, I could add objects to the repository normally, then move them to the hard drives without making them inaccessible. - -Obviously for this to be safe I would need to untrust the repository itself, as otherwise git-annex would see two real copies where in fact there was only one. (I'm fine with not being able to permanently store anything only on the SSD.) The other obstacle I've run into is that directory remotes don't have the same layout as an objects folder. - -Is this a terrible idea? Is there a better way? And, assuming the answers are \"not too terrible\" and \"not really\", how can I set up a directory special remote so that this will work? -"""]]
Added a comment: a potentially bad idea
diff --git a/doc/special_remotes/directory/comment_25_11ce2a9f48ab9a043cc90d125e796685._comment b/doc/special_remotes/directory/comment_25_11ce2a9f48ab9a043cc90d125e796685._comment new file mode 100644 index 0000000000..35fa9399e1 --- /dev/null +++ b/doc/special_remotes/directory/comment_25_11ce2a9f48ab9a043cc90d125e796685._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="hatzka" + avatar="http://cdn.libravatar.org/avatar/446138196d9d09c19f57e739e9786a99" + subject="a potentially bad idea" + date="2025-10-31T00:20:54Z" + content=""" +I have some git-annex repositories that are large enough that the objects don't fit on my SSD. I want to keep the repositories themselves on my SSD, because they also contain small versioned files that benefit from fast access. And I want git-annex to know which files are on which physical drives, so that I don't have to fsck if a drive fails (even with `--fast` it takes a while, and if one drive already failed I would rather avoid using the rest unnecessarily). + +I think it should be possible to meet all of these requirements by mounting an overlayfs over the `.git/annex/objects` folder. The writable `upperdir` would be on the same device as the rest of the repository; the read-only lower layers would be the hard drives, which I would also make accessible to git-annex as directory special remotes. This way, I could add objects to the repository normally, then move them to the hard drives without making them inaccessible. + +Obviously for this to be safe I would need to untrust the repository itself, as otherwise git-annex would see two real copies where in fact there was only one. (I'm fine with not being able to permanently store anything only on the SSD.) The other obstacle I've run into is that directory remotes don't have the same layout as an objects folder. + +Is this a terrible idea? Is there a better way? And, assuming the answers are \"not too terrible\" and \"not really\", how can I set up a directory special remote so that this will work? +"""]]
diff --git a/doc/bugs/p2phttp_deadlocks_with_concurrent_clients.mdwn b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients.mdwn new file mode 100644 index 0000000000..78a552343d --- /dev/null +++ b/doc/bugs/p2phttp_deadlocks_with_concurrent_clients.mdwn @@ -0,0 +1,41 @@ +### Please describe the problem. + +P2phttp can deadlock with multiple concurrent clients talking to it. + + +### What steps will reproduce the problem? + +1. Create a git-annex repository with a bunch of annexed files served via p2phttp like so: `git-annex --debug p2phttp -J2 --bind 127.0.0.1 --wideopen` +2. Create multiple different clones of that repository connected via p2phttp all doing `while true; do git annex drop .; git annex get --in origin; done` +3. Observe a deadlock after an indeterminate amount of time + +This deadlock seems to occur faster the more repos you use. I've tried increasing -J to 3 and had it deadlock with two client repos once, but that seems to happen much less often. + +### What version of git-annex are you using? On what operating system? + +``` +$ git annex version +git-annex version: 10.20250929-g33ab579243742b0b18ffec2ea4ce1e3a827720b4 +build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Benchmark Feeds Testsuite S3 WebDAV Servant OsPath +dependency versions: aws-0.24.4 bloomfilter-2.0.1.2 crypton-1.0.4 DAV-1.3.4 feed-1.3.2.1 ghc-9.10.2 http-client-0.7.19 persistent-sqlite-2.13.3.1 torrent-10000.1.3 uuid-1.3.16 yesod-1.6.2.1 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL GITBUNDLE GITMANIFEST VURL X* +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg rclone hook external compute mask +operating system: linux x86_64 +supported repository versions: 8 9 10 +upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10 +local repository version: 10 +``` + +### Please provide any additional information below. + +[[!format sh """ +# If you can, paste a complete transcript of the problem occurring here. +# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log + + +# End of transcript or log. +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +[[!tag projects/ICE4]]
diff --git a/doc/forum/Git-annex_in___34__AGit-Flow__34__.mdwn b/doc/forum/Git-annex_in___34__AGit-Flow__34__.mdwn index 2a52839629..af9c36998c 100644 --- a/doc/forum/Git-annex_in___34__AGit-Flow__34__.mdwn +++ b/doc/forum/Git-annex_in___34__AGit-Flow__34__.mdwn @@ -16,4 +16,4 @@ Worth it to note that AGit-Flow already works for contributors with write access Do you have any other ideas on how git-annex could be used in this workflow? -[[!tag projects/INM7]] +[[!tag projects/ICE4]]