Diff
patch 5811a855fa718774bca53bcc3f1f15d8436813d3
Author: fritjof@alokat.org
Date: Mon Apr 20 12:10:54 UTC 2026
* Add read-only HTTP clone endpoint for darcs repositories
hunk ./app/Main.hs 12
-import Network.Wai (pathInfo, modifyResponse, mapResponseHeaders,
- Middleware)
+import Network.Wai (Request, pathInfo, modifyResponse,
+ mapResponseHeaders, requestHeaders, Middleware)
hunk ./app/Main.hs 18
+import qualified Data.Text.Encoding as TE
hunk ./app/Main.hs 41
+import DarcsWeb.Clone (buildCloneUrl)
hunk ./app/Main.hs 45
+-- | Name of the darcs control directory whose contents are exposed for clone.
+darcsDir :: FilePath
+darcsDir = "_darcs"
+
hunk ./app/Main.hs 247
- html $ TL.fromStrict $ renderRepoSummary now name ri (take 10 patches) tags
+ req <- request
+ let clone = cloneUrl req name
+ html $ TL.fromStrict $ renderRepoSummary now name ri clone (take 10 patches) tags
+
+ -- Read-only darcs clone access: serves files strictly from the repo's
+ -- _darcs/ directory. Only regular files are served; sub-paths are
+ -- validated by the formally verified isSafeSubPath (no .., no hidden
+ -- names, no nested _darcs/.git), and the canonical target must stay
+ -- inside the repo's _darcs/ directory.
+ get (regex "^/clone/([^/]+)/_darcs/(.+)$") $ do
+ name <- pathParam "1"
+ subPath <- pathParam "2"
+ serveClone cfg name (T.unpack subPath)
hunk ./app/Main.hs 416
+-- | Serve a single file from the repository's _darcs/ directory for
+-- read-only darcs clones over HTTP. Rejects unsafe sub-paths and
+-- anything that escapes the repo jail after canonicalization.
+serveClone :: DarcsWebConfig -> T.Text -> FilePath -> ActionM ()
+serveClone cfg name subPath
+ | not (isSafeSubPath subPath) = notFound404
+ | otherwise = withRepo cfg name $ \repoPath -> do
+ let jail = repoPath </> darcsDir
+ candidate = jail </> subPath
+ canonical <- liftIO $ canonicalizePath candidate
+ let jailSlash = addTrailingSlash jail
+ if not (jailSlash `isPrefixOf` canonical)
+ then notFound404
+ else do
+ exists <- liftIO $ doesFileExist canonical
+ if not exists
+ then notFound404
+ else do
+ setHeader "Content-Type" "application/octet-stream"
+ setHeader "Cache-Control" "no-cache"
+ file canonical
+ where
+ notFound404 = do
+ status status404
+ html $ TL.fromStrict $ render404 "Not found."
+
+-- | Build the darcs clone URL for a repository from the incoming request.
+-- Delegates header-to-URL logic to 'DarcsWeb.Clone.buildCloneUrl' so it
+-- can be exercised by the test suite without spinning up a server.
+-- Uses non-throwing UTF-8 decoding: invalid byte sequences degrade to
+-- a missing header so the summary page cannot be crashed by a
+-- deliberately malformed 'Host' / 'X-Forwarded-Proto' value.
+cloneUrl :: Request -> T.Text -> T.Text
+cloneUrl req name =
+ let hdrs = requestHeaders req
+ decodeHdr = (>>= eitherToMaybe . TE.decodeUtf8') . flip lookup hdrs
+ in buildCloneUrl (decodeHdr "X-Forwarded-Proto") (decodeHdr "Host") name
+ where
+ eitherToMaybe = either (const Nothing) Just
+
hunk ./darcsweb.cabal 19
- exposed-modules: DarcsWeb.Config
+ exposed-modules: DarcsWeb.Clone
+ , DarcsWeb.Config
hunk ./darcsweb.cabal 63
+ , Properties.Clone
addfile ./src/DarcsWeb/Clone.hs
hunk ./src/DarcsWeb/Clone.hs 1
+{-# LANGUAGE OverloadedStrings #-}
+
+-- | Helpers for the read-only darcs HTTP clone endpoint.
+module DarcsWeb.Clone
+ ( buildCloneUrl
+ , encodePathSegment
+ ) where
+
+import Data.Bits (shiftR, (.&.))
+import Data.Char (isAsciiLower, isAsciiUpper, isDigit, ord)
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
+import qualified Data.ByteString as BS
+import Data.Word (Word8)
+
+-- | Construct the darcs-clone URL for a repository from the scheme and host
+-- observed on the incoming request. Both arguments accept 'Nothing' (or
+-- malformed / whitespace-only values) and fall back to a safe default so
+-- the summary page cannot be crashed by crafted request headers.
+--
+-- Guarantees:
+--
+-- * the scheme is always either @http@ or @https@;
+-- * the authority is always non-empty (defaults to @localhost@) and
+-- only contains characters legal in a URI authority;
+-- * the repository name is percent-encoded as a single path segment, so
+-- spaces / reserved characters cannot break out of the path.
+--
+-- The trailing @/@ is required: @darcs clone@ treats the URL as a
+-- directory and appends paths like @_darcs/hashed_inventory@.
+buildCloneUrl
+ :: Maybe Text -- ^ value of the X-Forwarded-Proto header, if any
+ -> Maybe Text -- ^ value of the Host header, if any
+ -> Text -- ^ repository name (already validated upstream)
+ -> Text
+buildCloneUrl mProto mHost name =
+ proto <> "://" <> host <> "/clone/" <> encodePathSegment name <> "/"
+ where
+ proto = maybe "http" normalizeProto mProto
+ host = case mHost >>= sanitizeHost of
+ Just h -> h
+ Nothing -> "localhost"
+
+-- | Only allow http/https from the proxy hint; anything else becomes http.
+-- Tolerates the @header, header@ form that some proxies emit
+-- (e.g. @X-Forwarded-Proto: https, http@) by inspecting only the first
+-- comma-separated token.
+normalizeProto :: Text -> Text
+normalizeProto v =
+ let v' = T.toLower (T.strip (firstToken v))
+ in if v' == "https" then "https" else "http"
+
+-- | Validate and return a cleaned @Host@ value, or 'Nothing' if it cannot
+-- be safely rendered into a URL. Accepts one of:
+--
+-- * @reg-name[:port]@ ��� letters / digits / @.-_@, optional numeric port;
+-- * @[IPv6-literal][:port]@ ��� bracketed IPv6 with optional numeric port.
+--
+-- Any other shape (bare @:@, unclosed bracket, multiple ports, extra
+-- commas, etc.) falls through to 'Nothing' so the caller uses the
+-- @localhost@ fallback.
+sanitizeHost :: Text -> Maybe Text
+sanitizeHost raw =
+ let h = T.strip (firstToken raw)
+ in if T.null h || not (validAuthority h) then Nothing else Just h
+
+validAuthority :: Text -> Bool
+validAuthority t
+ | T.any (== ',') t = False
+ | T.isPrefixOf "[" t = validBracketed t
+ | otherwise = validRegName t
+
+-- Bracketed IPv6 literal with optional port: @[...]@ or @[...]:port@
+validBracketed :: Text -> Bool
+validBracketed t =
+ case T.breakOn "]" (T.drop 1 t) of
+ (inner, rest)
+ | T.null inner -> False
+ | not (T.all isIpv6Char inner) -> False
+ | rest == "]" -> True
+ | Just port <- T.stripPrefix "]:" rest
+ -> validPort port
+ | otherwise -> False
+ where
+ isIpv6Char c =
+ isAsciiLower c || isAsciiUpper c || isDigit c
+ || c == ':' || c == '.'
+
+-- Reg-name or IPv4 literal with optional port.
+validRegName :: Text -> Bool
+validRegName t =
+ case T.splitOn ":" t of
+ [h] -> nameLike h
+ [h, p] -> nameLike h && validPort p
+ _ -> False -- multiple colons without IPv6 brackets
+ where
+ nameLike h =
+ not (T.null h) && T.all nameChar h
+ nameChar c =
+ isAsciiLower c || isAsciiUpper c || isDigit c
+ || c == '.' || c == '-' || c == '_'
+
+validPort :: Text -> Bool
+validPort p =
+ not (T.null p) && T.length p <= 5 && T.all isDigit p
+
+-- | First comma-separated token of a header value, e.g. \"https\" from
+-- \"https, http\".
+firstToken :: Text -> Text
+firstToken = fst . T.break (== ',')
+
+-- | A single path segment, percent-encoded so that spaces and reserved
+-- characters (@/@, @?@, @#@, @\@, ...) cannot alter the URL structure.
+-- Unreserved characters per RFC 3986 �� 2.3 are passed through verbatim;
+-- everything else is UTF-8 encoded and hex-escaped.
+encodePathSegment :: Text -> Text
+encodePathSegment t =
+ T.concat [ encodeByte b | b <- BS.unpack (TE.encodeUtf8 t) ]
+ where
+ encodeByte :: Word8 -> Text
+ encodeByte b
+ | isUnreserved b = T.singleton (toEnum (fromIntegral b))
+ | otherwise = T.pack ['%', hex (b `shiftR` 4), hex (b .&. 0x0F)]
+
+ isUnreserved b =
+ let c = toEnum (fromIntegral b) :: Char
+ in isAsciiLower c || isAsciiUpper c || isDigit c
+ || c == '-' || c == '_' || c == '.' || c == '~'
+
+ hex :: Word8 -> Char
+ hex n | n < 10 = toEnum (ord '0' + fromIntegral n)
+ | otherwise = toEnum (ord 'A' + fromIntegral n - 10)
hunk ./src/DarcsWeb/Html.hs 91
-renderRepoSummary :: UTCTime -> Text -> RepoInfo -> [PatchSummary] -> [PatchSummary] -> Text
-renderRepoSummary now repoName ri recentPatches tags =
+renderRepoSummary :: UTCTime -> Text -> RepoInfo -> Text -> [PatchSummary] -> [PatchSummary] -> Text
+renderRepoSummary now repoName ri clone recentPatches tags =
hunk ./src/DarcsWeb/Html.hs 100
+ -- Read-only clone URL
+ , renderCloneBlock clone
hunk ./src/DarcsWeb/Html.hs 123
+-- | Read-only HTTP clone URL box. Empty URL suppresses the block.
+-- Renders the URL alone (not a ready-made shell command) so that hosts
+-- or repo names containing whitespace or shell metacharacters cannot
+-- silently deform a copy-pasted command.
+renderCloneBlock :: Text -> Text
+renderCloneBlock url
+ | T.null url = ""
+ | otherwise = T.concat
+ [ "<div class=\"clone-box\">\n"
+ , "<span class=\"clone-label\">Read-only clone URL</span>\n"
+ , "<code class=\"clone-url\">", esc url, "</code>\n"
+ , "</div>\n"
+ ]
+
hunk ./static/style.css 732
+/* === Clone URL Box === */
+.clone-box {
+ display: flex;
+ align-items: center;
+ gap: var(--space-3);
+ flex-wrap: wrap;
+ margin: var(--space-4) 0 var(--space-6);
+ padding: var(--space-3) var(--space-4);
+ background: var(--accent-subtle);
+ border: 1px solid var(--border-default);
+ border-left: 3px solid var(--accent);
+ border-radius: var(--radius-md);
+}
+
+.clone-label {
+ color: var(--text-secondary);
+ font-size: var(--size-sm);
+ font-weight: 600;
+ flex-shrink: 0;
+}
+
+.clone-url {
+ font-family: var(--font-mono);
+ font-size: var(--size-sm);
+ color: var(--text-primary);
+ background: var(--bg-surface);
+ padding: var(--space-1) var(--space-2);
+ border: 1px solid var(--border-subtle);
+ border-radius: var(--radius-sm);
+ user-select: all;
+ word-break: break-all;
+ flex: 1;
+ min-width: 0;
+}
+
addfile ./test/Properties/Clone.hs
hunk ./test/Properties/Clone.hs 1
+{-# LANGUAGE OverloadedStrings #-}
+
+module Properties.Clone (tests) where
+
+import Test.QuickCheck
+import qualified Data.Text as T
+
+import DarcsWeb.Clone (buildCloneUrl, encodePathSegment)
+import DarcsWeb.Darcs (isSafeSubPath)
+
+-- --------------------------------------------------------------------------
+-- buildCloneUrl: URL construction from request headers
+-- --------------------------------------------------------------------------
+
+prop_clone_default_scheme :: Bool
+prop_clone_default_scheme =
+ buildCloneUrl Nothing (Just "darcs.example.com") "proj"
+ == "http://darcs.example.com/clone/proj/"
+
+prop_clone_https_forwarded :: Bool
+prop_clone_https_forwarded =
+ buildCloneUrl (Just "https") (Just "darcs.example.com") "proj"
+ == "https://darcs.example.com/clone/proj/"
+
+-- Unknown / suspicious schemes (e.g. "javascript") must never be echoed
+-- into the rendered clone URL.
+prop_clone_rejects_weird_scheme :: String -> Property
+prop_clone_rejects_weird_scheme raw =
+ let t = T.pack raw
+ lowered = T.toLower (T.strip t)
+ in lowered /= "https" ==>
+ let url = buildCloneUrl (Just t) (Just "h") "r"
+ in T.isPrefixOf "http://" url
+
+prop_clone_has_trailing_slash :: String -> String -> Property
+prop_clone_has_trailing_slash host name =
+ not (null host) && not (null name)
+ && all (`notElem` ("/ \t\n" :: String)) name ==>
+ let url = buildCloneUrl Nothing (Just (T.pack host)) (T.pack name)
+ in T.isSuffixOf "/" url
+
+prop_clone_missing_host_falls_back :: Bool
+prop_clone_missing_host_falls_back =
+ buildCloneUrl (Just "https") Nothing "r"
+ == "https://localhost/clone/r/"
+
+-- Whitespace-only / empty Host must never produce http:///clone/...
+prop_clone_whitespace_host_falls_back :: Bool
+prop_clone_whitespace_host_falls_back =
+ buildCloneUrl Nothing (Just " ") "r"
+ == "http://localhost/clone/r/"
+ &&
+ buildCloneUrl Nothing (Just "") "r"
+ == "http://localhost/clone/r/"
+
+-- Reject host values containing URL-structural characters that would
+-- otherwise smuggle path / query / fragment into the clone URL.
+prop_clone_rejects_bad_host :: Bool
+prop_clone_rejects_bad_host = and
+ [ buildCloneUrl Nothing (Just "evil.com/../foo") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "evil.com?x=1") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "evil.com#frag") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "attacker@evil.com") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "evil.com evil2") "r" == "http://localhost/clone/r/"
+ -- Malformed authorities
+ , buildCloneUrl Nothing (Just ":") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "[::1") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "host:80:90") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just ":8080") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "host:") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "host:abc") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "host:123456") "r" == "http://localhost/clone/r/"
+ , buildCloneUrl Nothing (Just "[]") "r" == "http://localhost/clone/r/"
+ ]
+
+-- Well-formed authorities should round-trip unchanged.
+prop_clone_accepts_good_host :: Bool
+prop_clone_accepts_good_host = and
+ [ buildCloneUrl Nothing (Just "example.com") "r" == "http://example.com/clone/r/"
+ , buildCloneUrl Nothing (Just "example.com:8080") "r" == "http://example.com:8080/clone/r/"
+ , buildCloneUrl Nothing (Just "[::1]") "r" == "http://[::1]/clone/r/"
+ , buildCloneUrl Nothing (Just "[::1]:3000") "r" == "http://[::1]:3000/clone/r/"
+ , buildCloneUrl Nothing (Just "[2001:db8::1]:443") "r" == "http://[2001:db8::1]:443/clone/r/"
+ , buildCloneUrl Nothing (Just "10.0.0.1:3000") "r" == "http://10.0.0.1:3000/clone/r/"
+ ]
+
+-- Proxies sometimes append: "https, http". Only the first token matters.
+prop_clone_comma_proto :: Bool
+prop_clone_comma_proto =
+ buildCloneUrl (Just "https, http") (Just "h") "r"
+ == "https://h/clone/r/"
+ &&
+ buildCloneUrl (Just "http, https") (Just "h") "r"
+ == "http://h/clone/r/"
+
+-- Repo names with reserved / unsafe characters must be percent-encoded
+-- so they cannot break out of the path segment.
+prop_clone_encodes_repo :: Bool
+prop_clone_encodes_repo = and
+ [ buildCloneUrl Nothing (Just "h") "my repo" == "http://h/clone/my%20repo/"
+ , buildCloneUrl Nothing (Just "h") "a/b" == "http://h/clone/a%2Fb/"
+ , buildCloneUrl Nothing (Just "h") "a?b" == "http://h/clone/a%3Fb/"
+ , buildCloneUrl Nothing (Just "h") "a#b" == "http://h/clone/a%23b/"
+ , buildCloneUrl Nothing (Just "h") "a%2F" == "http://h/clone/a%252F/"
+ ]
+
+prop_clone_encodes_passthrough :: Bool
+prop_clone_encodes_passthrough =
+ encodePathSegment "ABCabc123-_.~" == "ABCabc123-_.~"
+
+-- The percent-encoded output never contains characters that alter URL
+-- structure; pairs with prop_clone_encodes_repo at the byte level.
+prop_clone_encoded_has_no_reserved :: String -> Bool
+prop_clone_encoded_has_no_reserved s =
+ let enc = encodePathSegment (T.pack s)
+ in not (any (`T.isInfixOf` enc) ["/", "?", "#", " ", "\n", "\t"])
+
+-- --------------------------------------------------------------------------
+-- isSafeSubPath: concrete expectations for the clone jail.
+-- The function is formally verified; these are integration-level
+-- assertions that pin its behaviour to the clone use case so that a
+-- future refactor cannot silently drop a critical rejection.
+-- --------------------------------------------------------------------------
+
+accepts :: FilePath -> Bool
+accepts = isSafeSubPath
+
+rejects :: FilePath -> Bool
+rejects = not . isSafeSubPath
+
+prop_clone_accepts_inventory :: Bool
+prop_clone_accepts_inventory = accepts "hashed_inventory"
+
+prop_clone_accepts_nested_hash :: Bool
+prop_clone_accepts_nested_hash =
+ accepts "inventories/0000000123abc" &&
+ accepts "patches/ffbbaa" &&
+ accepts "pristine.hashed/deadbeef"
+
+prop_clone_rejects_parent :: Bool
+prop_clone_rejects_parent =
+ rejects ".." &&
+ rejects "../secret" &&
+ rejects "a/../b"
+
+prop_clone_rejects_hidden :: Bool
+prop_clone_rejects_hidden =
+ rejects ".ssh/id_rsa" &&
+ rejects ".git/config"
+
+prop_clone_rejects_nested_darcs :: Bool
+prop_clone_rejects_nested_darcs =
+ rejects "_darcs/config" &&
+ rejects "inventories/_darcs"
+
+prop_clone_rejects_empty :: Bool
+prop_clone_rejects_empty =
+ rejects "" &&
+ rejects "/"
+
+-- --------------------------------------------------------------------------
+-- Test runner
+-- --------------------------------------------------------------------------
+
+tests :: IO Bool
+tests = fmap and $ sequence
+ [ run "clone default scheme" (property prop_clone_default_scheme)
+ , run "clone https forwarded" (property prop_clone_https_forwarded)
+ , run "clone rejects weird scheme" prop_clone_rejects_weird_scheme
+ , run "clone trailing slash" prop_clone_has_trailing_slash
+ , run "clone missing host default" (property prop_clone_missing_host_falls_back)
+ , run "clone whitespace host" (property prop_clone_whitespace_host_falls_back)
+ , run "clone rejects bad host" (property prop_clone_rejects_bad_host)
+ , run "clone accepts good host" (property prop_clone_accepts_good_host)
+ , run "clone comma-separated proto" (property prop_clone_comma_proto)
+ , run "clone encodes repo segment" (property prop_clone_encodes_repo)
+ , run "clone encode passthrough" (property prop_clone_encodes_passthrough)
+ , run "clone encode no reserved" (property prop_clone_encoded_has_no_reserved)
+ , run "clone accepts inventory" (property prop_clone_accepts_inventory)
+ , run "clone accepts nested hash" (property prop_clone_accepts_nested_hash)
+ , run "clone rejects parent" (property prop_clone_rejects_parent)
+ , run "clone rejects hidden" (property prop_clone_rejects_hidden)
+ , run "clone rejects nested _darcs" (property prop_clone_rejects_nested_darcs)
+ , run "clone rejects empty" (property prop_clone_rejects_empty)
+ ]
+ where
+ run :: Testable p => String -> p -> IO Bool
+ run name prop = do
+ putStr $ " " ++ name ++ ": "
+ r <- quickCheckWithResult stdArgs { maxSuccess = 200 } prop
+ return (isSuccess r)
hunk ./test/Spec.hs 8
+import qualified Properties.Clone as Clone
hunk ./test/Spec.hs 30
- if htmlOk && cspOk
+ putStrLn "\n=== Running QuickCheck properties (Clone) ==="
+ cloneOk <- Clone.tests
+
+ if htmlOk && cspOk && cloneOk