Add read-only HTTP clone endpoint for darcs repositories

Authorfritjof@alokat.org
Date3 weeks ago
Hash5811a855fa718774bca53bcc3f1f15d8436813d3

Summary

M ./app/Main.hs -3 +61
M ./darcsweb.cabal -1 +3
A ./src/DarcsWeb/Clone.hs
M ./src/DarcsWeb/Html.hs -2 +18
M ./static/style.css +35
A ./test/Properties/Clone.hs
M ./test/Spec.hs -1 +5

Diff

patch 5811a855fa718774bca53bcc3f1f15d8436813d3
Author: fritjof@alokat.org
Date:   Mon Apr 20 12:10:54 UTC 2026
  * Add read-only HTTP clone endpoint for darcs repositories
hunk ./app/Main.hs 12
-import           Network.Wai (pathInfo, modifyResponse, mapResponseHeaders,
-                             Middleware)
+import           Network.Wai (Request, pathInfo, modifyResponse,
+                             mapResponseHeaders, requestHeaders, Middleware)
hunk ./app/Main.hs 18
+import qualified Data.Text.Encoding as TE
hunk ./app/Main.hs 41
+import           DarcsWeb.Clone (buildCloneUrl)
hunk ./app/Main.hs 45
+-- | Name of the darcs control directory whose contents are exposed for clone.
+darcsDir :: FilePath
+darcsDir = "_darcs"
+
hunk ./app/Main.hs 247
-                html $ TL.fromStrict $ renderRepoSummary now name ri (take 10 patches) tags
+                req <- request
+                let clone = cloneUrl req name
+                html $ TL.fromStrict $ renderRepoSummary now name ri clone (take 10 patches) tags
+
+    -- Read-only darcs clone access: serves files strictly from the repo's
+    -- _darcs/ directory. Only regular files are served; sub-paths are
+    -- validated by the formally verified isSafeSubPath (no .., no hidden
+    -- names, no nested _darcs/.git), and the canonical target must stay
+    -- inside the repo's _darcs/ directory.
+    get (regex "^/clone/([^/]+)/_darcs/(.+)$") $ do
+        name <- pathParam "1"
+        subPath <- pathParam "2"
+        serveClone cfg name (T.unpack subPath)
hunk ./app/Main.hs 416
+-- | Serve a single file from the repository's _darcs/ directory for
+--   read-only darcs clones over HTTP. Rejects unsafe sub-paths and
+--   anything that escapes the repo jail after canonicalization.
+serveClone :: DarcsWebConfig -> T.Text -> FilePath -> ActionM ()
+serveClone cfg name subPath
+    | not (isSafeSubPath subPath) = notFound404
+    | otherwise = withRepo cfg name $ \repoPath -> do
+        let jail      = repoPath </> darcsDir
+            candidate = jail </> subPath
+        canonical <- liftIO $ canonicalizePath candidate
+        let jailSlash = addTrailingSlash jail
+        if not (jailSlash `isPrefixOf` canonical)
+          then notFound404
+          else do
+            exists <- liftIO $ doesFileExist canonical
+            if not exists
+              then notFound404
+              else do
+                setHeader "Content-Type" "application/octet-stream"
+                setHeader "Cache-Control" "no-cache"
+                file canonical
+  where
+    notFound404 = do
+      status status404
+      html $ TL.fromStrict $ render404 "Not found."
+
+-- | Build the darcs clone URL for a repository from the incoming request.
+--   Delegates header-to-URL logic to 'DarcsWeb.Clone.buildCloneUrl' so it
+--   can be exercised by the test suite without spinning up a server.
+--   Uses non-throwing UTF-8 decoding: invalid byte sequences degrade to
+--   a missing header so the summary page cannot be crashed by a
+--   deliberately malformed 'Host' / 'X-Forwarded-Proto' value.
+cloneUrl :: Request -> T.Text -> T.Text
+cloneUrl req name =
+    let hdrs      = requestHeaders req
+        decodeHdr = (>>= eitherToMaybe . TE.decodeUtf8') . flip lookup hdrs
+    in buildCloneUrl (decodeHdr "X-Forwarded-Proto") (decodeHdr "Host") name
+  where
+    eitherToMaybe = either (const Nothing) Just
+
hunk ./darcsweb.cabal 19
-  exposed-modules:  DarcsWeb.Config
+  exposed-modules:  DarcsWeb.Clone
+                  , DarcsWeb.Config
hunk ./darcsweb.cabal 63
+                  , Properties.Clone
addfile ./src/DarcsWeb/Clone.hs
hunk ./src/DarcsWeb/Clone.hs 1
+{-# LANGUAGE OverloadedStrings #-}
+
+-- | Helpers for the read-only darcs HTTP clone endpoint.
+module DarcsWeb.Clone
+  ( buildCloneUrl
+  , encodePathSegment
+  ) where
+
+import           Data.Bits (shiftR, (.&.))
+import           Data.Char (isAsciiLower, isAsciiUpper, isDigit, ord)
+import           Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
+import qualified Data.ByteString as BS
+import           Data.Word (Word8)
+
+-- | Construct the darcs-clone URL for a repository from the scheme and host
+--   observed on the incoming request. Both arguments accept 'Nothing' (or
+--   malformed / whitespace-only values) and fall back to a safe default so
+--   the summary page cannot be crashed by crafted request headers.
+--
+--   Guarantees:
+--
+--   * the scheme is always either @http@ or @https@;
+--   * the authority is always non-empty (defaults to @localhost@) and
+--     only contains characters legal in a URI authority;
+--   * the repository name is percent-encoded as a single path segment, so
+--     spaces / reserved characters cannot break out of the path.
+--
+--   The trailing @/@ is required: @darcs clone@ treats the URL as a
+--   directory and appends paths like @_darcs/hashed_inventory@.
+buildCloneUrl
+  :: Maybe Text  -- ^ value of the X-Forwarded-Proto header, if any
+  -> Maybe Text  -- ^ value of the Host header, if any
+  -> Text        -- ^ repository name (already validated upstream)
+  -> Text
+buildCloneUrl mProto mHost name =
+    proto <> "://" <> host <> "/clone/" <> encodePathSegment name <> "/"
+  where
+    proto = maybe "http" normalizeProto mProto
+    host  = case mHost >>= sanitizeHost of
+              Just h  -> h
+              Nothing -> "localhost"
+
+-- | Only allow http/https from the proxy hint; anything else becomes http.
+--   Tolerates the @header, header@ form that some proxies emit
+--   (e.g. @X-Forwarded-Proto: https, http@) by inspecting only the first
+--   comma-separated token.
+normalizeProto :: Text -> Text
+normalizeProto v =
+    let v' = T.toLower (T.strip (firstToken v))
+    in if v' == "https" then "https" else "http"
+
+-- | Validate and return a cleaned @Host@ value, or 'Nothing' if it cannot
+--   be safely rendered into a URL. Accepts one of:
+--
+--   * @reg-name[:port]@ ��� letters / digits / @.-_@, optional numeric port;
+--   * @[IPv6-literal][:port]@ ��� bracketed IPv6 with optional numeric port.
+--
+--   Any other shape (bare @:@, unclosed bracket, multiple ports, extra
+--   commas, etc.) falls through to 'Nothing' so the caller uses the
+--   @localhost@ fallback.
+sanitizeHost :: Text -> Maybe Text
+sanitizeHost raw =
+    let h = T.strip (firstToken raw)
+    in if T.null h || not (validAuthority h) then Nothing else Just h
+
+validAuthority :: Text -> Bool
+validAuthority t
+    | T.any (== ',') t = False
+    | T.isPrefixOf "[" t = validBracketed t
+    | otherwise          = validRegName t
+
+-- Bracketed IPv6 literal with optional port:  @[...]@ or @[...]:port@
+validBracketed :: Text -> Bool
+validBracketed t =
+    case T.breakOn "]" (T.drop 1 t) of
+      (inner, rest)
+        | T.null inner      -> False
+        | not (T.all isIpv6Char inner) -> False
+        | rest == "]"       -> True
+        | Just port <- T.stripPrefix "]:" rest
+                            -> validPort port
+        | otherwise         -> False
+  where
+    isIpv6Char c =
+      isAsciiLower c || isAsciiUpper c || isDigit c
+      || c == ':' || c == '.'
+
+-- Reg-name or IPv4 literal with optional port.
+validRegName :: Text -> Bool
+validRegName t =
+    case T.splitOn ":" t of
+      [h]        -> nameLike h
+      [h, p]     -> nameLike h && validPort p
+      _          -> False  -- multiple colons without IPv6 brackets
+  where
+    nameLike h =
+      not (T.null h) && T.all nameChar h
+    nameChar c =
+      isAsciiLower c || isAsciiUpper c || isDigit c
+      || c == '.' || c == '-' || c == '_'
+
+validPort :: Text -> Bool
+validPort p =
+    not (T.null p) && T.length p <= 5 && T.all isDigit p
+
+-- | First comma-separated token of a header value, e.g. \"https\" from
+--   \"https, http\".
+firstToken :: Text -> Text
+firstToken = fst . T.break (== ',')
+
+-- | A single path segment, percent-encoded so that spaces and reserved
+--   characters (@/@, @?@, @#@, @\@, ...) cannot alter the URL structure.
+--   Unreserved characters per RFC 3986 �� 2.3 are passed through verbatim;
+--   everything else is UTF-8 encoded and hex-escaped.
+encodePathSegment :: Text -> Text
+encodePathSegment t =
+    T.concat [ encodeByte b | b <- BS.unpack (TE.encodeUtf8 t) ]
+  where
+    encodeByte :: Word8 -> Text
+    encodeByte b
+      | isUnreserved b = T.singleton (toEnum (fromIntegral b))
+      | otherwise      = T.pack ['%', hex (b `shiftR` 4), hex (b .&. 0x0F)]
+
+    isUnreserved b =
+      let c = toEnum (fromIntegral b) :: Char
+      in isAsciiLower c || isAsciiUpper c || isDigit c
+         || c == '-' || c == '_' || c == '.' || c == '~'
+
+    hex :: Word8 -> Char
+    hex n | n < 10    = toEnum (ord '0' + fromIntegral n)
+          | otherwise = toEnum (ord 'A' + fromIntegral n - 10)
hunk ./src/DarcsWeb/Html.hs 91
-renderRepoSummary :: UTCTime -> Text -> RepoInfo -> [PatchSummary] -> [PatchSummary] -> Text
-renderRepoSummary now repoName ri recentPatches tags =
+renderRepoSummary :: UTCTime -> Text -> RepoInfo -> Text -> [PatchSummary] -> [PatchSummary] -> Text
+renderRepoSummary now repoName ri clone recentPatches tags =
hunk ./src/DarcsWeb/Html.hs 100
+          -- Read-only clone URL
+          , renderCloneBlock clone
hunk ./src/DarcsWeb/Html.hs 123
+-- | Read-only HTTP clone URL box. Empty URL suppresses the block.
+--   Renders the URL alone (not a ready-made shell command) so that hosts
+--   or repo names containing whitespace or shell metacharacters cannot
+--   silently deform a copy-pasted command.
+renderCloneBlock :: Text -> Text
+renderCloneBlock url
+    | T.null url = ""
+    | otherwise  = T.concat
+        [ "<div class=\"clone-box\">\n"
+        , "<span class=\"clone-label\">Read-only clone URL</span>\n"
+        , "<code class=\"clone-url\">", esc url, "</code>\n"
+        , "</div>\n"
+        ]
+
hunk ./static/style.css 732
+/* === Clone URL Box === */
+.clone-box {
+  display: flex;
+  align-items: center;
+  gap: var(--space-3);
+  flex-wrap: wrap;
+  margin: var(--space-4) 0 var(--space-6);
+  padding: var(--space-3) var(--space-4);
+  background: var(--accent-subtle);
+  border: 1px solid var(--border-default);
+  border-left: 3px solid var(--accent);
+  border-radius: var(--radius-md);
+}
+
+.clone-label {
+  color: var(--text-secondary);
+  font-size: var(--size-sm);
+  font-weight: 600;
+  flex-shrink: 0;
+}
+
+.clone-url {
+  font-family: var(--font-mono);
+  font-size: var(--size-sm);
+  color: var(--text-primary);
+  background: var(--bg-surface);
+  padding: var(--space-1) var(--space-2);
+  border: 1px solid var(--border-subtle);
+  border-radius: var(--radius-sm);
+  user-select: all;
+  word-break: break-all;
+  flex: 1;
+  min-width: 0;
+}
+
addfile ./test/Properties/Clone.hs
hunk ./test/Properties/Clone.hs 1
+{-# LANGUAGE OverloadedStrings #-}
+
+module Properties.Clone (tests) where
+
+import Test.QuickCheck
+import qualified Data.Text as T
+
+import DarcsWeb.Clone (buildCloneUrl, encodePathSegment)
+import DarcsWeb.Darcs (isSafeSubPath)
+
+-- --------------------------------------------------------------------------
+-- buildCloneUrl: URL construction from request headers
+-- --------------------------------------------------------------------------
+
+prop_clone_default_scheme :: Bool
+prop_clone_default_scheme =
+    buildCloneUrl Nothing (Just "darcs.example.com") "proj"
+        == "http://darcs.example.com/clone/proj/"
+
+prop_clone_https_forwarded :: Bool
+prop_clone_https_forwarded =
+    buildCloneUrl (Just "https") (Just "darcs.example.com") "proj"
+        == "https://darcs.example.com/clone/proj/"
+
+-- Unknown / suspicious schemes (e.g. "javascript") must never be echoed
+-- into the rendered clone URL.
+prop_clone_rejects_weird_scheme :: String -> Property
+prop_clone_rejects_weird_scheme raw =
+    let t = T.pack raw
+        lowered = T.toLower (T.strip t)
+    in lowered /= "https" ==>
+       let url = buildCloneUrl (Just t) (Just "h") "r"
+       in T.isPrefixOf "http://" url
+
+prop_clone_has_trailing_slash :: String -> String -> Property
+prop_clone_has_trailing_slash host name =
+    not (null host) && not (null name)
+    && all (`notElem` ("/ \t\n" :: String)) name ==>
+    let url = buildCloneUrl Nothing (Just (T.pack host)) (T.pack name)
+    in T.isSuffixOf "/" url
+
+prop_clone_missing_host_falls_back :: Bool
+prop_clone_missing_host_falls_back =
+    buildCloneUrl (Just "https") Nothing "r"
+        == "https://localhost/clone/r/"
+
+-- Whitespace-only / empty Host must never produce http:///clone/...
+prop_clone_whitespace_host_falls_back :: Bool
+prop_clone_whitespace_host_falls_back =
+    buildCloneUrl Nothing (Just "   ") "r"
+        == "http://localhost/clone/r/"
+    &&
+    buildCloneUrl Nothing (Just "") "r"
+        == "http://localhost/clone/r/"
+
+-- Reject host values containing URL-structural characters that would
+-- otherwise smuggle path / query / fragment into the clone URL.
+prop_clone_rejects_bad_host :: Bool
+prop_clone_rejects_bad_host = and
+    [ buildCloneUrl Nothing (Just "evil.com/../foo")   "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "evil.com?x=1")      "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "evil.com#frag")     "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "attacker@evil.com") "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "evil.com evil2")    "r" == "http://localhost/clone/r/"
+    -- Malformed authorities
+    , buildCloneUrl Nothing (Just ":")             "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "[::1")          "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "host:80:90")    "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just ":8080")         "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "host:")         "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "host:abc")      "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "host:123456")   "r" == "http://localhost/clone/r/"
+    , buildCloneUrl Nothing (Just "[]")            "r" == "http://localhost/clone/r/"
+    ]
+
+-- Well-formed authorities should round-trip unchanged.
+prop_clone_accepts_good_host :: Bool
+prop_clone_accepts_good_host = and
+    [ buildCloneUrl Nothing (Just "example.com")         "r" == "http://example.com/clone/r/"
+    , buildCloneUrl Nothing (Just "example.com:8080")    "r" == "http://example.com:8080/clone/r/"
+    , buildCloneUrl Nothing (Just "[::1]")               "r" == "http://[::1]/clone/r/"
+    , buildCloneUrl Nothing (Just "[::1]:3000")          "r" == "http://[::1]:3000/clone/r/"
+    , buildCloneUrl Nothing (Just "[2001:db8::1]:443")   "r" == "http://[2001:db8::1]:443/clone/r/"
+    , buildCloneUrl Nothing (Just "10.0.0.1:3000")       "r" == "http://10.0.0.1:3000/clone/r/"
+    ]
+
+-- Proxies sometimes append: "https, http". Only the first token matters.
+prop_clone_comma_proto :: Bool
+prop_clone_comma_proto =
+    buildCloneUrl (Just "https, http") (Just "h") "r"
+        == "https://h/clone/r/"
+    &&
+    buildCloneUrl (Just "http, https") (Just "h") "r"
+        == "http://h/clone/r/"
+
+-- Repo names with reserved / unsafe characters must be percent-encoded
+-- so they cannot break out of the path segment.
+prop_clone_encodes_repo :: Bool
+prop_clone_encodes_repo = and
+    [ buildCloneUrl Nothing (Just "h") "my repo"  == "http://h/clone/my%20repo/"
+    , buildCloneUrl Nothing (Just "h") "a/b"      == "http://h/clone/a%2Fb/"
+    , buildCloneUrl Nothing (Just "h") "a?b"      == "http://h/clone/a%3Fb/"
+    , buildCloneUrl Nothing (Just "h") "a#b"      == "http://h/clone/a%23b/"
+    , buildCloneUrl Nothing (Just "h") "a%2F"     == "http://h/clone/a%252F/"
+    ]
+
+prop_clone_encodes_passthrough :: Bool
+prop_clone_encodes_passthrough =
+    encodePathSegment "ABCabc123-_.~" == "ABCabc123-_.~"
+
+-- The percent-encoded output never contains characters that alter URL
+-- structure; pairs with prop_clone_encodes_repo at the byte level.
+prop_clone_encoded_has_no_reserved :: String -> Bool
+prop_clone_encoded_has_no_reserved s =
+    let enc = encodePathSegment (T.pack s)
+    in not (any (`T.isInfixOf` enc) ["/", "?", "#", " ", "\n", "\t"])
+
+-- --------------------------------------------------------------------------
+-- isSafeSubPath: concrete expectations for the clone jail.
+-- The function is formally verified; these are integration-level
+-- assertions that pin its behaviour to the clone use case so that a
+-- future refactor cannot silently drop a critical rejection.
+-- --------------------------------------------------------------------------
+
+accepts :: FilePath -> Bool
+accepts = isSafeSubPath
+
+rejects :: FilePath -> Bool
+rejects = not . isSafeSubPath
+
+prop_clone_accepts_inventory :: Bool
+prop_clone_accepts_inventory = accepts "hashed_inventory"
+
+prop_clone_accepts_nested_hash :: Bool
+prop_clone_accepts_nested_hash =
+    accepts "inventories/0000000123abc" &&
+    accepts "patches/ffbbaa" &&
+    accepts "pristine.hashed/deadbeef"
+
+prop_clone_rejects_parent :: Bool
+prop_clone_rejects_parent =
+    rejects ".." &&
+    rejects "../secret" &&
+    rejects "a/../b"
+
+prop_clone_rejects_hidden :: Bool
+prop_clone_rejects_hidden =
+    rejects ".ssh/id_rsa" &&
+    rejects ".git/config"
+
+prop_clone_rejects_nested_darcs :: Bool
+prop_clone_rejects_nested_darcs =
+    rejects "_darcs/config" &&
+    rejects "inventories/_darcs"
+
+prop_clone_rejects_empty :: Bool
+prop_clone_rejects_empty =
+    rejects "" &&
+    rejects "/"
+
+-- --------------------------------------------------------------------------
+-- Test runner
+-- --------------------------------------------------------------------------
+
+tests :: IO Bool
+tests = fmap and $ sequence
+    [ run "clone default scheme"        (property prop_clone_default_scheme)
+    , run "clone https forwarded"       (property prop_clone_https_forwarded)
+    , run "clone rejects weird scheme"  prop_clone_rejects_weird_scheme
+    , run "clone trailing slash"        prop_clone_has_trailing_slash
+    , run "clone missing host default"  (property prop_clone_missing_host_falls_back)
+    , run "clone whitespace host"       (property prop_clone_whitespace_host_falls_back)
+    , run "clone rejects bad host"      (property prop_clone_rejects_bad_host)
+    , run "clone accepts good host"     (property prop_clone_accepts_good_host)
+    , run "clone comma-separated proto" (property prop_clone_comma_proto)
+    , run "clone encodes repo segment"  (property prop_clone_encodes_repo)
+    , run "clone encode passthrough"    (property prop_clone_encodes_passthrough)
+    , run "clone encode no reserved"    (property prop_clone_encoded_has_no_reserved)
+    , run "clone accepts inventory"     (property prop_clone_accepts_inventory)
+    , run "clone accepts nested hash"   (property prop_clone_accepts_nested_hash)
+    , run "clone rejects parent"        (property prop_clone_rejects_parent)
+    , run "clone rejects hidden"        (property prop_clone_rejects_hidden)
+    , run "clone rejects nested _darcs" (property prop_clone_rejects_nested_darcs)
+    , run "clone rejects empty"         (property prop_clone_rejects_empty)
+    ]
+  where
+    run :: Testable p => String -> p -> IO Bool
+    run name prop = do
+      putStr $ "  " ++ name ++ ": "
+      r <- quickCheckWithResult stdArgs { maxSuccess = 200 } prop
+      return (isSuccess r)
hunk ./test/Spec.hs 8
+import qualified Properties.Clone as Clone
hunk ./test/Spec.hs 30
-    if htmlOk && cspOk
+    putStrLn "\n=== Running QuickCheck properties (Clone) ==="
+    cloneOk <- Clone.tests
+
+    if htmlOk && cspOk && cloneOk