{-# LANGUAGE OverloadedStrings #-}
-- | Helpers for the read-only darcs HTTP clone endpoint.
module DarcsWeb.Clone
( buildCloneUrl
, encodePathSegment
) where
import Data.Bits (shiftR, (.&.))
import Data.Char (isAsciiLower, isAsciiUpper, isDigit, ord)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.ByteString as BS
import Data.Word (Word8)
-- | Construct the darcs-clone URL for a repository from the scheme and host
-- observed on the incoming request. Both arguments accept 'Nothing' (or
-- malformed / whitespace-only values) and fall back to a safe default so
-- the summary page cannot be crashed by crafted request headers.
--
-- Guarantees:
--
-- * the scheme is always either @http@ or @https@;
-- * the authority is always non-empty (defaults to @localhost@) and
-- only contains characters legal in a URI authority;
-- * the repository name is percent-encoded as a single path segment, so
-- spaces / reserved characters cannot break out of the path.
--
-- The trailing @/@ is required: @darcs clone@ treats the URL as a
-- directory and appends paths like @_darcs/hashed_inventory@.
buildCloneUrl
:: Maybe Text -- ^ value of the X-Forwarded-Proto header, if any
-> Maybe Text -- ^ value of the Host header, if any
-> Text -- ^ repository name (already validated upstream)
-> Text
buildCloneUrl mProto mHost name =
proto <> "://" <> host <> "/clone/" <> encodePathSegment name <> "/"
where
proto = maybe "http" normalizeProto mProto
host = case mHost >>= sanitizeHost of
Just h -> h
Nothing -> "localhost"
-- | Only allow http/https from the proxy hint; anything else becomes http.
-- Tolerates the @header, header@ form that some proxies emit
-- (e.g. @X-Forwarded-Proto: https, http@) by inspecting only the first
-- comma-separated token.
normalizeProto :: Text -> Text
normalizeProto v =
let v' = T.toLower (T.strip (firstToken v))
in if v' == "https" then "https" else "http"
-- | Validate and return a cleaned @Host@ value, or 'Nothing' if it cannot
-- be safely rendered into a URL. Accepts one of:
--
-- * @reg-name[:port]@ — letters / digits / @.-_@, optional numeric port;
-- * @[IPv6-literal][:port]@ — bracketed IPv6 with optional numeric port.
--
-- Any other shape (bare @:@, unclosed bracket, multiple ports, extra
-- commas, etc.) falls through to 'Nothing' so the caller uses the
-- @localhost@ fallback.
sanitizeHost :: Text -> Maybe Text
sanitizeHost raw =
let h = T.strip (firstToken raw)
in if T.null h || not (validAuthority h) then Nothing else Just h
validAuthority :: Text -> Bool
validAuthority t
| T.any (== ',') t = False
| T.isPrefixOf "[" t = validBracketed t
| otherwise = validRegName t
-- Bracketed IPv6 literal with optional port: @[...]@ or @[...]:port@
validBracketed :: Text -> Bool
validBracketed t =
case T.breakOn "]" (T.drop 1 t) of
(inner, rest)
| T.null inner -> False
| not (T.all isIpv6Char inner) -> False
| rest == "]" -> True
| Just port <- T.stripPrefix "]:" rest
-> validPort port
| otherwise -> False
where
isIpv6Char c =
isAsciiLower c || isAsciiUpper c || isDigit c
|| c == ':' || c == '.'
-- Reg-name or IPv4 literal with optional port.
validRegName :: Text -> Bool
validRegName t =
case T.splitOn ":" t of
[h] -> nameLike h
[h, p] -> nameLike h && validPort p
_ -> False -- multiple colons without IPv6 brackets
where
nameLike h =
not (T.null h) && T.all nameChar h
nameChar c =
isAsciiLower c || isAsciiUpper c || isDigit c
|| c == '.' || c == '-' || c == '_'
validPort :: Text -> Bool
validPort p =
not (T.null p) && T.length p <= 5 && T.all isDigit p
-- | First comma-separated token of a header value, e.g. \"https\" from
-- \"https, http\".
firstToken :: Text -> Text
firstToken = fst . T.break (== ',')
-- | A single path segment, percent-encoded so that spaces and reserved
-- characters (@/@, @?@, @#@, @\@, ...) cannot alter the URL structure.
-- Unreserved characters per RFC 3986 § 2.3 are passed through verbatim;
-- everything else is UTF-8 encoded and hex-escaped.
encodePathSegment :: Text -> Text
encodePathSegment t =
T.concat [ encodeByte b | b <- BS.unpack (TE.encodeUtf8 t) ]
where
encodeByte :: Word8 -> Text
encodeByte b
| isUnreserved b = T.singleton (toEnum (fromIntegral b))
| otherwise = T.pack ['%', hex (b `shiftR` 4), hex (b .&. 0x0F)]
isUnreserved b =
let c = toEnum (fromIntegral b) :: Char
in isAsciiLower c || isAsciiUpper c || isDigit c
|| c == '-' || c == '_' || c == '.' || c == '~'
hex :: Word8 -> Char
hex n | n < 10 = toEnum (ord '0' + fromIntegral n)
| otherwise = toEnum (ord 'A' + fromIntegral n - 10)