darcsweb - test/Properties/Clone.hs

[root] / test / Properties / Clone.hs
{-# LANGUAGE OverloadedStrings #-}

module Properties.Clone (tests) where

import Test.QuickCheck
import qualified Data.Text as T

import DarcsWeb.Clone (buildCloneUrl, encodePathSegment)
import DarcsWeb.Darcs (isSafeSubPath, isCloneSubPath)

-- --------------------------------------------------------------------------
-- buildCloneUrl: URL construction from request headers
-- --------------------------------------------------------------------------

prop_clone_default_scheme :: Bool
prop_clone_default_scheme =
    buildCloneUrl Nothing (Just "darcs.example.com") "proj"
        == "http://darcs.example.com/clone/proj/"

prop_clone_https_forwarded :: Bool
prop_clone_https_forwarded =
    buildCloneUrl (Just "https") (Just "darcs.example.com") "proj"
        == "https://darcs.example.com/clone/proj/"

-- Unknown / suspicious schemes (e.g. "javascript") must never be echoed
-- into the rendered clone URL.
prop_clone_rejects_weird_scheme :: String -> Property
prop_clone_rejects_weird_scheme raw =
    let t = T.pack raw
        lowered = T.toLower (T.strip t)
    in lowered /= "https" ==>
       let url = buildCloneUrl (Just t) (Just "h") "r"
       in T.isPrefixOf "http://" url

prop_clone_has_trailing_slash :: String -> String -> Property
prop_clone_has_trailing_slash host name =
    not (null host) && not (null name)
    && all (`notElem` ("/ \t\n" :: String)) name ==>
    let url = buildCloneUrl Nothing (Just (T.pack host)) (T.pack name)
    in T.isSuffixOf "/" url

prop_clone_missing_host_falls_back :: Bool
prop_clone_missing_host_falls_back =
    buildCloneUrl (Just "https") Nothing "r"
        == "https://localhost/clone/r/"

-- Whitespace-only / empty Host must never produce http:///clone/...
prop_clone_whitespace_host_falls_back :: Bool
prop_clone_whitespace_host_falls_back =
    buildCloneUrl Nothing (Just "   ") "r"
        == "http://localhost/clone/r/"
    &&
    buildCloneUrl Nothing (Just "") "r"
        == "http://localhost/clone/r/"

-- Reject host values containing URL-structural characters that would
-- otherwise smuggle path / query / fragment into the clone URL.
prop_clone_rejects_bad_host :: Bool
prop_clone_rejects_bad_host = and
    [ buildCloneUrl Nothing (Just "evil.com/../foo")   "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "evil.com?x=1")      "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "evil.com#frag")     "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "attacker@evil.com") "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "evil.com evil2")    "r" == "http://localhost/clone/r/"
    -- Malformed authorities
    , buildCloneUrl Nothing (Just ":")             "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "[::1")          "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "host:80:90")    "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just ":8080")         "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "host:")         "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "host:abc")      "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "host:123456")   "r" == "http://localhost/clone/r/"
    , buildCloneUrl Nothing (Just "[]")            "r" == "http://localhost/clone/r/"
    ]

-- Well-formed authorities should round-trip unchanged.
prop_clone_accepts_good_host :: Bool
prop_clone_accepts_good_host = and
    [ buildCloneUrl Nothing (Just "example.com")         "r" == "http://example.com/clone/r/"
    , buildCloneUrl Nothing (Just "example.com:8080")    "r" == "http://example.com:8080/clone/r/"
    , buildCloneUrl Nothing (Just "[::1]")               "r" == "http://[::1]/clone/r/"
    , buildCloneUrl Nothing (Just "[::1]:3000")          "r" == "http://[::1]:3000/clone/r/"
    , buildCloneUrl Nothing (Just "[2001:db8::1]:443")   "r" == "http://[2001:db8::1]:443/clone/r/"
    , buildCloneUrl Nothing (Just "10.0.0.1:3000")       "r" == "http://10.0.0.1:3000/clone/r/"
    ]

-- Proxies sometimes append: "https, http". Only the first token matters.
prop_clone_comma_proto :: Bool
prop_clone_comma_proto =
    buildCloneUrl (Just "https, http") (Just "h") "r"
        == "https://h/clone/r/"
    &&
    buildCloneUrl (Just "http, https") (Just "h") "r"
        == "http://h/clone/r/"

-- Repo names with reserved / unsafe characters must be percent-encoded
-- so they cannot break out of the path segment.
prop_clone_encodes_repo :: Bool
prop_clone_encodes_repo = and
    [ buildCloneUrl Nothing (Just "h") "my repo"  == "http://h/clone/my%20repo/"
    , buildCloneUrl Nothing (Just "h") "a/b"      == "http://h/clone/a%2Fb/"
    , buildCloneUrl Nothing (Just "h") "a?b"      == "http://h/clone/a%3Fb/"
    , buildCloneUrl Nothing (Just "h") "a#b"      == "http://h/clone/a%23b/"
    , buildCloneUrl Nothing (Just "h") "a%2F"     == "http://h/clone/a%252F/"
    ]

prop_clone_encodes_passthrough :: Bool
prop_clone_encodes_passthrough =
    encodePathSegment "ABCabc123-_.~" == "ABCabc123-_.~"

-- The percent-encoded output never contains characters that alter URL
-- structure; pairs with prop_clone_encodes_repo at the byte level.
prop_clone_encoded_has_no_reserved :: String -> Bool
prop_clone_encoded_has_no_reserved s =
    let enc = encodePathSegment (T.pack s)
    in not (any (`T.isInfixOf` enc) ["/", "?", "#", " ", "\n", "\t"])

-- --------------------------------------------------------------------------
-- isSafeSubPath: concrete expectations for the clone jail.
-- The function is formally verified; these are integration-level
-- assertions that pin its behaviour to the clone use case so that a
-- future refactor cannot silently drop a critical rejection.
-- --------------------------------------------------------------------------

accepts :: FilePath -> Bool
accepts = isSafeSubPath

rejects :: FilePath -> Bool
rejects = not . isSafeSubPath

prop_clone_accepts_inventory :: Bool
prop_clone_accepts_inventory = accepts "hashed_inventory"

prop_clone_accepts_nested_hash :: Bool
prop_clone_accepts_nested_hash =
    accepts "inventories/0000000123abc" &&
    accepts "patches/ffbbaa" &&
    accepts "pristine.hashed/deadbeef"

prop_clone_rejects_parent :: Bool
prop_clone_rejects_parent =
    rejects ".." &&
    rejects "../secret" &&
    rejects "a/../b"

prop_clone_rejects_hidden :: Bool
prop_clone_rejects_hidden =
    rejects ".ssh/id_rsa" &&
    rejects ".git/config"

prop_clone_rejects_nested_darcs :: Bool
prop_clone_rejects_nested_darcs =
    rejects "_darcs/config" &&
    rejects "inventories/_darcs"

prop_clone_rejects_empty :: Bool
prop_clone_rejects_empty =
    rejects "" &&
    rejects "/"

-- --------------------------------------------------------------------------
-- isCloneSubPath: allowlist pinning for the read-only HTTP clone
-- endpoint. The predicate is tighter than isSafeSubPath so that
-- unrelated files under _darcs/ stay private.
-- --------------------------------------------------------------------------

acceptsClone :: FilePath -> Bool
acceptsClone = isCloneSubPath

rejectsClone :: FilePath -> Bool
rejectsClone = not . isCloneSubPath

prop_clone_allowlist_accepts_core :: Bool
prop_clone_allowlist_accepts_core = and
    [ acceptsClone "hashed_inventory"
    , acceptsClone "inventories/0000000123abc"
    , acceptsClone "patches/ffbbaa"
    , acceptsClone "pristine.hashed/deadbeef"
    ]

prop_clone_allowlist_accepts_packs :: Bool
prop_clone_allowlist_accepts_packs =
    acceptsClone "packs/basic.tar.gz" &&
    acceptsClone "packs/patches.tar.gz"

prop_clone_allowlist_rejects_prefs :: Bool
prop_clone_allowlist_rejects_prefs = and
    [ rejectsClone "prefs/defaultrepo"
    , rejectsClone "prefs/author"
    , rejectsClone "prefs/motd"
    , rejectsClone "prefs/repo_description"
    ]

prop_clone_allowlist_rejects_other :: Bool
prop_clone_allowlist_rejects_other = and
    [ rejectsClone "format"
    , rejectsClone "tentative_hashed_pristine"
    , rejectsClone "packs/other.tar.gz"
    , rejectsClone "hooks/pre-apply"
    , rejectsClone "scripts/boom.sh"
    ]

-- Pin the exact two-segment shape for hash directories; deeper nesting
-- must not be accepted. Catches a regression to the previous
-- @("patches" : _ : _)@ wildcard-tail pattern, which admitted any depth.
prop_clone_allowlist_rejects_nested :: Bool
prop_clone_allowlist_rejects_nested = and
    [ rejectsClone "patches/aa/bb"
    , rejectsClone "inventories/aa/bb"
    , rejectsClone "pristine.hashed/aa/bb"
    , rejectsClone "patches/aa/bb/cc"
    , rejectsClone "packs/basic.tar.gz/extra"
    ]

prop_clone_allowlist_rejects_unsafe :: Bool
prop_clone_allowlist_rejects_unsafe = and
    [ rejectsClone ""
    , rejectsClone ".."
    , rejectsClone "../secret"
    , rejectsClone "patches/../../etc/passwd"
    , rejectsClone ".git/config"
    , rejectsClone "patches/.hidden"
    ]

-- isCloneSubPath must never accept anything isSafeSubPath rejects.
prop_clone_allowlist_refines_safe :: String -> Bool
prop_clone_allowlist_refines_safe s =
    not (isCloneSubPath s) || isSafeSubPath s

-- --------------------------------------------------------------------------
-- Test runner
-- --------------------------------------------------------------------------

tests :: IO Bool
tests = fmap and $ sequence
    [ run "clone default scheme"        (property prop_clone_default_scheme)
    , run "clone https forwarded"       (property prop_clone_https_forwarded)
    , run "clone rejects weird scheme"  prop_clone_rejects_weird_scheme
    , run "clone trailing slash"        prop_clone_has_trailing_slash
    , run "clone missing host default"  (property prop_clone_missing_host_falls_back)
    , run "clone whitespace host"       (property prop_clone_whitespace_host_falls_back)
    , run "clone rejects bad host"      (property prop_clone_rejects_bad_host)
    , run "clone accepts good host"     (property prop_clone_accepts_good_host)
    , run "clone comma-separated proto" (property prop_clone_comma_proto)
    , run "clone encodes repo segment"  (property prop_clone_encodes_repo)
    , run "clone encode passthrough"    (property prop_clone_encodes_passthrough)
    , run "clone encode no reserved"    (property prop_clone_encoded_has_no_reserved)
    , run "clone accepts inventory"     (property prop_clone_accepts_inventory)
    , run "clone accepts nested hash"   (property prop_clone_accepts_nested_hash)
    , run "clone rejects parent"        (property prop_clone_rejects_parent)
    , run "clone rejects hidden"        (property prop_clone_rejects_hidden)
    , run "clone rejects nested _darcs" (property prop_clone_rejects_nested_darcs)
    , run "clone rejects empty"         (property prop_clone_rejects_empty)
    , run "clone allowlist core"        (property prop_clone_allowlist_accepts_core)
    , run "clone allowlist packs"       (property prop_clone_allowlist_accepts_packs)
    , run "clone allowlist rejects prefs" (property prop_clone_allowlist_rejects_prefs)
    , run "clone allowlist rejects other" (property prop_clone_allowlist_rejects_other)
    , run "clone allowlist rejects nested" (property prop_clone_allowlist_rejects_nested)
    , run "clone allowlist rejects unsafe" (property prop_clone_allowlist_rejects_unsafe)
    , run "clone allowlist refines safe"   prop_clone_allowlist_refines_safe
    ]
  where
    run :: Testable p => String -> p -> IO Bool
    run name prop = do
      putStr $ "  " ++ name ++ ": "
      r <- quickCheckWithResult stdArgs { maxSuccess = 200 } prop
      return (isSuccess r)