Diff
patch 00d84dfea2646c9e1942c7934f73b435c5c7df92
Author: fritjof@alokat.org
Date: Fri Mar 13 15:55:54 UTC 2026
* Add formal verification with Rocq and property-based testing.
Security-critical pure functions (HTML escaping, path validation,
trailing slash normalization) are reimplemented in Gallina, formally
proven correct, and extracted to Haskell. Remaining pure functions
are covered by QuickCheck properties. Rocq extraction runs
automatically on stack build, proofs and QuickCheck on stack test.
hunk ./.claude/settings.local.json 1
-{
- "permissions": {
- "allow": [
- "WebFetch(domain:hackage.haskell.org)",
- "WebFetch(domain:github.com)",
- "WebFetch(domain:hackage-content.haskell.org)",
- "Bash(which ghc:*)",
- "Read(//home/fritjof/.ghcup/**)",
- "Bash(apt list:*)",
- "Bash(which stack:*)",
- "WebFetch(domain:www.stackage.org)"
- ]
- }
-}
rmfile ./.claude/settings.local.json
rmdir ./.claude
hunk ./README.md 29
+You also need [Rocq](https://rocq-prover.org/) (formerly Coq) installed and
+available as `rocq` on your `PATH`. The project uses Rocq to compile formally
+verified pure functions that are extracted to Haskell during the build. Rocq can
+be installed via [opam](https://opam.ocaml.org/):
+
+```
+opam install rocq-core rocq-stdlib
+```
+
hunk ./README.md 47
+The build automatically compiles the Rocq sources in `verified/` and extracts
+Haskell modules into `gen/` before compiling the Haskell code (via a custom
+`Setup.hs` pre-build hook).
+
+### Test
+
+```
+stack test
+```
+
+This runs two phases:
+
+1. **Rocq proof verification** -- checks all formal proofs in
+ `verified/*_proofs.v`.
+2. **QuickCheck property tests** -- runs property-based tests for the pure
+ Haskell functions that are not formally verified.
+
hunk ./README.md 169
+## Formal Verification
+
+Security-critical pure functions are implemented in Gallina (the Rocq/Coq
+specification language), formally proven correct, and extracted to Haskell for
+use at runtime. This follows the same approach as
+[hopify](https://github.com/...) -- identify pure functions, reimplement them in
+Gallina, and export them to the target language.
+
+### Verified functions
+
+| Function | Source | Replaces |
+|---|---|---|
+| `esc` / `esc_char` | `verified/html_pure.v` | HTML entity escaping in `DarcsWeb.Html` |
+| `is_safe_sub_path` / `split_path` | `verified/path_pure.v` | Path traversal validation in `DarcsWeb.Darcs` |
+| `add_trailing_slash` | `verified/path_pure.v` | Jail-check path normalisation in `Main` |
+
+### Proven properties
+
+The proof files (`verified/*_proofs.v`) establish:
+
+- **Concrete correctness**: each dangerous character (`<`, `>`, `&`, `"`, `'`)
+ maps to the correct HTML entity.
+- **Security theorems**: the output of `esc` contains no raw `<`, `>`, or `"`
+ characters -- proven by exhaustive case analysis over all 256 ASCII values
+ combined with structural induction over the input string.
+- **Path safety**: `is_safe_sub_path` rejects `..`, `_darcs`, `.git`, hidden
+ files (names starting with `.`), and empty paths; accepts legitimate nested
+ paths.
+- **Idempotence**: `add_trailing_slash` applied twice equals applying it once.
+
+### Property-based tests
+
+Functions that remain in Haskell (because they depend on types or libraries not
+easily represented in Gallina) are tested with QuickCheck:
+
+| Function | Properties |
+|---|---|
+| `highlightDiff` | Correct CSS class wrapping for `+`/`-`/`@@` prefixed lines |
+| `shortAuthor` | Email stripping, plain name preservation |
+| `formatSize` | Correct unit suffix (`B`/`KiB`/`MiB`) for each size range |
+| `formatAbsolute` | Short strings unchanged, long date strings gain separators |
+
+### Project layout
+
+```
+verified/ Rocq/Coq sources and Makefile
+ html_pure.v HTML escaping implementation + extraction
+ path_pure.v Path validation implementation + extraction
+ html_pure_proofs.v Proofs for HTML escaping
+ path_pure_proofs.v Proofs for path validation
+ Makefile Build rules for extraction and proof checking
+gen/ Auto-generated Haskell (do not edit)
+ HtmlPure.hs Extracted from html_pure.v
+ PathPure.hs Extracted from path_pure.v
+Setup.hs Custom build hooks (runs Rocq extraction before build)
+test/
+ Spec.hs Test runner (proofs + QuickCheck)
+ Properties/Html.hs QuickCheck property definitions
+```
+
hunk ./darcsweb.cabal 8
-build-type: Simple
-
-executable darcsweb
+build-type: Custom
+
+custom-setup
+ setup-depends:
+ base >= 4.12 && < 5
+ , Cabal >= 3.0
+ , process >= 1.6
+
+library
hunk ./darcsweb.cabal 18
- hs-source-dirs: src
- main-is: Main.hs
- other-modules: DarcsWeb.Config
+ hs-source-dirs: src, gen
+ exposed-modules: DarcsWeb.Config
hunk ./darcsweb.cabal 23
- ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
+ , HtmlPure
+ , PathPure
+ ghc-options: -Wall
hunk ./darcsweb.cabal 28
- , scotty >= 0.20 && < 1
- , warp >= 3.3 && < 4
hunk ./darcsweb.cabal 33
+ , time >= 1.9 && < 2
+
+executable darcsweb
+ default-language: Haskell2010
+ hs-source-dirs: app
+ main-is: Main.hs
+ ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
+ build-depends: base >= 4.12 && < 5
+ , darcsweb
+ , scotty >= 0.20 && < 1
+ , warp >= 3.3 && < 4
+ , text >= 1.2 && < 3
hunk ./darcsweb.cabal 49
+ , directory >= 1.3 && < 2
+ , filepath >= 1.4 && < 2
hunk ./darcsweb.cabal 52
+
+test-suite darcsweb-test
+ type: exitcode-stdio-1.0
+ default-language: Haskell2010
+ hs-source-dirs: test
+ main-is: Spec.hs
+ other-modules: Properties.Html
+ ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
+ build-depends: base >= 4.12 && < 5
+ , darcsweb
+ , QuickCheck >= 2.14 && < 3
+ , text >= 1.2 && < 3
+ , process >= 1.6 && < 2
hunk ./src/DarcsWeb/Darcs.hs 15
+ -- Exported for testing
+ , isSafeSubPath
+ , splitPath
hunk ./src/DarcsWeb/Darcs.hs 54
+import qualified PathPure
hunk ./src/DarcsWeb/Darcs.hs 169
+-- Uses formally verified implementation from Coq/Rocq.
hunk ./src/DarcsWeb/Darcs.hs 171
-isSafeSubPath sub =
- let segments = filter (not . null) $ splitPath sub
- in not (null segments)
- && all safeSegment segments
- where
- safeSegment s =
- let name = filter (/= '/') s
- in not (null name)
- && name /= ".."
- && name /= "."
- && name /= "_darcs"
- && name /= ".git"
- && head name /= '.'
+isSafeSubPath = PathPure.is_safe_sub_path
hunk ./src/DarcsWeb/Darcs.hs 174
+-- Uses formally verified implementation from Coq/Rocq.
hunk ./src/DarcsWeb/Darcs.hs 176
-splitPath [] = []
-splitPath s = let (seg, rest) = break (== '/') s
- in seg : case rest of
- [] -> []
- (_:xs) -> splitPath xs
+splitPath = PathPure.split_path
hunk ./src/DarcsWeb/Html.hs 14
+ -- Exported for testing
+ , esc
+ , highlightDiff
+ , shortAuthor
+ , formatSize
+ , formatAbsolute
hunk ./src/DarcsWeb/Html.hs 27
+import qualified HtmlPure
hunk ./src/DarcsWeb/Html.hs 393
--- | HTML-escape text
+-- | HTML-escape text.
+-- Uses formally verified implementation from Coq/Rocq.
hunk ./src/DarcsWeb/Html.hs 396
-esc = T.concatMap escChar
- where
- escChar '<' = "<"
- escChar '>' = ">"
- escChar '&' = "&"
- escChar '"' = """
- escChar '\'' = "'"
- escChar c = T.singleton c
+esc = T.pack . HtmlPure.esc . T.unpack
hunk ./src/Main.hs 1
-{-# LANGUAGE ForeignFunctionInterface #-}
-{-# LANGUAGE OverloadedStrings #-}
-
-module Main (main) where
-
-import Web.Scotty (scottyOpts, ScottyM, ActionM, get, notFound,
- pathParam, html, status, liftIO,
- Options(..), defaultOptions, file, setHeader,
- request, regex)
-import Network.Wai.Handler.Warp (setPort, setHost, defaultSettings)
-import Network.HTTP.Types.Status (status404, status403)
-import Network.Wai (pathInfo)
-import Data.String (fromString)
-
-import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
-import Data.Time.Clock (getCurrentTime)
-import System.Console.GetOpt
-import System.Environment (getArgs, getProgName)
-import System.Directory (makeAbsolute, canonicalizePath,
- doesFileExist)
-import System.FilePath ((</>), takeExtension)
-import System.Exit (exitFailure, exitSuccess)
-import System.IO (hPutStrLn, stderr, hFlush, stdout)
-import System.Posix.Process (forkProcess, createSession)
-import System.Posix.IO (dupTo, stdInput, stdOutput, stdError)
-import System.Posix.Syslog (withSyslog, syslog, Facility(..),
- Priority(..), Option(..))
-import Foreign.C.String (withCStringLen, withCString, CString)
-import Foreign.C.Types (CInt(..))
-import System.Posix.Types (Fd(..))
-import Data.List (isPrefixOf)
-
-import DarcsWeb.Types
-import DarcsWeb.Config
-import DarcsWeb.Darcs
-import DarcsWeb.Html
-
--- Command-line options
-data Opts = Opts
- { optConfigFile :: Maybe FilePath
- , optDaemon :: Bool
- } deriving (Show)
-
-defaultOpts :: Opts
-defaultOpts = Opts
- { optConfigFile = Nothing
- , optDaemon = False
- }
-
-optDescr :: [OptDescr (Opts -> Opts)]
-optDescr =
- [ Option "c" ["config"]
- (ReqArg (\f o -> o { optConfigFile = Just f }) "FILE")
- "configuration file (required)"
- , Option "d" ["daemon"]
- (NoArg (\o -> o { optDaemon = True }))
- "run as daemon (log to syslog)"
- ]
-
-parseOpts :: [String] -> IO Opts
-parseOpts argv =
- case getOpt Permute optDescr argv of
- (o, _, []) -> return (foldl (flip id) defaultOpts o)
- (_, _, errs) -> do
- hPutStrLn stderr (concat errs)
- usage
-
-usage :: IO a
-usage = do
- prog <- getProgName
- hPutStrLn stderr (usageInfo ("Usage: " ++ prog ++ " -c FILE [-d]") optDescr)
- exitFailure
-
-main :: IO ()
-main = do
- args <- getArgs
- opts <- parseOpts args
-
- configPath <- case optConfigFile opts of
- Nothing -> usage
- Just f -> return f
-
- cfgMap <- parseConfigFile configPath
-
- let port = read (cfgLookupDefault "port" "3000" cfgMap)
- bind = cfgLookupDefault "bind" "127.0.0.1" cfgMap
- repoDir = cfgLookupDefault "repos" "." cfgMap
- title = T.pack (cfgLookupDefault "title" "DarcsWeb" cfgMap)
- staticDir = cfgLookupDefault "static" "static" cfgMap
-
- absRepoDir <- makeAbsolute repoDir >>= canonicalizePath
- absStaticDir <- makeAbsolute staticDir >>= canonicalizePath
-
- if optDaemon opts
- then do
- logStdout $ "Daemonizing (bind " ++ bind ++ ":" ++ show port ++ ")"
- hFlush stdout
- daemonize $ withSyslog "darcsweb" [LogPID] Daemon $ do
- let logf = logSyslog
- logf $ "Starting on " ++ bind ++ ":" ++ show port
- logf $ "Serving repositories from: " ++ absRepoDir
- let cfg = mkConfig port bind absRepoDir title absStaticDir logf
- runServer cfg
- else do
- let logf = logStdout
- logf $ "Starting on " ++ bind ++ ":" ++ show port
- logf $ "Serving repositories from: " ++ absRepoDir
- let cfg = mkConfig port bind absRepoDir title absStaticDir logf
- runServer cfg
-
-mkConfig :: Int -> String -> FilePath -> T.Text -> FilePath -> LogFunc
- -> DarcsWebConfig
-mkConfig port bind repoDir title staticDir logf = DarcsWebConfig
- { cfgPort = port
- , cfgBind = bind
- , cfgRepoDir = repoDir
- , cfgTitle = title
- , cfgStaticDir = staticDir
- , cfgLog = logf
- }
-
-runServer :: DarcsWebConfig -> IO ()
-runServer cfg = do
- let warpSettings = setPort (cfgPort cfg)
- $ setHost (fromString (cfgBind cfg))
- defaultSettings
- scottyConfig = defaultOptions { verbose = 0, settings = warpSettings }
- scottyOpts scottyConfig (app cfg)
-
--- | Log to stdout (foreground mode)
-logStdout :: String -> IO ()
-logStdout msg = putStrLn ("[darcsweb] " ++ msg)
-
--- | Log to syslog (daemon mode). Must be called inside withSyslog.
-logSyslog :: String -> IO ()
-logSyslog msg = withCStringLen msg $ \cstr ->
- syslog (Just Daemon) Info cstr
-
--- | Double-fork daemonize
-daemonize :: IO () -> IO ()
-daemonize action = do
- _ <- forkProcess $ do
- _ <- createSession
- _ <- forkProcess $ do
- redirectToDevNull
- action
- exitSuccess
- exitSuccess
-
-redirectToDevNull :: IO ()
-redirectToDevNull = do
- nullFd <- openDevNull
- _ <- dupTo nullFd stdInput
- _ <- dupTo nullFd stdOutput
- _ <- dupTo nullFd stdError
- return ()
-
--- Open /dev/null using Posix.openFd. Works across unix package versions
--- by using the FFI directly.
-foreign import ccall "open" c_open :: CString -> CInt -> IO CInt
-
-openDevNull :: IO Fd
-openDevNull = withCString "/dev/null" $ \cstr -> do
- fd <- c_open cstr 2 -- O_RDWR = 2
- return (Fd fd)
-
--- | Map file extension to MIME type for static assets
-mimeType :: String -> TL.Text
-mimeType ".css" = "text/css; charset=utf-8"
-mimeType ".js" = "application/javascript"
-mimeType ".png" = "image/png"
-mimeType ".jpg" = "image/jpeg"
-mimeType ".jpeg" = "image/jpeg"
-mimeType ".gif" = "image/gif"
-mimeType ".svg" = "image/svg+xml"
-mimeType ".ico" = "image/x-icon"
-mimeType ".woff" = "font/woff"
-mimeType ".woff2" = "font/woff2"
-mimeType _ = "application/octet-stream"
-
-app :: DarcsWebConfig -> ScottyM ()
-app cfg = do
- -- Serve static files strictly from the configured static directory.
- -- The route only matches /static/*, so no other paths are served.
- -- The resolved file path is canonicalized and verified to be inside
- -- the static directory before serving.
- get (regex "^/static/.*") $ serveStatic cfg
-
- -- Index page: list all repositories
- get "/" $ do
- now <- liftIO getCurrentTime
- repos <- liftIO $ listRepos (cfgRepoDir cfg)
- html $ TL.fromStrict $ renderRepoList now (cfgTitle cfg) repos
-
- -- Repository summary
- get "/repo/:name/summary" $ do
- name <- pathParam "name"
- withRepo cfg name $ \repoPath -> do
- now <- liftIO getCurrentTime
- repos <- liftIO $ listRepos (cfgRepoDir cfg)
- let mRepoInfo = findRepo name repos
- case mRepoInfo of
- Nothing -> do
- status status404
- html $ TL.fromStrict $ render404 "Repository not found."
- Just ri -> do
- patches <- liftIO $ getRepoPatches repoPath
- tags <- liftIO $ getRepoTags repoPath
- html $ TL.fromStrict $ renderRepoSummary now name ri (take 10 patches) tags
-
- -- Shortlog
- get "/repo/:name/shortlog" $ do
- name <- pathParam "name"
- withRepo cfg name $ \repoPath -> do
- now <- liftIO getCurrentTime
- patches <- liftIO $ getRepoPatches repoPath
- html $ TL.fromStrict $ renderShortLog now name patches
-
- -- Full log
- get "/repo/:name/log" $ do
- name <- pathParam "name"
- withRepo cfg name $ \repoPath -> do
- now <- liftIO getCurrentTime
- patches <- liftIO $ getRepoPatches repoPath
- html $ TL.fromStrict $ renderFullLog now name patches
-
- -- Tags
- get "/repo/:name/tags" $ do
- name <- pathParam "name"
- withRepo cfg name $ \repoPath -> do
- now <- liftIO getCurrentTime
- tags <- liftIO $ getRepoTags repoPath
- html $ TL.fromStrict $ renderTags now name tags
-
- -- Tree view (root)
- get "/repo/:name/tree" $ do
- name <- pathParam "name"
- withRepo cfg name $ \repoPath -> do
- mEntries <- liftIO $ getRepoTree repoPath ""
- case mEntries of
- Nothing -> do
- status status404
- html $ TL.fromStrict $ render404 "Tree not found."
- Just entries ->
- html $ TL.fromStrict $ renderTree name "" entries
-
- -- Tree view (subdirectory)
- get (regex "^/repo/([^/]+)/tree/(.+)$") $ do
- name <- pathParam "1"
- subPath <- pathParam "2"
- withRepo cfg name $ \repoPath -> do
- let sub = T.unpack subPath
- mEntries <- liftIO $ getRepoTree repoPath sub
- case mEntries of
- Nothing -> do
- status status404
- html $ TL.fromStrict $ render404 "Not found."
- Just entries ->
- html $ TL.fromStrict $ renderTree name subPath entries
-
- -- Blob view (file contents)
- get (regex "^/repo/([^/]+)/blob/(.+)$") $ do
- name <- pathParam "1"
- subPath <- pathParam "2"
- withRepo cfg name $ \repoPath -> do
- let sub = T.unpack subPath
- mContent <- liftIO $ getRepoBlob repoPath sub
- case mContent of
- Nothing -> do
- status status404
- html $ TL.fromStrict $ render404 "Not found."
- Just content ->
- html $ TL.fromStrict $ renderBlob name subPath content
-
- -- Single patch detail (diff view)
- get "/repo/:name/patch/:hash" $ do
- name <- pathParam "name"
- patchHash <- pathParam "hash"
- withRepo cfg name $ \repoPath -> do
- mPatch <- liftIO $ getRepoPatch repoPath patchHash
- case mPatch of
- Nothing -> do
- status status404
- html $ TL.fromStrict $ render404 "Patch not found."
- Just ps -> do
- now <- liftIO getCurrentTime
- html $ TL.fromStrict $ renderPatchDetail now name ps
-
- -- 404 fallback
- notFound $ do
- status status404
- html $ TL.fromStrict $ render404 "Page not found."
-
--- | Serve a static file, strictly jailed to cfgStaticDir.
--- Rejects any request whose canonicalized path escapes the static directory.
-serveStatic :: DarcsWebConfig -> ActionM ()
-serveStatic cfg = do
- req <- request
- -- Reconstruct the sub-path from the WAI pathInfo segments after "static"
- let segments = pathInfo req
- -- Drop the leading "static" segment
- subSegments = drop 1 segments
- -- Reject empty path, segments with "..", and any segment starting with "."
- if null subSegments
- || any (\s -> s == ".." || s == "." || T.null s) subSegments
- || any (T.isPrefixOf ".") subSegments
- then do
- status status404
- html $ TL.fromStrict $ render404 "Not found."
- else do
- let relPath = T.unpack (T.intercalate "/" subSegments)
- candidate = cfgStaticDir cfg </> relPath
- -- Canonicalize first, then check existence on the canonical path
- -- to avoid TOCTOU race conditions with symlinks.
- canonical <- liftIO $ canonicalizePath candidate
- let jailDir = addTrailingSlash (cfgStaticDir cfg)
- if not (jailDir `isPrefixOf` canonical)
- then do
- status status404
- html $ TL.fromStrict $ render404 "Not found."
- else do
- exists <- liftIO $ doesFileExist canonical
- if not exists
- then do
- status status404
- html $ TL.fromStrict $ render404 "Not found."
- else do
- setHeader "Content-Type" (mimeType (takeExtension canonical))
- file canonical
-
--- | Validate repo name and resolve path, then run an action.
--- Rejects names with path separators or "..".
--- Canonicalizes the resolved path and verifies it is inside the repos directory.
-withRepo :: DarcsWebConfig -> T.Text -> (FilePath -> ActionM ()) -> ActionM ()
-withRepo cfg name action
- | T.any (== '/') name || T.any (== '\\') name || ".." `T.isInfixOf` name
- || T.isPrefixOf "." name = do
- status status404
- html $ TL.fromStrict $ render404 "Invalid repository name."
- | otherwise = do
- let candidate = cfgRepoDir cfg </> T.unpack name
- isRepo <- liftIO $ isDarcsRepo candidate
- if not isRepo
- then do
- status status404
- html $ TL.fromStrict $ render404 "Repository not found."
- else do
- -- Canonicalize to defeat symlink escapes
- canonical <- liftIO $ canonicalizePath candidate
- let jailDir = addTrailingSlash (cfgRepoDir cfg)
- if jailDir `isPrefixOf` canonical
- then action canonical
- else do
- status status403
- html $ TL.fromStrict $ render404 "Access denied."
-
--- | Ensure a directory path ends with exactly one /
-addTrailingSlash :: FilePath -> FilePath
-addTrailingSlash [] = "/"
-addTrailingSlash p
- | last p == '/' = p
- | otherwise = p ++ "/"
-
-findRepo :: T.Text -> [RepoInfo] -> Maybe RepoInfo
-findRepo name = foldr (\r acc -> if riName r == name then Just r else acc) Nothing
-
rmfile ./src/Main.hs