darcsweb - app/Main.hs

summary shortlog log tree tags
[root] / app / Main.hs
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE OverloadedStrings #-}

module Main (main) where

import           Web.Scotty (scottyOpts, ScottyM, ActionM, get, notFound,
                             pathParam, html, status, liftIO,
                             Options(..), defaultOptions, file, setHeader,
                             request, regex)
import           Network.Wai.Handler.Warp (setPort, setHost, defaultSettings)
import           Network.HTTP.Types.Status (status404, status403)
import           Network.Wai (pathInfo)
import           Data.String (fromString)

import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import           Data.Time.Clock (getCurrentTime)
import           System.Console.GetOpt
import           System.Environment (getArgs, getProgName)
import           System.Directory (makeAbsolute, canonicalizePath,
                                   doesFileExist)
import           System.FilePath ((</>), takeExtension)
import           System.Exit (exitFailure, exitSuccess)
import           System.IO (hPutStrLn, stderr, hFlush, stdout)
import           System.Posix.Process (forkProcess, createSession)
import           System.Posix.IO (dupTo, stdInput, stdOutput, stdError)
import           System.Posix.Syslog (withSyslog, syslog, Facility(..),
                                      Priority(..), Option(..))
import           Foreign.C.String (withCStringLen, withCString, CString)
import           Foreign.C.Types (CInt(..))
import           System.Posix.Types (Fd(..))
import           Data.List (isPrefixOf)

import           DarcsWeb.Types
import           DarcsWeb.Config
import           DarcsWeb.Darcs
import           DarcsWeb.Html
import qualified PathPure

-- Command-line options
data Opts = Opts
  { optConfigFile :: Maybe FilePath
  , optDaemon     :: Bool
  } deriving (Show)

defaultOpts :: Opts
defaultOpts = Opts
  { optConfigFile = Nothing
  , optDaemon     = False
  }

optDescr :: [OptDescr (Opts -> Opts)]
optDescr =
  [ Option "c" ["config"]
      (ReqArg (\f o -> o { optConfigFile = Just f }) "FILE")
      "configuration file (required)"
  , Option "d" ["daemon"]
      (NoArg (\o -> o { optDaemon = True }))
      "run as daemon (log to syslog)"
  ]

parseOpts :: [String] -> IO Opts
parseOpts argv =
    case getOpt Permute optDescr argv of
      (o, _, [])   -> return (foldl (flip id) defaultOpts o)
      (_, _, errs) -> do
        hPutStrLn stderr (concat errs)
        usage

usage :: IO a
usage = do
    prog <- getProgName
    hPutStrLn stderr (usageInfo ("Usage: " ++ prog ++ " -c FILE [-d]") optDescr)
    exitFailure

main :: IO ()
main = do
    args <- getArgs
    opts <- parseOpts args

    configPath <- case optConfigFile opts of
      Nothing -> usage
      Just f  -> return f

    cfgMap <- parseConfigFile configPath

    let port      = read (cfgLookupDefault "port" "3000" cfgMap)
        bind      = cfgLookupDefault "bind" "127.0.0.1" cfgMap
        repoDir   = cfgLookupDefault "repos" "." cfgMap
        title     = T.pack (cfgLookupDefault "title" "DarcsWeb" cfgMap)
        staticDir = cfgLookupDefault "static" "static" cfgMap

    absRepoDir   <- makeAbsolute repoDir >>= canonicalizePath
    absStaticDir <- makeAbsolute staticDir >>= canonicalizePath

    if optDaemon opts
      then do
        logStdout $ "Daemonizing (bind " ++ bind ++ ":" ++ show port ++ ")"
        hFlush stdout
        daemonize $ withSyslog "darcsweb" [LogPID] Daemon $ do
          let logf = logSyslog
          logf $ "Starting on " ++ bind ++ ":" ++ show port
          logf $ "Serving repositories from: " ++ absRepoDir
          let cfg = mkConfig port bind absRepoDir title absStaticDir logf
          runServer cfg
      else do
        let logf = logStdout
        logf $ "Starting on " ++ bind ++ ":" ++ show port
        logf $ "Serving repositories from: " ++ absRepoDir
        let cfg = mkConfig port bind absRepoDir title absStaticDir logf
        runServer cfg

mkConfig :: Int -> String -> FilePath -> T.Text -> FilePath -> LogFunc
         -> DarcsWebConfig
mkConfig port bind repoDir title staticDir logf = DarcsWebConfig
    { cfgPort      = port
    , cfgBind      = bind
    , cfgRepoDir   = repoDir
    , cfgTitle     = title
    , cfgStaticDir = staticDir
    , cfgLog       = logf
    }

runServer :: DarcsWebConfig -> IO ()
runServer cfg = do
    let warpSettings = setPort (cfgPort cfg)
                     $ setHost (fromString (cfgBind cfg))
                       defaultSettings
        scottyConfig = defaultOptions { verbose = 0, settings = warpSettings }
    scottyOpts scottyConfig (app cfg)

-- | Log to stdout (foreground mode)
logStdout :: String -> IO ()
logStdout msg = putStrLn ("[darcsweb] " ++ msg)

-- | Log to syslog (daemon mode). Must be called inside withSyslog.
logSyslog :: String -> IO ()
logSyslog msg = withCStringLen msg $ \cstr ->
    syslog (Just Daemon) Info cstr

-- | Double-fork daemonize
daemonize :: IO () -> IO ()
daemonize action = do
    _ <- forkProcess $ do
      _ <- createSession
      _ <- forkProcess $ do
        redirectToDevNull
        action
      exitSuccess
    exitSuccess

redirectToDevNull :: IO ()
redirectToDevNull = do
    nullFd <- openDevNull
    _ <- dupTo nullFd stdInput
    _ <- dupTo nullFd stdOutput
    _ <- dupTo nullFd stdError
    return ()

-- Open /dev/null using Posix.openFd. Works across unix package versions
-- by using the FFI directly.
foreign import ccall "open" c_open :: CString -> CInt -> IO CInt

openDevNull :: IO Fd
openDevNull = withCString "/dev/null" $ \cstr -> do
    fd <- c_open cstr 2 -- O_RDWR = 2
    return (Fd fd)

-- | Map file extension to MIME type for static assets
mimeType :: String -> TL.Text
mimeType ".css"  = "text/css; charset=utf-8"
mimeType ".js"   = "application/javascript"
mimeType ".png"  = "image/png"
mimeType ".jpg"  = "image/jpeg"
mimeType ".jpeg" = "image/jpeg"
mimeType ".gif"  = "image/gif"
mimeType ".svg"  = "image/svg+xml"
mimeType ".ico"  = "image/x-icon"
mimeType ".woff" = "font/woff"
mimeType ".woff2" = "font/woff2"
mimeType _       = "application/octet-stream"

app :: DarcsWebConfig -> ScottyM ()
app cfg = do
    -- Serve static files strictly from the configured static directory.
    -- The route only matches /static/*, so no other paths are served.
    -- The resolved file path is canonicalized and verified to be inside
    -- the static directory before serving.
    get (regex "^/static/.*") $ serveStatic cfg

    -- Index page: list all repositories
    get "/" $ do
        now <- liftIO getCurrentTime
        repos <- liftIO $ listRepos (cfgRepoDir cfg)
        html $ TL.fromStrict $ renderRepoList now (cfgTitle cfg) repos

    -- Repository summary
    get "/repo/:name/summary" $ do
        name <- pathParam "name"
        withRepo cfg name $ \repoPath -> do
            now <- liftIO getCurrentTime
            repos <- liftIO $ listRepos (cfgRepoDir cfg)
            let mRepoInfo = findRepo name repos
            case mRepoInfo of
              Nothing -> do
                status status404
                html $ TL.fromStrict $ render404 "Repository not found."
              Just ri -> do
                patches <- liftIO $ getRepoPatches repoPath
                tags <- liftIO $ getRepoTags repoPath
                html $ TL.fromStrict $ renderRepoSummary now name ri (take 10 patches) tags

    -- Shortlog
    get "/repo/:name/shortlog" $ do
        name <- pathParam "name"
        withRepo cfg name $ \repoPath -> do
            now <- liftIO getCurrentTime
            patches <- liftIO $ getRepoPatches repoPath
            html $ TL.fromStrict $ renderShortLog now name patches

    -- Full log
    get "/repo/:name/log" $ do
        name <- pathParam "name"
        withRepo cfg name $ \repoPath -> do
            now <- liftIO getCurrentTime
            patches <- liftIO $ getRepoPatches repoPath
            html $ TL.fromStrict $ renderFullLog now name patches

    -- Tags
    get "/repo/:name/tags" $ do
        name <- pathParam "name"
        withRepo cfg name $ \repoPath -> do
            now <- liftIO getCurrentTime
            tags <- liftIO $ getRepoTags repoPath
            html $ TL.fromStrict $ renderTags now name tags

    -- Tree view (root)
    get "/repo/:name/tree" $ do
        name <- pathParam "name"
        withRepo cfg name $ \repoPath -> do
            mEntries <- liftIO $ getRepoTree repoPath ""
            case mEntries of
              Nothing -> do
                status status404
                html $ TL.fromStrict $ render404 "Tree not found."
              Just entries ->
                html $ TL.fromStrict $ renderTree name "" entries

    -- Tree view (subdirectory)
    get (regex "^/repo/([^/]+)/tree/(.+)$") $ do
        name <- pathParam "1"
        subPath <- pathParam "2"
        withRepo cfg name $ \repoPath -> do
            let sub = T.unpack subPath
            mEntries <- liftIO $ getRepoTree repoPath sub
            case mEntries of
              Nothing -> do
                status status404
                html $ TL.fromStrict $ render404 "Not found."
              Just entries ->
                html $ TL.fromStrict $ renderTree name subPath entries

    -- Blob view (file contents)
    get (regex "^/repo/([^/]+)/blob/(.+)$") $ do
        name <- pathParam "1"
        subPath <- pathParam "2"
        withRepo cfg name $ \repoPath -> do
            let sub = T.unpack subPath
            mContent <- liftIO $ getRepoBlob repoPath sub
            case mContent of
              Nothing -> do
                status status404
                html $ TL.fromStrict $ render404 "Not found."
              Just content ->
                html $ TL.fromStrict $ renderBlob name subPath content

    -- Single patch detail (diff view)
    get "/repo/:name/patch/:hash" $ do
        name <- pathParam "name"
        patchHash <- pathParam "hash"
        withRepo cfg name $ \repoPath -> do
            mPatch <- liftIO $ getRepoPatch repoPath patchHash
            case mPatch of
              Nothing -> do
                status status404
                html $ TL.fromStrict $ render404 "Patch not found."
              Just ps -> do
                now <- liftIO getCurrentTime
                html $ TL.fromStrict $ renderPatchDetail now name ps

    -- 404 fallback
    notFound $ do
        status status404
        html $ TL.fromStrict $ render404 "Page not found."

-- | Serve a static file, strictly jailed to cfgStaticDir.
--   Rejects any request whose canonicalized path escapes the static directory.
serveStatic :: DarcsWebConfig -> ActionM ()
serveStatic cfg = do
    req <- request
    -- Reconstruct the sub-path from the WAI pathInfo segments after "static"
    let segments = pathInfo req
        -- Drop the leading "static" segment
        subSegments = drop 1 segments
    -- Reject empty path, segments with "..", and any segment starting with "."
    if null subSegments
       || any (\s -> s == ".." || s == "." || T.null s) subSegments
       || any (T.isPrefixOf ".") subSegments
      then do
        status status404
        html $ TL.fromStrict $ render404 "Not found."
      else do
        let relPath = T.unpack (T.intercalate "/" subSegments)
            candidate = cfgStaticDir cfg </> relPath
        -- Canonicalize first, then check existence on the canonical path
        -- to avoid TOCTOU race conditions with symlinks.
        canonical <- liftIO $ canonicalizePath candidate
        let jailDir = addTrailingSlash (cfgStaticDir cfg)
        if not (jailDir `isPrefixOf` canonical)
          then do
            status status404
            html $ TL.fromStrict $ render404 "Not found."
          else do
            exists <- liftIO $ doesFileExist canonical
            if not exists
              then do
                status status404
                html $ TL.fromStrict $ render404 "Not found."
              else do
                setHeader "Content-Type" (mimeType (takeExtension canonical))
                file canonical

-- | Validate repo name and resolve path, then run an action.
--   Rejects names with path separators or "..".
--   Canonicalizes the resolved path and verifies it is inside the repos directory.
withRepo :: DarcsWebConfig -> T.Text -> (FilePath -> ActionM ()) -> ActionM ()
withRepo cfg name action
    | T.any (== '/') name || T.any (== '\\') name || ".." `T.isInfixOf` name
    || T.isPrefixOf "." name = do
        status status404
        html $ TL.fromStrict $ render404 "Invalid repository name."
    | otherwise = do
        let candidate = cfgRepoDir cfg </> T.unpack name
        isRepo <- liftIO $ isDarcsRepo candidate
        if not isRepo
          then do
            status status404
            html $ TL.fromStrict $ render404 "Repository not found."
          else do
            -- Canonicalize to defeat symlink escapes
            canonical <- liftIO $ canonicalizePath candidate
            let jailDir = addTrailingSlash (cfgRepoDir cfg)
            if jailDir `isPrefixOf` canonical
              then action canonical
              else do
                status status403
                html $ TL.fromStrict $ render404 "Access denied."

-- | Ensure a directory path ends with exactly one /
--   Uses formally verified implementation from Coq/Rocq.
addTrailingSlash :: FilePath -> FilePath
addTrailingSlash = PathPure.add_trailing_slash

findRepo :: T.Text -> [RepoInfo] -> Maybe RepoInfo
findRepo name = foldr (\r acc -> if riName r == name then Just r else acc) Nothing