module Index where

import           Data.HashMap.Strict (Map)
import qualified Data.HashMap.Strict as Map
import           Data.IntMap.Strict  (IntMap)
import qualified Data.IntMap.Strict  as IntMap
import           Data.IntSet         (IntSet)
import qualified Data.IntSet         as IntSet
import           Data.List           (foldl')
import           Data.Maybe          (catMaybes, fromMaybe)
import           Data.Text           (Text)

import qualified Analysis            as Analysis
import           Document            (Document)
import qualified Document            as Document

newtype TermDoc = TermDoc { unTermDoc :: Map Text IntSet }
  deriving Show

instance Semigroup TermDoc where
  (TermDoc mA) <> (TermDoc mB) =
    TermDoc (Map.unionWith IntSet.union mA mB)

instance Monoid TermDoc where
  mempty = TermDoc mempty

addTerm :: Int -> Text -> TermDoc -> TermDoc
addTerm doc term td =
  TermDoc
  . Map.insertWith IntSet.union term (IntSet.singleton doc)
  $ unTermDoc td


data Index = Index
  { termdoc :: !TermDoc
  , docs    :: !(IntMap Document)
  }
  deriving Show

instance Semigroup Index where
  (Index tdA dsA) <> (Index tdB dsB) = Index
    { termdoc = tdA <> tdB
    , docs = dsA <> dsB
    }

instance Monoid Index where
  mempty = Index mempty mempty


fromDocument :: Document -> Index
fromDocument doc = Index
  { docs =
      IntMap.singleton (Document.id doc) doc
  , termdoc =
      foldl' (flip (addTerm (Document.id doc)))
        mempty
        (Analysis.analyse (Document.fullText doc))
  }

{-
  Built in main with

  index <- runConduitRes $
    documents
    .| takeC 100000
    .| mapC Index.fromDocument
    .| foldlC (<>) mempty

-}