napkin-runtime
Safe HaskellNone
LanguageGHC2024

Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

Synopsis

Documentation

data Mode #

Constructors

ModeAutomatic 
ModeManual 

Instances

Instances details
FromJSON Mode # 
Instance details

Defined in Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

ToJSON Mode # 
Instance details

Defined in Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

Bounded Mode # 
Instance details

Defined in Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

Enum Mode # 
Instance details

Defined in Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

Methods

succ :: Mode -> Mode #

pred :: Mode -> Mode #

toEnum :: Int -> Mode #

fromEnum :: Mode -> Int #

enumFrom :: Mode -> [Mode] #

enumFromThen :: Mode -> Mode -> [Mode] #

enumFromTo :: Mode -> Mode -> [Mode] #

enumFromThenTo :: Mode -> Mode -> Mode -> [Mode] #

Generic Mode # 
Instance details

Defined in Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

Associated Types

type Rep Mode 
Instance details

Defined in Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

type Rep Mode = D1 ('MetaData "Mode" "Napkin.Spec.Yaml.Preprocessors.DatasetHygiene" "napkin-runtime-2.0.0-9enTD0RIrF78Wys8ycSC3g" 'False) (C1 ('MetaCons "ModeAutomatic" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "ModeManual" 'PrefixI 'False) (U1 :: Type -> Type))

Methods

from :: Mode -> Rep Mode x #

to :: Rep Mode x -> Mode #

Show Mode # 
Instance details

Defined in Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

Methods

showsPrec :: Int -> Mode -> ShowS #

show :: Mode -> String #

showList :: [Mode] -> ShowS #

Eq Mode # 
Instance details

Defined in Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

Methods

(==) :: Mode -> Mode -> Bool #

(/=) :: Mode -> Mode -> Bool #

type Rep Mode # 
Instance details

Defined in Napkin.Spec.Yaml.Preprocessors.DatasetHygiene

type Rep Mode = D1 ('MetaData "Mode" "Napkin.Spec.Yaml.Preprocessors.DatasetHygiene" "napkin-runtime-2.0.0-9enTD0RIrF78Wys8ycSC3g" 'False) (C1 ('MetaCons "ModeAutomatic" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "ModeManual" 'PrefixI 'False) (U1 :: Type -> Type))

preprocessor :: PreprocessorForYaml b #

Validates the consistency of dataset management states (managed vs unmanaged) for tables in a BigQuery-like environment. It supports two modes of operation:

  • Automatic: Infers managed/unmanaged state from the graph of tables and their references.
  • Manual: Uses explicit lists of managed and unmanaged datasets, with an optional strictness flag.

The checkup ensures:

  • No dataset is simultaneously marked as both managed and unmanaged.
  • All tables in a dataset are consistently managed (optionally final, e.i. doesn't have any dependencies in it) or unmanaged.
  • In strict mode, all datasets used in the napkin's spec must be explicitly listed as managed or unmanaged.
  • Reports inconsistencies or configuration errors with detailed messages.

YAML

dataset_hygiene:
  mode: manual # default is automatic
  strict: false # optional, only in manual mode, default is true
  managed:
    - derived
    - training
  final:
    - training
  unmanaged:
    - inputs