Safe Haskell | None |
---|---|
Language | GHC2024 |
Napkin.Run.BigQuery.AEDATypes
Synopsis
- data AEDANumerical b a = Numerical {}
- data AEDANumberColumn
- = NumericalDouble { }
- | NumericalInt { }
- | NumericalDateTime { }
- | NumericalDate {
- date :: AEDANumerical Day Day
- | NumericalTime { }
- data AEDAHistogramColumn
- = Categorical { }
- | Histogram { }
- | Time { }
- | Decile { }
- class AdditionNumber a where
- addition :: a -> a -> a
- toUTCTime :: Int -> UTCTime
- fromUTCTime :: UTCTime -> Int
- timeToSeconds' :: TimeOfDay -> Int
- secondsToTime' :: Int -> TimeOfDay
- class AdditionNumber a => ZeroNumber a where
- additionIdentity :: a
- data TableDiagnostics = TableDiagnostics {}
- data NumericalStat
- data CategoricalStat
- data ColumnName = ColumnName (Ref Table) Name
- toColumnName :: Value -> Value -> Value -> Value -> ColumnName
- textValueToName :: Value -> Name
- data DecileGram
- data CountGram
- class Stats a where
- class Stats a => NumericalStats a where
- timeToSeconds :: SExp -> SExp
- secondsToTime :: SExp -> SExp
- data BucketArgument
- isDec :: BucketArgument -> Bool
- isDatePartition :: DatePart -> Bool
- isDayPartition :: DatePart -> Bool
- isTimePartition :: DatePart -> Bool
- class Stats a => Histogram a (b :: k) where
- bucketStrategy :: a -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp)
- bucketSize :: a -> SExp -> SExp -> SExp
- additionalArguments :: a -> [BucketArgument]
- countingStrategy :: a -> BucketArgument -> ColumnName -> SExp -> Q SExp
- decileQ :: Int -> ColumnName -> Query
- minMaxQuery :: ColumnName -> Query
- bucketCountSubQuery :: forall {k} a (b :: k). Histogram a b => a -> ColumnName -> Q (Ref Relation)
- bucketDecileSubQuery :: forall {k} a (b :: k). (NumericalStats a, Histogram a b) => a -> Int -> ColumnName -> Q (Ref Relation)
- datePartList :: [DatePart]
- truncateDate :: DatePart -> SExp -> (SExp, SExp)
- truncateDay :: DatePart -> SExp -> (SExp, SExp)
- nameDatePart :: DatePart -> SExp
- class HistogramCleanUp a where
- bucketStrategyCU :: a -> a -> a -> a
- bucketSizeCU :: a -> a -> a
- statQuery :: Bool -> Relation -> UTCTime -> NumericalStat -> ColumnName -> Query
- subQ :: NumericalStats a => a -> ColumnName -> Query
- skewAndKurtQ :: NumericalStats a => a -> ColumnName -> Query
- histogramQuery :: forall {k} a (b :: k). Histogram a b => BucketArgument -> Relation -> UTCTime -> a -> ColumnName -> Query
- queryColumnNameSpace :: Ref Table -> Query
- mColumnToNumStatsQuery :: Bool -> NumericalStat -> Relation -> UTCTime -> [Value] -> Maybe Query
- mColumnToHistogramQuery :: forall {k} a (b :: k). (Histogram a b, Eq a) => [a] -> Relation -> UTCTime -> [Value] -> Maybe Query
- deconstructStatQueryResult :: [Map Text Value] -> [AEDANumberColumn]
- toNumericalRow :: [(Text, Value)] -> Maybe AEDANumberColumn
- toRow :: Map Text Value -> Maybe AEDAHistogramColumn
- deconstructHistogramQueryResult :: [Map Text Value] -> [AEDAHistogramColumn]
- type NumberHistogramUpdateStrategy = (Name, Double, Double)
- numberHistogramUpdateStrategy :: AEDANumberColumn -> Maybe NumberHistogramUpdateStrategy
- getNumberHistogramUpdateStrategies :: [AEDANumberColumn] -> [NumberHistogramUpdateStrategy]
- updateNumberHistogram :: Int -> NumberHistogramUpdateStrategy -> [AEDAHistogramColumn] -> [AEDAHistogramColumn]
- type TimeHistogramUpdateStrategy = (Name, Day, Name)
- timeHistogramUpdateStrategy :: AEDANumberColumn -> [TimeHistogramUpdateStrategy]
- getTimeHistogramUpdateStrategies :: [AEDANumberColumn] -> [TimeHistogramUpdateStrategy]
- updateTimeHistogram :: Int -> TimeHistogramUpdateStrategy -> [AEDAHistogramColumn] -> [AEDAHistogramColumn]
- weekFirstDay :: DayOfWeek -> Day -> Day
- monthFirstDay :: Day -> Day
- quarterFirstDay :: Day -> Day
- yearFirstDay :: Day -> Day
- nextMonth :: Day -> Day
- successorMonths :: Int -> Day -> Day
- retrieveReport :: Relation -> Day -> Ref Table -> Query
- retrieveReportHistogram :: Relation -> Day -> Ref Table -> Query
- retrieveReportNumerical :: Relation -> Day -> Ref Table -> Query
Documentation
data AEDANumerical b a #
Constructors
Numerical | |
Instances
data AEDANumberColumn #
Constructors
NumericalDouble | |
Fields | |
NumericalInt | |
Fields | |
NumericalDateTime | |
Fields | |
NumericalDate | |
Fields
| |
NumericalTime | |
Fields |
Instances
DefaultOrdered AEDANumberColumn # | |||||
Defined in Napkin.Parse.AEDA.CSVParser Methods headerOrder :: AEDANumberColumn -> Header # | |||||
FromRecord AEDANumberColumn # | |||||
Defined in Napkin.Parse.AEDA.CSVParser Methods parseRecord :: Record -> Parser AEDANumberColumn # | |||||
ToNamedRecord AEDANumberColumn # | |||||
Defined in Napkin.Parse.AEDA.CSVParser Methods | |||||
ToRecord AEDANumberColumn # | |||||
Defined in Napkin.Parse.AEDA.CSVParser Methods toRecord :: AEDANumberColumn -> Record # | |||||
Generic AEDANumberColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Associated Types
Methods from :: AEDANumberColumn -> Rep AEDANumberColumn x # to :: Rep AEDANumberColumn x -> AEDANumberColumn # | |||||
Show AEDANumberColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Methods showsPrec :: Int -> AEDANumberColumn -> ShowS # show :: AEDANumberColumn -> String # showList :: [AEDANumberColumn] -> ShowS # | |||||
Eq AEDANumberColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Methods (==) :: AEDANumberColumn -> AEDANumberColumn -> Bool # (/=) :: AEDANumberColumn -> AEDANumberColumn -> Bool # | |||||
Ord AEDANumberColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Methods compare :: AEDANumberColumn -> AEDANumberColumn -> Ordering # (<) :: AEDANumberColumn -> AEDANumberColumn -> Bool # (<=) :: AEDANumberColumn -> AEDANumberColumn -> Bool # (>) :: AEDANumberColumn -> AEDANumberColumn -> Bool # (>=) :: AEDANumberColumn -> AEDANumberColumn -> Bool # max :: AEDANumberColumn -> AEDANumberColumn -> AEDANumberColumn # min :: AEDANumberColumn -> AEDANumberColumn -> AEDANumberColumn # | |||||
type Rep AEDANumberColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes type Rep AEDANumberColumn = D1 ('MetaData "AEDANumberColumn" "Napkin.Run.BigQuery.AEDATypes" "napkin-backend-bigquery-aeda-2.0.0-AtiaEOUJx0p3WTdo4jBggo" 'False) ((C1 ('MetaCons "NumericalDouble" 'PrefixI 'True) (S1 ('MetaSel ('Just "double") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical Double Double))) :+: C1 ('MetaCons "NumericalInt" 'PrefixI 'True) (S1 ('MetaSel ('Just "int") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical Double Int)))) :+: (C1 ('MetaCons "NumericalDateTime" 'PrefixI 'True) (S1 ('MetaSel ('Just "dateTime") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical UTCTime UTCTime))) :+: (C1 ('MetaCons "NumericalDate" 'PrefixI 'True) (S1 ('MetaSel ('Just "date") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical Day Day))) :+: C1 ('MetaCons "NumericalTime" 'PrefixI 'True) (S1 ('MetaSel ('Just "time") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical TimeOfDay TimeOfDay)))))) |
data AEDAHistogramColumn #
Constructors
Categorical | |
Fields | |
Histogram | |
Fields
| |
Time | |
Fields
| |
Decile | |
Fields
|
Instances
DefaultOrdered AEDAHistogramColumn # | |||||
Defined in Napkin.Parse.AEDA.CSVParser Methods | |||||
FromRecord AEDAHistogramColumn # | |||||
Defined in Napkin.Parse.AEDA.CSVParser Methods | |||||
ToNamedRecord AEDAHistogramColumn # | |||||
Defined in Napkin.Parse.AEDA.CSVParser Methods | |||||
ToRecord AEDAHistogramColumn # | |||||
Defined in Napkin.Parse.AEDA.CSVParser Methods toRecord :: AEDAHistogramColumn -> Record # | |||||
Generic AEDAHistogramColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Associated Types
Methods from :: AEDAHistogramColumn -> Rep AEDAHistogramColumn x # to :: Rep AEDAHistogramColumn x -> AEDAHistogramColumn # | |||||
Show AEDAHistogramColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Methods showsPrec :: Int -> AEDAHistogramColumn -> ShowS # show :: AEDAHistogramColumn -> String # showList :: [AEDAHistogramColumn] -> ShowS # | |||||
Eq AEDAHistogramColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Methods (==) :: AEDAHistogramColumn -> AEDAHistogramColumn -> Bool # (/=) :: AEDAHistogramColumn -> AEDAHistogramColumn -> Bool # | |||||
Ord AEDAHistogramColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Methods compare :: AEDAHistogramColumn -> AEDAHistogramColumn -> Ordering # (<) :: AEDAHistogramColumn -> AEDAHistogramColumn -> Bool # (<=) :: AEDAHistogramColumn -> AEDAHistogramColumn -> Bool # (>) :: AEDAHistogramColumn -> AEDAHistogramColumn -> Bool # (>=) :: AEDAHistogramColumn -> AEDAHistogramColumn -> Bool # max :: AEDAHistogramColumn -> AEDAHistogramColumn -> AEDAHistogramColumn # min :: AEDAHistogramColumn -> AEDAHistogramColumn -> AEDAHistogramColumn # | |||||
type Rep AEDAHistogramColumn # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes type Rep AEDAHistogramColumn = D1 ('MetaData "AEDAHistogramColumn" "Napkin.Run.BigQuery.AEDATypes" "napkin-backend-bigquery-aeda-2.0.0-AtiaEOUJx0p3WTdo4jBggo" 'False) ((C1 ('MetaCons "Categorical" 'PrefixI 'True) ((S1 ('MetaSel ('Just "runCategoricalRelation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: S1 ('MetaSel ('Just "runHistogramColumnName") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name)) :*: (S1 ('MetaSel ('Just "distinctHistogram") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Text, Int)) :*: S1 ('MetaSel ('Just "timeHistogramRan") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime))) :+: C1 ('MetaCons "Histogram" 'PrefixI 'True) ((S1 ('MetaSel ('Just "runHistogramRelation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: S1 ('MetaSel ('Just "runHistogramColumnName") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name)) :*: (S1 ('MetaSel ('Just "numHistogram") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Text, Int)) :*: S1 ('MetaSel ('Just "timeHistogramRan") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime)))) :+: (C1 ('MetaCons "Time" 'PrefixI 'True) ((S1 ('MetaSel ('Just "runTimeRelation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: S1 ('MetaSel ('Just "runHistogramColumnName") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name)) :*: (S1 ('MetaSel ('Just "runDatePart") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: (S1 ('MetaSel ('Just "timeHistoGram") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Text, Int)) :*: S1 ('MetaSel ('Just "timeHistogramRan") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime)))) :+: C1 ('MetaCons "Decile" 'PrefixI 'True) ((S1 ('MetaSel ('Just "runDecileRelation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: S1 ('MetaSel ('Just "runHistogramColumnName") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name)) :*: (S1 ('MetaSel ('Just "decile") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Int) :*: (S1 ('MetaSel ('Just "decileHistogram") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Text, Double)) :*: S1 ('MetaSel ('Just "timeHistogramRan") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime)))))) |
class AdditionNumber a where #
fromUTCTime :: UTCTime -> Int #
timeToSeconds' :: TimeOfDay -> Int #
secondsToTime' :: Int -> TimeOfDay #
class AdditionNumber a => ZeroNumber a where #
Methods
additionIdentity :: a #
Instances
ZeroNumber Day # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods additionIdentity :: Day # | |
ZeroNumber UTCTime # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods | |
ZeroNumber TimeOfDay # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods | |
ZeroNumber Double # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods | |
ZeroNumber Int # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods additionIdentity :: Int # |
data TableDiagnostics #
Constructors
TableDiagnostics | |
Fields |
Instances
Generic TableDiagnostics # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Associated Types
Methods from :: TableDiagnostics -> Rep TableDiagnostics x # to :: Rep TableDiagnostics x -> TableDiagnostics # | |||||
Show TableDiagnostics # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Methods showsPrec :: Int -> TableDiagnostics -> ShowS # show :: TableDiagnostics -> String # showList :: [TableDiagnostics] -> ShowS # | |||||
Eq TableDiagnostics # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Methods (==) :: TableDiagnostics -> TableDiagnostics -> Bool # (/=) :: TableDiagnostics -> TableDiagnostics -> Bool # | |||||
Ord TableDiagnostics # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes Methods compare :: TableDiagnostics -> TableDiagnostics -> Ordering # (<) :: TableDiagnostics -> TableDiagnostics -> Bool # (<=) :: TableDiagnostics -> TableDiagnostics -> Bool # (>) :: TableDiagnostics -> TableDiagnostics -> Bool # (>=) :: TableDiagnostics -> TableDiagnostics -> Bool # max :: TableDiagnostics -> TableDiagnostics -> TableDiagnostics # min :: TableDiagnostics -> TableDiagnostics -> TableDiagnostics # | |||||
type Rep TableDiagnostics # | |||||
Defined in Napkin.Run.BigQuery.AEDATypes type Rep TableDiagnostics = D1 ('MetaData "TableDiagnostics" "Napkin.Run.BigQuery.AEDATypes" "napkin-backend-bigquery-aeda-2.0.0-AtiaEOUJx0p3WTdo4jBggo" 'False) (C1 ('MetaCons "TableDiagnostics" 'PrefixI 'True) ((S1 ('MetaSel ('Just "relation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Relation) :*: S1 ('MetaSel ('Just "diagnosticDate") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime)) :*: (S1 ('MetaSel ('Just "numberColumns") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 [AEDANumberColumn]) :*: (S1 ('MetaSel ('Just "categoricalHistograms") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 [AEDAHistogramColumn]) :*: S1 ('MetaSel ('Just "histograms") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 [AEDAHistogramColumn]))))) |
data NumericalStat #
Constructors
IntStat | |
FloatStat | |
TimeStat | |
DateStat | |
DateTimeStat |
Instances
Show NumericalStat # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods showsPrec :: Int -> NumericalStat -> ShowS # show :: NumericalStat -> String # showList :: [NumericalStat] -> ShowS # | |
Eq NumericalStat # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods (==) :: NumericalStat -> NumericalStat -> Bool # (/=) :: NumericalStat -> NumericalStat -> Bool # | |
NumericalStats NumericalStat # | |
Defined in Napkin.Run.BigQuery.AEDATypes | |
Stats NumericalStat # | |
Defined in Napkin.Run.BigQuery.AEDATypes | |
Histogram NumericalStat CountGram # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods bucketStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) # bucketSize :: NumericalStat -> SExp -> SExp -> SExp # additionalArguments :: NumericalStat -> [BucketArgument] # countingStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q SExp # | |
Histogram NumericalStat DecileGram # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods bucketStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) # bucketSize :: NumericalStat -> SExp -> SExp -> SExp # additionalArguments :: NumericalStat -> [BucketArgument] # countingStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q SExp # |
data CategoricalStat #
Constructors
StringStat | |
BoolStat |
Instances
Show CategoricalStat # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods showsPrec :: Int -> CategoricalStat -> ShowS # show :: CategoricalStat -> String # showList :: [CategoricalStat] -> ShowS # | |
Eq CategoricalStat # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods (==) :: CategoricalStat -> CategoricalStat -> Bool # (/=) :: CategoricalStat -> CategoricalStat -> Bool # | |
Stats CategoricalStat # | |
Defined in Napkin.Run.BigQuery.AEDATypes | |
Histogram CategoricalStat CountGram # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods bucketStrategy :: CategoricalStat -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) # bucketSize :: CategoricalStat -> SExp -> SExp -> SExp # additionalArguments :: CategoricalStat -> [BucketArgument] # countingStrategy :: CategoricalStat -> BucketArgument -> ColumnName -> SExp -> Q SExp # |
data ColumnName #
Constructors
ColumnName (Ref Table) Name |
toColumnName :: Value -> Value -> Value -> Value -> ColumnName #
textValueToName :: Value -> Name #
Helper function to transform the result of a Query into a Name.
data DecileGram #
Instances
Histogram NumericalStat DecileGram # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods bucketStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) # bucketSize :: NumericalStat -> SExp -> SExp -> SExp # additionalArguments :: NumericalStat -> [BucketArgument] # countingStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q SExp # |
Instances
Histogram CategoricalStat CountGram # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods bucketStrategy :: CategoricalStat -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) # bucketSize :: CategoricalStat -> SExp -> SExp -> SExp # additionalArguments :: CategoricalStat -> [BucketArgument] # countingStrategy :: CategoricalStat -> BucketArgument -> ColumnName -> SExp -> Q SExp # | |
Histogram NumericalStat CountGram # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods bucketStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) # bucketSize :: NumericalStat -> SExp -> SExp -> SExp # additionalArguments :: NumericalStat -> [BucketArgument] # countingStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q SExp # |
Instances
Stats CategoricalStat # | |
Defined in Napkin.Run.BigQuery.AEDATypes | |
Stats NumericalStat # | |
Defined in Napkin.Run.BigQuery.AEDATypes |
class Stats a => NumericalStats a where #
Instances
timeToSeconds :: SExp -> SExp #
secondsToTime :: SExp -> SExp #
isDec :: BucketArgument -> Bool #
isDatePartition :: DatePart -> Bool #
isDayPartition :: DatePart -> Bool #
isTimePartition :: DatePart -> Bool #
class Stats a => Histogram a (b :: k) where #
Methods
bucketStrategy :: a -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) #
In order to get a histogram there must be some kind of bucketing strategy and counting strategy, one of which depends on bucketsize. BucketStrategy returns a bucket, histogram name tuple (the name of the histogram is based on the bucket). countingStrategy will return the count of the bucket (either a count, or the particular strategy devised.) The additional arguments defines what additional parameters will be included in histogram query (datePartitions in the case of Date NumericalStats with a Count histogram, Deciles in the case of decile histograms)
bucketSize :: a -> SExp -> SExp -> SExp #
additionalArguments :: a -> [BucketArgument] #
countingStrategy :: a -> BucketArgument -> ColumnName -> SExp -> Q SExp #
Instances
Histogram CategoricalStat CountGram # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods bucketStrategy :: CategoricalStat -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) # bucketSize :: CategoricalStat -> SExp -> SExp -> SExp # additionalArguments :: CategoricalStat -> [BucketArgument] # countingStrategy :: CategoricalStat -> BucketArgument -> ColumnName -> SExp -> Q SExp # | |
Histogram NumericalStat CountGram # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods bucketStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) # bucketSize :: NumericalStat -> SExp -> SExp -> SExp # additionalArguments :: NumericalStat -> [BucketArgument] # countingStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q SExp # | |
Histogram NumericalStat DecileGram # | |
Defined in Napkin.Run.BigQuery.AEDATypes Methods bucketStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) # bucketSize :: NumericalStat -> SExp -> SExp -> SExp # additionalArguments :: NumericalStat -> [BucketArgument] # countingStrategy :: NumericalStat -> BucketArgument -> ColumnName -> SExp -> Q SExp # |
decileQ :: Int -> ColumnName -> Query #
minMaxQuery :: ColumnName -> Query #
bucketCountSubQuery :: forall {k} a (b :: k). Histogram a b => a -> ColumnName -> Q (Ref Relation) #
bucketDecileSubQuery :: forall {k} a (b :: k). (NumericalStats a, Histogram a b) => a -> Int -> ColumnName -> Q (Ref Relation) #
datePartList :: [DatePart] #
nameDatePart :: DatePart -> SExp #
class HistogramCleanUp a where #
Methods
bucketStrategyCU :: a -> a -> a -> a #
In order to cleanup a histogram we need the bucketsize and bucketing strategy.
bucketSizeCU :: a -> a -> a #
Instances
HistogramCleanUp Double # | |
Defined in Napkin.Run.BigQuery.AEDATypes | |
HistogramCleanUp Int # | |
Defined in Napkin.Run.BigQuery.AEDATypes |
statQuery :: Bool -> Relation -> UTCTime -> NumericalStat -> ColumnName -> Query #
Creates a query to gather statistical information from a column (skew, stDev, kurtosis, etc). It is intended to be used on columns of Int, Float, Time, Day, TimeStamp or DateTime (they may be nullable). The function takes a NumericalStat to indicate how it should handle casting in the case of DateTime, TimeStamp, and Day columns.
subQ :: NumericalStats a => a -> ColumnName -> Query #
Gathers statistical primitives that will be used to calculate the various statistics that are supposed to be gathered. Rather than calculating Skew and Kurtosis outright, the various summed powers are calculated first. We can use the fact that multiplication distributes over addition, and expanding the powered subtraction in the numerator, to reduce the number of calculations performed overall.
skewAndKurtQ :: NumericalStats a => a -> ColumnName -> Query #
This function relies on statistical primitives gathered by subQ to calculate skew and kurtosis. For motivating example, (unbiased) Variance is measured as the sum from x = 1 to x = n of each x which a member of X such that square (x - average) / (n - 1). Rather than perform this operation for each member of X, we can note that Sum (square (x -average) / (n - 1)) = 1 (n - 1) * Sum (square (x - average)) = 1 (n - 1) * Sum (square (x) - 2 * average * x + square (average)) = 1 / (n-1) * (Sum (square x) - 2 * average * (Sum x) + square (average)). For very large X we can forgo many un-needed divisions and subtractions and merely calculate based on the summed power of the members of X. We do a similar trick for Skew and Kurtosis in this function.
histogramQuery :: forall {k} a (b :: k). Histogram a b => BucketArgument -> Relation -> UTCTime -> a -> ColumnName -> Query #
Takes a Histogram statType and creates a histogram depending on the BucketArgument used (which will be defined in Histogram instance). This accepts both Count Histograms and Decile Histograms.
queryColumnNameSpace :: Ref Table -> Query #
In order for statQuery
and histogramQuery
to work they need to be given a
ColumnName (Ref projectName :| [dataSetName, tableName]) columnName. In order to decide which to perform each
ColumnName will also need an associated data type. For each data set bigquery has a view INFORMATION_SCHEMA.
The INFORMATION_SCHEMA.COLUMNS can be queried for information needed. queryColumnNameSpace produces this query.
In the case of a QueryRelation or table sampling, the relation is first created as a table in the given data
Set so the relations info can be obtained.
mColumnToNumStatsQuery :: Bool -> NumericalStat -> Relation -> UTCTime -> [Value] -> Maybe Query #
These functions takes a single [Values] and returns a Just Query. These are filtered based on the stat-type(s) provided to them. They will be Nothing in the case of a malformed response. mColumnToNumStatsQuery produces stat queries and mColumnToHistogramQuery produces histogram.
mColumnToHistogramQuery :: forall {k} a (b :: k). (Histogram a b, Eq a) => [a] -> Relation -> UTCTime -> [Value] -> Maybe Query #
deconstructStatQueryResult :: [Map Text Value] -> [AEDANumberColumn] #
toNumericalRow :: [(Text, Value)] -> Maybe AEDANumberColumn #
type NumberHistogramUpdateStrategy = (Name, Double, Double) #
The histogram is updated iteratively, starting at the bottom bucket and adding the bucket size recursively. For Int and Double the bucket size is calculated using the min and max of the table, either to guarantee 20 equal buckets, or in the case of Ints with little difference just 1. The update strategy is therefore the column name, the bottom bucket, and the bucket size.
updateNumberHistogram :: Int -> NumberHistogramUpdateStrategy -> [AEDAHistogramColumn] -> [AEDAHistogramColumn] #
type TimeHistogramUpdateStrategy = (Name, Day, Name) #
For time and date columns the bucket size is determined by the date partition. Therefore the update strategy is the column name, minimum value, and date partition.
updateTimeHistogram :: Int -> TimeHistogramUpdateStrategy -> [AEDAHistogramColumn] -> [AEDAHistogramColumn] #
weekFirstDay :: DayOfWeek -> Day -> Day #
monthFirstDay :: Day -> Day #
quarterFirstDay :: Day -> Day #
yearFirstDay :: Day -> Day #
successorMonths :: Int -> Day -> Day #