napkin-0.5.14
Safe HaskellSafe-Inferred
LanguageGHC2021

Napkin.Run.BigQuery.AEDATypes

Synopsis

Documentation

data AEDANumerical b a #

Instances

Instances details
Bifunctor AEDANumerical # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

bimap :: (a -> b) -> (c -> d) -> AEDANumerical a c -> AEDANumerical b d #

first :: (a -> b) -> AEDANumerical a c -> AEDANumerical b c #

second :: (b -> c) -> AEDANumerical a b -> AEDANumerical a c #

Foldable (AEDANumerical a) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

fold :: Monoid m => AEDANumerical a m -> m #

foldMap :: Monoid m => (a0 -> m) -> AEDANumerical a a0 -> m #

foldMap' :: Monoid m => (a0 -> m) -> AEDANumerical a a0 -> m #

foldr :: (a0 -> b -> b) -> b -> AEDANumerical a a0 -> b #

foldr' :: (a0 -> b -> b) -> b -> AEDANumerical a a0 -> b #

foldl :: (b -> a0 -> b) -> b -> AEDANumerical a a0 -> b #

foldl' :: (b -> a0 -> b) -> b -> AEDANumerical a a0 -> b #

foldr1 :: (a0 -> a0 -> a0) -> AEDANumerical a a0 -> a0 #

foldl1 :: (a0 -> a0 -> a0) -> AEDANumerical a a0 -> a0 #

toList :: AEDANumerical a a0 -> [a0] #

null :: AEDANumerical a a0 -> Bool #

length :: AEDANumerical a a0 -> Int #

elem :: Eq a0 => a0 -> AEDANumerical a a0 -> Bool #

maximum :: Ord a0 => AEDANumerical a a0 -> a0 #

minimum :: Ord a0 => AEDANumerical a a0 -> a0 #

sum :: Num a0 => AEDANumerical a a0 -> a0 #

product :: Num a0 => AEDANumerical a a0 -> a0 #

Traversable (AEDANumerical a) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

traverse :: Applicative f => (a0 -> f b) -> AEDANumerical a a0 -> f (AEDANumerical a b) #

sequenceA :: Applicative f => AEDANumerical a (f a0) -> f (AEDANumerical a a0) #

mapM :: Monad m => (a0 -> m b) -> AEDANumerical a a0 -> m (AEDANumerical a b) #

sequence :: Monad m => AEDANumerical a (m a0) -> m (AEDANumerical a a0) #

ZeroNumber a => Applicative (AEDANumerical a) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

pure :: a0 -> AEDANumerical a a0 #

(<*>) :: AEDANumerical a (a0 -> b) -> AEDANumerical a a0 -> AEDANumerical a b #

liftA2 :: (a0 -> b -> c) -> AEDANumerical a a0 -> AEDANumerical a b -> AEDANumerical a c #

(*>) :: AEDANumerical a a0 -> AEDANumerical a b -> AEDANumerical a b #

(<*) :: AEDANumerical a a0 -> AEDANumerical a b -> AEDANumerical a a0 #

Functor (AEDANumerical a) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

fmap :: (a0 -> b) -> AEDANumerical a a0 -> AEDANumerical a b #

(<$) :: a0 -> AEDANumerical a b -> AEDANumerical a a0 #

(ZeroNumber a, ZeroNumber b) => Monoid (AEDANumerical a b) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

(ZeroNumber a, ZeroNumber b) => Semigroup (AEDANumerical a b) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Generic (AEDANumerical b a) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Associated Types

type Rep (AEDANumerical b a) :: Type -> Type #

Methods

from :: AEDANumerical b a -> Rep (AEDANumerical b a) x #

to :: Rep (AEDANumerical b a) x -> AEDANumerical b a #

(Show b, Show a) => Show (AEDANumerical b a) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

(Eq b, Eq a) => Eq (AEDANumerical b a) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

(==) :: AEDANumerical b a -> AEDANumerical b a -> Bool #

(/=) :: AEDANumerical b a -> AEDANumerical b a -> Bool #

(Ord b, Ord a) => Ord (AEDANumerical b a) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

type Rep (AEDANumerical b a) # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

data AEDANumberColumn #

Instances

Instances details
Generic AEDANumberColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Associated Types

type Rep AEDANumberColumn :: Type -> Type #

Show AEDANumberColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

DefaultOrdered AEDANumberColumn # 
Instance details

Defined in Napkin.Parse.AEDA.CSVParser

FromRecord AEDANumberColumn # 
Instance details

Defined in Napkin.Parse.AEDA.CSVParser

ToNamedRecord AEDANumberColumn # 
Instance details

Defined in Napkin.Parse.AEDA.CSVParser

ToRecord AEDANumberColumn # 
Instance details

Defined in Napkin.Parse.AEDA.CSVParser

Eq AEDANumberColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Ord AEDANumberColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

type Rep AEDANumberColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

type Rep AEDANumberColumn = D1 ('MetaData "AEDANumberColumn" "Napkin.Run.BigQuery.AEDATypes" "napkin-0.5.14-JrXUGmKUOt9J0meJSj0Kh4" 'False) ((C1 ('MetaCons "NumericalDouble" 'PrefixI 'True) (S1 ('MetaSel ('Just "_runNDouble") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical Double Double))) :+: C1 ('MetaCons "NumericalInt" 'PrefixI 'True) (S1 ('MetaSel ('Just "_runNInt") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical Double Int)))) :+: (C1 ('MetaCons "NumericalDateTime" 'PrefixI 'True) (S1 ('MetaSel ('Just "_runNDateTime") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical UTCTime UTCTime))) :+: (C1 ('MetaCons "NumericalDate" 'PrefixI 'True) (S1 ('MetaSel ('Just "_runNDate") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical Day Day))) :+: C1 ('MetaCons "NumericalTime" 'PrefixI 'True) (S1 ('MetaSel ('Just "_runNTime") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (AEDANumerical TimeOfDay TimeOfDay))))))

data AEDAHistogramColumn #

Instances

Instances details
Generic AEDAHistogramColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Associated Types

type Rep AEDAHistogramColumn :: Type -> Type #

Show AEDAHistogramColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

DefaultOrdered AEDAHistogramColumn # 
Instance details

Defined in Napkin.Parse.AEDA.CSVParser

FromRecord AEDAHistogramColumn # 
Instance details

Defined in Napkin.Parse.AEDA.CSVParser

ToNamedRecord AEDAHistogramColumn # 
Instance details

Defined in Napkin.Parse.AEDA.CSVParser

ToRecord AEDAHistogramColumn # 
Instance details

Defined in Napkin.Parse.AEDA.CSVParser

Eq AEDAHistogramColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Ord AEDAHistogramColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

type Rep AEDAHistogramColumn # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

type Rep AEDAHistogramColumn = D1 ('MetaData "AEDAHistogramColumn" "Napkin.Run.BigQuery.AEDATypes" "napkin-0.5.14-JrXUGmKUOt9J0meJSj0Kh4" 'False) ((C1 ('MetaCons "Categorical" 'PrefixI 'True) ((S1 ('MetaSel ('Just "_runCategoricalRelation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: S1 ('MetaSel ('Just "_runHistogramColumnName") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name)) :*: (S1 ('MetaSel ('Just "_distinctHistogram") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Text, Int)) :*: S1 ('MetaSel ('Just "_timeHistogramRan") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime))) :+: C1 ('MetaCons "Histogram" 'PrefixI 'True) ((S1 ('MetaSel ('Just "_runHistogramRelation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: S1 ('MetaSel ('Just "_runHistogramColumnName") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name)) :*: (S1 ('MetaSel ('Just "_numHistogram") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Text, Int)) :*: S1 ('MetaSel ('Just "_timeHistogramRan") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime)))) :+: (C1 ('MetaCons "Time" 'PrefixI 'True) ((S1 ('MetaSel ('Just "_runTimeRelation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: S1 ('MetaSel ('Just "_runHistogramColumnName") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name)) :*: (S1 ('MetaSel ('Just "_runDatePart") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: (S1 ('MetaSel ('Just "_timeHistoGram") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Text, Int)) :*: S1 ('MetaSel ('Just "_timeHistogramRan") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime)))) :+: C1 ('MetaCons "Decile" 'PrefixI 'True) ((S1 ('MetaSel ('Just "_runDecileRelation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name) :*: S1 ('MetaSel ('Just "_runHistogramColumnName") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Name)) :*: (S1 ('MetaSel ('Just "_decile") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Int) :*: (S1 ('MetaSel ('Just "_decileHistogram") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Text, Double)) :*: S1 ('MetaSel ('Just "_timeHistogramRan") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime))))))

class AdditionNumber a where #

Methods

addition :: a -> a -> a #

Instances

Instances details
AdditionNumber Day # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

addition :: Day -> Day -> Day #

AdditionNumber UTCTime # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

addition :: UTCTime -> UTCTime -> UTCTime #

AdditionNumber TimeOfDay # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

AdditionNumber Double # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

addition :: Double -> Double -> Double #

AdditionNumber Int # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

addition :: Int -> Int -> Int #

class AdditionNumber a => ZeroNumber a where #

Methods

additionIdentity :: a #

Instances

Instances details
ZeroNumber Day # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

ZeroNumber UTCTime # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

ZeroNumber TimeOfDay # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

ZeroNumber Double # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

ZeroNumber Int # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

data TableDiagnostics #

Instances

Instances details
Generic TableDiagnostics # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Associated Types

type Rep TableDiagnostics :: Type -> Type #

Show TableDiagnostics # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Eq TableDiagnostics # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Ord TableDiagnostics # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Command AEDA TableDiagnostics BigQuery # 
Instance details

Defined in Napkin.Run.BigQuery

Command AEDA TableDiagnostics MsSql # 
Instance details

Defined in Napkin.Run.MsSql

Command AEDA TableDiagnostics Postgres # 
Instance details

Defined in Napkin.Run.Postgres

Command AEDA TableDiagnostics Redshift # 
Instance details

Defined in Napkin.Run.Redshift

Command AEDA TableDiagnostics Sqlite # 
Instance details

Defined in Napkin.Run.Sqlite

type Rep TableDiagnostics # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

type Rep TableDiagnostics = D1 ('MetaData "TableDiagnostics" "Napkin.Run.BigQuery.AEDATypes" "napkin-0.5.14-JrXUGmKUOt9J0meJSj0Kh4" 'False) (C1 ('MetaCons "TableDiagnostics" 'PrefixI 'True) ((S1 ('MetaSel ('Just "_runRelation") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Relation) :*: S1 ('MetaSel ('Just "_runDiagnosticDate") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 UTCTime)) :*: (S1 ('MetaSel ('Just "_runNumberColumns") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 [AEDANumberColumn]) :*: (S1 ('MetaSel ('Just "_runCategoricalHistograms") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 [AEDAHistogramColumn]) :*: S1 ('MetaSel ('Just "_runHistograms") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 [AEDAHistogramColumn])))))

data NumericalStat #

Instances

Instances details
Show NumericalStat # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Eq NumericalStat # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

NumericalStats NumericalStat # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Stats NumericalStat # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Histogram NumericalStat CountGram # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Histogram NumericalStat DecileGram # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

data ColumnName #

Constructors

ColumnName (Ref Table) Name 

textValueToName :: Value -> Name #

Helper function to transform the result of a Query into a Name.

class Stats a where #

Methods

fromValue :: Value -> Maybe a #

toValue :: a -> Value #

class Stats a => NumericalStats a where #

Methods

fromNumber :: a -> SExp -> SExp #

toNumber :: a -> SExp -> SExp #

Instances

Instances details
NumericalStats NumericalStat # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

data BucketArgument #

Constructors

Date DatePart 
None 
Dec Int 

class Stats a => Histogram a b where #

Methods

bucketStrategy :: a -> BucketArgument -> ColumnName -> SExp -> Q (SExp, SExp) #

In order to get a histogram there must be some kind of bucketing strategy and counting strategy, one of which depends on bucketsize. BucketStrategy returns a bucket, histogram name tuple (the name of the histogram is based on the bucket). countingStrategy will return the count of the bucket (either a count, or the particular strategy devised.) The additional arguments defines what additional parameters will be included in histogram query (datePartitions in the case of Date NumericalStats with a Count histogram, Deciles in the case of decile histograms)

bucketSize :: a -> SExp -> SExp -> SExp #

additionalArguments :: a -> [BucketArgument] #

countingStrategy :: a -> BucketArgument -> ColumnName -> SExp -> Q SExp #

bucketCountSubQuery :: forall a b. Histogram a b => a -> ColumnName -> Q (Ref Relation) #

bucketDecileSubQuery :: forall a b. (NumericalStats a, Histogram a b) => a -> Int -> ColumnName -> Q (Ref Relation) #

class HistogramCleanUp a where #

Methods

bucketStrategyCU :: a -> a -> a -> a #

In order to cleanup a histogram we need the bucketsize and bucketing strategy.

bucketSizeCU :: a -> a -> a #

Instances

Instances details
HistogramCleanUp Double # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

HistogramCleanUp Int # 
Instance details

Defined in Napkin.Run.BigQuery.AEDATypes

Methods

bucketStrategyCU :: Int -> Int -> Int -> Int #

bucketSizeCU :: Int -> Int -> Int #

statQuery :: Bool -> Relation -> UTCTime -> NumericalStat -> ColumnName -> Query #

Creates a query to gather statistical information from a column (skew, stDev, kurtosis, etc). It is intended to be used on columns of Int, Float, Time, Day, TimeStamp or DateTime (they may be nullable). The function takes a NumericalStat to indicate how it should handle casting in the case of DateTime, TimeStamp, and Day columns.

subQ :: NumericalStats a => a -> ColumnName -> Query #

Gathers statistical primitives that will be used to calculate the various statistics that are supposed to be gathered. Rather than calculating Skew and Kurtosis outright, the various summed powers are calculated first. We can use the fact that multiplication distributes over addition, and expanding the powered subtraction in the numerator, to reduce the number of calculations performed overall.

skewAndKurtQ :: NumericalStats a => a -> ColumnName -> Query #

This function relies on statistical primitives gathered by subQ to calculate skew and kurtosis. For motivating example, (unbiased) Variance is measured as the sum from x = 1 to x = n of each x which a member of X such that square (x - average) / (n - 1). Rather than perform this operation for each member of X, we can note that Sum (square (x -average) / (n - 1)) = 1 (n - 1) * Sum (square (x - average)) = 1 (n - 1) * Sum (square (x) - 2 * average * x + square (average)) = 1 / (n-1) * (Sum (square x) - 2 * average * (Sum x) + square (average)). For very large X we can forgo many un-needed divisions and subtractions and merely calculate based on the summed power of the members of X. We do a similar trick for Skew and Kurtosis in this function.

histogramQuery :: forall a b. Histogram a b => BucketArgument -> Relation -> UTCTime -> a -> ColumnName -> Query #

Takes a Histogram statType and creates a histogram depending on the BucketArgument used (which will be defined in Histogram instance). This accepts both Count Histograms and Decile Histograms.

queryColumnNameSpace :: Ref Table -> Query #

In order for statQuery and histogramQuery to work they need to be given a ColumnName (Ref projectName :| [dataSetName, tableName]) columnName. In order to decide which to perform each ColumnName will also need an associated data type. For each data set bigquery has a view INFORMATION_SCHEMA. The INFORMATION_SCHEMA.COLUMNS can be queried for information needed. queryColumnNameSpace produces this query. In the case of a QueryRelation or table sampling, the relation is first created as a table in the given data Set so the relations info can be obtained.

mColumnToNumStatsQuery :: Bool -> NumericalStat -> Relation -> UTCTime -> [Value] -> Maybe Query #

These functions takes a single [Values] and returns a Just Query. These are filtered based on the stat-type(s) provided to them. They will be Nothing in the case of a malformed response. mColumnToNumStatsQuery produces stat queries and mColumnToHistogramQuery produces histogram.

mColumnToHistogramQuery :: forall a b. (Histogram a b, Eq a) => [a] -> Relation -> UTCTime -> [Value] -> Maybe Query #

type NumberHistogramUpdateStrategy = (Name, Double, Double) #

The histogram is updated iteratively, starting at the bottom bucket and adding the bucket size recursively. For Int and Double the bucket size is calculated using the min and max of the table, either to guarantee 20 equal buckets, or in the case of Ints with little difference just 1. The update strategy is therefore the column name, the bottom bucket, and the bucket size.

type TimeHistogramUpdateStrategy = (Name, Day, Name) #

For time and date columns the bucket size is determined by the date partition. Therefore the update strategy is the column name, minimum value, and date partition.