From d2e0dc12c72fa5d5721cf08bbc1e3b9e7cde309f Mon Sep 17 00:00:00 2001 From: Markus Alexander Kuppe Date: Thu, 5 Aug 2021 22:30:51 -0700 Subject: [PATCH] v38 (Statistics): WIP --- BlockingQueuePoisonApple_stats.R | 44 ++++++++ BlockingQueuePoisonApple_stats.tla | 156 +++++++++++++++++++++++++++ BlockingQueuePoisonApple_statsSC.tla | 63 +++++++++++ 3 files changed, 263 insertions(+) create mode 100644 BlockingQueuePoisonApple_stats.R create mode 100644 BlockingQueuePoisonApple_stats.tla create mode 100644 BlockingQueuePoisonApple_statsSC.tla diff --git a/BlockingQueuePoisonApple_stats.R b/BlockingQueuePoisonApple_stats.R new file mode 100644 index 0000000..f1f3fa1 --- /dev/null +++ b/BlockingQueuePoisonApple_stats.R @@ -0,0 +1,44 @@ +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly=TRUE) + +library(ggplot2) +library(stringr) +library(svglite) + +data <- read.csv(header=TRUE, sep = ",", file = args[1]) + +## Poor-mans version of grouping the data according to 'P', 'B', and 'C' by concatenating the 0-padded values. +data <- within(data, config <- paste(sep = "/", str_pad(data$P, 3, pad = "0"), str_pad(data$C, 3, pad = "0"),str_pad(data$B, 3, pad = "0"))) + +## Group data by the 'config' column and calculate mean for 'over' and 'under'. +ag.means <- aggregate(cbind(data$over, data$under), by=list(config=data$config), FUN=mean) +names(ag.means)[names(ag.means)=="V1"] <- "mean_over" +names(ag.means)[names(ag.means)=="V2"] <- "mean_under" + +## Group data by the 'config' column and calculate standard deviation for 'over' and 'under'. +ag.sd <- aggregate(cbind(data$over, data$under), by=list(config=data$config), FUN=sd) +names(ag.sd)[names(ag.sd)=="V1"] <- "sd_over" +names(ag.sd)[names(ag.sd)=="V2"] <- "sd_under" + +## Merge the two data frames by the common 'config' column. +df <- merge(ag.means, ag.sd) + +p <- ggplot(df, aes(x = config, y = mean_over, fill = config)) + + geom_errorbar(aes(ymin=mean_over-sd_over, ymax=mean_over+sd_over), width=.2, position=position_dodge(.9)) + + geom_bar(stat = "identity") + + geom_bar(aes(x= config, y = mean_under, fill=config), stat="identity",position="dodge") + + geom_errorbar(aes(ymin=mean_under-sd_under, ymax=mean_under+sd_under), width=.2, position=position_dodge(.9)) + + geom_hline(yintercept = 0, alpha = 0.3) + + #scale_x_discrete(guide = guide_axis(n.dodge=3))+ + scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+ + coord_flip() + + theme_minimal() + + theme(legend.position = "none") + + labs( + x = "Configuration |P|/|C|/B", + y = "Average under- and over-provisioning of consumers (positive |ac|>|ap|)", + title = paste( + "Traces:", nrow(data), format(Sys.time(), "(%a %b %d %X %Y)") + ) + ) +ggsave(gsub(".csv$", ".svg", args[1]), plot = last_plot(), bg = "white", device = "svg") \ No newline at end of file diff --git a/BlockingQueuePoisonApple_stats.tla b/BlockingQueuePoisonApple_stats.tla new file mode 100644 index 0000000..d788825 --- /dev/null +++ b/BlockingQueuePoisonApple_stats.tla @@ -0,0 +1,156 @@ +-------------------- MODULE BlockingQueuePoisonApple_stats ------------------- +EXTENDS BlockingQueuePoisonApple, CSV, TLC, TLCExt, Integers, IOUtils + +Max(a,b) == + IF a > b THEN a ELSE b + +Min(a,b) == + IF a < b THEN a ELSE b + +----------------------------------------------------------------------------- + +\* Collecting statistics does not work with an ordinary TLA+ spec such as +\* BlockingQueuePoisonApple because of its non-determinism. The statistics +\* would be all over the board and not meaningful. Instead, we break the +\* non-determinism by mimiking a real-world workload s.t. each Producer adds +\* a (fixed) number of elements to the queue. In other words, we remove +\* those behaviors from the state space that we do not expect to see in the +\* executions of an implementation of the TLA+ spec. +\* It would be more convenient to conjoin a liveness property that stipulates +\* that each producer adds N elements to the queue instead of amending the +\* original next-state relation Next with SNext below. However, I don't +\* see a way to get around adding the (auxiliary) pending variable that keeps +\* track of the number of elements added by each producer. +\* Alternatively, a more variable approach, compared to producers adding a +\* fixed number of elements to the queue, would be to add probabilities that +\* model the likelyhood of a Put and Terminate actions to occur (with +\* Put having a much higher probability). This is straighforward to model +\* with TLC's RandomElement operator by conjoining it to Put and +\* Terminate: +\* RandomElement(1..10) \in 1..9 +\* and +\* RandomElement(1..10) \in 10..10 +\* A more sophisticated example is at +\* https://github.com/lemmy/PageQueue/blob/master/PageQueue.tla + +\* Number of elements to put into the queue by each producer. +CONSTANT Work +ASSUME Work \in (Nat \ {0}) + +\* Count how many elements have been added by each producer. This variable +\* is how we model workloads. +VARIABLES pending +auxVars == <> + +\* The two auxilary variables are used to measure the over- and under-provisioning +\* of consumers. +\* TODO: Make Welford's algorithm for variance and standard deviation available +\* here (https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) +\* It would free users from doing things in R, ... , and probably be useful +\* in 99% of collecting statistics. +VARIABLE over, under +statsVars == <> + +STypeInv == + /\ pending \in [ Producers -> 0..Work ] + /\ over \in Int + /\ under \in Int + +SInit == + \* Verbatim copy (redundant) of the original spec. + /\ buffer = <<>> + /\ waitSet = {} + /\ cons = [ c \in Consumers |-> Cardinality(Producers) ] + /\ prod = [ p \in Producers |-> Cardinality(Consumers) ] + \* Workload: + /\ pending = [ p \in Producers |-> Work ] + \* Statistics: + /\ over = 0 + /\ under = 0 + +SNext == + /\ \/ \E p \in Producers: /\ Put(p, p) + \* Decrement pending iff the buffer changed. + /\ IF buffer # buffer' + THEN pending' = [pending EXCEPT ![p]=@-1] + ELSE UNCHANGED pending + \/ \E c \in Consumers: /\ Get(c) + /\ UNCHANGED pending + \* Update the statistics. + /\ over' = Max(over, Cardinality(ac) - Cardinality(ap)) + /\ under' = Min(under, Cardinality(ac) - Cardinality(ap)) + +StatsSpec == + SInit /\ [][SNext]_<> + +----------------------------------------------------------------------------- + +CONSTANT CSVFile + +StatsInv == + \* Per-behavior stats written to CSVfile on global termination. Global + \* termination is defined by the union of the active producers and consumers + \* being the empty set. + /\ (ap \cup ac = {}) => + CSVWrite("%1$s,%2$s,%3$s,%4$s,%5$s,%6$s,%7$s", + << + \* TLCGet("stats").traces equals the number of traces generated + \* so far. Thus, individual records in the CSV can be aggregated + \* later. TLCGet("level") can be seen as the timestamp of the + \* record. + TLCGet("stats").traces, TLCGet("level"), + \* + Cardinality(Producers), Cardinality(Consumers), BufCapacity, + \* ...the actual statistics. + over, under + >>, CSVFile) + +----------------------------------------------------------------------------- + +\* Below, we read the values from the OS environment and turn them into constants +\* of this spec. The operators s2n and SetOfNModelValues should probably be +\* moved into IOUtils. + +s2n(str) == + CHOOSE n \in 0..2^16: ToString(n) = str + +SetOfNModelValues(lbl, N) == + { TLCModelValue(lbl \o ToString(i)) : i \in 1..N } + +----------------------------------------------------------------------------- + +B == + s2n(IOEnv.B) + +P == + SetOfNModelValues("p", s2n(IOEnv.P)) + +C == + SetOfNModelValues("c", s2n(IOEnv.C)) + +W == + s2n(IOEnv.W) + +Out == + IOEnv.Out + +=========================================================================== + +--------------------- CONFIG BlockingQueuePoisonApple_stats --------------------- +CONSTANTS + BufCapacity <- B + Producers <- P + Consumers <- C + Work <- W + CSVFile <- Out + Poison = Poison + +SPECIFICATION StatsSpec + +CHECK_DEADLOCK FALSE + +INVARIANT STypeInv + +INVARIANT StatsInv + +============================================================================= diff --git a/BlockingQueuePoisonApple_statsSC.tla b/BlockingQueuePoisonApple_statsSC.tla new file mode 100644 index 0000000..6e446c4 --- /dev/null +++ b/BlockingQueuePoisonApple_statsSC.tla @@ -0,0 +1,63 @@ +--------------------- MODULE BlockingQueuePoisonApple_statsSC --------------------- +EXTENDS TLC, IOUtils, Naturals, Sequences, CSV + +\* Assume a recent version of TLC that has support for all shenanigans below. +ASSUME TLCGet("revision").timestamp >= 1628118195 + +----------------------------------------------------------------------------- + +\* Filename for the CSV file that appears also in the R script and is passed +\* to the nested TLC instances that are forked below. +CSVFile == + "BQPA_B_" \o ToString(JavaTime) \o ".csv" + +\* Write column headers to CSV file at startup of TLC instance that "runs" +\* this script and forks the nested instances of TLC that simulate the spec +\* and collect the statistics. +ASSUME + CSVWrite("trace,level,P,C,B,over,under", <<>>, CSVFile) + +PlotStatistics == + \* Have TLC execute the R script on the generated CSV file. + LET proc == IOExec(<< + \* Find R on the current system (known to work on macOS and Linux). + "/usr/bin/env", "Rscript", + "BlockingQueuePoisonApple_stats.R", CSVFile>>) + IN \/ proc.exitValue = 0 + \/ PrintT(proc) \* Print stdout and stderr if R script fails. + +----------------------------------------------------------------------------- + +\* Command to fork nested TLC instances that simulate the spec and collect the +\* statistics. TLCGet("config").install gives the path to the TLC jar also +\* running this script. +Cmd == LET absolutePathOfTLC == TLCGet("config").install + IN <<"java", "-jar", + absolutePathOfTLC, + "-generate", "num=100", + "-config", "BlockingQueuePoisonApple_stats.tla", + "BlockingQueuePoisonApple_stats">> + +Success(e) == + /\ PrintT(e) + /\ PlotStatistics + +ASSUME \A conf \in + { r \in [ {"P","C","B"} -> {1,2,4,8,16,32}]: + \* Check only symmetric configs of Producers and Consumers. + \* Over/Under-provisioning for asymmetrics numbers of + \* Producers and Consumers. + r.P = r.C }: + LET ret == IOEnvExec(conf @@ [W |-> 16, Out |-> CSVFile], Cmd).exitValue + IN CASE ret = 0 -> Success(conf) + [] ret = 10 -> PrintT(<>) + [] ret = 12 -> PrintT(<>) + [] ret = 13 -> PrintT(<>) + [] OTHER -> Print(<>, FALSE) + +============================================================================= +---- CONFIG BlockingQueuePoisonAppleSC ---- +\* TLC always expects a configuration file, but an empty one will do in this case. +\* If this approach proves useful, TLC should be extended to launch without a config +\* file. +====