From 96cdee31c657877c229000e0c4cab40d04f74d1f Mon Sep 17 00:00:00 2001 From: Anton Date: Fri, 22 Aug 2014 16:14:10 +0100 Subject: [PATCH 01/26] ignore sublime text project files --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index f89ab74..a3ef85e 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,8 @@ npm-debug.log clar.suite clar.cache ssmtest + +# Sublime text 2 files + +*.sublime-project +*.sublime-workspace From 771ccb192f695a58d7bd8aade4109b2a54db68f0 Mon Sep 17 00:00:00 2001 From: Anton Date: Sat, 23 Aug 2014 10:40:42 +0100 Subject: [PATCH 02/26] new sigmoid special function --- lib/validate.js | 2 +- src/C/core/special_functions.c | 8 ++++++++ src/C/core/ssm.h | 3 ++- src/Cmodel.py | 2 +- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/validate.js b/lib/validate.js index 3b21b09..7523cc9 100644 --- a/lib/validate.js +++ b/lib/validate.js @@ -5,7 +5,7 @@ var fs = require('fs') var op = [ '+', '-', '*', '/', ',', '(', ')' ] , modelSchema = require('../json-schema/model-schema.json') - , funcsSSM = ['heaviside', 'ramp', 'correct_rate'] + , funcsSSM = ['heaviside', 'ramp', 'correct_rate', 'sigmoid'] , jsmaths = Object.getOwnPropertyNames(Math); function parseRate(rate){ diff --git a/src/C/core/special_functions.c b/src/C/core/special_functions.c index 43f73c4..722ae7c 100644 --- a/src/C/core/special_functions.c +++ b/src/C/core/special_functions.c @@ -46,3 +46,11 @@ double slowstep(double x, double d) { return ( x >= 0.0 ? ( x >= d ? d : x ) : 0.0 ); } + +/** +* Sigmoid function +*/ +double sigmoid(double x, double shape, double shift) +{ + return ( 1 / ( 1 + exp( - shape * ( x - shift ) ) ) ); +} diff --git a/src/C/core/ssm.h b/src/C/core/ssm.h index d5d9c52..998eed9 100644 --- a/src/C/core/ssm.h +++ b/src/C/core/ssm.h @@ -764,7 +764,8 @@ void ssm_workers_stop(ssm_workers_t *workers); double heaviside(double x); double ramp(double x); double slowstep(double x, double d); - +double sigmoid(double x, double shape, double shift); + /******************************/ /* kalman function signatures */ /******************************/ diff --git a/src/Cmodel.py b/src/Cmodel.py index 70305b6..df5036c 100755 --- a/src/Cmodel.py +++ b/src/Cmodel.py @@ -43,7 +43,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): self.op = set(['+', '-', '*', '/', ',', '(', ')']) ##!!!CAN'T contain square bracket '[' ']' self.reserved = set(['U', 'x', 't', 'E', 'LN2', 'LN10','LOG2E', 'LOG10E', 'PI', 'SQRT1_2', 'SQRT2']) #JS Math Global Object - self.special_functions = set(['terms_forcing', 'heaviside', 'ramp', 'slowstep', 'sin', 'cos', 'correct_rate', 'ssm_correct_rate', 'sqrt', 'pow']) + self.special_functions = set(['terms_forcing', 'heaviside', 'ramp', 'slowstep', 'sigmoid', 'sin', 'cos', 'correct_rate', 'ssm_correct_rate', 'sqrt', 'pow']) self.remainder = sorted([x['remainder']['name'] for x in self.model['populations'] if 'remainder' in x]) self.ur = ['U'] + self.remainder From bd6981cb958b598a505a47cc7e28cfb2a98d4537 Mon Sep 17 00:00:00 2001 From: Anton Date: Thu, 28 Aug 2014 13:14:19 +0100 Subject: [PATCH 03/26] fix typo --- doc/doc.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc.tex b/doc/doc.tex index 23747c0..0b62f18 100644 --- a/doc/doc.tex +++ b/doc/doc.tex @@ -392,7 +392,7 @@ \subsubsection{\label{subsub:sde}Stochastic differential equations} {2pt} ][c]{rCl} dS_t &= &-\beta S_t \frac{I_t}{N} dt - \sigma \beta S_t \frac{I_t}{N} dB^{(1)}_t \\ -dI_t &=& (\beta S_t \frac{I_t}{N} d\Gamma_t -\gamma I_t)dt + \sigma \beta S_t \frac{I_t}{N} dB^{(1)}_t\\ +dI_t &=& (\beta S_t \frac{I_t}{N} - \gamma I_t)dt + \sigma \beta S_t \frac{I_t}{N} dB^{(1)}_t\\ dR_t &=& \gamma I_t dt \end{IEEEeqnarraybox} \right. From 8538934bba56ecaa1a6e241ef9f2a5aa6965f24c Mon Sep 17 00:00:00 2001 From: Anton Date: Sat, 30 Aug 2014 09:53:45 +0100 Subject: [PATCH 04/26] doc sigmoid function --- src/C/core/special_functions.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/C/core/special_functions.c b/src/C/core/special_functions.c index 722ae7c..4e701f3 100644 --- a/src/C/core/special_functions.c +++ b/src/C/core/special_functions.c @@ -48,7 +48,10 @@ double slowstep(double x, double d) } /** -* Sigmoid function + * Sigmoid function: decreases from 1 to 0. + * x is typically t + * shape controls the steep of the sigmoid (the greater the steeper) + * shift controls how far from 0 the midpoint is shifted */ double sigmoid(double x, double shape, double shift) { From f66d60d724ba2406f1f9f7cc45491f1d0d95519e Mon Sep 17 00:00:00 2001 From: Anton Date: Mon, 1 Sep 2014 11:50:12 +0100 Subject: [PATCH 05/26] new observation process SSM now understand the poisson distribution --- lib/validate.js | 124 ++++++++++++++++------------ src/C/templates/observed_template.c | 31 ++++++- src/Ccoder.py | 12 +-- src/Cmodel.py | 3 + 4 files changed, 108 insertions(+), 62 deletions(-) diff --git a/lib/validate.js b/lib/validate.js index 7523cc9..30d3bb1 100644 --- a/lib/validate.js +++ b/lib/validate.js @@ -1,18 +1,18 @@ var fs = require('fs') - , tv4 = require("tv4") - , _ = require('underscore') - , util = require('util'); +, tv4 = require("tv4") +, _ = require('underscore') +, util = require('util'); var op = [ '+', '-', '*', '/', ',', '(', ')' ] - , modelSchema = require('../json-schema/model-schema.json') - , funcsSSM = ['heaviside', 'ramp', 'correct_rate', 'sigmoid'] - , jsmaths = Object.getOwnPropertyNames(Math); +, modelSchema = require('../json-schema/model-schema.json') +, funcsSSM = ['heaviside', 'ramp', 'correct_rate', 'sigmoid'] +, jsmaths = Object.getOwnPropertyNames(Math); function parseRate(rate){ rate = rate.replace(/\s+/g, ''); var s = '' - , l = []; + , l = []; for (var i = 0; i< rate.length; i++){ if (op.indexOf(rate[i]) !== -1){ @@ -36,7 +36,7 @@ function parseRate(rate){ /** * check that the model is semantically complete and correct */ -module.exports = function(model){ + module.exports = function(model){ if( !tv4.validate(model, modelSchema) ){ // check that model complies with corresponding schema-json @@ -75,7 +75,7 @@ module.exports = function(model){ } }); incidences = _.uniq(incidences); - + // no ode for the moment if('ode' in model){ throw new Error("For the moment, SSM does not support ode's. This will come sortly."); @@ -121,10 +121,10 @@ module.exports = function(model){ model.reactions.forEach(function(r){ if( (remainders.indexOf(r.from) != -1) && ('accumulators' in r) ){ if( r.accumulators.length > 0){ - throw new Error(util.format("The incidence variable %s is ill-defined. As %s has been defined as a remainder state variable, reactions leaving from it will be ignored.",_.first(r.accumulators),r.from)); - } - } - }); + throw new Error(util.format("The incidence variable %s is ill-defined. As %s has been defined as a remainder state variable, reactions leaving from it will be ignored.",_.first(r.accumulators),r.from)); + } + } + }); // all t0's are the same. var t0s = model.observations.map(function(o){return o.start;}); @@ -153,7 +153,7 @@ module.exports = function(model){ } } }); - + // observations exactly map to data if (!_.isEqual(observations,data)){ @@ -162,69 +162,85 @@ module.exports = function(model){ // Observed variables either correspond to state variables or accumulators flows. var allowedTerms = op.concat(parameters, funcsSSM, incidences, jsmaths, 't'); //terms allowed in rates of the process model + model.observations.forEach(function(obs){ - parseRate(obs.mean).forEach(function(t){ - if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ - throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); - }; - }); - parseRate(obs.sd).forEach(function(t){ - if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ - throw new Error(util.format("In 'observations', 'sd' %s, the term %s cannot be understood by SSM. Please define it.",obs.sd,t)); - }; - }); + + if(obs.distribution == 'discretized_normal'){ + + parseRate(obs.mean).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); + }; + }); + + parseRate(obs.sd).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'sd' %s, the term %s cannot be understood by SSM. Please define it.",obs.sd,t)); + }; + }); + + } else if (obs.distribution == 'poisson'){ + + parseRate(obs.mean).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); + }; + + }); + + }; + }); - - // SSM only supports discretised_normal observation distribution so far + + // SSM only supports discretised_normal and poisson observation distributions so far model.observations.forEach(function(obs){ - if(obs.distribution != 'discretized_normal'){ - throw new Error("For the moment, SSM only supports 'discretized_normal' distributions for observations."); + if( (obs.distribution != 'discretized_normal') && (obs.distribution != 'poisson')){ + throw new Error("For the moment, SSM only supports 'discretized_normal' and 'poisson' distributions for observations."); } }); - - + if('sde' in model){ // Zero SDE drifts model.sde.drift.forEach(function(d){ if( d.f != 0.0 ){ - throw new Error("For the moment, SSM does not support non-zero drifts in SDE's."); - } - }); + throw new Error("For the moment, SSM does not support non-zero drifts in SDE's."); + } + }); // SDE transformations are understood by SSM allowedTerms = op.concat(parameters, funcsSSM, jsmaths,'t'); //terms allowed in rates of the process model model.sde.drift.forEach(function(r){ if ('transformation' in r){ - parseRate(r.transformation).forEach(function(t){ - if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ - throw new Error(util.format("In SDE's, 'transformation' %s, the term %s cannot be understood by SSM. Please define it.",r.transformation, t)); - } - }); - } - }); + parseRate(r.transformation).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In SDE's, 'transformation' %s, the term %s cannot be understood by SSM. Please define it.",r.transformation, t)); + } + }); + } + }); // SDE dispersions are understood by SSM model.sde.dispersion.forEach(function(tmp){ tmp.forEach(function(r){ - if (isNaN(parseFloat(r))){ - parseRate(r).forEach(function(t){ - if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ - throw new Error(util.format("In SDE's, the dispersion term %s cannot be understood by SSM. Please define it.",t)); - } - }); - } - }); + if (isNaN(parseFloat(r))){ + parseRate(r).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In SDE's, the dispersion term %s cannot be understood by SSM. Please define it.",t)); + } + }); + } + }); }); // Variables on which SDE's are defined must belong to parameters, but cannot be state variables. model.sde.drift.forEach(function(d){ if ( parameters.indexOf(d.name) == -1 ){ - throw new Error(util.format("The variable %s on which you define an SDE is not defined in 'inputs'. Please define it.",d.name)); - } - if ( (stateVariables.indexOf(d.name) != -1) || (popSizes.indexOf(d.name) != -1) ){ - throw new Error(util.format("SDE's cannot be defined on a state variable or population size (%s).",d.name)); - } - }); + throw new Error(util.format("The variable %s on which you define an SDE is not defined in 'inputs'. Please define it.",d.name)); + } + if ( (stateVariables.indexOf(d.name) != -1) || (popSizes.indexOf(d.name) != -1) ){ + throw new Error(util.format("SDE's cannot be defined on a state variable or population size (%s).",d.name)); + } + }); } }; diff --git a/src/C/templates/observed_template.c b/src/C/templates/observed_template.c index 19c4263..841a846 100644 --- a/src/C/templates/observed_template.c +++ b/src/C/templates/observed_template.c @@ -8,10 +8,12 @@ static double f_likelihood_tpl_{{ x.name }}(double y, ssm_X_t *p_X, ssm_par_t *p { double like; double *X = p_X->proj; + + {% if x.distribution == 'discretized_normal' %} + double gsl_mu = {{ x.mean }}; double gsl_sd = {{ x.sd }}; - // printf("mu %f sd %f y %f\n",gsl_mu, gsl_sd,y); if (y > 0.0) { like = gsl_cdf_gaussian_P(y + 0.5 - gsl_mu, gsl_sd) - gsl_cdf_gaussian_P(y - 0.5 - gsl_mu, gsl_sd); @@ -19,9 +21,18 @@ static double f_likelihood_tpl_{{ x.name }}(double y, ssm_X_t *p_X, ssm_par_t *p like = gsl_cdf_gaussian_P(y + 0.5 - gsl_mu, gsl_sd); } + {% elif x.distribution == 'poisson' %} + + double gsl_mu = {{ x.mean }}; + + like = gsl_ran_poisson_pdf(rint(y), gsl_mu); + + {% endif %} + return like; } + static double f_obs_mean_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, double t) { double *X = p_X->proj; @@ -34,15 +45,29 @@ static double f_obs_var_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_ return pow({{ x.sd }}, 2); } + static double f_obs_ran_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, double t) { double *X = p_X->proj; + + {% if x.distribution == 'discretized_normal' %} + double gsl_mu = {{ x.mean }}; double gsl_sd = {{ x.sd }}; - double yobs= gsl_mu+gsl_ran_gaussian(calc->randgsl, gsl_sd); + double yobs = gsl_mu + gsl_ran_gaussian(calc->randgsl, gsl_sd); + + yobs = (yobs >0) ? yobs : 0.0; + + {% elif x.distribution == 'poisson' %} + + double gsl_mu = {{ x.mean }}; + + double yobs = gsl_ran_poisson(calc->randgsl, gsl_mu); + + {% endif %} - return (yobs >0) ? yobs : 0.0; + return yobs; } {% endfor %} diff --git a/src/Ccoder.py b/src/Ccoder.py index 99cd4bc..25e86b4 100755 --- a/src/Ccoder.py +++ b/src/Ccoder.py @@ -138,14 +138,16 @@ def parameters(self): def observed(self): - ##WARNING right now only the discretized normal is supported. - ##TODO: generalization for different distribution obs = copy.deepcopy(self.obs_model) - + for x in obs: - x['mean'] = self.make_C_term(x['mean'], True) - x['sd'] = self.make_C_term(x['sd'], True) + if x['distribution'] == 'discretized_normal': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) + elif x['distribution'] == 'poisson': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) return {'observed': obs} diff --git a/src/Cmodel.py b/src/Cmodel.py index df5036c..f6347c6 100755 --- a/src/Cmodel.py +++ b/src/Cmodel.py @@ -125,6 +125,9 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): #par_obs par_obs = set(); for o in observations: + if o['distribution'] == 'poisson': + o['sd'] = 'sqrt('+o['mean']+')' + for p in [o['mean'], o['sd']]: el = self.change_user_input(p) for e in el: From c66e16267b4fee5a6a857d137b97406c63fec566 Mon Sep 17 00:00:00 2001 From: Anton Date: Mon, 1 Sep 2014 11:51:25 +0100 Subject: [PATCH 06/26] developer documentation the README now links to the dev help page, useful for external contributors and for us to remember how to do stuff ;) --- README.md | 1 + doc/developers/dev_help.md | 161 +++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 doc/developers/dev_help.md diff --git a/README.md b/README.md index 77167c8..502c5b2 100644 --- a/README.md +++ b/README.md @@ -274,6 +274,7 @@ One observation model has to be defined per observed time-series. } ] +SSM currently implements the `discretized_normal` and `poisson` observation processes. See the developer [documentation](doc/developers/dev_help.md) to contribute and add your favourite distribution. ### Initial conditions diff --git a/doc/developers/dev_help.md b/doc/developers/dev_help.md new file mode 100644 index 0000000..095a658 --- /dev/null +++ b/doc/developers/dev_help.md @@ -0,0 +1,161 @@ +# Help documentation for SSM developers + +This document provides some help for developers who wants to contribute to the SSM library by adding new functionalities that require to modify the source code. + +## How to add a new observation process? + +SSM originally implements the `discretized_normal` observation process. Adding a new observation distribution require to modify four files: + +* `src/C/templates/observed_template.c` +* `lib/validate.js` +* `src/Ccoder.py` +* `src/Cmodel.py` + +Let's illustrate this by adding the `poisson` distribution, which is fully parametrized by its `mean` and would be specified in `ssm.json` as follows: + +```json +"observations" : [ + { + "name" : "incidence_observed", + "start" : "1789-07-14", + "distribution" : "poisson", + "mean" : "reporting_rate * incidence" + } + ] +``` + +where `reporting_rate` is the rate at which new cases are reported and would be defined in the `inputs`. + +### Modify `src/C/templates/observed_template.c` + +This file contains the template for the likelihood (`f_likelihood_tpl_`) and random generator (`f_obs_ran_tpl_`) functions of the observation process. Both need to be modified by adding `elif` commands to switch between distributions and make calls to suitable functions of the [GSL](https://www.gnu.org/software/gsl/manual/html_node/The-Poisson-Distribution.html) library to perform the core calculation. + +```C +static double f_likelihood_tpl_{{ x.name }}(double y, ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, double t) +{ + double like; + double *X = p_X->proj; + + {% if x.distribution == 'discretized_normal' %} + + double gsl_mu = {{ x.mean }}; + double gsl_sd = {{ x.sd }}; + if (y > 0.0) { + like = gsl_cdf_gaussian_P(y + 0.5 - gsl_mu, gsl_sd) - gsl_cdf_gaussian_P(y - 0.5 - gsl_mu, gsl_sd); + } else { + like = gsl_cdf_gaussian_P(y + 0.5 - gsl_mu, gsl_sd); + } + + {% elif x.distribution == 'poisson' %} + + double gsl_mu = {{ x.mean }}; + like = gsl_ran_poisson_pdf(rint(y), gsl_mu); + + {% endif %} + + return like; +} +``` +```C +static double f_obs_ran_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, double t) +{ + double *X = p_X->proj; + + {% if x.distribution == 'discretized_normal' %} + + double gsl_mu = {{ x.mean }}; + double gsl_sd = {{ x.sd }}; + double yobs = gsl_mu + gsl_ran_gaussian(calc->randgsl, gsl_sd); + yobs = (yobs >0) ? yobs : 0.0; + + {% elif x.distribution == 'poisson' %} + + double gsl_mu = {{ x.mean }}; + double yobs = gsl_ran_poisson(calc->randgsl, gsl_mu); + + {% endif %} + + return yobs; +} +``` +### Modify `lib/validate.js` + +This file contains the function `module.exports` that checks that the model is semantically complete and correct, and throw error messages otherwise. + +```javascript +model.observations.forEach(function(obs){ + + if(obs.distribution == 'discretized_normal'){ + + parseRate(obs.mean).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); + }; + }); + + parseRate(obs.sd).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'sd' %s, the term %s cannot be understood by SSM. Please define it.",obs.sd,t)); + }; + }); + + } else if (obs.distribution == 'poisson'){ + + parseRate(obs.mean).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); + }; + }); + }; +}); + +model.observations.forEach(function(obs){ + + if( (obs.distribution != 'discretized_normal') && (obs.distribution != 'poisson')){ + throw new Error("For the moment, SSM only supports 'discretized_normal' and 'poisson' distributions for observations."); + } +}); +``` + +### Modify `src/Ccoder.py` + +This file contains the method `observed` which defines all the parameters needed by SSM to handle the distribution. In particular, several algorithms in SSM perform Gaussian approximations, which require us to define the `mean` and `sd` (standard deviation) parameters. + +```python +def observed(self): + + obs = copy.deepcopy(self.obs_model) + + for x in obs: + if x['distribution'] == 'discretized_normal': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) + elif x['distribution'] == 'poisson': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) + + return {'observed': obs} +``` +Note that all the parameters of the distribution need to appear in addition to `mean` and `sd`. For instance, if one wants to add a binomial distribution parametrized by its probability `p` and sample size `n`, these two parameters must also be defined here. + +### Modify `src/Cmodel.py` + +Finally, the appropriate `mean` and `sd` of the Gaussian approximation must be defined in this file. We can use the fact that when `mean > 1000`, the Poisson distribution can be approximated by a normal distribution with the same `mean` and `sd = sqrt(mean)`. Thus, we only need to compute the standard deviation: + +```python +for o in observations: + if o['distribution'] == 'poisson': + o['sd'] = 'sqrt('+o['mean']+')' + + for p in [o['mean'], o['sd']]: + el = self.change_user_input(p) + for e in el: + if e not in self.op and e not in self.reserved and e not in self.special_functions and e not in self.par_sv and e not in self.par_noise and e not in self.par_proc and e not in self.par_forced and e not in self.par_inc: + try: + float(e) + except ValueError: + par_obs.add(e) + + self.par_obs = sorted(list(par_obs)) +``` + From dbcdf8239e1c66a04bd18b766dfe891663525f49 Mon Sep 17 00:00:00 2001 From: Anton Date: Mon, 1 Sep 2014 11:59:26 +0100 Subject: [PATCH 07/26] add json color syntaxing --- README.md | 267 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 137 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index 502c5b2..141c2aa 100644 --- a/README.md +++ b/README.md @@ -118,15 +118,16 @@ Parameters (priors) have to be specified in [JSON](http://json.org/) or [JSON-LD $ cat data/pr_v.json - { - "name": "normal", - "distributionParameter" : [ - { "name" : "mean", "value" : 12.5, "unitCode": "DAY" }, - { "name" : "sd", "value" : 3.8265, "unitCode": "DAY" }, - { "name" : "lower", "value" : 0, "unitCode": "DAY" } - ] - } - +```json +{ + "name": "normal", + "distributionParameter" : [ + { "name" : "mean", "value" : 12.5, "unitCode": "DAY" }, + { "name" : "sd", "value" : 3.8265, "unitCode": "DAY" }, + { "name" : "lower", "value" : 0, "unitCode": "DAY" } + ] +} +``` ## Model @@ -147,13 +148,14 @@ The first thing to do when writting a model is to _link_ it to the data it explains. $ cat ssm.json | json data - - "data": [ - { - "name": "cases", - "require": { "path": "data/data.csv", "fields": ["date", "cases"] }, - } - ] +```json +"data": [ + { + "name": "cases", + "require": { "path": "data/data.csv", "fields": ["date", "cases"] }, + } +] +``` The ```data.require``` property is a link pointing to a time-series. A link is an object with 3 properties: @@ -173,41 +175,42 @@ The same link objects are used to point to the resources that will be used as priors or covariate of the model. $ cat ssm.json | json inputs - - "inputs": [ - { - "name": "r0", - "description": "Basic reproduction number", - "require": { "name": "r0", "path": "data/r0.json" } - }, - { - "name": "v", - "description": "Recovery rate", - "require": { "name": "pr_v", "path": "data/pr_v.json" }, - "transformation": "1/pr_v", - "to_resource": "1/v" - }, - { - "name": "S", - "description": "Number of susceptible", - "require": { "name": "S", "path": "data/S.json" } - }, - { - "name": "I", - "description": "Number of infectious", - "require": { "name": "I", "path": "data/I.json" } - }, - { - "name": "R", - "description": "Number of recovered", - "require": { "name": "R", "path": "data/R.json" } - }, - { - "name": "rep", - "description": "Reporting rate", - "require": { "name": "rep", "path": "data/rep.json" } - } - ], +```json +"inputs": [ + { + "name": "r0", + "description": "Basic reproduction number", + "require": { "name": "r0", "path": "data/r0.json" } + }, + { + "name": "v", + "description": "Recovery rate", + "require": { "name": "pr_v", "path": "data/pr_v.json" }, + "transformation": "1/pr_v", + "to_resource": "1/v" + }, + { + "name": "S", + "description": "Number of susceptible", + "require": { "name": "S", "path": "data/S.json" } + }, + { + "name": "I", + "description": "Number of infectious", + "require": { "name": "I", "path": "data/I.json" } + }, + { + "name": "R", + "description": "Number of recovered", + "require": { "name": "R", "path": "data/R.json" } + }, + { + "name": "rep", + "description": "Reporting rate", + "require": { "name": "rep", "path": "data/rep.json" } + } +], +``` Note that this linking stage also allows to include some _transformations_ so that a relation can be established between your @@ -227,20 +230,20 @@ contains the following properties: the populations $ cat ssm.json | json populations - - "populations": [ - {"name": "NYC", "composition": ["S", "I", "R"]} - ] - +```json +"populations": [ + {"name": "NYC", "composition": ["S", "I", "R"]} +] +``` and the reactions, defining the process model $ cat ssm.json | json reactions - - "reactions": [ - {"from": "S", "to": "I", "rate": "r0/(S+I+R)*v*I", "description": "infection", "accumulators": ["Inc"]}, - {"from": "I", "to": "R", "rate": "v", "description":"recovery"} - ] - +```json +"reactions": [ + {"from": "S", "to": "I", "rate": "r0/(S+I+R)*v*I", "description": "infection", "accumulators": ["Inc"]}, + {"from": "I", "to": "R", "rate": "v", "description":"recovery"} +] +``` Note that the populations object is a list. Structured populatiols can be defined by appending terms to the list. @@ -263,17 +266,17 @@ point related to the accumulator state. One observation model has to be defined per observed time-series. $ cat ssm.json | json observations - - "observations": [ - { - "name": "cases", - "start": "2012-07-26", - "distribution": "discretized_normal", - "mean": "rep * Inc", - "sd": "sqrt(rep * ( 1.0 - rep ) * Inc )" - } - ] - +```json +"observations": [ + { + "name": "cases", + "start": "2012-07-26", + "distribution": "discretized_normal", + "mean": "rep * Inc", + "sd": "sqrt(rep * ( 1.0 - rep ) * Inc )" + } +] +``` SSM currently implements the `discretized_normal` and `poisson` observation processes. See the developer [documentation](doc/developers/dev_help.md) to contribute and add your favourite distribution. ### Initial conditions @@ -284,26 +287,26 @@ them need need to be defined in a separate JSON file typicaly named inference algorithms: $ cat theta.json - - "resources": [ - { - "name": "values", - "description": "initial values for the parameters", - "data": { - "r0": 25.0, - "pr_v": 11.0 - } - }, - { - "name": "covariance", - "description": "covariance matrix", - "data": { - "r0": {"r0": 0.04, "pr_v": 0.01}, - "pr_v": {"pr_v": 0.02, "r0": 0.01} - } - } - ] - +```json +"resources": [ + { + "name": "values", + "description": "initial values for the parameters", + "data": { + "r0": 25.0, + "pr_v": 11.0 + } + }, + { + "name": "covariance", + "description": "covariance matrix", + "data": { + "r0": {"r0": 0.04, "pr_v": 0.01}, + "pr_v": {"pr_v": 0.02, "r0": 0.01} + } + } +] +``` Only the diagonal terms are mandatory for the covariance matrix. @@ -362,15 +365,17 @@ Let's infer the parameters to get a better fit let's read the values found: $ cat mle.json | json resources | json -c "this.name=='values'" - [ - { - "name": "values", - "data": { - "pr_v": 19.379285906561037, - "r0": 29.528755614881494 - } - } - ] +```json +[ + { + "name": "values", + "data": { + "pr_v": 19.379285906561037, + "r0": 29.528755614881494 + } + } +] +``` Let's plot the evolution of the parameters: @@ -395,39 +400,41 @@ to realize that the fit is now much better. And now in one line: $ cat theta.json | ./simplex -M 10000 --trace | ./simul --traj | json resources | json -c "this.name=='values'" - [ - { - "name": "values", - "data": { - "r0": 29.528755614881494, - "pr_v": 19.379285906561037 - } - } - ] - +```json +[ + { + "name": "values", + "data": { + "r0": 29.528755614881494, + "pr_v": 19.379285906561037 + } + } +] +``` + Let's get some posteriors and sample some trajectories by adding a pmcmc at the end of our pipeline (we actualy add 2 of them to skip the convergence of the mcmc algorithm). $ cat theta.json | ./simplex -M 10000 | ./pmcmc -M 10000 | ./pmcmc -M 100000 --trace --traj | json resources | json -c 'this.name=="summary"' - - [ - { - "name": "summary", - "data": { - "id": 0, - "log_ltp": -186.70579009197556, - "AICc": 363.94320971360844, - "n_parameters": 2, - "AIC": 363.6765430469418, - "DIC": 363.6802334782078, - "log_likelihood": -179.8382715234709, - "sum_squares": null, - "n_data": 48 - } - } - ] - +```json +[ + { + "name": "summary", + "data": { + "id": 0, + "log_ltp": -186.70579009197556, + "AICc": 363.94320971360844, + "n_parameters": 2, + "AIC": 363.6765430469418, + "DIC": 363.6802334782078, + "log_likelihood": -179.8382715234709, + "sum_squares": null, + "n_data": 48 + } + } +] +``` Some posteriors plots (still with R) trace <- read.csv('trace_0.csv') From baa83c68badc49af1099a48cefcafe7f4ce13a0c Mon Sep 17 00:00:00 2001 From: Anton Date: Mon, 1 Sep 2014 12:04:22 +0100 Subject: [PATCH 08/26] add R color syntaxing --- README.md | 120 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 69 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 141c2aa..2ba02ab 100644 --- a/README.md +++ b/README.md @@ -346,8 +346,10 @@ Let's start by plotting the data with [R](http://www.r-project.org/): - data <- read.csv('../data/data.csv', na.strings='null') - plot(as.Date(data$date), data$cases, type='s') +```R +data <- read.csv('../data/data.csv', na.strings='null') +plot(as.Date(data$date), data$cases, type='s') +``` Let's run a first simulation: @@ -355,8 +357,10 @@ Let's run a first simulation: And add the simulated trajectory to our first plot - traj <- read.csv('X_0.csv') - lines(as.Date(traj$date), traj$cases, type='s', col='red') +```R +traj <- read.csv('X_0.csv') +lines(as.Date(traj$date), traj$cases, type='s', col='red') +``` Let's infer the parameters to get a better fit @@ -379,11 +383,13 @@ let's read the values found: Let's plot the evolution of the parameters: - trace <- read.csv('trace_0.csv') - layout(matrix(1:3,1,3)) - plot(trace$index, trace$r0, type='l') - plot(trace$index, trace$pr_v, type='l') - plot(trace$index, trace$fitness, type='l') +```R +trace <- read.csv('trace_0.csv') +layout(matrix(1:3,1,3)) +plot(trace$index, trace$r0, type='l') +plot(trace$index, trace$pr_v, type='l') +plot(trace$index, trace$fitness, type='l') +``` Now let's redo a simulation with these values (```mle.json```): @@ -391,9 +397,11 @@ Now let's redo a simulation with these values (```mle.json```): and replot the results: - plot(as.Date(data$date), data$cases, type='s') - traj <- read.csv('X_0.csv') - lines(as.Date(traj$date), traj$cases, type='s', col='red') +```R +plot(as.Date(data$date), data$cases, type='s') +traj <- read.csv('X_0.csv') +lines(as.Date(traj$date), traj$cases, type='s', col='red') +``` to realize that the fit is now much better. @@ -434,22 +442,27 @@ the convergence of the mcmc algorithm). } } ] -``` +``` + Some posteriors plots (still with R) - trace <- read.csv('trace_0.csv') - layout(matrix(1:2,1,2)) - hist(trace$r0) - hist(trace$pr_v) +```R + trace <- read.csv('trace_0.csv') + layout(matrix(1:2,1,2)) + hist(trace$r0) + hist(trace$pr_v) +``` The sampled trajectories - traj <- read.csv('X_0.csv') - plot(as.Date(data$date), data$cases, type='s') - samples <- unique(traj$index) - for(i in samples){ - lines(as.Date(traj$date[traj$index == i]), traj$cases[traj$index == i], type='s', col='red') - } +```R + traj <- read.csv('X_0.csv') + plot(as.Date(data$date), data$cases, type='s') + samples <- unique(traj$index) + for(i in samples){ + lines(as.Date(traj$date[traj$index == i]), traj$cases[traj$index == i], type='s', col='red') + } +``` ## Be cautious @@ -468,19 +481,21 @@ diagnostic outputs. For instance let's run a particle filter with the ```--diag``` option give us access to the prediction residuals and the effective sample size. Let's plot these quantities - diag <- read.csv('diag_0.csv') - layout(matrix(1:3,3,1)) +```R + diag <- read.csv('diag_0.csv') + layout(matrix(1:3,3,1)) - #data vs prediction - plot(as.Date(data$date), data$cases, type='p') - lines(as.Date(diag$date), diag$pred_cases, type='p', col='red') + #data vs prediction + plot(as.Date(data$date), data$cases, type='p') + lines(as.Date(diag$date), diag$pred_cases, type='p', col='red') - #prediction residuals - plot(as.Date(diag$date), diag$res_cases, type='p') - abline(h=0, lty=2) + #prediction residuals + plot(as.Date(diag$date), diag$res_cases, type='p') + abline(h=0, lty=2) - #effective sample size - plot(as.Date(diag$date), diag$ess, type='s') + #effective sample size + plot(as.Date(diag$date), diag$ess, type='s') +``` ## Parallel computing @@ -494,10 +509,12 @@ envelopes (```--hat```). Let's plot the trajectories - hat <- read.csv('hat_0.csv') - plot(as.Date(hat$date), hat$mean_cases, type='s') - lines(as.Date(hat$date), hat$lower_cases, type='s', lty=2) - lines(as.Date(hat$date), hat$upper_cases, type='s', lty=2) +```R + hat <- read.csv('hat_0.csv') + plot(as.Date(hat$date), hat$mean_cases, type='s') + lines(as.Date(hat$date), hat$lower_cases, type='s', lty=2) + lines(as.Date(hat$date), hat$upper_cases, type='s', lty=2) +``` Your machine is not enough ? You can use several. First let's transform our ```smc``` into a _server_ that will dispatch some work to @@ -540,20 +557,21 @@ xlim on our first plot. For the prediction we ran ```simul``` with the ```--hat``` option that will output empirical credible envelop instead of all the projected trajectories (as does ```--traj```). - - data <- read.csv('../data/data.csv', na.strings='null') - plot(as.Date(data$date), data$cases, type='s', xlim=c(min(as.Date(data$date)), as.Date('2013-12-25'))) - - traj <- read.csv('X_0.csv') #from the previous run - samples <- unique(traj$index) - for(i in samples){ - lines(as.Date(traj$date[traj$index == i]), traj$cases[traj$index == i], type='s', col='red') - } - - hat <- read.csv('hat_0.csv') #from the current run - lines(as.Date(hat$date), hat$mean_cases, type='s' , col='blue') - lines(as.Date(hat$date), hat$lower_cases, type='s', lty=2, col='blue') - lines(as.Date(hat$date), hat$upper_cases, type='s', lty=2, col='blue') +```R + data <- read.csv('../data/data.csv', na.strings='null') + plot(as.Date(data$date), data$cases, type='s', xlim=c(min(as.Date(data$date)), as.Date('2013-12-25'))) + + traj <- read.csv('X_0.csv') #from the previous run + samples <- unique(traj$index) + for(i in samples){ + lines(as.Date(traj$date[traj$index == i]), traj$cases[traj$index == i], type='s', col='red') + } + + hat <- read.csv('hat_0.csv') #from the current run + lines(as.Date(hat$date), hat$mean_cases, type='s' , col='blue') + lines(as.Date(hat$date), hat$lower_cases, type='s', lty=2, col='blue') + lines(as.Date(hat$date), hat$upper_cases, type='s', lty=2, col='blue') +``` License From 4e24ef720e356e62a5083f8283e3bb4d0826903d Mon Sep 17 00:00:00 2001 From: JDureau Date: Wed, 8 Oct 2014 00:23:01 +0200 Subject: [PATCH 09/26] bugfix in parallel version of pmcmc --- src/C/pmcmc/main_pmcmc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/C/pmcmc/main_pmcmc.c b/src/C/pmcmc/main_pmcmc.c index aa556cb..a5be75d 100644 --- a/src/C/pmcmc/main_pmcmc.c +++ b/src/C/pmcmc/main_pmcmc.c @@ -48,7 +48,7 @@ static ssm_err_code_t run_smc(ssm_err_code_t (*f_pred) (ssm_X_t *, double, doubl zmq_send(workers->sender, &n, sizeof (int), ZMQ_SNDMORE); ssm_zmq_send_par(workers->sender, par, ZMQ_SNDMORE); - zmq_send(workers->sender, &j, sizeof (int), ZMQ_SNDMORE); + zmq_send(workers->sender, &j, sizeof (int), ZMQ_SNDMORE); ssm_zmq_send_X(workers->sender, D_J_X[n][j], ZMQ_SNDMORE); zmq_send(workers->sender, &(fitness->cum_status[j]), sizeof (ssm_err_code_t), 0); } @@ -83,7 +83,7 @@ static ssm_err_code_t run_smc(ssm_err_code_t (*f_pred) (ssm_X_t *, double, doubl } } } - + if(data->rows[n]->ts_nonan_length) { if(ssm_weight(fitness, data->rows[n], nav, n)) { ssm_systematic_sampling(fitness, calc[0], n); @@ -131,7 +131,7 @@ int main(int argc, char *argv[]) ssm_f_pred_t f_pred = ssm_get_f_pred(nav); - ssm_workers_t *workers = ssm_workers_start(D_J_X, &par, data, calc, fitness, f_pred, nav, opts, SSM_WORKER_D_X | SSM_WORKER_FITNESS); + ssm_workers_t *workers = ssm_workers_start(D_J_X, &par_proposed, data, calc, fitness, f_pred, nav, opts, SSM_WORKER_D_X | SSM_WORKER_FITNESS); ///////////////////////// // initialization step // From d1d7ac9e03f69e7508efbad42b0ac1f4f8bae86d Mon Sep 17 00:00:00 2001 From: Anton Date: Tue, 14 Oct 2014 13:17:46 +0100 Subject: [PATCH 10/26] fix bug when parsing the dispersion matrix Dispersion matrix must be readen in the same order as the drift list. Accordingly, the drift variables must not be sorted. --- src/Cmodel.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Cmodel.py b/src/Cmodel.py index f6347c6..94e9060 100755 --- a/src/Cmodel.py +++ b/src/Cmodel.py @@ -139,7 +139,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): self.par_obs = sorted(list(par_obs)) - ##par_disp (parameter involve **only** in dispertion (nowhere else) + ##par_disp (parameter involve **only** in dispersion (nowhere else)) disp = [x for subl in sde['dispersion'] for x in subl if x != 0] if 'dispersion' in sde else [] par_disp = set() for x in disp: @@ -153,13 +153,14 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): self.par_disp = sorted(list(par_disp)) - #par_diff (state variable for diffusions) + ##par_diff (state variable for diffusions) par_diff = [] if sde: for x in sde.get('drift', []): par_diff.append(x['name']) - self.par_diff = ['diff__' + x for x in sorted(par_diff)] + ##NOTE: do not sort par_diff as the dispersion matrix is readen in the unsorted order + self.par_diff = ['diff__' + x for x in par_diff] ##par_other par_ssm = self.par_sv + self.par_inc + self.remainder + self.par_diff + self.par_noise + self.par_proc + self.par_obs + self.par_forced + self.par_disp From fd4f7a6ad3d61eec520ff9522eab463f7445e5b6 Mon Sep 17 00:00:00 2001 From: JDureau Date: Tue, 4 Nov 2014 23:13:34 +0100 Subject: [PATCH 11/26] exp and log in special functions --- src/Cmodel.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Cmodel.py b/src/Cmodel.py index 94e9060..5979d85 100755 --- a/src/Cmodel.py +++ b/src/Cmodel.py @@ -43,7 +43,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): self.op = set(['+', '-', '*', '/', ',', '(', ')']) ##!!!CAN'T contain square bracket '[' ']' self.reserved = set(['U', 'x', 't', 'E', 'LN2', 'LN10','LOG2E', 'LOG10E', 'PI', 'SQRT1_2', 'SQRT2']) #JS Math Global Object - self.special_functions = set(['terms_forcing', 'heaviside', 'ramp', 'slowstep', 'sigmoid', 'sin', 'cos', 'correct_rate', 'ssm_correct_rate', 'sqrt', 'pow']) + self.special_functions = set(['terms_forcing', 'heaviside', 'ramp', 'slowstep', 'sigmoid', 'sin', 'cos', 'correct_rate', 'ssm_correct_rate', 'sqrt', 'pow', 'exp', 'log']) self.remainder = sorted([x['remainder']['name'] for x in self.model['populations'] if 'remainder' in x]) self.ur = ['U'] + self.remainder @@ -66,7 +66,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): p['require']['name'] = p['name'] #semantic to SSM transfo - prior = { 'distribution': resource['name'] if resource['name'] != 'dirac' else 'fixed' } + prior = { 'distribution': resource['name'] if resource['name'] != 'dirac' else 'fixed' } for x in resource['distributionParameter']: prior[x['name'] if 'name' in x else 'value'] = x['value'] @@ -183,7 +183,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): self.map_name2prior_name[p['name']] = p['require']['name'] else: self.map_name2prior_name[p['name']] = p['name'] - + # proc_model self.proc_model = copy.deepcopy(reactions) @@ -264,12 +264,12 @@ def pow2star(self, term): if not start and not terms[0] == 'pow': return term - pos = 1 #counter for open parenthesis + pos = 1 #counter for open parenthesis ind = start+2 #skip first parenthesis lhs = '' rhs = '' left = True - while (ind < len(terms)): + while (ind < len(terms)): if terms[ind] == '(': pos += 1 if terms[ind] == ')': @@ -341,7 +341,7 @@ def generator_C(self, term, no_correct_rate, force_par=False, xify=None, human=F ind += 2 #skip first parenthesis stack.append({"f": myf, "pos": 1}) #pos: counter for open parenthesis else: - if stack: + if stack: if terms[ind] == '(': stack[-1]['pos'] += 1 Cterm += '(' @@ -357,7 +357,7 @@ def generator_C(self, term, no_correct_rate, force_par=False, xify=None, human=F Cterm += ')' else: Cterm += ')' - + else: Cterm += ')' From 3e0953567bd662e1eeb7a908855ac38c64519cf0 Mon Sep 17 00:00:00 2001 From: JDureau Date: Sat, 24 Jan 2015 09:39:44 +0100 Subject: [PATCH 12/26] errors get printed --- examples/tutorial/ssm.json | 36 ++++++++++++++++++------------------ lib/install.js | 19 +++++++++++++++++-- src/Ccoder.py | 5 +++++ 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/examples/tutorial/ssm.json b/examples/tutorial/ssm.json index e9f0b4c..654a940 100644 --- a/examples/tutorial/ssm.json +++ b/examples/tutorial/ssm.json @@ -1,43 +1,43 @@ { "data": [ - { - "name": "cases", + { + "name": "cases", "require": { "path": "data/data.csv", "fields": ["date", "cases"] } } ], - + "inputs": [ { - "name": "r0", - "description": "Basic reproduction number", - "require": { "name": "r0", "path": "data/r0.json" } + "name": "r0", + "description": "Basic reproduction number", + "require": { "name": "r0", "path": "data/r0.json" } }, - { + { "name": "v", "description": "Recovery rate", "require": { "name": "pr_v", "path": "data/pr_v.json" }, "transformation": "1/pr_v", - "to_resource": "1/v" + "to_resource": "1/v" }, { - "name": "S", + "name": "S", "description": "Number of susceptible", - "require": { "name": "S", "path": "data/S.json" } + "require": { "name": "S", "path": "data/S.json" } }, - { + { "name": "I", - "description": "Number of infectious", - "require": { "name": "I", "path": "data/I.json" } + "description": "Number of infectious", + "require": { "name": "I", "path": "data/I.json" } }, - { - "name": "R", + { + "name": "R", "description": "Number of recovered", - "require": { "name": "R", "path": "data/R.json" } + "require": { "name": "R", "path": "data/R.json" } }, - { + { "name": "rep", "description": "Reporting rate", - "require": { "name": "rep", "path": "data/rep.json" } + "require": { "name": "rep", "path": "data/rep.json" } } ], diff --git a/lib/install.js b/lib/install.js index 871d875..6df291a 100644 --- a/lib/install.js +++ b/lib/install.js @@ -28,17 +28,30 @@ module.exports = function(dpkgRoot, dpkg, pathModel, keepSources, emitter, callb "import os", "import sys", "import json", + "class SsmError(Exception):", + "\tdef __init__(self, value):", + "\t\tself.value = value", + "\tdef __str__(self):", + "\t\treturn repr(self.value)", "sys.path.append('" + path.resolve(__dirname, '..', 'src') + "')", "from Builder import Builder", "path_model_coded = '" + pathModel + "'", "dpkg = json.load(open('" + path.join(dpkgRoot, 'ssm.json') + "'))", + // "b = Builder(path_model_coded, '"+ dpkgRoot + "', dpkg)", + // "b.prepare()", + // "b.code()", + // "b.write_data()", "try:", "\tb = Builder(path_model_coded, '"+ dpkgRoot + "', dpkg)", "\tb.prepare()", "\tb.code()", "\tb.write_data()", - "except:", - "\tsys.exit(1)" + "except SsmError as err:", + // "\tprint err", + "\tsys.exit(1)"//, + // "except:", + // "\tprint 'in default'", + // "\tsys.exit(1)" ].join('\n'); var templater = spawn('python', ['-c', tplter]); @@ -53,6 +66,8 @@ module.exports = function(dpkgRoot, dpkg, pathModel, keepSources, emitter, callb templater.on('exit', function (code) { + // console.log('CODE', code); + if(code === 0) { var make = spawn('make', ['install'], {cwd: path.join(pathModel, 'C', 'templates')}); make.stdout.setEncoding('utf8'); diff --git a/src/Ccoder.py b/src/Ccoder.py index 25e86b4..8c7cb33 100755 --- a/src/Ccoder.py +++ b/src/Ccoder.py @@ -95,6 +95,11 @@ def parameters(self): pdict = {x['name']:x for x in parameters} sdict = {'diff__' + x['name']: x for x in drifts} + for x in pars: + if x not in pdict: + raise SsmError('An input is missing for parameter %s' %x) + + f_remainders = {} f_remainders_par = {} f_remainders_var = {} From 7acf9f23d67a87d7b6295d30b79d9c5264d8ce28 Mon Sep 17 00:00:00 2001 From: JDureau Date: Sat, 24 Jan 2015 09:40:47 +0100 Subject: [PATCH 13/26] cleanup --- lib/install.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/install.js b/lib/install.js index 6df291a..573e915 100644 --- a/lib/install.js +++ b/lib/install.js @@ -37,21 +37,13 @@ module.exports = function(dpkgRoot, dpkg, pathModel, keepSources, emitter, callb "from Builder import Builder", "path_model_coded = '" + pathModel + "'", "dpkg = json.load(open('" + path.join(dpkgRoot, 'ssm.json') + "'))", - // "b = Builder(path_model_coded, '"+ dpkgRoot + "', dpkg)", - // "b.prepare()", - // "b.code()", - // "b.write_data()", "try:", "\tb = Builder(path_model_coded, '"+ dpkgRoot + "', dpkg)", "\tb.prepare()", "\tb.code()", "\tb.write_data()", "except SsmError as err:", - // "\tprint err", "\tsys.exit(1)"//, - // "except:", - // "\tprint 'in default'", - // "\tsys.exit(1)" ].join('\n'); var templater = spawn('python', ['-c', tplter]); From f66bd7094299fee9cdcfabf8eb028158ee3c6399 Mon Sep 17 00:00:00 2001 From: Joseph Dureau Date: Tue, 27 Jan 2015 08:49:56 +0100 Subject: [PATCH 14/26] Update README.md --- README.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/README.md b/README.md index 2ba02ab..96de22f 100644 --- a/README.md +++ b/README.md @@ -80,14 +80,6 @@ We also recomend that you install [jsontool](http://trentm.com/json/) npm install -g jsontool -Tests -===== - - npm test - -Notes: -The C code is tested with [clar](https://github.com/vmg/clar) (shipped with this package) - Usage ===== From 2cf4da90beeb8339ac443e5aa25f5f91b36b4006 Mon Sep 17 00:00:00 2001 From: Joseph Dureau Date: Tue, 27 Jan 2015 21:17:56 +0100 Subject: [PATCH 15/26] Update README.md --- README.md | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 96de22f..3f0bce7 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Maths, methods and algorithms ============================= For more details on the modeling framework and on the algorithms -available in SSM, see the [documentation](https://github.com/standard-analytics/ssm/raw/master/doc/doc.pdf). +available in SSM, see the [documentation](https://github.com/JDureau/ssm/raw/master/doc/doc.pdf). Installation @@ -53,12 +53,14 @@ On OSX with [homebrew](http://brew.sh/) and [pip](https://pypi.python.org/pypi/p brew install jansson zmq gsl node sudo pip install jinja2 sympy python-dateutil - ## Installing S|S|M itself -With [npm](https://npmjs.org/) +Download the library from [this link](http://5.135.176.187:3000/). + +Unzip it, and do the following in the terminal: - npm install -g ssm + cd ssm + npm install -g Note: requires that all the C and python dependencies have been installed _before_ as this will also build the standalone C libraries. @@ -67,24 +69,31 @@ We recommend _not_ to use ```sudo``` for this command. If (and only if) you _have to_ use ```sudo``` to install package globaly (```-g```) then proceed differently: - git clone https://github.com/standard-analytics/ssm.git + git clone https://github.com/JDureau/ssm.git cd ssm - npm install - sudo npm link + npm install -g Pull requests are welcome for a .gyp file and windows support! -We also recomend that you install [jsontool](http://trentm.com/json/) +## Last check - npm install -g jsontool +To make sure that everything is properly installed and ssm runs properly, +try the following: + + cd ssm/examples/tutorial + ssm + +If the model compiles successfully, you're good to go! +## Any problem with the install? +Check out the _Issues_ section of this repository (top right of this webpage), and if no issue answers your question, post a new one describing your problem. Usage ===== -What follows use [this example](https://github.com/standard-analytics/ssm/tree/master/examples/tutorial). +What follows use [this example](https://github.com/JDureau/ssm/tree/master/examples/tutorial). All the paths will be relative to this directory. ## Data and parameters (priors) @@ -132,7 +141,7 @@ combination thereof. The syntax to define a model is fully described as JSON [schema](http://json-schema.org/) -[here](https://raw.github.com/standard-analytics/ssm/master/json-schema/model-schema.json). +[here](https://raw.github.com/JDureau/ssm/master/json-schema/model-schema.json). ### Link to the data @@ -242,10 +251,10 @@ defined by appending terms to the list. An ```sde``` property can be added in case you want that some parameters follow diffusions (see -[here](https://github.com/standard-analytics/ssm/blob/master/examples/foo/package.json) +[here](https://github.com/JDureau/ssm/blob/master/examples/foo/package.json) for an example, and [here](http://arxiv.org/abs/1203.5950) for references). White environmental noise can also be added to the reaction -as in this [example](https://raw.github.com/standard-analytics/ssm/master/examples/noise/package.json) +as in this [example](https://raw.github.com/JDureau/ssm/master/examples/noise/package.json) (references [here](http://arxiv.org/abs/0802.0021)). The ```accumulators``` property allows to defined new state variable From a383e2687cb0f5338a1fb1ed1b1e52bc26ecda4f Mon Sep 17 00:00:00 2001 From: Joseph Dureau Date: Tue, 27 Jan 2015 21:18:26 +0100 Subject: [PATCH 16/26] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 3f0bce7..98555bc 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,6 @@ playing with duplo blocks. cat theta.json | ./simplex -M 10000 | ./ksimplex -M 10000 > mle.json cat mle.json | ./kmcmc -M 100000 | ./pmcmc -J 1000 -M 500000 --trace > yeaaah.json -[![NPM](https://nodei.co/npm/ssm.png)](https://nodei.co/npm/ssm/) - Maths, methods and algorithms ============================= From 39b40ae3c0a6751ec7d08b5c14ae9e783da1c611 Mon Sep 17 00:00:00 2001 From: Joseph Dureau Date: Tue, 27 Jan 2015 21:20:45 +0100 Subject: [PATCH 17/26] Update README.md --- README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 98555bc..573a3cd 100644 --- a/README.md +++ b/README.md @@ -55,13 +55,13 @@ On OSX with [homebrew](http://brew.sh/) and [pip](https://pypi.python.org/pypi/p Download the library from [this link](http://5.135.176.187:3000/). -Unzip it, and do the following in the terminal: +Unzip it, and run the following in the terminal: cd ssm npm install -g -Note: requires that all the C and python dependencies have been -installed _before_ as this will also build the standalone C libraries. +Note: this step requires that all the C and python dependencies have been +installed _before_ as it also builds the standalone C libraries. We recommend _not_ to use ```sudo``` for this command. If (and only if) you _have to_ use ```sudo``` to install package @@ -71,12 +71,12 @@ globaly (```-g```) then proceed differently: cd ssm npm install -g - Pull requests are welcome for a .gyp file and windows support! + ## Last check -To make sure that everything is properly installed and ssm runs properly, +To make sure that everything is properly installed and ssm runs smoothly, try the following: cd ssm/examples/tutorial @@ -84,9 +84,10 @@ try the following: If the model compiles successfully, you're good to go! + ## Any problem with the install? -Check out the _Issues_ section of this repository (top right of this webpage), and if no issue answers your question, post a new one describing your problem. +Check out the _Issues_ section of this repository (top right of this webpage), and if no issue solves your problem, post a new one describing it. Usage ===== From 1a62c5ab1721e4ce938f043f231cc0d3a927c17e Mon Sep 17 00:00:00 2001 From: Anton Date: Thu, 28 May 2015 17:46:38 +0100 Subject: [PATCH 18/26] fix typo --- src/Cmodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cmodel.py b/src/Cmodel.py index 5979d85..9c6e645 100755 --- a/src/Cmodel.py +++ b/src/Cmodel.py @@ -159,7 +159,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): for x in sde.get('drift', []): par_diff.append(x['name']) - ##NOTE: do not sort par_diff as the dispersion matrix is readen in the unsorted order + ##NOTE: do not sort par_diff as the dispersion matrix is read in the unsorted order self.par_diff = ['diff__' + x for x in par_diff] ##par_other From 757e46a819141247413e4bd46651dcb8dd0a62e6 Mon Sep 17 00:00:00 2001 From: Anton Date: Wed, 19 Aug 2015 22:08:34 +0100 Subject: [PATCH 19/26] fix typo in sde description --- json-schema/model-schema.json | 434 ++++++++++++++++++---------------- 1 file changed, 233 insertions(+), 201 deletions(-) diff --git a/json-schema/model-schema.json b/json-schema/model-schema.json index d959d35..eaae255 100644 --- a/json-schema/model-schema.json +++ b/json-schema/model-schema.json @@ -1,10 +1,7 @@ { "$schema": "http://json-schema.org/draft-04/schema#", - "title": "SSM model", - "description": "Description of an state space model following the grammar of the SSM inference package", - "definitions": { "require": { "type": "object", @@ -12,252 +9,287 @@ "datapackage": { "type": "string" }, - "resource": { - "type": "string" - }, - "fields": { - "type": "array", - "items": { - "type": "string" - } - } + "resource": { + "type": "string" + }, + "fields": { + "type": "array", + "items": { + "type": "string" + } + } } } - }, - - + }, "type": "object", - "properties": { - "populations": { "type": "array", "description": "Grouping of the state variables comprised in your compartmental model. For each group, provide a name and the list of corresponding states. By adding an optional remainder field, you will ensure the population size remains positive.", "items": { "type": "object", "properties": { - "name": { - "type": "string" - }, - "composition": { - "type": "array", - "items": { - "type": "string" - } - }, - "remainder": { - "type": "object", - "description": "Following this instruction, the value of the remainder variable at every time will be determined by the size of the population minus the sum of the other compartments. Note that SSM will discard scenarios where this variable becomes negative.", - "properties": { - "name": { - "type": "string" - }, - "pop_size": { - "type": "string" - } - }, - "required": ["name", "pop_size"] - } - }, - "required": ["name", "composition"] - } + "name": { + "type": "string" + }, + "composition": { + "type": "array", + "items": { + "type": "string" + } + }, + "remainder": { + "type": "object", + "description": "Following this instruction, the value of the remainder variable at every time will be determined by the size of the population minus the sum of the other compartments. Note that SSM will discard scenarios where this variable becomes negative.", + "properties": { + "name": { + "type": "string" + }, + "pop_size": { + "type": "string" + } + }, + "required": [ + "name", + "pop_size" + ] + } + }, + "required": [ + "name", + "composition" + ] + } }, - "reactions": { "type": "array", "description": "Description of the dynamics of your compartmental model. We only consider density-dependent reactions: each rate will be multiplied by the size of the compartment individuals are leaving. See the accumulators and white_noise optional fields for additional options.", "items": { "type": "object", - "properties": { - "from": { - "type": "string" - }, - "to": { - "type": "string" - }, - "rate": { - "type": "string" - }, - "description": { - "type": "string" - }, - "white_noise": { - "type": "object", - "description": "In order to cope with the absence or mis-specification of environmental factors in the model, you can multiply the rate of each reaction by a white gamma noise. To correlate these noises, use the same name. For more information, see Breto et al. 2009, Time series analysis via mechanistic models.", - "properties": { - "name": { - "type": "string" - }, - "sd": { - "type": "string" - } - }, - "required": ["name","sd"] - }, - "accumulators": { - "type": "array", - "description": "When your are monitoring or fitting the integrated flow of a given reaction, you can store it in one or several variables defined in this list. If a given flow variable is repeated in several reactions, it will correspond of the sum of the flows of these reactions.", - "items": { - "type": "string" - } - }, + "properties": { + "from": { + "type": "string" + }, + "to": { + "type": "string" + }, + "rate": { + "type": "string" + }, + "description": { + "type": "string" + }, + "white_noise": { + "type": "object", + "description": "In order to cope with the absence or mis-specification of environmental factors in the model, you can multiply the rate of each reaction by a white gamma noise. To correlate these noises, use the same name. For more information, see Breto et al. 2009, Time series analysis via mechanistic models.", + "properties": { + "name": { + "type": "string" + }, + "sd": { + "type": "string" + } + }, + "required": [ + "name", + "sd" + ] + }, + "accumulators": { + "type": "array", + "description": "When your are monitoring or fitting the integrated flow of a given reaction, you can store it in one or several variables defined in this list. If a given flow variable is repeated in several reactions, it will correspond of the sum of the flows of these reactions.", + "items": { + "type": "string" + } + }, "keywords": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": ["from", "to","rate"] + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "from", + "to", + "rate" + ] } }, - "sde": { "type": "object", "description": "System of stochastic differential equations, defined by their drift vector and dispersion matrix", - "properties":{ + "properties": { "drift": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "State variable which dynamic is determined by this line of the system." - }, - "f": { - "type": ["number", "string"], - "description": "Deterministic component of the equation." - }, - "transformation": { - "type": "string", - "description": "In case you want to express the equation in terms of a function of X and avoid applying the Ito formula." - } - }, - "required": ["name","f"] - } - }, - "dispersion": { - "type": "array", - "description": "Dispersion matrix L. It needs to have as many rows as objects in drifts, and as many rows as independent sources of noise.", - "items": { - "type": "array", - "items": { - "type": ["number", "string"] - } - } - } + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "State variable which dynamic is determined by this line of the system." + }, + "f": { + "type": [ + "number", + "string" + ], + "description": "Deterministic component of the equation." + }, + "transformation": { + "type": "string", + "description": "In case you want to express the equation in terms of a function of X and avoid applying the Ito formula." + } + }, + "required": [ + "name", + "f" + ] + } + }, + "dispersion": { + "type": "array", + "description": "Dispersion matrix L. It needs to have as many rows as objects in drifts, and as many colums as independent sources of noise.", + "items": { + "type": "array", + "items": { + "type": [ + "number", + "string" + ] + } + } + } }, - "required": ["drift", "dispersion"] + "required": [ + "drift", + "dispersion" + ] }, - "ode": { "type": "array", "description": "System of ordinary differential equations.", - "items": { + "items": { "type": "object", - "properties": { - "name": { - "type": "string", - "description": "State variable which dynamic is determined by this line of the system." - }, - "f": { - "type": ["number", "string"], - "description": "Deterministic component of the equation." - }, - "transformation": { - "type": "string", - "description": "In case you want to express the equation in terms of a function of X and avoid applying the Ito formula." - } - }, - "required": ["name","f"] + "properties": { + "name": { + "type": "string", + "description": "State variable which dynamic is determined by this line of the system." + }, + "f": { + "type": [ + "number", + "string" + ], + "description": "Deterministic component of the equation." + }, + "transformation": { + "type": "string", + "description": "In case you want to express the equation in terms of a function of X and avoid applying the Ito formula." + } + }, + "required": [ + "name", + "f" + ] } }, - - "observations": { "type": "array", "description": "Name each of the observed time series, determine when observations started to be collected and what is the distribution of the observation process.", "items": { "type": "object", - "properties": { - "name": { - "type": "string" - }, - "start": { - "type": "string", - "format": "date-time" - }, - "distribution": { - "type": "string" - }, - "mean": { - "type": "string" - }, - "sd": { - "type": "string" - } - }, - "required": ["name", "start","distribution","mean","sd"] + "properties": { + "name": { + "type": "string" + }, + "start": { + "type": "string", + "format": "date-time" + }, + "distribution": { + "type": "string" + }, + "mean": { + "type": "string" + }, + "sd": { + "type": "string" + } + }, + "required": [ + "name", + "start", + "distribution", + "mean", + "sd" + ] } }, - "data": { "type": "array", "description": "Link each of the observed variables to a data resource.", "items": { - "type": "object", - "anyOf": [ - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "require": {"$ref": "#/definitions/require"}, - "transformation": { - "description": "When the parameters used in the model are functions of the data resources, specify the relation here.", - "type": "string" - } - }, - "required": ["name", "require"] - } + "type": "object", + "anyOf": [ + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "require": { + "$ref": "#/definitions/require" + }, + "transformation": { + "description": "When the parameters used in the model are functions of the data resources, specify the relation here.", + "type": "string" + } + }, + "required": [ + "name", + "require" + ] + } ] } }, - "inputs": { "type": "array", "description": "Link each of the parameters or covariates at stake in your model to a data resource.", "items": { - "type": "object", - "anyOf": [ - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "require": {"$ref": "#/definitions/require"}, - "transformation": { - "description": "When the parameters used in the model are functions of the data resources, specify the relation here.", - "type": "string" - }, - "to_resource": { - "description": "In order to make predictions after fitting your data, specify how to invert the transformation relation at a later time than t0.", - "type": "string" - } - }, - "required": ["name"] - } - ] - } + "type": "object", + "anyOf": [ + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "require": { + "$ref": "#/definitions/require" + }, + "transformation": { + "description": "When the parameters used in the model are functions of the data resources, specify the relation here.", + "type": "string" + }, + "to_resource": { + "description": "In order to make predictions after fitting your data, specify how to invert the transformation relation at a later time than t0.", + "type": "string" + } + }, + "required": [ + "name" + ] + } + ] + } } } -} +} \ No newline at end of file From 892fd49621f1b811c87190e6ce2ab0ebf6d82e1b Mon Sep 17 00:00:00 2001 From: Anton Date: Wed, 29 Mar 2017 16:33:35 +0200 Subject: [PATCH 20/26] add binomial observation process --- README.md | 2 +- lib/validate.js | 19 +++++++++++++++++-- src/C/templates/observed_template.c | 22 ++++++++++++++++++---- src/Ccoder.py | 5 +++++ src/Cmodel.py | 11 +++++++++-- 5 files changed, 50 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 573a3cd..be1f886 100644 --- a/README.md +++ b/README.md @@ -277,7 +277,7 @@ One observation model has to be defined per observed time-series. } ] ``` -SSM currently implements the `discretized_normal` and `poisson` observation processes. See the developer [documentation](doc/developers/dev_help.md) to contribute and add your favourite distribution. +SSM currently implements the `discretized_normal`, `poisson` and `binomial`observation processes. See the developer [documentation](doc/developers/dev_help.md) to contribute and add your favourite distribution. ### Initial conditions diff --git a/lib/validate.js b/lib/validate.js index 30d3bb1..48d49b4 100644 --- a/lib/validate.js +++ b/lib/validate.js @@ -188,14 +188,29 @@ function parseRate(rate){ }); + } else if (obs.distribution == 'binomial'){ + + + parseRate(obs.p).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'p' %s, the term %s cannot be understood by SSM. Please define it.",obs.p,t)); + }; + }); + + parseRate(obs.n).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'n' %s, the term %s cannot be understood by SSM. Please define it.",obs.n,t)); + }; + }); + }; }); // SSM only supports discretised_normal and poisson observation distributions so far model.observations.forEach(function(obs){ - if( (obs.distribution != 'discretized_normal') && (obs.distribution != 'poisson')){ - throw new Error("For the moment, SSM only supports 'discretized_normal' and 'poisson' distributions for observations."); + if( (obs.distribution != 'discretized_normal') && (obs.distribution != 'poisson') && (obs.distribution != 'binomial')){ + throw new Error("For the moment, SSM only supports 'discretized_normal', 'poisson' and 'binomial' distributions for observations."); } }); diff --git a/src/C/templates/observed_template.c b/src/C/templates/observed_template.c index 841a846..b4908dd 100644 --- a/src/C/templates/observed_template.c +++ b/src/C/templates/observed_template.c @@ -27,6 +27,13 @@ static double f_likelihood_tpl_{{ x.name }}(double y, ssm_X_t *p_X, ssm_par_t *p like = gsl_ran_poisson_pdf(rint(y), gsl_mu); + {% elif x.distribution == 'binomial' %} + + double gsl_p = {{ x.p }}; + double gsl_n = {{ x.n }}; + + like = gsl_ran_binomial_pdf(rint(y), gsl_p, gsl_n); + {% endif %} return like; @@ -65,6 +72,13 @@ static double f_obs_ran_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_ double yobs = gsl_ran_poisson(calc->randgsl, gsl_mu); + {% elif x.distribution == 'binomial' %} + + double gsl_p = {{ x.p }}; + double gsl_n = {{ x.n }}; + + like = gsl_ran_binomial(calc->randgsl, gsl_p, gsl_n); + {% endif %} return yobs; @@ -78,10 +92,10 @@ static double f_obs_ran_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_ * First order Taylor expansion * Var(f(X))= \sum_i \frac{\partial f(E(X_i))}{\partial x_i}Var(X_i)+\sum_{i\neqj}\frac{\partial f(E(X_i))}{\partial x_i}\frac{\partial f(E(X_j))}{\partial x_ij}Cov(X_i,X_j) */ -{% for h, x in h_grads.items() %} -{% for t, y in x.items() %} -static double f_var_pred_tpl_{{ y.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, ssm_nav_t *nav, double t) -{ + {% for h, x in h_grads.items() %} + {% for t, y in x.items() %} + static double f_var_pred_tpl_{{ y.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, ssm_nav_t *nav, double t) + { double res = 0; int m = nav->states_sv->length + nav->states_inc->length + nav->states_diff->length; gsl_matrix_const_view Ct = gsl_matrix_const_view_array(&p_X->proj[m], m, m); diff --git a/src/Ccoder.py b/src/Ccoder.py index 8c7cb33..aa0b5d6 100755 --- a/src/Ccoder.py +++ b/src/Ccoder.py @@ -153,6 +153,11 @@ def observed(self): elif x['distribution'] == 'poisson': x['mean'] = self.make_C_term(x['mean'], True) x['sd'] = self.make_C_term(x['sd'], True) + elif x['distribution'] == 'binomial': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) + x['p'] = self.make_C_term(x['p'], True) + x['n'] = self.make_C_term(x['n'], True) return {'observed': obs} diff --git a/src/Cmodel.py b/src/Cmodel.py index 9c6e645..397d9de 100755 --- a/src/Cmodel.py +++ b/src/Cmodel.py @@ -125,10 +125,17 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): #par_obs par_obs = set(); for o in observations: - if o['distribution'] == 'poisson': + if o['distribution'] == 'discretized_normal': + pars = [o['mean'], o['sd']] + elif o['distribution'] == 'poisson': o['sd'] = 'sqrt('+o['mean']+')' + pars = [o['mean'], o['sd']] + elif o['distribution'] == 'binomial': + o['mean'] = o['p']+'*'+o['n'] + o['sd'] = 'sqrt('+o['p']+'*(1-'+o['p']+')*'+o['n']+')' + pars = [o['n'], o['p'],o['mean'], o['sd']] - for p in [o['mean'], o['sd']]: + for p in pars: el = self.change_user_input(p) for e in el: if e not in self.op and e not in self.reserved and e not in self.special_functions and e not in self.par_sv and e not in self.par_noise and e not in self.par_proc and e not in self.par_forced and e not in self.par_inc: From ebafd8f53309f094e36ffcf72221296cfffd8aae Mon Sep 17 00:00:00 2001 From: Anton Date: Wed, 29 Mar 2017 16:34:02 +0200 Subject: [PATCH 21/26] update dev doc for adding obs. processes --- doc/developers/dev_help.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/developers/dev_help.md b/doc/developers/dev_help.md index 095a658..1ebe0d0 100644 --- a/doc/developers/dev_help.md +++ b/doc/developers/dev_help.md @@ -144,10 +144,13 @@ Finally, the appropriate `mean` and `sd` of the Gaussian approximation must be d ```python for o in observations: - if o['distribution'] == 'poisson': + if o['distribution'] == 'discretized_normal': + pars = [o['mean'], o['sd']] + elif o['distribution'] == 'poisson': o['sd'] = 'sqrt('+o['mean']+')' + pars = [o['mean'], o['sd']] - for p in [o['mean'], o['sd']]: + for p in pars: el = self.change_user_input(p) for e in el: if e not in self.op and e not in self.reserved and e not in self.special_functions and e not in self.par_sv and e not in self.par_noise and e not in self.par_proc and e not in self.par_forced and e not in self.par_inc: From 20e628124ee0e81d3c655834442f47273e990699 Mon Sep 17 00:00:00 2001 From: Anton Date: Wed, 29 Mar 2017 16:50:13 +0200 Subject: [PATCH 22/26] update schema and doc --- doc/developers/dev_help.md | 46 ++++++++++++++++++++++++++++++++++- json-schema/model-schema.json | 10 +++++--- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/doc/developers/dev_help.md b/doc/developers/dev_help.md index 1ebe0d0..6866dfb 100644 --- a/doc/developers/dev_help.md +++ b/doc/developers/dev_help.md @@ -140,7 +140,7 @@ Note that all the parameters of the distribution need to appear in addition to ` ### Modify `src/Cmodel.py` -Finally, the appropriate `mean` and `sd` of the Gaussian approximation must be defined in this file. We can use the fact that when `mean > 1000`, the Poisson distribution can be approximated by a normal distribution with the same `mean` and `sd = sqrt(mean)`. Thus, we only need to compute the standard deviation: +The appropriate `mean` and `sd` of the Gaussian approximation must be defined in this file. We can use the fact that when `mean > 1000`, the Poisson distribution can be approximated by a normal distribution with the same `mean` and `sd = sqrt(mean)`. Thus, we only need to compute the standard deviation: ```python for o in observations: @@ -162,3 +162,47 @@ for o in observations: self.par_obs = sorted(list(par_obs)) ``` +### Modify `json-schema/model-schema.json` + +Finally, if the distribution requires parameters names other than `means` and `sd`, you need to specify these in the `observation` field of the schema file. Since the Poisson distribution is parameterized by the `mean` there is no need to edit the file. However, for a binomial distribution with parameters `p` and `n` this would become: + +```json + "observations": { + "type": "array", + "description": "Name each of the observed time series, determine when observations started to be collected and what is the distribution of the observation process.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "start": { + "type": "string", + "format": "date-time" + }, + "distribution": { + "type": "string" + }, + "mean": { + "type": "string" + }, + "sd": { + "type": "string" + }, + "n": { + "type": "string" + }, + "p": { + "type": "string" + } + }, + "required": [ + "name", + "start", + "distribution" + ] + } + } +``` + + diff --git a/json-schema/model-schema.json b/json-schema/model-schema.json index eaae255..2c79028 100644 --- a/json-schema/model-schema.json +++ b/json-schema/model-schema.json @@ -215,14 +215,18 @@ }, "sd": { "type": "string" + }, + "n": { + "type": "string" + }, + "p": { + "type": "string" } }, "required": [ "name", "start", - "distribution", - "mean", - "sd" + "distribution" ] } }, From 09905be1252b2cf5a58e3b0ab0041745c499b394 Mon Sep 17 00:00:00 2001 From: Anton Date: Wed, 29 Mar 2017 22:32:48 +0200 Subject: [PATCH 23/26] fix bug in binom obs --- json-schema/model-schema.json | 2 +- src/C/templates/observed_template.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/json-schema/model-schema.json b/json-schema/model-schema.json index 2c79028..3719ed8 100644 --- a/json-schema/model-schema.json +++ b/json-schema/model-schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-04/schema#", "title": "SSM model", - "description": "Description of an state space model following the grammar of the SSM inference package", + "description": "Description of a state space model following the grammar of the SSM inference package", "definitions": { "require": { "type": "object", diff --git a/src/C/templates/observed_template.c b/src/C/templates/observed_template.c index b4908dd..f557f14 100644 --- a/src/C/templates/observed_template.c +++ b/src/C/templates/observed_template.c @@ -77,7 +77,7 @@ static double f_obs_ran_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_ double gsl_p = {{ x.p }}; double gsl_n = {{ x.n }}; - like = gsl_ran_binomial(calc->randgsl, gsl_p, gsl_n); + double yobs = gsl_ran_binomial(calc->randgsl, gsl_p, gsl_n); {% endif %} From fb7bd425e70efe6f3e16f7de0cc72cfe69db11d1 Mon Sep 17 00:00:00 2001 From: Anton Date: Wed, 26 Apr 2017 14:56:24 +0200 Subject: [PATCH 24/26] fix bug that prevented to combine several time-series with different time-step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the reset for observation t was done at the beginning of the prediction step leading to time t, which was doomed to occur on the time t’ of the time-series with smallest time step (t-1 < t’ < t). Now, reset for observation t is done immediately after the prediction at time t-1, thus ensuring that prediction at time t doesn’t depend on intermediate time-step of other time series. --- src/Data.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Data.py b/src/Data.py index 460529c..1832986 100644 --- a/src/Data.py +++ b/src/Data.py @@ -107,7 +107,8 @@ def prepare_data(self): dates.sort() data_C = [] - for d in dates: + + for i, d in enumerate(dates): pd = parse_date(d) row = { 'date': pd.isoformat(), @@ -117,9 +118,13 @@ def prepare_data(self): 'time': (pd-self.t0).days } + if i > 0: + for x in obs_id: + if dates[i-1] in data[x]['dict']: + row['reset'].extend(data[x]['ind_inc_reset']) + for x in obs_id: - if d in data[x]['dict']: - row['reset'].extend(data[x]['ind_inc_reset']) + if d in data[x]['dict']: if data[x]['dict'][d] is not None: row['observed'].append(data[x]['order']) row['values'].append(data[x]['f'](data[x]['dict'][d])) From 30aeaba6220cacd418acccf163aa9e1d08305457 Mon Sep 17 00:00:00 2001 From: Anton Date: Tue, 9 May 2017 22:48:51 +0200 Subject: [PATCH 25/26] fix bug in sample traj Fix a bug in the selection of ancestor for the sampled particle. Since resampling happen after observation is made, then ancestor must be chosen (when looking backward in time) just after the observation time and not before. --- src/C/core/bayes.c | 120 +++++++++++-- src/C/core/fitness.c | 27 +-- src/C/core/print.c | 287 ++++++++++++++++++------------ src/C/core/ssm.h | 2 + src/C/pmcmc/main_pmcmc.c | 368 +++++++++++++++++++++++---------------- 5 files changed, 515 insertions(+), 289 deletions(-) diff --git a/src/C/core/bayes.c b/src/C/core/bayes.c index aa4e341..c1d9988 100644 --- a/src/C/core/bayes.c +++ b/src/C/core/bayes.c @@ -21,8 +21,8 @@ /** * compute the log of the prob of the proposal */ -ssm_err_code_t ssm_log_prob_proposal(double *log_proposal, ssm_theta_t *proposed, ssm_theta_t *theta, ssm_var_t *var, double sd_fac, ssm_nav_t *nav, int is_mvn) -{ + ssm_err_code_t ssm_log_prob_proposal(double *log_proposal, ssm_theta_t *proposed, ssm_theta_t *theta, ssm_var_t *var, double sd_fac, ssm_nav_t *nav, int is_mvn) + { int i; ssm_parameter_t *p; @@ -59,10 +59,10 @@ ssm_err_code_t ssm_log_prob_proposal(double *log_proposal, ssm_theta_t *proposed diagonal terms so everything generalizes nicely */ - p_tmp /= p->f_der_inv(gsl_vector_get(proposed, p->offset_theta)); + p_tmp /= p->f_der_inv(gsl_vector_get(proposed, p->offset_theta)); //check for numerical issues - if( (isnan(p_tmp)==1) || (isinf(p_tmp)==1) || (p_tmp<0.0) ) { + if( (isnan(p_tmp)==1) || (isinf(p_tmp)==1) || (p_tmp<0.0) ) { return SSM_ERR_PROPOSAL; } @@ -92,8 +92,8 @@ ssm_err_code_t ssm_log_prob_proposal(double *log_proposal, ssm_theta_t *proposed * means it doesn't immediatly return on failure). This is usefull for * the --prior option. */ -ssm_err_code_t ssm_log_prob_prior(double *log_prior, ssm_theta_t *theta, ssm_nav_t *nav, ssm_fitness_t *fitness) -{ + ssm_err_code_t ssm_log_prob_prior(double *log_prior, ssm_theta_t *theta, ssm_nav_t *nav, ssm_fitness_t *fitness) + { int i; ssm_parameter_t *p; int is_err = 0; @@ -129,8 +129,8 @@ ssm_err_code_t ssm_log_prob_prior(double *log_prior, ssm_theta_t *theta, ssm_nav * return accepted (SSM_SUCCESS) or rejected (SSM_MH_REJECTED) or * combination of prob errors and assign fitness->log_prior */ -ssm_err_code_t ssm_metropolis_hastings(ssm_fitness_t *fitness, double *alpha, ssm_theta_t *proposed, ssm_theta_t *theta, gsl_matrix *var, double sd_fac , ssm_nav_t *nav, ssm_calc_t *calc, int is_mvn) -{ + ssm_err_code_t ssm_metropolis_hastings(ssm_fitness_t *fitness, double *alpha, ssm_theta_t *proposed, ssm_theta_t *theta, gsl_matrix *var, double sd_fac , ssm_nav_t *nav, ssm_calc_t *calc, int is_mvn) + { double ran; ssm_err_code_t success = SSM_SUCCESS; double lproposal, lproposal_prev, lprior_prev; @@ -162,8 +162,8 @@ ssm_err_code_t ssm_metropolis_hastings(ssm_fitness_t *fitness, double *alpha, ss * (depending on the iteration value and options) and the evaluated * tuning factor sd_fac */ -ssm_var_t *ssm_adapt_eps_var_sd_fac(double *sd_fac, ssm_adapt_t *a, ssm_var_t *var, ssm_nav_t *nav, int m) -{ + ssm_var_t *ssm_adapt_eps_var_sd_fac(double *sd_fac, ssm_adapt_t *a, ssm_var_t *var, ssm_nav_t *nav, int m) + { // evaluate epsilon(m) = epsilon(m-1) * exp(a^(m-1) * (acceptance_rate(m-1) - 0.234)) if ( (m > a->eps_switch) && ( m * a->ar < a->m_switch) ) { @@ -234,8 +234,8 @@ int ssm_par_copy(ssm_par_t *dest, ssm_par_t *src) * Other caveat: D_J_p_X are in [N_DATA+1] ([0] contains the initial conditions) * select is in [N_DATA], times is in [N_DATA] */ -void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness) -{ + void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness) + { int j_sel; //the selected particle int n, nn, indn; @@ -252,9 +252,12 @@ void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data //print traj of ancestors of particle j_sel; - //!!! we assume that the last data point contain information' + //!!! below we assume that the last data point contain information' ssm_X_copy(D_X[data->n_obs], D_J_X[data->n_obs][j_sel]); + // take ancestor of last data point + j_sel = fitness->select[data->n_obs - 1][j_sel]; + //printing all ancesters up to previous observation time for(nn = (data->ind_nonan[data->n_obs_nonan-1]-1); nn > data->ind_nonan[data->n_obs_nonan-2]; nn--) { ssm_X_copy(D_X[nn + 1], D_J_X[nn + 1][j_sel]); @@ -263,9 +266,92 @@ void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data for(n = (data->n_obs_nonan-2); n >= 1; n--) { //indentifying index of the path that led to sampled particule indn = data->ind_nonan[n]; + ssm_X_copy(D_X[indn + 1], D_J_X[indn + 1][j_sel]); + j_sel = fitness->select[indn][j_sel]; + + //printing all ancesters up to previous observation time + for(nn= (indn-1); nn > data->ind_nonan[n-1]; nn--) { + ssm_X_copy(D_X[nn + 1], D_J_X[nn + 1][j_sel]); + } + } + + + indn = data->ind_nonan[0]; + ssm_X_copy(D_X[nn + 1], D_J_X[ nn + 1 ][j_sel]); + + j_sel = fitness->select[indn][j_sel]; + + for(nn=indn; nn>=-1; nn--) { + ssm_X_copy(D_X[nn + 1], D_J_X[ nn + 1 ][j_sel]); + } + +} + + +// this is a function used for debugging a bug in retrieving particle genealogy +// it's similar to ssm_sample_traj but take the j_select as input +// j_select is returned by ssm_sample_traj_print2 and allow to compare the print and non-print version of this function +// we keep it in case we need to debugg it in the future +void ssm_sample_traj2(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness,const int j_select) +{ + int j_sel; //the selected particle + int n, nn, indn; + + double ran, cum_weights; + + ran=gsl_ran_flat(calc->randgsl, 0.0, 1.0); + + j_sel=0; + cum_weights=fitness->weights[0]; + + while (cum_weights < ran) { + cum_weights += fitness->weights[++j_sel]; + } + + j_sel = j_select; + + // /*test*/ + // FILE *test_file = fopen("/Users/Tonton/work/projects/hev-modelling/ssm/SIR_ssm/pmcmc/X_sampled_0.csv", "a"); + // if (test_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + + // fprintf(test_file, "%i\n", j_sel); + // fclose(test_file); + // /*test*/ + + //print traj of ancestors of particle j_sel; + + //!!! we assume that the last data point contain information' + fprintf(stderr, "data->n_obs = %i\n", data->n_obs); + ssm_X_copy(D_X[data->n_obs], D_J_X[data->n_obs][j_sel]); + + //printing all ancesters up to previous observation time + fprintf(stderr, "data->n_obs_nonan = %i\n", data->n_obs_nonan); + fprintf(stderr, "(data->ind_nonan[data->n_obs_nonan - 1] - 1) = %i\n", (data->ind_nonan[data->n_obs_nonan - 1] - 1)); + fprintf(stderr, "data->ind_nonan[data->n_obs_nonan-2] = %i\n", data->ind_nonan[data->n_obs_nonan-2]); + + // for(nn = 0; nn < data->n_obs_nonan; nn++){ + // fprintf(stderr, "data->ind_nonan[%i] = %i\n", nn, data->ind_nonan[nn]); + // } + + j_sel = fitness->select[data->n_obs - 1][j_sel]; + + for(nn = (data->ind_nonan[data->n_obs_nonan - 1] - 1); nn > data->ind_nonan[data->n_obs_nonan-2]; nn--) { + fprintf(stderr, "nn = %i\n", nn); + ssm_X_copy(D_X[nn+1], D_J_X[nn+1][j_sel]); + } + + for(n = (data->n_obs_nonan-2); n >= 1; n--) { + //indentifying index of the path that led to sampled particule + indn = data->ind_nonan[n]; ssm_X_copy(D_X[indn + 1], D_J_X[indn + 1][j_sel]); + j_sel = fitness->select[indn][j_sel]; + //printing all ancesters up to previous observation time for(nn= (indn-1); nn > data->ind_nonan[n-1]; nn--) { ssm_X_copy(D_X[nn + 1], D_J_X[nn + 1][j_sel]); @@ -273,12 +359,18 @@ void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data } indn = data->ind_nonan[0]; + ssm_X_copy(D_X[nn + 1], D_J_X[ nn + 1 ][j_sel]); + j_sel = fitness->select[indn][j_sel]; - for(nn=indn; nn>=0; nn--) { + for(nn=indn; nn>=-1; nn--) { ssm_X_copy(D_X[nn + 1], D_J_X[ nn + 1 ][j_sel]); } + // j_sel = fitness->select[indn][j_sel]; + + //TODO nn=-1 (for initial conditions) } + diff --git a/src/C/core/fitness.c b/src/C/core/fitness.c index d9f1b11..08f0383 100644 --- a/src/C/core/fitness.c +++ b/src/C/core/fitness.c @@ -24,11 +24,12 @@ * scale by making sure that everything below fitness->log_like_min * is fitness->log_like_min*row->ts_nonan_length. */ -double ssm_sanitize_log_likelihood(double log_like, ssm_row_t *row, ssm_fitness_t *fitness, ssm_nav_t *nav) -{ + double ssm_sanitize_log_likelihood(double log_like, ssm_row_t *row, ssm_fitness_t *fitness, ssm_nav_t *nav) + { if ((isinf(log_like)==1) || (isnan(log_like)==1) ) { //error if (nav->print & SSM_PRINT_WARNING) { ssm_print_warning("error likelihood computation"); + // fprintf(stderr, "at time %u\n", row->time); } return fitness->log_like_min * row->ts_nonan_length; } else { @@ -88,17 +89,17 @@ void ssm_dic_init(ssm_fitness_t *fitness, double log_like, double log_prior) void ssm_dic_update(ssm_fitness_t *fitness, double log_like, double log_prior) { if( log_like > fitness->summary_log_likelihood){ - fitness->summary_log_likelihood = log_like; - } - if( (log_like+log_prior) > fitness->summary_log_ltp){ - fitness->summary_log_ltp = (log_like+log_prior); - } - - double deviance = -2*log_like; - fitness->_deviance_cum += deviance; - if( deviance < fitness->_min_deviance){ - fitness->_min_deviance = deviance; - } + fitness->summary_log_likelihood = log_like; + } + if( (log_like+log_prior) > fitness->summary_log_ltp){ + fitness->summary_log_ltp = (log_like+log_prior); + } + + double deviance = -2*log_like; + fitness->_deviance_cum += deviance; + if( deviance < fitness->_min_deviance){ + fitness->_min_deviance = deviance; + } } void ssm_dic_end(ssm_fitness_t *fitness, ssm_nav_t *nav, int m) diff --git a/src/C/core/print.c b/src/C/core/print.c index f1a3510..29b464e 100644 --- a/src/C/core/print.c +++ b/src/C/core/print.c @@ -18,8 +18,8 @@ #include "ssm.h" -void ssm_print_log(char *data) -{ + void ssm_print_log(char *data) + { #if SSM_JSON json_t *root; root = json_pack("{s,s,s,s}", "id", "log", "data", data); @@ -88,79 +88,79 @@ void ssm_pipe_theta(FILE *stream, json_t *jparameters, ssm_theta_t *theta, ssm_v } else if ((strcmp(name, "covariance") == 0)) { jcovariance = el; - } else if ((strcmp(name, "summary") == 0)) { - jsummary = el; - } - } - - json_t *jsummarydata = json_object(); - json_object_set_new(jsummarydata, "id", json_integer(opts->id)); - json_object_set_new(jsummarydata, "AIC", isnan(fitness->AIC) ? json_null(): json_real(fitness->AIC)); - json_object_set_new(jsummarydata, "AICc", isnan(fitness->AICc) ? json_null(): json_real(fitness->AICc)); - json_object_set_new(jsummarydata, "DIC", isnan(fitness->DIC) ? json_null(): json_real(fitness->DIC)); - json_object_set_new(jsummarydata, "log_likelihood", isnan(fitness->summary_log_likelihood) ? json_null(): json_real(fitness->summary_log_likelihood)); - json_object_set_new(jsummarydata, "log_ltp", isnan(fitness->summary_log_ltp) ? json_null(): json_real(fitness->summary_log_ltp)); - json_object_set_new(jsummarydata, "sum_squares", isnan(fitness->summary_sum_squares) ? json_null(): json_real(fitness->summary_sum_squares)); - json_object_set_new(jsummarydata, "n_parameters", json_integer(nav->theta_all->length)); - json_object_set_new(jsummarydata, "n_data", json_integer(fitness->n)); - - if(!jsummary){ - json_array_append_new(jresources, json_pack("{s,s,s,o}", "name", "summary", "data", jsummarydata)); - } else{ - json_object_set_new(jsummary, "data", jsummarydata); - } - - if(var){ - json_t *jdata = json_object(); - - for(i=0; itheta_all->length; i++){ - json_t *jrow = json_object(); - for(j=0; jtheta_all->length; j++){ - x = gsl_matrix_get(var, nav->theta_all->p[i]->offset_theta, nav->theta_all->p[j]->offset_theta); - if(x){ - json_object_set_new(jrow, nav->theta_all->p[j]->name, json_real(x)); - } - } - if(json_object_size(jrow)){ - json_object_set_new(jdata, nav->theta_all->p[i]->name, jrow); - } else { - json_decref(jrow); + } else if ((strcmp(name, "summary") == 0)) { + jsummary = el; + } + } + + json_t *jsummarydata = json_object(); + json_object_set_new(jsummarydata, "id", json_integer(opts->id)); + json_object_set_new(jsummarydata, "AIC", isnan(fitness->AIC) ? json_null(): json_real(fitness->AIC)); + json_object_set_new(jsummarydata, "AICc", isnan(fitness->AICc) ? json_null(): json_real(fitness->AICc)); + json_object_set_new(jsummarydata, "DIC", isnan(fitness->DIC) ? json_null(): json_real(fitness->DIC)); + json_object_set_new(jsummarydata, "log_likelihood", isnan(fitness->summary_log_likelihood) ? json_null(): json_real(fitness->summary_log_likelihood)); + json_object_set_new(jsummarydata, "log_ltp", isnan(fitness->summary_log_ltp) ? json_null(): json_real(fitness->summary_log_ltp)); + json_object_set_new(jsummarydata, "sum_squares", isnan(fitness->summary_sum_squares) ? json_null(): json_real(fitness->summary_sum_squares)); + json_object_set_new(jsummarydata, "n_parameters", json_integer(nav->theta_all->length)); + json_object_set_new(jsummarydata, "n_data", json_integer(fitness->n)); + + if(!jsummary){ + json_array_append_new(jresources, json_pack("{s,s,s,o}", "name", "summary", "data", jsummarydata)); + } else{ + json_object_set_new(jsummary, "data", jsummarydata); + } + + if(var){ + json_t *jdata = json_object(); + + for(i=0; itheta_all->length; i++){ + json_t *jrow = json_object(); + for(j=0; jtheta_all->length; j++){ + x = gsl_matrix_get(var, nav->theta_all->p[i]->offset_theta, nav->theta_all->p[j]->offset_theta); + if(x){ + json_object_set_new(jrow, nav->theta_all->p[j]->name, json_real(x)); } } - - if(json_object_size(jdata)){ - if(!jcovariance){ - json_array_append_new(jresources, json_pack("{s,s,s,o}", "name", "covariance", "data", jdata)); - } else{ - json_object_set_new(jcovariance, "data", jdata); - } + if(json_object_size(jrow)){ + json_object_set_new(jdata, nav->theta_all->p[i]->name, jrow); } else { - json_decref(jdata); + json_decref(jrow); } - } - - if(strcmp(opts->next, "") != 0){ + } + + if(json_object_size(jdata)){ + if(!jcovariance){ + json_array_append_new(jresources, json_pack("{s,s,s,o}", "name", "covariance", "data", jdata)); + } else{ + json_object_set_new(jcovariance, "data", jdata); + } + } else { + json_decref(jdata); + } +} + +if(strcmp(opts->next, "") != 0){ char path[SSM_STR_BUFFSIZE]; snprintf(path, SSM_STR_BUFFSIZE, "%s/%s%d.json", opts->root, opts->next, opts->id); json_dump_file(jparameters, path, JSON_INDENT(2)); - } else { +} else { json_dumpf(jparameters, stdout, JSON_COMPACT); printf("\n"); fflush(stdout); - } +} } /** * remove summary (if any) and pipe hat. This is typicaly used for simulations */ -void ssm_pipe_hat(FILE *stream, json_t *jparameters, ssm_input_t *input, ssm_hat_t *hat, ssm_par_t *par, ssm_calc_t *calc, ssm_nav_t *nav, ssm_options_t *opts, double t) -{ + void ssm_pipe_hat(FILE *stream, json_t *jparameters, ssm_input_t *input, ssm_hat_t *hat, ssm_par_t *par, ssm_calc_t *calc, ssm_nav_t *nav, ssm_options_t *opts, double t) + { int i, index; double x; json_t *jresources = json_object_get(jparameters, "resources"); json_t *jsummary = NULL; int index_summary; - + for(index=0; index< json_array_size(jresources); index++){ json_t *el = json_array_get(jresources, index); @@ -173,23 +173,23 @@ void ssm_pipe_hat(FILE *stream, json_t *jparameters, ssm_input_t *input, ssm_hat json_object_set_new(values, nav->theta_all->p[i]->name, json_real(x)); } } else if (strcmp(name, "summary") == 0){ - jsummary = el; - index_summary = index; - } - } - - if(jsummary){ - json_array_remove(jresources, index_summary); - } - - if(strcmp(opts->next, "") != 0){ - char path[SSM_STR_BUFFSIZE]; - snprintf(path, SSM_STR_BUFFSIZE, "%s/%s%d.json", opts->root, opts->next, opts->id); - json_dump_file(jparameters, path, JSON_INDENT(2)); - } else { - json_dumpf(jparameters, stdout, JSON_COMPACT); printf("\n"); - fflush(stdout); - } + jsummary = el; + index_summary = index; + } + } + + if(jsummary){ + json_array_remove(jresources, index_summary); + } + + if(strcmp(opts->next, "") != 0){ + char path[SSM_STR_BUFFSIZE]; + snprintf(path, SSM_STR_BUFFSIZE, "%s/%s%d.json", opts->root, opts->next, opts->id); + json_dump_file(jparameters, path, JSON_INDENT(2)); + } else { + json_dumpf(jparameters, stdout, JSON_COMPACT); printf("\n"); + fflush(stdout); + } } @@ -264,7 +264,7 @@ void ssm_print_X(FILE *stream, ssm_X_t *p_X, ssm_par_t *par, ssm_nav_t *nav, ssm #endif } - for(i=0; iobserved_length; i++){ + for(i=0; iobserved_length; i++){ observed = nav->observed[i]; #if SSM_JSON json_object_set_new(jout, observed->name, json_real(observed->f_obs_mean(p_X, par, calc, t))); @@ -273,8 +273,8 @@ void ssm_print_X(FILE *stream, ssm_X_t *p_X, ssm_par_t *par, ssm_nav_t *nav, ssm #endif } - char key[SSM_STR_BUFFSIZE]; - for(i=0; iobserved_length; i++){ + char key[SSM_STR_BUFFSIZE]; + for(i=0; iobserved_length; i++){ observed = nav->observed[i]; snprintf(key, SSM_STR_BUFFSIZE, "ran_%s", observed->name); #if SSM_JSON @@ -295,20 +295,20 @@ void ssm_print_X(FILE *stream, ssm_X_t *p_X, ssm_par_t *par, ssm_nav_t *nav, ssm -void ssm_print_header_trace(FILE *stream, ssm_nav_t *nav) -{ - int i; - for(i=0; i < nav->theta_all->length; i++) { - fprintf(stream, "%s,", nav->theta_all->p[i]->name); + void ssm_print_header_trace(FILE *stream, ssm_nav_t *nav) + { + int i; + for(i=0; i < nav->theta_all->length; i++) { + fprintf(stream, "%s,", nav->theta_all->p[i]->name); + } + fprintf(stream, "fitness,index\n"); } - fprintf(stream, "fitness,index\n"); -} /** * fitness is either log likelihood or sum of square */ -void ssm_print_trace(FILE *stream, ssm_theta_t *theta, ssm_nav_t *nav, const double fitness, const int index) -{ + void ssm_print_trace(FILE *stream, ssm_theta_t *theta, ssm_nav_t *nav, const double fitness, const int index) + { int i; ssm_parameter_t *parameter; @@ -409,37 +409,37 @@ void ssm_print_pred_res(FILE *stream, ssm_X_t **J_X, ssm_par_t *par, ssm_nav_t * var_obs += observed->f_obs_var(J_X[j], par, calc, t); } - if(fitness->J > 1){ - var_state = M2/(kn - 1.0); - } else { - var_state = 0; - } + if(fitness->J > 1){ + var_state = M2/(kn - 1.0); + } else { + var_state = 0; + } - var_obs /= ((double) fitness->J); + var_obs /= ((double) fitness->J); - res = (y - pred)/sqrt(var_state + var_obs); - } + res = (y - pred)/sqrt(var_state + var_obs); + } #if SSM_JSON - snprintf(key, SSM_STR_BUFFSIZE, "pred_%s", observed->name); - json_object_set_new(jout, key, json_real(pred)); + snprintf(key, SSM_STR_BUFFSIZE, "pred_%s", observed->name); + json_object_set_new(jout, key, json_real(pred)); - snprintf(key, SSM_STR_BUFFSIZE, "res_%s", observed->name); - json_object_set_new(jout, key, (isnan(res)==1)? json_null(): json_real(res)); + snprintf(key, SSM_STR_BUFFSIZE, "res_%s", observed->name); + json_object_set_new(jout, key, (isnan(res)==1)? json_null(): json_real(res)); #else - tmp_pred[observed->offset] = pred; - tmp_res[observed->offset] = res; + tmp_pred[observed->offset] = pred; + tmp_res[observed->offset] = res; #endif - } + } #if SSM_JSON - json_object_set_new(jout, "ess", (isnan(fitness->ess_n)==1)? json_null(): json_real(fitness->ess_n)); - ssm_json_dumpf(stream, "predres", jout); + json_object_set_new(jout, "ess", (isnan(fitness->ess_n)==1)? json_null(): json_real(fitness->ess_n)); + ssm_json_dumpf(stream, "predres", jout); #else - for(ts=0; tsts_length; ts++){ - fprintf(stream, "%g,%g,", tmp_pred[ts], tmp_res[ts]); - } - fprintf(stream, "%g\n", fitness->ess_n); + for(ts=0; tsts_length; ts++){ + fprintf(stream, "%g,%g,", tmp_pred[ts], tmp_res[ts]); +} +fprintf(stream, "%g\n", fitness->ess_n); #endif } @@ -463,10 +463,10 @@ void ssm_print_header_hat(FILE *stream, ssm_nav_t *nav) } for(i=0; iobserved_length; i++){ - fprintf(stream, ",mean_%s,lower_%s,upper_%s", nav->observed[i]->name, nav->observed[i]->name, nav->observed[i]->name); - } + fprintf(stream, ",mean_%s,lower_%s,upper_%s", nav->observed[i]->name, nav->observed[i]->name, nav->observed[i]->name); + } - fprintf(stream, "\n"); + fprintf(stream, "\n"); } @@ -563,8 +563,8 @@ void ssm_print_hat(FILE *stream, ssm_hat_t *hat, ssm_nav_t *nav, ssm_row_t *row) * Other caveat: D_J_p_X are in [N_DATA+1] ([0] contains the initial conditions) * select is in [N_DATA], times is in [N_DATA] */ -void ssm_sample_traj_print(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index) -{ + void ssm_sample_traj_print(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index) + { int j_sel; int n, nn, indn; @@ -622,6 +622,73 @@ void ssm_sample_traj_print(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_n } +// this is a function used for debugging a bug in retrieving particle genealogy +// it's similar to ssm_sample_traj but print the sampled particle +// it's similar to ssm_sample_traj_print but return j_sel_start +// j_sel_start is then used by by ssm_sample_traj2 and allow to compare the print and non-print version of this function +// we keep it in case we need to debugg it in the future +int ssm_sample_traj_print2(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index) + { + int j_sel, j_sel_start; + int n, nn, indn; + + double ran, cum_weights; + + ssm_X_t *X_sel; + + ran=gsl_ran_flat(calc->randgsl, 0.0, 1.0); + + j_sel=0; + cum_weights=fitness->weights[0]; + + while (cum_weights < ran) { + cum_weights += fitness->weights[++j_sel]; + } + + j_sel_start = j_sel; + + //print traj of ancestors of particle j_sel; + + //!!! we assume that the last data point contain information' + X_sel = D_J_X[data->n_obs][j_sel]; // N_DATA-1 <=> data->indn_data_nonan[N_DATA_NONAN-1] + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[data->n_obs-1], index); + + //printing all ancesters up to previous observation time + for(nn = (data->ind_nonan[data->n_obs_nonan-1]-1); nn > data->ind_nonan[data->n_obs_nonan-2]; nn--) { + X_sel = D_J_X[ nn + 1 ][j_sel]; + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[nn], index); + } + + for(n = (data->n_obs_nonan-2); n >= 1; n--) { + //indentifying index of the path that led to sampled particule + indn = data->ind_nonan[n]; + j_sel = fitness->select[indn][j_sel]; + X_sel = D_J_X[ indn + 1 ][j_sel]; + + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[indn], index); + + //printing all ancesters up to previous observation time + for(nn= (indn-1); nn > data->ind_nonan[n-1]; nn--) { + X_sel = D_J_X[ nn + 1 ][j_sel]; + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[nn], index); + } + } + + indn = data->ind_nonan[0]; + j_sel = fitness->select[indn][j_sel]; + X_sel = D_J_X[indn+1][j_sel]; + + for(nn=indn; nn>=0; nn--) { + X_sel = D_J_X[ nn + 1 ][j_sel]; + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[nn], index); + } + + //TODO nn=-1 (for initial conditions) + + return(j_sel_start); + +} + void ssm_print_header_ar(FILE *stream) { fprintf(stream, "ar,ar_smoothed,eps,index\n"); diff --git a/src/C/core/ssm.h b/src/C/core/ssm.h index 998eed9..d2857ab 100644 --- a/src/C/core/ssm.h +++ b/src/C/core/ssm.h @@ -733,6 +733,7 @@ void ssm_print_pred_res(FILE *stream, ssm_X_t **J_X, ssm_par_t *par, ssm_nav_t * void ssm_print_header_hat(FILE *stream, ssm_nav_t *nav); void ssm_print_hat(FILE *stream, ssm_hat_t *hat, ssm_nav_t *nav, ssm_row_t *row); void ssm_sample_traj_print(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index); +int ssm_sample_traj_print2(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index); void ssm_print_header_ar(FILE *stream); void ssm_print_ar(FILE *stream, ssm_adapt_t *adapt, const int index); @@ -751,6 +752,7 @@ void ssm_theta_ran(ssm_theta_t *proposed, ssm_theta_t *theta, ssm_var_t *var, do int ssm_theta_copy(ssm_theta_t *dest, ssm_theta_t *src); int ssm_par_copy(ssm_par_t *dest, ssm_par_t *src); void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness); +void ssm_sample_traj2(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int j_select); /* simplex.c */ double ssm_simplex(ssm_theta_t *theta, ssm_var_t *var, void *params, double (*f_simplex)(const gsl_vector *x, void *params), ssm_nav_t *nav, ssm_options_t *opts); diff --git a/src/C/pmcmc/main_pmcmc.c b/src/C/pmcmc/main_pmcmc.c index a5be75d..421498d 100644 --- a/src/C/pmcmc/main_pmcmc.c +++ b/src/C/pmcmc/main_pmcmc.c @@ -18,98 +18,98 @@ #include "ssm.h" -static ssm_err_code_t run_smc(ssm_err_code_t (*f_pred) (ssm_X_t *, double, double, ssm_par_t *, ssm_nav_t *, ssm_calc_t *), ssm_X_t ***D_J_X, ssm_X_t ***D_J_X_tmp, ssm_par_t *par, ssm_calc_t **calc, ssm_data_t *data, ssm_fitness_t *fitness, ssm_nav_t *nav, ssm_workers_t *workers) -{ - int i, j, n, np1, id, the_j; - double t0, t1; - - fitness->log_like = 0.0; - fitness->log_prior = 0.0; - fitness->n_all_fail = 0; - - for(j=0; jJ; j++){ - fitness->cum_status[j] = SSM_SUCCESS; - } - - for(n=0; nn_obs; n++) { - np1 = n+1; - t0 = (n) ? data->rows[n-1]->time: 0; - t1 = data->rows[n]->time; - - if(!workers->flag_tcp){ - for(j=0; jJ; j++){ - ssm_X_copy(D_J_X[np1][j], D_J_X[n][j]); - } - } + static ssm_err_code_t run_smc(ssm_err_code_t (*f_pred) (ssm_X_t *, double, double, ssm_par_t *, ssm_nav_t *, ssm_calc_t *), ssm_X_t ***D_J_X, ssm_X_t ***D_J_X_tmp, ssm_par_t *par, ssm_calc_t **calc, ssm_data_t *data, ssm_fitness_t *fitness, ssm_nav_t *nav, ssm_workers_t *workers) + { + int i, j, n, np1, id, the_j; + double t0, t1; + + fitness->log_like = 0.0; + fitness->log_prior = 0.0; + fitness->n_all_fail = 0; + + for(j=0; jJ; j++){ + fitness->cum_status[j] = SSM_SUCCESS; + } + + for(n=0; nn_obs; n++) { + np1 = n+1; + t0 = (n) ? data->rows[n-1]->time: 0; + t1 = data->rows[n]->time; + + if(!workers->flag_tcp){ + for(j=0; jJ; j++){ + ssm_X_copy(D_J_X[np1][j], D_J_X[n][j]); + } +} - if(workers->flag_tcp){ +if(workers->flag_tcp){ //send work - for (j=0;jJ;j++) { - zmq_send(workers->sender, &n, sizeof (int), ZMQ_SNDMORE); - ssm_zmq_send_par(workers->sender, par, ZMQ_SNDMORE); + for (j=0;jJ;j++) { + zmq_send(workers->sender, &n, sizeof (int), ZMQ_SNDMORE); + ssm_zmq_send_par(workers->sender, par, ZMQ_SNDMORE); - zmq_send(workers->sender, &j, sizeof (int), ZMQ_SNDMORE); - ssm_zmq_send_X(workers->sender, D_J_X[n][j], ZMQ_SNDMORE); - zmq_send(workers->sender, &(fitness->cum_status[j]), sizeof (ssm_err_code_t), 0); - } + zmq_send(workers->sender, &j, sizeof (int), ZMQ_SNDMORE); + ssm_zmq_send_X(workers->sender, D_J_X[n][j], ZMQ_SNDMORE); + zmq_send(workers->sender, &(fitness->cum_status[j]), sizeof (ssm_err_code_t), 0); +} //get results from the workers - for (j=0; jJ; j++) { - zmq_recv(workers->receiver, &the_j, sizeof (int), 0); - ssm_zmq_recv_X(D_J_X[ np1 ][ the_j ], workers->receiver); - zmq_recv(workers->receiver, &(fitness->weights[the_j]), sizeof (double), 0); - zmq_recv(workers->receiver, &(fitness->cum_status[the_j]), sizeof (ssm_err_code_t), 0); - } - - } else if(calc[0]->threads_length > 1){ +for (j=0; jJ; j++) { + zmq_recv(workers->receiver, &the_j, sizeof (int), 0); + ssm_zmq_recv_X(D_J_X[ np1 ][ the_j ], workers->receiver); + zmq_recv(workers->receiver, &(fitness->weights[the_j]), sizeof (double), 0); + zmq_recv(workers->receiver, &(fitness->cum_status[the_j]), sizeof (ssm_err_code_t), 0); +} + +} else if(calc[0]->threads_length > 1){ //send work - for (i=0; ithreads_length; i++) { - zmq_send(workers->sender, &i, sizeof (int), ZMQ_SNDMORE); - zmq_send(workers->sender, &n, sizeof (int), 0); - } + for (i=0; ithreads_length; i++) { + zmq_send(workers->sender, &i, sizeof (int), ZMQ_SNDMORE); + zmq_send(workers->sender, &n, sizeof (int), 0); + } //get results from the workers - for (i=0; ithreads_length; i++) { - zmq_recv(workers->receiver, &id, sizeof (int), 0); - } - } else { - - for(j=0;jJ;j++) { - ssm_X_reset_inc(D_J_X[np1][j], data->rows[n], nav); - fitness->cum_status[j] |= (*f_pred)(D_J_X[np1][j], t0, t1, par, nav, calc[0]); - if(data->rows[n]->ts_nonan_length) { - fitness->weights[j] = (fitness->cum_status[j] == SSM_SUCCESS) ? exp(ssm_log_likelihood(data->rows[n], D_J_X[np1][j], par, calc[0], nav, fitness)) : 0.0; - fitness->cum_status[j] = SSM_SUCCESS; - } - } - } - - if(data->rows[n]->ts_nonan_length) { - if(ssm_weight(fitness, data->rows[n], nav, n)) { - ssm_systematic_sampling(fitness, calc[0], n); - } - ssm_resample_X(fitness, &D_J_X[np1], &D_J_X_tmp[np1], n); - } - } - return ( (data->n_obs != 0) && (fitness->n_all_fail == data->n_obs) ) ? SSM_ERR_PRED: SSM_SUCCESS; + for (i=0; ithreads_length; i++) { + zmq_recv(workers->receiver, &id, sizeof (int), 0); + } +} else { + + for(j=0;jJ;j++) { + ssm_X_reset_inc(D_J_X[np1][j], data->rows[n], nav); + fitness->cum_status[j] |= (*f_pred)(D_J_X[np1][j], t0, t1, par, nav, calc[0]); + if(data->rows[n]->ts_nonan_length) { + fitness->weights[j] = (fitness->cum_status[j] == SSM_SUCCESS) ? exp(ssm_log_likelihood(data->rows[n], D_J_X[np1][j], par, calc[0], nav, fitness)) : 0.0; + fitness->cum_status[j] = SSM_SUCCESS; + } +} +} + +if(data->rows[n]->ts_nonan_length) { + if(ssm_weight(fitness, data->rows[n], nav, n)) { + ssm_systematic_sampling(fitness, calc[0], n); + } + ssm_resample_X(fitness, &D_J_X[np1], &D_J_X_tmp[np1], n); +} +} +return ( (data->n_obs != 0) && (fitness->n_all_fail == data->n_obs) ) ? SSM_ERR_PRED: SSM_SUCCESS; } int main(int argc, char *argv[]) { - char str[SSM_STR_BUFFSIZE]; + char str[SSM_STR_BUFFSIZE]; - ssm_options_t *opts = ssm_options_new(); - ssm_options_load(opts, SSM_PMCMC, argc, argv); + ssm_options_t *opts = ssm_options_new(); + ssm_options_load(opts, SSM_PMCMC, argc, argv); - json_t *jparameters = ssm_load_json_stream(stdin); - json_t *jdata = ssm_load_data(opts); + json_t *jparameters = ssm_load_json_stream(stdin); + json_t *jdata = ssm_load_data(opts); - ssm_nav_t *nav = ssm_nav_new(jparameters, opts); - ssm_data_t *data = ssm_data_new(jdata, nav, opts); - ssm_fitness_t *fitness = ssm_fitness_new(data, opts); - ssm_calc_t **calc = ssm_N_calc_new(jdata, nav, data, fitness, opts); - ssm_X_t ***D_J_X = ssm_D_J_X_new(data, fitness, nav, opts); - ssm_X_t ***D_J_X_tmp = ssm_D_J_X_new(data, fitness, nav, opts); + ssm_nav_t *nav = ssm_nav_new(jparameters, opts); + ssm_data_t *data = ssm_data_new(jdata, nav, opts); + ssm_fitness_t *fitness = ssm_fitness_new(data, opts); + ssm_calc_t **calc = ssm_N_calc_new(jdata, nav, data, fitness, opts); + ssm_X_t ***D_J_X = ssm_D_J_X_new(data, fitness, nav, opts); + ssm_X_t ***D_J_X_tmp = ssm_D_J_X_new(data, fitness, nav, opts); ssm_X_t **D_X = ssm_D_X_new(data, nav, opts); //to store sampled trajectories ssm_X_t **D_X_prev = ssm_D_X_new(data, nav, opts); @@ -133,23 +133,63 @@ int main(int argc, char *argv[]) ssm_workers_t *workers = ssm_workers_start(D_J_X, &par_proposed, data, calc, fitness, f_pred, nav, opts, SSM_WORKER_D_X | SSM_WORKER_FITNESS); + +// test + // snprintf(str, SSM_STR_BUFFSIZE, "%s/X_sampled_%d.csv", opts->root, opts->id); + // FILE *sampled_traj_file = fopen(str, "w"); + // if (sampled_traj_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + // ssm_print_header_X(sampled_traj_file, nav); + + // snprintf(str, SSM_STR_BUFFSIZE, "%s/X_particles_%d.csv", opts->root, opts->id); + // FILE *part_traj_file = fopen(str, "w"); + // if (part_traj_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + // ssm_print_header_X(part_traj_file, nav); + + // snprintf(str, SSM_STR_BUFFSIZE, "%s/select_%d.csv", opts->root, opts->id); + // FILE *select_file = fopen(str, "w"); + // if (select_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + // fprintf(select_file, "data\tparticle\tancestor\n"); + + // snprintf(str, SSM_STR_BUFFSIZE, "%s/part_sampled_%d.csv", opts->root, opts->id); + // FILE *sampled_part_file = fopen(str, "w"); + // if (sampled_part_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + /*test*/ + +// test + ///////////////////////// // initialization step // ///////////////////////// - int j, n; + int j, n, j_select; int m = 0; ssm_par2X(D_J_X[0][0], par, calc[0], nav); for(j=1; jJ; j++){ - ssm_X_copy(D_J_X[0][j], D_J_X[0][0]); + ssm_X_copy(D_J_X[0][j], D_J_X[0][0]); } ssm_err_code_t success = run_smc(f_pred, D_J_X, D_J_X_tmp, par_proposed, calc, data, fitness, nav, workers); success |= ssm_log_prob_prior(&fitness->log_prior, proposed, nav, fitness); if(success != SSM_SUCCESS){ - ssm_print_err("epic fail, initialization step failed"); - exit(EXIT_FAILURE); + ssm_print_err("epic fail, initialization step failed"); + exit(EXIT_FAILURE); } //the first run is accepted @@ -157,116 +197,140 @@ int main(int argc, char *argv[]) fitness->log_prior_prev = fitness->log_prior; if ( ( nav->print & SSM_PRINT_X ) && data->n_obs ) { + ssm_sample_traj(D_X, D_J_X, calc[0], data, fitness); - for(n=0; nn_obs; n++){ - ssm_X_copy(D_X_prev[n+1], D_X[n+1]); - ssm_print_X(nav->X, D_X_prev[n+1], par, nav, calc[0], data->rows[n], m); - } + + // j_select = ssm_sample_traj_print2(sampled_traj_file, D_J_X, par, nav, calc[0], data, fitness, m); + // ssm_sample_traj2(D_X, D_J_X, calc[0], data, fitness, j_select); + + // fprintf(sampled_part_file, "%i\n", j_select); + // create a file, print all particle trajectories (D_J_X) + // for(j=0; jJ; j++){ + // for(n=0; nn_obs; n++){ + // ssm_print_X(part_traj_file, D_J_X[n+1][j], par, nav, calc[0], data->rows[n], j); + // fprintf(select_file, "%i\t%i\t%i\n", n, j, fitness->select[n][j]); + // } + // } + // create another file and print the resampled trajectory (D_X) + // actually this already printed in the X_0.csv + // try to understand where is the error in the resampling + + for(n=0; nn_obs; n++){ + ssm_X_copy(D_X_prev[n+1], D_X[n+1]); + ssm_print_X(nav->X, D_X_prev[n+1], par, nav, calc[0], data->rows[n], m); + } + } if(nav->print & SSM_PRINT_TRACE){ - ssm_print_trace(nav->trace, theta, nav, fitness->log_like_prev + fitness->log_prior_prev, m); + ssm_print_trace(nav->trace, theta, nav, fitness->log_like_prev + fitness->log_prior_prev, m); } ssm_dic_init(fitness, fitness->log_like_prev, fitness->log_prior_prev); if (nav->print & SSM_PRINT_LOG) { - snprintf(str, SSM_STR_BUFFSIZE, "%d\t logLike.: %g\t accepted: %d\t acc. rate: %g", m, fitness->log_like_prev + fitness->log_prior_prev, !(success & SSM_MH_REJECT), adapt->ar); - ssm_print_log(str); - } + snprintf(str, SSM_STR_BUFFSIZE, "%d\t logLike.: %g\t accepted: %d\t acc. rate: %g", m, fitness->log_like_prev + fitness->log_prior_prev, !(success & SSM_MH_REJECT), adapt->ar); + ssm_print_log(str); + } //////////////// // iterations // //////////////// - double sd_fac; - double ratio; - for(m=1; mdt = D_J_X[0][0]->dt0; - for(j=1; jJ; j++){ - ssm_X_copy(D_J_X[0][j], D_J_X[0][0]); - } + double sd_fac; + double ratio; + for(m=1; mdt = D_J_X[0][0]->dt0; + for(j=1; jJ; j++){ + ssm_X_copy(D_J_X[0][j], D_J_X[0][0]); + } - success |= run_smc(f_pred, D_J_X, D_J_X_tmp, par_proposed, calc, data, fitness, nav, workers); - success |= ssm_metropolis_hastings(fitness, &ratio, proposed, theta, var, sd_fac, nav, calc[0], 1); - } + success |= run_smc(f_pred, D_J_X, D_J_X_tmp, par_proposed, calc, data, fitness, nav, workers); + success |= ssm_metropolis_hastings(fitness, &ratio, proposed, theta, var, sd_fac, nav, calc[0], 1); + } if(success == SSM_SUCCESS){ //everything went well and the proposed theta was accepted - fitness->log_like_prev = fitness->log_like; - fitness->log_prior_prev = fitness->log_prior; - ssm_theta_copy(theta, proposed); - ssm_par_copy(par, par_proposed); - - if ( (nav->print & SSM_PRINT_X) && data->n_obs ) { - ssm_sample_traj(D_X, D_J_X, calc[0], data, fitness); - for(n=0; nn_obs; n++){ - ssm_X_copy(D_X_prev[n+1], D_X[n+1]); - } + fitness->log_like_prev = fitness->log_like; + fitness->log_prior_prev = fitness->log_prior; + ssm_theta_copy(theta, proposed); + ssm_par_copy(par, par_proposed); + + if ( (nav->print & SSM_PRINT_X) && data->n_obs ) { + ssm_sample_traj(D_X, D_J_X, calc[0], data, fitness); + for(n=0; nn_obs; n++){ + ssm_X_copy(D_X_prev[n+1], D_X[n+1]); } + } } ssm_adapt_ar(adapt, (success == SSM_SUCCESS) ? 1: 0, m); //compute acceptance rate ssm_adapt_var(adapt, theta, m); //compute empirical variance if ( (nav->print & SSM_PRINT_X) && ( (m % thin_traj) == 0) ) { - for(n=0; nn_obs; n++){ - ssm_print_X(nav->X, D_X_prev[n+1], par, nav, calc[0], data->rows[n], m); - } + for(n=0; nn_obs; n++){ + ssm_print_X(nav->X, D_X_prev[n+1], par, nav, calc[0], data->rows[n], m); + } } if (nav->print & SSM_PRINT_TRACE){ - ssm_print_trace(nav->trace, theta, nav, fitness->log_like_prev + fitness->log_prior_prev, m); + ssm_print_trace(nav->trace, theta, nav, fitness->log_like_prev + fitness->log_prior_prev, m); } - ssm_dic_update(fitness, fitness->log_like_prev, fitness->log_prior_prev); + ssm_dic_update(fitness, fitness->log_like_prev, fitness->log_prior_prev); if (nav->print & SSM_PRINT_DIAG) { - ssm_print_ar(nav->diag, adapt, m); + ssm_print_ar(nav->diag, adapt, m); } - if (nav->print & SSM_PRINT_LOG) { - snprintf(str, SSM_STR_BUFFSIZE, "%d\t logLike.: %g\t accepted: %d\t acc. rate: %g", m, fitness->log_like_prev + fitness->log_prior_prev, !(success & SSM_MH_REJECT), adapt->ar); - ssm_print_log(str); - } - } + if (nav->print & SSM_PRINT_LOG) { + snprintf(str, SSM_STR_BUFFSIZE, "%d\t logLike.: %g\t accepted: %d\t acc. rate: %g", m, fitness->log_like_prev + fitness->log_prior_prev, !(success & SSM_MH_REJECT), adapt->ar); + ssm_print_log(str); + } + } - if (!(nav->print & SSM_PRINT_LOG)) { - ssm_dic_end(fitness, nav, m); - ssm_pipe_theta(stdout, jparameters, theta, var, fitness, nav, opts); - } + if (!(nav->print & SSM_PRINT_LOG)) { + ssm_dic_end(fitness, nav, m); + ssm_pipe_theta(stdout, jparameters, theta, var, fitness, nav, opts); + } - json_decref(jparameters); + // test + // fclose(sampled_traj_file); + // fclose(part_traj_file); + // fclose(select_file); + // fclose(sampled_part_file); +// test + json_decref(jparameters); - ssm_workers_stop(workers); + ssm_workers_stop(workers); - ssm_D_J_X_free(D_J_X, data, fitness); - ssm_D_J_X_free(D_J_X_tmp, data, fitness); - ssm_D_X_free(D_X, data); - ssm_D_X_free(D_X_prev, data); + ssm_D_J_X_free(D_J_X, data, fitness); + ssm_D_J_X_free(D_J_X_tmp, data, fitness); + ssm_D_X_free(D_X, data); + ssm_D_X_free(D_X_prev, data); - ssm_N_calc_free(calc, nav); + ssm_N_calc_free(calc, nav); - ssm_data_free(data); - ssm_nav_free(nav); + ssm_data_free(data); + ssm_nav_free(nav); - ssm_fitness_free(fitness); + ssm_fitness_free(fitness); - ssm_input_free(input); - ssm_par_free(par_proposed); - ssm_par_free(par); + ssm_input_free(input); + ssm_par_free(par_proposed); + ssm_par_free(par); - ssm_theta_free(theta); - ssm_theta_free(proposed); - ssm_var_free(var_input); - ssm_adapt_free(adapt); + ssm_theta_free(theta); + ssm_theta_free(proposed); + ssm_var_free(var_input); + ssm_adapt_free(adapt); - return 0; -} + return 0; + } From 4a94dd6ac09c9d91a854d899bfa443fb93d5050e Mon Sep 17 00:00:00 2001 From: Anton Date: Tue, 9 May 2017 22:51:28 +0200 Subject: [PATCH 26/26] comment unused variable --- src/C/pmcmc/main_pmcmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/C/pmcmc/main_pmcmc.c b/src/C/pmcmc/main_pmcmc.c index 421498d..d4941e8 100644 --- a/src/C/pmcmc/main_pmcmc.c +++ b/src/C/pmcmc/main_pmcmc.c @@ -176,7 +176,8 @@ int main(int argc, char *argv[]) ///////////////////////// // initialization step // ///////////////////////// - int j, n, j_select; + int j, n; + // int j_select; int m = 0; ssm_par2X(D_J_X[0][0], par, calc[0], nav);