Merge pull request #7 from zhongwencool/v0.5.1

zhongwencool · web-flow · commit bd942c351f92 · 2019-11-11T00:23:58.000+08:00
Check if the job's name is duplicated.
diff --git a/README.md b/README.md
@@ -5,24 +5,27 @@
 
 A lightweight/efficient cron-like job scheduling library for Erlang.
 
-Ecron does not poll the system on a minute-by-minute basis like cron does. 
-All jobs is assigned to a single process, just run as same as the [timer](http://erlang.org/doc/man/timer.html).
+All Ecron's jobs is assigned to one single gen_server process, just run as same as the [stdlib's timer](http://erlang.org/doc/man/timer.html).
 
-It organize the tasks to be run in a ordered_set ets with the next time to run as key. 
+It organize the jobs to be run in a ordered_set ets with the next time to run as key. 
 This way, you only need one process that calculates the time to wait until the next task should be executed, 
-then spawn the process to execute that task. Saves lots of processes. 
+then spawn the process to execute that task. 
 more detail see [Implementation](#Implementation).
  
-This implementation also prevents a lot of messages from flying around.
+Ecron does not poll the system on a second-by-second basis like cron does.
+The advantage of not doing this is to avoid lots of messages flying around. 
+
+All jobs are managed in one process, rather than running one job per process, 
+which saves lots of processes and avoids taking up a lot of memory. 
+After all, most of the time the process is waiting(do nothing but eat memory). 
 
 It offers:
 
 * Both cron-like scheduling and interval-based scheduling.
-* Well tested by `PropTest` ![Coverage Status](https://coveralls.io/repos/github/zhongwencool/ecron/badge.svg?branch=master).
-* Use gen_server timeout(`receive after`) at any given time (rather than reevaluating upcoming jobs every second/minute).
-* Minimal overhead. ecron aims to keep its code base small.
+* Well tested by [PropTest](https://github.com/proper-testing/proper) ![Coverage Status](https://coveralls.io/repos/github/zhongwencool/ecron/badge.svg?branch=master).
+* Using gen_server timeout(`receive after`) at any given time (rather than reevaluating upcoming jobs every second/minute).
 
-You can find a collection of general best practices in [Full Erlang Examples](https://github.com/zhongwencool/ecron/blob/master/examples/titan_erlang) and [Full Elixir Examples](https://github.com/zhongwencool/ecron/blob/master/examples/titan_elixir).
+You can find a collection of general practices in [Full Erlang Examples](https://github.com/zhongwencool/ecron/blob/master/examples/titan_erlang) and [Full Elixir Examples](https://github.com/zhongwencool/ecron/blob/master/examples/titan_elixir).
 
 ## Installation
   
@@ -66,24 +69,25 @@ You can find a collection of general best practices in [Full Erlang Examples](ht
          {no_singleton_job, "@minutely", {timer, sleep, [61000]}, unlimited, unlimited, [{singleton, false}]}            
      ]},
      {global_jobs, []}, %% Global Spec has the same format as local_jobs.
-     {cluster_quorum_size, 1} %%  Minimum number of nodes which run ecron. Global_jobs only run on majority cluster when it > ClusterNode/2.
+     {global_quorum_size, 1} %%  Minimum number of nodes which run ecron. Global_jobs only run on majority cluster when it > ClusterNode/2.
     }
 ].
 ```
 
-* When `time_zone` is `local`, current datetime is [calendar:local_time()](http://erlang.org/doc/man/calendar.html#local_time-0).
-* When `time_zone` is `utc`, current datetime is [calendar:universal_time()](http://erlang.org/doc/man/calendar.html#universal_time-0).
-* The job will be auto remove at the end of the time.
+* Default `time_zone` is `local`, the current datetime is [calendar:local_time()](http://erlang.org/doc/man/calendar.html#local_time-0).
+* The current datetime is [calendar:universal_time()](http://erlang.org/doc/man/calendar.html#universal_time-0) when `{time_zone, utc}`.
+* The job will be auto remove at `EndDateTime`, the default value of `EndDateTime` is `unlimited`. 
 * Default job is singleton, Each task cannot be executed concurrently. 
 * If the system clock suddenly alter a lot(such as sleep your laptop for two hours or modify system time manually),
   it will skip the tasks which are supposed be running during the sudden lapse of time,
   then recalculate the next running time by the latest system time.
-  You can also reload task manually by `ecron:reload().`
-* Global jobs depend on [global](http://erlang.org/doc/man/global.html), only allowed to be added statically, [check for more detail](https://github.com/zhongwencool/ecron/blob/master/doc/global.md).
+  You can also reload task manually by `ecron:reload().` when the system time is manually modified.
+* Global jobs depend on [global](http://erlang.org/doc/man/global.html), only allowed to be added statically, [check this for more detail](https://github.com/zhongwencool/ecron/blob/master/doc/global.md).
 
 ## Advanced Usage 
 
 ```erlang
+%% Same as: Spec = "0 * 0-5,18 * * 0-5", 
 Spec = #{second => [0], 
        minute => '*',   
        hour => [{0,5}, 18], %% same as [0,1,2,3,4,5,18]
@@ -109,7 +113,7 @@ EveryMFA = {io, format, ["Runs every 120 second.~n"]},
 ```
 ## Debug Support
 
-There are some function to get information for a Job and to handle the Job and Invocations.
+There are some function to get information for debugging jobs.
 ````erlang
 1> ecron:deactivate(CrontabName).
 ok
@@ -126,8 +130,8 @@ ok
        start_time => unlimited,end_time => unlimited,
        failed => 0,mfa => {io,format,["ddd"]},
        name => test,status => activate,type => cron,
-       ok => 0,results => [],run_microsecond => [],       
-       opts => [{singleton,true}],
+       ok => 1,results => [ok],run_microsecond => [12],       
+       opts => [{singleton,true}], node => 'test@127.0.0.1',
        next =>
           ["2019-09-27T01:00:00+08:00","2019-09-27T13:00:00+08:00",
            "2019-09-30T01:00:00+08:00","2019-09-30T13:00:00+08:00",
@@ -153,6 +157,26 @@ ok
   type => cron}
 }
 ````
+## Implementation
+The local_jobs workflow is as follows:
+1. `ecron_sup` (supervisor) would Start a standalone gen_server `ecron`, when application starts.
+2. Look for configuration `{jobs, Jobs}` when `ecron` process initialization.
+3. For each crontab job found, determine the next time in the future that each command must run.
+4. Place those commands on the ordered_set ets with their `{Corresponding_time, Name}` to run as key.
+5. Enter main loop:
+    * Examine the task entry at the head of the ets, compute how far in the future it must run.
+    * Sleep for that period of time by gen_server timeout feature.
+    * On awakening and after verifying the correct time, execute the task at the head of the ets (spawn in background).
+    * Delete old key in ets.
+    * Determine the next time in the future to run this command and place it back on the ets at that time value.
+    
+Additionally, `ecron` also collect job's latest 16 results and execute times, you can observe by `ecron:statistic(Name)`.
+
+[Check this for global_jobs workflow](https://github.com/zhongwencool/ecron/blob/master/doc/global.md#Implementation).       
+
+## Telemetry
+Ecron publish events through telemetry, you can handle those events by [this guide](https://github.com/zhongwencool/ecron/blob/master/doc/telemetry.md), 
+such as you can monitor events dispatch duration and failures to create alerts which notify somebody.
 
 ## CRON Expression Format
 
@@ -223,10 +247,11 @@ Entry                  | Description                                | Equivalent
 >You might find something like [https://crontab.guru/](https://crontab.guru/) or [https://cronjob.xyz/](https://cronjob.xyz/) helpful. 
 >But, note that these don't necessarily accept the exact same syntax as this library, 
 >for instance, it doesn't accept the seconds field, so keep that in mind.
+>The best way to verify the spec format is `ecron:parse_spec("0 0 1 1 1-6 1", 10).`. 
 
 ## Intervals
 
-You may also schedule a job to execute at fixed intervals, starting at the time it's added or cron is run. 
+You may also execute job at fixed intervals, starting at the time it's added or cron is run. 
 This is supported by formatting the cron spec like this:
 ```shell
 @every <duration>
@@ -235,34 +260,10 @@ For example, "@every 1h30m10s" would indicate a schedule that activates after 1
 
 >Note: The interval doesn't take the job runtime into account. 
 >For example, if a job takes 3 minutes to run, and it is scheduled to run every 5 minutes, 
->it will have 5 minutes of idle time between each run.
+>it also has 5 minutes of idle time between each run.
   
-## Implementation
-
-1. On application start-up, start a standalone gen_server `ecron` under supervision tree(`ecron_sup`).
-2. Look for configuration `{jobs, Jobs}` when  ecron process initialization.
-3. For each crontab job found, determine the next time in the future that each command must run.
-4. Place those commands on the ordered_set ets with their `{Corresponding_time, Name}` to run as key.
-5. Enter main loop:
-    * Examine the task entry at the head of the ets, compute how far in the future it must run.
-    * Sleep for that period of time by gen_server timeout feature.
-    * On awakening and after verifying the correct time, execute the task at the head of the ets (spawn in background).
-    * Delete old key in ets.
-    * Determine the next time in the future to run this command and place it back on the ets at that time value.
-    
-Additionally, this ecron also collect the MFA latest 16 results and execute times, you can observe by `ecron:statistic(Name)`.       
-
-## Telemetry
-Ecron publish events through telemetry, you can handle those events by [this guide](https://github.com/zhongwencool/ecron/blob/master/doc/telemetry.md), 
-such as you can monitor events dispatch duration and failures to create alerts which notify somebody.
-
-## Proper Test
-
+## Test
 
 ```shell
   $ rebar3 do proper -c, ct -c, cover -v
 ```
-
-## TODO
-
-* support the last day of a month.
diff --git a/changelog.md b/changelog.md
@@ -1,6 +1,10 @@
+### 0.5.1
+- Replace `cluster_quorum_size` by `global_quorum_size`.
+- Check if job's name is duplicate.
+
 ### 0.5.0
-- support global_jobs by `global`.
-- add global/up/down telemetry metrics.
+- Support global_jobs by `global`.
+- Add global/up/down telemetry metrics.
  
 ### 0.4.0
 
diff --git a/doc/global.md b/doc/global.md
@@ -1,33 +1,73 @@
 ### Precondition
-Because it depends on `global`'s name registration service. 
-The name server also maintains a fully connected network. 
-For example, if node N1 connects to node N2 (which is already connected to N3), 
-the global name servers on the nodes N1 and N3 ensure that also N1 and N3 are connected. 
-In other words, command-line flag -connect_all false can not be used 
+
+1. Fully Connected Cluster
+ 
+   Because it depends on `global`'s name registration service. 
+   The name server must maintains a fully connected network. 
+
+   For example, if node N1 connects to node N2 (which is already connected to N3), 
+   the global name servers on the nodes N1 and N3 ensure that also N1 and N3 are connected. 
+   In other words, command-line flag `-connect_all false` can not be used.
+
+2. Same Global Configuration
+ 
+   All node's `global_quorum_size` and `global_jobs` must keep the same value. 
+   This ensures that the global task manager can transfer between nodes when the network splits.
 
 ### Configuration
-`cluster_quorum_size` - A majority of the ecron must respond, default is 1.
 
-If you want to make sure always one global task manager run in cluster even at brain split.
+1. `global_jobs`
+
+    the same format as `local_jobs`, default is `[]`. 
+    This means only run local jobs without running global task manager and monitor processes.
+
+2. `global_quorum_size`
+ 
+   ecron application live on at least `global_quorum_size` nodes in the same cluster, can be regarded as a healthy cluster. 
+
+   Global task manager only running on a healthy cluster.
+
+   If you want to guarantee always no more than **one** global task manager even when the cluster has network split,
+   you should set it to **"half plus one"**. For example:
 
-You should set it to “half plus one”.
+   Run on majority:
+   1. `ABC` 3 nodes in one cluster.
+   2. `global_quorum_size=2`.
+   3. (`ABC`) cluster split into 2 part(`AB`  =|=  `C`).
+   4. the global task manager would run on `AB` cluster(`AB` is the healthy cluster now).
+   5. `C` node only running local jobs without global jobs.
 
-for example:
+   Run on majority 
+   1. `ABC` 3 nodes in one cluster.
+   2. `global_quorum_size=2`.
+   3. (`ABC`) cluster split into 3 part(`A` =|= `B`  =|=  `C`).
+   4. every node only running local jobs without global jobs(all nodes is unhealthy).
 
-### Run on majority
-1. Set up 3 nodes in on cluster.  
-2. `cluster_quorum_size=2`.
-3. (`ABC`) nodes cluster split into 2 part(`AB`  =/=  `C`).
-4. the global task manager will run on `AB` cluster.
+   Run on every node if brain split.
+   1. `ABC` nodes in one cluster.    
+   2. `global_quorum_size=1`.
+   3. (`ABC`) cluster split into 3 part(`A` =|= `B`  =|=  `C`).
+   4. the global task manager would run on every nodes(we have three healthy cluster now).
+   5. But the global task manager only running one in the same cluster.
 
-### Don't run
-1. Set up 3 nodes in on cluster.  
-2. `cluster_quorum_size=2`.
-3. (`ABC`) nodes cluster split into 3 part(`A` =/= `B`  =/=  `C`).
-4. the global task manager doesn't run.
+### Implementation
+1. The top supervisor `ecron_sup` start at first.
+2. Nothing will happen if the `global_jobs` is empty.
+3. When `global_jobs` is not empty, `ecron_sup` would start_link `ecron_monitor` worker (gen_server).
+4. `ecron_monitor` subscribes node's up/down messages by [net_kernel:monitor_nodes(true)](http://erlang.org/doc/man/net_kernel.html#monitor_nodes-1), when it initializes.
+5. Checking if there is enough `ecron` process in the cluster(`global_quorum_size`).
+6. Trying to terminate global job manager process when cluster's `ecron` number less than `global_quorum_size`.
+7. Otherwise, trying to start a global job manager process, This gen_server register by [global:register_name/2](http://erlang.org/doc/man/global.html#register_name-2).
+8. All the nodes are rushing to register this global jobs manager process, only one node will success, other node's `ecron_monitor` would link this process if the process already exists.
+9. The `ecron_monitor` will receive notification, when node down/up or the global job manager dies.
+10. Enter step 5 again, When notified.
 
-### Run on every node if brain split.
-1. Set up 3 nodes in on cluster.  
-1. `cluster_quorum_size=1`.
-2. (`ABC`) nodes cluster split into 3 part(`A` =/= `B`  =/=  `C`).
-3. the global task manager will run on every nodes.
+```
+     NodeA             NodeB              NodeC
+      sup               sup                sup
+       |                |  \                |
+    monitor             | monitor         monitor
+       |                |      |            |
+       |                |     link          |
+       |____link____GlobalJob__|____link____|
+``` 
diff --git a/doc/telemetry.md b/doc/telemetry.md
@@ -11,7 +11,9 @@ you can use your own event handler. For example, you can create a module to hand
 ```erlang
 -module(my_ecron_telemetry_logger).
 -include_lib("kernel/include/logger.hrl").
--define(Events, [[ecron, success], [ecron, failure], [ecron, activate], [ecron, deactivate], [ecron, delete]]).
+-define(Events, [[ecron, success], [ecron, failure], [ecron, activate], 
+                 [ecron, deactivate], [ecron, delete], 
+                 [ecron, global, up], [ecron, global, down]]).
 %% API
 -export([attach/0, detach/0]).
 -define(TELEMETRY_HANDLE, ecron_telemetry_metrics).
@@ -35,7 +37,11 @@ handle_event([ecron, success], #{run_microsecond := Ms, run_result := Res},
 handle_event([ecron, failure], #{run_microsecond := Ms, run_result := {Error, Reason, Stack}},
     #{name := Name, mfa := MFA}, _Config) ->
     ?LOG_ERROR("EcronJob(~p)-~p CRASH in ~p microsecond. {Error, Reason}: {~p, ~p}. Stack:~p",
-        [Name, MFA, Ms, Error, Reason, Stack]).
+        [Name, MFA, Ms, Error, Reason, Stack]);
+handle_event([ecron, global, up], #{action_ms := Time, reason := Reason}, #{node := Node}, _Config) ->
+    ?LOG_INFO("Ecron Global UP on ~p at -~p ms because of ~p.", [Node, Time, Reason]);
+handle_event([ecron, global, down], #{action_ms := Time, reason := Reason}, #{node := Node}, _Config) ->
+    ?LOG_INFO("Ecron Global DOWN on ~p at -~p ms because of ~p.", [Node, Time, Reason]).
 ``` 
 
 Once you have a module like this, you can attach it when your application starts:
diff --git a/examples/titan_elixir/config/config.exs b/examples/titan_elixir/config/config.exs
@@ -59,7 +59,7 @@ config :ecron, :local_jobs,
          {:no_singleton_job, "@minutely", {Process, :sleep, [61000]}, :unlimited, :unlimited, singleton: false}
        ]
 
-config :ecron, :cluster_quorum_size, 1
+config :ecron, :global_quorum_size, 1
 config :ecron, :global_jobs,
        [
          {:global_crontab_job, "*/15 * * * * *", {StatelessCron, :inspect, ["Runs on 0, 15, 30, 45 seconds"]}},
diff --git a/examples/titan_elixir/mix.exs b/examples/titan_elixir/mix.exs
@@ -16,6 +16,6 @@ defmodule TitanElixir.MixProject do
   #
   # Run "mix help deps" for examples and options.
   defp deps do
-    [{:ecron, ">= 0.5.0"}]
+    [{:ecron, ">= 0.5.1"}]
   end
 end
diff --git a/examples/titan_elixir/mix.lock b/examples/titan_elixir/mix.lock
@@ -1,4 +1,4 @@
 %{
-  "ecron": {:hex, :ecron, "0.5.0", "9d457d548c0f7b09acd558270f5ad02bb10a5db89e3d9a109e17fa13620042e8", [:rebar3], [{:telemetry, "~>0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
+  "ecron": {:hex, :ecron, "0.5.1", "08a1e05486da327f9277e8c512ad0f7f7a5cc231b52f1c870cee90de319dbb9b", [:rebar3], [{:telemetry, "~>0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
   "telemetry": {:hex, :telemetry, "0.4.0", "8339bee3fa8b91cb84d14c2935f8ecf399ccd87301ad6da6b71c09553834b2ab", [:rebar3], [], "hexpm"},
 }
diff --git a/examples/titan_erlang/config/sys.config b/examples/titan_erlang/config/sys.config
@@ -2,7 +2,7 @@
     {titan, []},
     {ecron, [
         {time_zone, local}, %% local or utc
-        {cluster_quorum_size, 1},
+        {global_quorum_size, 1},
         {global_jobs, [
             {global_crontab_job, "*/15 * * * * *", {stateless_cron, inspect, ["Runs on 0, 15, 30, 45 seconds"]}}
         ]},
diff --git a/examples/titan_erlang/rebar.lock b/examples/titan_erlang/rebar.lock
@@ -1,8 +1,8 @@
 {"1.1.0",
-[{<<"ecron">>,{pkg,<<"ecron">>,<<"0.5.0">>},0},
+[{<<"ecron">>,{pkg,<<"ecron">>,<<"0.5.1">>},0},
  {<<"telemetry">>,{pkg,<<"telemetry">>,<<"0.4.0">>},1}]}.
 [
 {pkg_hash,[
- {<<"ecron">>, <<"9D457D548C0F7B09ACD558270F5AD02BB10A5DB89E3D9A109E17FA13620042E8">>},
+ {<<"ecron">>, <<"08A1E05486DA327F9277E8C512AD0F7F7A5CC231B52F1C870CEE90DE319DBB9B">>},
  {<<"telemetry">>, <<"8339BEE3FA8B91CB84D14C2935F8ECF399CCD87301AD6DA6B71C09553834B2AB">>}]}
 ].
diff --git a/src/ecron.app.src b/src/ecron.app.src
@@ -1,13 +1,13 @@
 {application, ecron,
     [{description, "cron-like/crontab job scheduling library"},
-        {vsn, "0.5.0"},
+        {vsn, "0.5.1"},
         {registered, [ecron_sup, ecron]},
         {mod, {ecron_app, []}},
         {applications, [kernel, stdlib, telemetry]},
         {env, [
             {adjusting_time_second, 604800}, %7*24*3600
             {time_zone, local}, %% local or utc
-            {cluster_quorum_size, 1}, %% A majority of the nodes must connect.
+            {global_quorum_size, 1}, %% A majority of the nodes must connect.
             {local_jobs, [
                 %% {JobName, CrontabSpec, {M, F, A}}
                 %% {JobName, CrontabSpec, {M, F, A}, StartDateTime, EndDateTime}
diff --git a/src/ecron_monitor.erl b/src/ecron_monitor.erl
@@ -24,7 +24,7 @@ handle_cast(_Request, State) ->
     {noreply, State}.
 
 handle_info(Reason, State) ->
-    QuorumSize = application:get_env(ecron, cluster_quorum_size, 1),
+    QuorumSize = application:get_env(ecron, global_quorum_size, 1),
     {ResL, _BadNodes} = rpc:multicall(nodes(visible), erlang, whereis, [ecron], 6000),
     Healthy = lists:foldl(fun(Pid, Acc) -> case is_pid(Pid) of true -> Acc + 1; false -> Acc end end, 1, ResL),
     case Healthy >= QuorumSize of
diff --git a/src/ecron_sup.erl b/src/ecron_sup.erl
@@ -31,9 +31,12 @@ start_global(Reason) ->
     end.
 
 stop_global(Reason) ->
-    Meta = #{action_ms => erlang:system_time(millisecond), reason => Reason},
-    telemetry:execute(?GlobalDown, Meta, #{node => node()}),
-    supervisor:terminate_child(?MODULE, ?GLOBAL_WORKER).
+    case supervisor:terminate_child(?MODULE, ?GLOBAL_WORKER) of
+        ok ->
+            Meta = #{action_ms => erlang:system_time(millisecond), reason => Reason},
+            telemetry:execute(?GlobalDown, Meta, #{node => node()});
+        Err -> Err
+    end.
 
 init([]) ->
     ?Job = ets:new(?Job, [named_table, set, public, {keypos, 2}]),
diff --git a/src/ecron_tick.erl b/src/ecron_tick.erl
diff --git a/test/prop_ecron_global_SUITE.erl b/test/prop_ecron_global_SUITE.erl

Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,7 @@ config :ecron, :local_jobs,`
`59`	`59`	`{:no_singleton_job, "@minutely", {Process, :sleep, [61000]}, :unlimited, :unlimited, singleton: false}`
`60`	`60`	`]`
`61`	`61`
`62`		`-config :ecron, :cluster_quorum_size, 1`
	`62`	`+config :ecron, :global_quorum_size, 1`
`63`	`63`	`config :ecron, :global_jobs,`
`64`	`64`	`[`
`65`	`65`	`{:global_crontab_job, "/15 * * * *", {StatelessCron, :inspect, ["Runs on 0, 15, 30, 45 seconds"]}},`
Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,6 @@ defmodule TitanElixir.MixProject do`
`16`	`16`	`#`
`17`	`17`	`# Run "mix help deps" for examples and options.`
`18`	`18`	`defp deps do`
`19`		`- [{:ecron, ">= 0.5.0"}]`
	`19`	`+ [{:ecron, ">= 0.5.1"}]`
`20`	`20`	`end`
`21`	`21`	`end`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`	`1`	`%{`
`2`		`- "ecron": {:hex, :ecron, "0.5.0", "9d457d548c0f7b09acd558270f5ad02bb10a5db89e3d9a109e17fa13620042e8", [:rebar3], [{:telemetry, "~>0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},`
	`2`	`+ "ecron": {:hex, :ecron, "0.5.1", "08a1e05486da327f9277e8c512ad0f7f7a5cc231b52f1c870cee90de319dbb9b", [:rebar3], [{:telemetry, "~>0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},`
`3`	`3`	`"telemetry": {:hex, :telemetry, "0.4.0", "8339bee3fa8b91cb84d14c2935f8ecf399ccd87301ad6da6b71c09553834b2ab", [:rebar3], [], "hexpm"},`
`4`	`4`	`}`