Skip to content

Commit 90a0079

Browse files
committed
usrloc HA: Improve management for replicated "labels"
This patch aims to fix the occasional "differring rlabels" debugging error messages which may occur after a restart in usrloc HA scenarios, especially if the active SIP box is restarted while processing REGISTER requests in parallel. Since both record and contact labels are dynamic, per-instance and lost on restart, conflicts are to be expected. The idea behind the fix is to simply adapt the replicated contact_id to the local instance, thus preferring the local labels to the remote labels whenever detecting conflict.
1 parent 91c523c commit 90a0079

File tree

5 files changed

+34
-43
lines changed

5 files changed

+34
-43
lines changed

modules/usrloc/dlist.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ udomain_t* get_next_udomain(udomain_t *_d);
110110

111111
/*contact label may not be higher than 14 bits*/
112112
#define CLABEL_MASK ((1<<14)-1)
113-
#define CLABEL_INC_AND_TEST(_clabel_) ((_clabel_+1)&CLABEL_MASK)
113+
#define CLABEL_NEXT(_clabel_) ((_clabel_+1)&CLABEL_MASK)
114114
#define CID_GET_CLABEL(_cid) (_cid&CLABEL_MASK)
115115
#define CID_NEXT_RLABEL(_dom, _sl) (_dom->table[_sl].next_label++)
116116

@@ -123,7 +123,7 @@ static inline void init_urecord_labels(urecord_t *r, udomain_t *d)
123123
static inline uint64_t
124124
pack_indexes(unsigned short aorhash, unsigned int rlabel, unsigned short clabel)
125125
{
126-
return (clabel & CLABEL_MASK) +
126+
return ((uint64_t)clabel & CLABEL_MASK) +
127127
((uint64_t)rlabel << 14) + ((uint64_t)aorhash << 46);
128128
}
129129

modules/usrloc/ucontact.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,10 @@ struct ct_match {
101101
*/
102102
typedef struct ucontact {
103103
uint64_t contact_id; /*!< 64 bit Contact identifier
104-
0-------0-------------0---------------0
105-
|0 - 13 | 14 - 45 | 46 - 61 |
106-
|aorhash| record label| contact label |
107-
0-------0-------------0---------------0
104+
0---------------0--------------0---------------0
105+
| 0 - 13 | 14 - 45 | 46 - 63 |
106+
| contact label | record label | aorhash |
107+
0---------------0--------------0---------------0
108108
*/
109109
str* domain; /*!< Pointer to domain name (NULL terminated) */
110110
str* aor; /*!< Pointer to the AOR string in record structure*/
@@ -127,7 +127,7 @@ typedef struct ucontact {
127127
unsigned int methods; /*!< Supported methods */
128128
str attr; /*!< Additional registration info */
129129
struct proxy_l next_hop;/*!< SIP-wise determined next hop */
130-
unsigned int label; /*!< label to find the contact in contact list>*/
130+
unsigned short label; /*!< label to find the contact in contact list>*/
131131
int sipping_latency; /*!< useconds; not restart-persistent >*/
132132
str shtag; /*!< helps determine the logical owner node */
133133
str cdb_key; /*!< the key of the contact in cache_db; makes
@@ -144,10 +144,10 @@ typedef struct ucontact {
144144

145145
typedef struct ucontact_info {
146146
uint64_t contact_id; /*!< 64 bit Contact identifier
147-
0-------0-------------0---------------0
148-
|0 - 15 | 16 - 47 | 48 - 63 |
149-
|aorhash| record label| contact label |
150-
0-------0-------------0---------------0
147+
0---------------0--------------0---------------0
148+
| 0 - 13 | 14 - 45 | 46 - 63 |
149+
| contact label | record label | aorhash |
150+
0---------------0--------------0---------------0
151151
*/
152152
str* c;
153153
str received;

modules/usrloc/udomain.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ int preload_udomain(db_con_t* _c, udomain_t* _d)
618618
_d->table[sl].next_label = rlabel + 1;
619619

620620
if (r->next_clabel <= clabel || r->next_clabel == 0)
621-
r->next_clabel = CLABEL_INC_AND_TEST(clabel);
621+
r->next_clabel = CLABEL_NEXT(clabel);
622622

623623
r->label = rlabel;
624624
}
@@ -658,7 +658,7 @@ int preload_udomain(db_con_t* _c, udomain_t* _d)
658658
if (cid_regen && old_expires) {
659659
/* rebuild the contact id for this contact */
660660
ci->contact_id = pack_indexes(r->aorhash, r->label, r->next_clabel);
661-
r->next_clabel = CLABEL_INC_AND_TEST(r->next_clabel);
661+
r->next_clabel = CLABEL_NEXT(r->next_clabel);
662662

663663
ci->expires = old_expires;
664664

modules/usrloc/ul_cluster.c

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ static int receive_ucontact_insert(bin_packet_t *packet)
506506
user_agent, path, attr, st, sock, kv_str, cflags_str;
507507
udomain_t *domain;
508508
urecord_t *record;
509-
ucontact_t *contact, *ct;
509+
ucontact_t *contact;
510510
int rc, sl;
511511
unsigned short _, clabel;
512512
unsigned int rlabel;
@@ -604,34 +604,10 @@ static int receive_ucontact_insert(bin_packet_t *packet)
604604

605605
record->label = rlabel;
606606
sl = record->aorhash & (domain->size - 1);
607-
if (domain->table[sl].next_label <= rlabel)
607+
if (rlabel >= domain->table[sl].next_label)
608608
domain->table[sl].next_label = rlabel + 1;
609609
}
610610

611-
if (record->label != rlabel) {
612-
int has_good_cts = 0;
613-
614-
for (ct = record->contacts; ct; ct = ct->next)
615-
if (ct->expires != UL_EXPIRED_TIME) {
616-
has_good_cts = 1;
617-
break;
618-
}
619-
620-
if (has_good_cts) {
621-
LM_BUG("differring rlabels (%u vs. %u, ci: '%.*s')",
622-
record->label, rlabel, callid.len, callid.s);
623-
} else {
624-
/* no contacts -> it's safe to inherit the active node's rlabel */
625-
record->label = rlabel;
626-
sl = record->aorhash & (domain->size - 1);
627-
if (domain->table[sl].next_label <= rlabel)
628-
domain->table[sl].next_label = rlabel + 1;
629-
}
630-
}
631-
632-
if (record->next_clabel <= clabel)
633-
record->next_clabel = CLABEL_INC_AND_TEST(clabel);
634-
635611
rc = get_ucontact(record, &contact_str, &callid, ci.cseq, &cmatch,
636612
&contact);
637613

@@ -641,15 +617,30 @@ static int receive_ucontact_insert(bin_packet_t *packet)
641617
case -1:
642618
/* received data is older than what we have */
643619
break;
620+
644621
case 0:
622+
ci.contact_id = pack_indexes((unsigned short)record->aorhash,
623+
record->label, (unsigned short)contact->label);
624+
645625
/* received data is newer than what we have */
646626
if (update_ucontact(record, contact, &ci, NULL, 1) != 0) {
647627
LM_ERR("failed to update ucontact (ci: '%.*s')\n", callid.len, callid.s);
648628
unlock_udomain(domain, &aor);
649629
goto error;
650630
}
651631
break;
632+
652633
case 1:
634+
if (clabel >= record->next_clabel) {
635+
record->next_clabel = CLABEL_NEXT(clabel);
636+
} else {
637+
clabel = record->next_clabel;
638+
record->next_clabel = CLABEL_NEXT(record->next_clabel);
639+
}
640+
641+
ci.contact_id = pack_indexes((unsigned short)record->aorhash,
642+
record->label, (unsigned short)clabel);
643+
653644
if (insert_ucontact(record, &contact_str, &ci, NULL, 1, &contact) != 0) {
654645
LM_ERR("failed to insert ucontact (ci: '%.*s')\n", callid.len, callid.s);
655646
unlock_udomain(domain, &aor);
@@ -787,7 +778,7 @@ static int receive_ucontact_update(bin_packet_t *packet)
787778
}
788779

789780
if (record->next_clabel <= clabel)
790-
record->next_clabel = CLABEL_INC_AND_TEST(clabel);
781+
record->next_clabel = CLABEL_NEXT(clabel);
791782
} else {
792783
rc = get_ucontact(record, &contact_str, &callid, ci.cseq + 1, &cmatch,
793784
&contact);
@@ -803,7 +794,7 @@ static int receive_ucontact_update(bin_packet_t *packet)
803794
}
804795

805796
if (record->next_clabel <= clabel)
806-
record->next_clabel = CLABEL_INC_AND_TEST(clabel);
797+
record->next_clabel = CLABEL_NEXT(clabel);
807798

808799
} else if (rc == 0) {
809800
if (update_ucontact(record, contact, &ci, NULL, 1) != 0) {

modules/usrloc/urecord.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -856,7 +856,7 @@ int insert_ucontact(urecord_t* _r, str* _contact, ucontact_info_t* _ci,
856856
pack_indexes((unsigned short)_r->aorhash,
857857
_r->label,
858858
((unsigned short)_r->next_clabel));
859-
_r->next_clabel = CLABEL_INC_AND_TEST(_r->next_clabel);
859+
_r->next_clabel = CLABEL_NEXT(_r->next_clabel);
860860
}
861861

862862
if (cluster_mode == CM_FULL_SHARING_CACHEDB && !_ci->cdb_key.s) {
@@ -1104,7 +1104,7 @@ uint64_t next_contact_id(urecord_t* _r)
11041104
pack_indexes((unsigned short)_r->aorhash,
11051105
_r->label,
11061106
((unsigned short)_r->next_clabel));
1107-
_r->next_clabel = CLABEL_INC_AND_TEST(_r->next_clabel);
1107+
_r->next_clabel = CLABEL_NEXT(_r->next_clabel);
11081108

11091109
return contact_id;
11101110
}

0 commit comments

Comments
 (0)