admin管理员组文章数量:1666858
HandleFtsWalRepPromote函数位于src/backend/fts/ftsmessagehandler.c,用于在postgres后端处理FTS进程发送的Promote请求。后续内容欢迎关注公号或者充值CSDN VIP阅读。
static void HandleFtsWalRepPromote(void) {
FtsResponse response = {
false, /* IsMirrorUp */
false, /* IsInSync */
false, /* IsSyncRepEnabled */
am_mirror, /* IsRoleMirror */
false, /* RequestRetry */
};
ereport(LOG, (errmsg("promoting mirror to primary due to FTS request")));
FTS 向mirror发送提升消息。mirror可能正在进行提升。 因此,应该以幂等的方式处理提升消息。数据库的状态只有在DB_IN_ARCHIVE_RECOVERY状态下,才能进行promote。在promote时,重置 sync_standby_names。 这是为了避免提交(commits)挂起/等待复制(hanging/waiting for replication)直到下一次 FTS 探测。 下一个 FTS 探测将检测到该节点不同步并重置相同的节点,这可能需要一分钟。 由于我们知道在mirror promotion中它在gp_segment_configuration 中被标记为不同步,因此最好立即清理 sync_standby_names。
/* FTS sends promote message to a mirror. The mirror may be undergoing
* promotion. Promote messages should therefore be handled in an
* idempotent way. */
DBState state = GetCurrentDBState(); // 取出ControlFile->state,也就是数据库的状态
if (state == DB_IN_ARCHIVE_RECOVERY) {
/* Reset sync_standby_names on promotion. This is to avoid commits
* hanging/waiting for replication till next FTS probe. Next FTS probe
* will detect this node to be not in sync and reset the same which
* can take a min. Since we know on mirror promotion its marked as not
* in sync in gp_segment_configuration, best to right away clean the
* sync_standby_names. */
UnsetSyncStandbysDefined();
CreateReplicationSlotOnPromote(INTERNAL_WAL_REPLICATION_SLOT_NAME);
SignalPromote();
} else {
elog(LOG, "ignoring promote request, walreceiver not running, DBState = %d", state);
}
SendFtsResponse(&response, FTS_MSG_PROMOTE);
}
DBState GetCurrentDBState(void) {
Assert(ControlFile);
return ControlFile->state;
}
CreateReplicationSlotOnPromote(“internal_wal_replication_slot”)函数,先从ReplicationSlotCtl->replication_slots数组中找出名为"internal_wal_replication_slot"的槽位,如果没有则需要创建复制槽(mirror本应该没有,使用ReplicationSlotCreate(name, false, RS_PERSISTENT)函数创建)。只有在提升时信号复制槽(signal replication slot)才会在mirror上创建。 如果节点充当mirror,则其上不应存在复制槽。 因此,restart_lsn不为零意味着是由先前提升信号尝试设置的,因此不需要覆盖它。关于restart_lsn为零情况下,所做的工作后续分析复制槽时再进行讲解(to do)。
static void CreateReplicationSlotOnPromote(const char *name) {
int i;
/* Check for name collision, and identify an allocatable slot. We need to
* hold ReplicationSlotControlLock in shared mode for this, so that nobody
* else can change the in_use flags while we're looking at them. */
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (i = 0; i < max_replication_slots; i++) {
ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
MyReplicationSlot = s;
}
LWLockRelease(ReplicationSlotControlLock);
if (MyReplicationSlot == NULL) {
ereport(LOG, (errmsg("creating replication slot %s", name)));
ReplicationSlotCreate(name, false, RS_PERSISTENT);
} else
ereport(LOG, (errmsg("replication slot %s exists", name)));
/* Only on promote signal replication slot is created on mirror. If
* node was acting as mirror, no replication slot should exists on it.
* Hence, no-zero restart_lsn means was set by previous attempt on promote
* signal and hence no need to overwrite the same. */
if (MyReplicationSlot->data.restart_lsn == 0) {
/* Starting reserving WAL right away for pg_rewind to work later */
ReplicationSlotReserveWal();
/* Write this slot to disk */
ReplicationSlotMarkDirty();
ReplicationSlotSave();
if (MyReplicationSlot->active)
ReplicationSlotRelease();
}
MyReplicationSlot = NULL;
}
SignalPromote函数写PROMOTE_SIGNAL_FILE文件,并向postmaster发送SIGUSR1信号。
/* GPDB_90_MERGE_FIXME: This function should be removed once hot standby can and will be enabled for mirrors. */
void SignalPromote(void) {
FILE *fd;
if ((fd = fopen(PROMOTE_SIGNAL_FILE, "w"))) {
fclose(fd);
kill(PostmasterPid, SIGUSR1);
}
}
版权声明:本文标题:Greenplum数据库源码学习——FTS HandleFtsWalRepPromote函数 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://www.elefans.com/dianzi/1730075652a1221741.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论