LevelDB源码解读——Log日志文件

LevelDB源码解读——Log⽇志⽂件
⽇志⽂件是LevelDB架构中最重要的⼀个部分,由于LSM-Tree是将写⼊的数据先存放在内存中的Memtable, 当到达临界时,就将其变成只读Immutable Memtable,随后后台进程将其flush到磁盘上sstable,如果在这个过程中系统发⽣错误,内存中的数据就会受到破坏,这时候就需要⽇志⽂件进⾏恢复。LevelDB在将数据写⼊内存的时候就会将其记录在⽇志⽂件中。
⽇志格式
每⼀条⽇志记录由下⾯格式组成,每⼀个block包含若⼲条记录,block的⼤⼩默认为32kb。type有四种类型:FULL、FIRST、MIDDLE、LAST,FULL表⽰这⼀个块只包含⼀条记录,FIRST、MIDDLE、LAST分别表⽰⼀条记录拆分到不同的块中。
enum RecordType {
// 0是留给还没有分配的log⽂件
kZeroType =0,
kFullType =1,
// ⼀条记录分成三种类型,跨越多个block
kFirstType =2,
kMiddleType =3,
kLastType =4
};
//表⽰当前最⼤的type数
static const int kMaxRecordType = kLastType;
/
/block⼤⼩,默认32kb
static const int kBlockSize =32768;
// 记录头部 checksum (4 bytes), length (2 bytes), type (1 byte).
static const int kHeaderSize =4+2+1;
⽇志的读过程
LevelDB整体的读过程如下图。可以看到查询过程要从内存中查,如果没有再到磁盘上查。步骤分成三步:
1. 先从内存中MemTable中查,到返回
2. 然后从Immutable Memtable中查,到返回
3. 如果内存中没有到,再从磁盘中sstable中查
接下来看Reader的源码实现。Reader类定义。
class Reader {
public:
// 抽象类Reporter,⽤来报告错误
class Reporter {
public:
virtual~Reporter();
virtual void Corruption(size_t bytes,const Status& status)=0; };
// Create a reader that will return log records from "*file".
// "*file" must remain live while this Reader is in use.
//
// If "reporter" is non-null, it is notified whenever some data is
// dropped due to a detected corruption.  "*reporter" must remain // live while this Reader is in use.
//
/
/ If "checksum" is true, verify checksums if available.
//
// The Reader will start reading at the first record located at physical // position >= initial_offset within the file.
Reader(SequentialFile* file, Reporter* reporter,bool checksum, uint64_t initial_offset);
烤花炉
Reader(const Reader&)=delete;
Reader&operator=(const Reader&)=delete;
~Reader();
// 读取下⼀条记录到record中,scratch是临时存储,成功返回true bool ReadRecord(Slice* record, std::string* scratch);
8gggg// 返回最后⼀条记录的偏移量,要在ReadRecord之后调⽤
uint64_t LastRecordOffset();
private:
enum{
kEof = kMaxRecordType +1,
// Returned whenever we find an invalid physical record.
// Currently there are three situations in which this happens:
// * The record has an invalid CRC (ReadPhysicalRecord reports a drop)
// * The record is a 0-length record (No drop is reported)
// * The record is below constructor's initial_offset (No drop is reported)
kBadRecord = kMaxRecordType +2
};
//直接跳到initial_offeset
bool SkipToInitialBlock();
// 读取物理空间上的记录
unsigned int ReadPhysicalRecord(Slice* result);
// 当出现问题时候,将其原因放到reporter
void ReportCorruption(uint64_t bytes,const char* reason);
void ReportDrop(uint64_t bytes,const Status& reason);
SequentialFile*const file_;//读取的⽂件
Reporter*const reporter_;//错误报告对象
bool const checksum_;//校验和
char*const backing_store_;//备份存储
Slice buffer_;//缓存
bool eof_;// Last Read() indicated EOF by returning < kBlockSize
uint64_t last_record_offset_;//最后⼀个记录的offset
uint64_t end_of_buffer_offset_;//buffer中的offset
uint64_t const initial_offset_;//第⼀个记录的初始offset
bool resyncing_;
};
ReadRecord函数将记录放到record中,⾸先调⽤ReadPhysicalRecord返回⼀条记录,如果⼀条记录是在不同的block中,则想将记录放到scratch中,然后最后到kLastType才将整个scratch放到record中返回。
bool Reader::ReadRecord(Slice* record, std::string* scratch){
if(last_record_offset_ < initial_offset_){
if(!SkipToInitialBlock()){
return false;
}
}
//清零
scratch->clear();
record->clear();
bool in_fragmented_record =false;
// Record offset of the logical record that we're reading
// 0 is a dummy value to make compilers happy
uint64_t prospective_record_offset =0;
Slice fragment;
while(true){
const unsigned int record_type =ReadPhysicalRecord(&fragment);
// ReadPhysicalRecord may have only had an empty trailer remaining in its
// internal buffer. Calculate the offset of the next physical record now
// that it has returned, properly accounting for its header size.
uint64_t physical_record_offset =
end_of_buffer_offset_ - buffer_.size()- kHeaderSize - fragment.size();
if(resyncing_){
if(record_type == kMiddleType){
continue;
}else if(record_type == kLastType){
resyncing_ =false;
continue;
}else{
resyncing_ =false;
}
}
switch(record_type){
case kFullType:
if(in_fragmented_record){
// Handle bug in earlier versions of log::Writer where
// it could emit an empty kFirstType record at the tail end
/
/ of a block followed by a kFullType or kFirstType record
// at the beginning of the next block.
if(!scratch->empty()){
ReportCorruption(scratch->size(),"partial record without end(1)");
}
}
prospective_record_offset = physical_record_offset;
电子念佛器scratch->clear();
*record = fragment;
last_record_offset_ = prospective_record_offset;
return true;
case kFirstType:
if(in_fragmented_record){
// Handle bug in earlier versions of log::Writer where
// it could emit an empty kFirstType record at the tail end
// of a block followed by a kFullType or kFirstType record
// at the beginning of the next block.
if(!scratch->empty()){
ReportCorruption(scratch->size(),"partial record without end(2)");
}
}
prospective_record_offset = physical_record_offset;
scratch->assign(fragment.data(), fragment.size());
in_fragmented_record =true;
break;
case kMiddleType:
if(!in_fragmented_record){
ReportCorruption(fragment.size(),
"missing start of fragmented record(1)");
}else{
scratch->append(fragment.data(), fragment.size());
}
break;
case kLastType:
if(!in_fragmented_record){
ReportCorruption(fragment.size(),
"missing start of fragmented record(2)");
}else{
人造板生产线scratch->append(fragment.data(), fragment.size());
*record =Slice(*scratch);
last_record_offset_ = prospective_record_offset;
return true;
}
break;
case kEof:
if(in_fragmented_record){
// This can be caused by the writer dying immediately after展示架制作
// writing a physical record but before completing the next; don't
// treat it as a corruption, just ignore the entire logical record.
scratch->clear();
}
return false;
case kBadRecord:
if(in_fragmented_record){
ReportCorruption(scratch->size(),"error in middle of record");
in_fragmented_record =false;
scratch->clear();
}
break;
default:{
char buf[40];
snprintf(buf,sizeof(buf),"unknown record type %u", record_type);
ReportCorruption(
(fragment.size()+(in_fragmented_record ? scratch->size():0)),            buf);
in_fragmented_record =false;
scratch->clear();
break;
电池盖帽
}
}
}
return false;
}
ReadPhysicalRecord函数就是从磁盘上读取
unsigned int Reader::ReadPhysicalRecord(Slice* result){
while(true){
if(buffer_.size()< kHeaderSize){
if(!eof_){
/
/ Last read was a full read, so this is a trailer to skip
buffer_.clear();
//从⽂件中读取⼀个block放到buffer中
Status status = file_->Read(kBlockSize,&buffer_, backing_store_);
end_of_buffer_offset_ += buffer_.size();
if(!status.ok()){
buffer_.clear();
ReportDrop(kBlockSize, status);
eof_ =true;
return kEof;
}else if(buffer_.size()< kBlockSize){
eof_ =true;
}
continue;//这⾥是block读取完成(status.ok() == true)
}else{
// Note that if buffer_ is non-empty, we have a truncated header at the // end of the file, which can be caused by the writer crashing in the
// middle of writing the header. Instead of considering this an error,
// just report EOF.
buffer_.clear();
return kEof;
}
}
const char* header = buffer_.data();
// [0,1,2,3]是crc [4,5]是长度 [6]是type
//解析长度
const uint32_t a =static_cast<uint32_t>(header[4])&0xff;
const uint32_t b =static_cast<uint32_t>(header[5])&0xff;
//解析类型
const unsigned int type = header[6];
const uint32_t length = a |(b <<8);
if(kHeaderSize + length > buffer_.size()){
size_t drop_size = buffer_.size();

本文发布于:2024-09-22 16:40:08,感谢您对本站的认可!

本文链接:https://www.17tex.com/tex/4/211864.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

上一篇:加班日记
下一篇:日志结构
标签:记录   内存   查找
留言与评论(共有 0 条评论)
   
验证码:
Copyright ©2019-2024 Comsenz Inc.Powered by © 易纺专利技术学习网 豫ICP备2022007602号 豫公网安备41160202000603 站长QQ:729038198 关于我们 投诉建议