c++实现解压+解包tar.gz文件

编程入门 行业动态 更新时间:2024-10-07 06:40:11

c++实现解压+解包tar.gz<a href=https://www.elefans.com/category/jswz/34/1771438.html style=文件"/>

c++实现解压+解包tar.gz文件

事情是这样的:

之前一直用libarchive来提取tar.gz文件,但是在qnx系统下,偶尔会发生解压失败的情况,不是每次都失败。后来分析了一下libarchive源码,发现libarchive在处理gz解压时(archive_read_support_filter_gzip)会fork子进程,然后调用linux的read函数时失败了,不知道具体原因。。。所以就想针对tar.gz写一个功能,具体需求具体实现,libarchive处理还是比较复杂的。

实现逻辑还是比较简单的:

1.依赖zlib库,用来将xxx.tar.gz解压成xxx.tar。(使用gzopen,gzread)

2.根据tar format来extract出文件,tar format都是512 size的,具体可以google。

 

我的tar,gz 是USTAR的,可以通过二进制文本工具查看xxx.tar文件,会看到USTAR。下面是具体的结构的,注意的是tar文件最后都有1024个'\0'字符

 

 tar存放的类型如下:

For USTAR, the typeflag field is a compatible extension of the link field of the older tar format. The following values are recognized:
Flag
File type
0 or null
Regular file
1
Link to another file already archived
2
Symbolic link
3
Character special file
4
Block special file (not supported)
5
Directory
6
FIFO special file
7
Reserved
S
z/OS extended USTAR special header
T
z/OS extended USTAR special header summary (S and T are z/OS extensions. See z/OS-extended USTAR support for more information.)
A-Z
Available for custom usage

​​​​​​​

// targz.h
#ifndef XXX_TARGZ_H
#define XXX_H#include <string>
#include <vector>namespace targz {enum tarEntryType {FileCompatibilityType = '\0',FileType = '0',HardlinkType = '1',SymlinkType,CharacterDeviceType,BlockDeviceType,DirType,FifoType,ReservedType,OtherType
};class tarGzFile {public:tarGzFile() {}~tarGzFile() {}void setExtractedPath(const std::string &path);std::string getExtractedPath();// decompress tar.gz file by zlib to tar bufferbool decompressGzFile(const std::string &targz_file_path, std::vector<char> &tar_buffer);// decompress tar.gz file by zlib to tar filebool decompressGzFile(const std::string &targz_file_path);// extract source file or dir from tar bufferbool extractSource(const std::vector<char> &tar_buffer);// extract source file or dir from tar filebool extractSource();private:// The tar file ends on two Null headersbool is_end_of_archive(const char *p);// Check if the header is Null.bool is_null_of_header(const char *p);// Verify the checksum, actually, it stops the reading on errorint verifyChecksum(const char *p);// Used to read the headerint parseoct(const char *p, std::size_t n);void createFileStream(char *pathname, std::ofstream &f);void wirteTarFile(char *pathname, const char *file_buff, int filesize);// save decompress pathstd::string savepath_ = "";// substr xxx.tar.gz to xxx.tarstd::string tar_file_path_ = "";
};} // namespace targz
// targz.cpp
#include "targz.h"
#include <dirent.h>
#include <stdio.h>#include <zlib.h>
#include <fstream>
static const ushort TAR_FORMAT_CELL_SIZE = 512;
static const ushort TAR_END_TWO_NULL_HEADER_SIZE = 1024;namespace targz {static void createDir(char *pathname, int mode) {char *p;int r = -1;/* Strip trailing '/' */if (pathname[strlen(pathname) - 1] == '/') {pathname[strlen(pathname) - 1] = '\0';}if (nullptr != opendir(pathname)) {return;}/* Try creating the directory. */
#if defined(_WIN32) && !defined(__CYGWIN__)r = _mkdir(pathname);
#elser = mkdir(pathname, mode);
#endifif (r != 0) {/* On failure, try creating parent directory. */p = strrchr(pathname, '/');if (p != NULL) {*p = '\0';createDir(pathname, 0755);*p = '/';
#if defined(_WIN32) && !defined(__CYGWIN__)r = _mkdir(pathname);
#elser = mkdir(pathname, mode);
#endif}}if (r != 0) {ERROR("[targz] create_dir failed :{}", pathname);return;}INFO("[targz] create dir: {}", pathname);
}void tarGzFile::createFileStream(char *pathname, std::ofstream &f) {f.open(pathname, std::ios::out | std::ios::binary);if (!f.is_open()) {/* Try creating parent dir and then creating file. */char *p = strrchr(pathname, '/');if (p != NULL) {*p = '\0';createDir(pathname, 0755);*p = '/';f.open(pathname, std::ios::app | std::ios::binary);if (!f.is_open()) {ERROR("[targz] create file stream failed: {}", pathname);return;}}}
}void tarGzFile::wirteTarFile(char *pathname, const char *file_buff, int filesize) {INFO("[targz] write Tar File: {}", pathname);std::ofstream f;createFileStream(pathname, f);if (f.is_open()) {f.write(file_buff, filesize);}f.close();
}bool tarGzFile::decompressGzFile(const std::string &targz_file_path, std::vector<char> &tar_buffer) {tar_buffer.clear();gzFile inFileZ = gzopen(targz_file_path.c_str(), "rb");if (!inFileZ) {ERROR("[targz]decompressGzFile open tar.gz failed: {}", targz_file_path);return false;}char unzipBuffer[TAR_FORMAT_CELL_SIZE] = {0};unsigned int unzippedBytes;std::vector<char> unzippedData;while (true) {unzippedBytes = gzread(inFileZ, unzipBuffer, TAR_FORMAT_CELL_SIZE);if (unzippedBytes > 0) {tar_buffer.insert(tar_buffer.end(), unzipBuffer, unzipBuffer + unzippedBytes);} else {break;}}gzclose(inFileZ);return true;
}bool tarGzFile::decompressGzFile(const std::string &targz_file_path) {gzFile inFileZ = gzopen(targz_file_path.c_str(), "rb");if (!inFileZ) {ERROR("[targz]decompressGzFile open tar.gz failed: {}", targz_file_path);return false;}auto find_index = targz_file_path.find_last_of(".");tar_file_path_ = targz_file_path.substr(0, find_index);std::ofstream writer(tar_file_path_.c_str(), std::ios::app | std::ios::binary);if (!writer.is_open()) {ERROR("[targz] decompressGzFile Open file {} error!", tar_file_path_);return false;}char unzipBuffer[TAR_FORMAT_CELL_SIZE] = {0};unsigned int unzippedBytes;while (true) {unzippedBytes = gzread(inFileZ, unzipBuffer, TAR_FORMAT_CELL_SIZE);if (unzippedBytes > 0) {writer.write(unzipBuffer, unzippedBytes);} else {break;}}gzclose(inFileZ);writer.close();return true;
}bool tarGzFile::extractSource(const std::vector<char> &tar_buffer) {if (tar_buffer.size() < TAR_FORMAT_CELL_SIZE) {ERROR("[targz] targz file size less than 512, format error!");return false;}std::size_t bytes_read = 0;std::vector<char> buff;std::size_t tar_buffer_size = tar_buffer.size();std::size_t vaild_buffer = tar_buffer_size - TAR_END_TWO_NULL_HEADER_SIZE;buff.reserve(TAR_FORMAT_CELL_SIZE);do {buff.clear();buff = {tar_buffer.begin() + bytes_read, tar_buffer.begin() + bytes_read + TAR_FORMAT_CELL_SIZE};// read the headerif (is_null_of_header(&buff[0])) {ERROR("[targz]extractSource read header all null!");return false;}// Checksum verificationif (!verifyChecksum(&buff[0])) {ERROR("[targz]extractSource checksum verification wrong!");return false;}// read the filesize at buff[124], 12 bytesint filesize = parseoct(&buff[0] + 124, 12);// The actual size on disk of the file is a multiple of 512 in tar formatstd::size_t nextEntry = TAR_FORMAT_CELL_SIZE;if (filesize) { // filesize > 0if (filesize < TAR_FORMAT_CELL_SIZE) {nextEntry += TAR_FORMAT_CELL_SIZE;} else if (filesize % TAR_FORMAT_CELL_SIZE == 0) {nextEntry += filesize;} else {nextEntry += (TAR_FORMAT_CELL_SIZE * (1 + filesize / TAR_FORMAT_CELL_SIZE));}}tarEntryType type = static_cast<tarEntryType>(buff[156]);std::string filename(&buff[0], &buff[99]);switch (type) {case FileCompatibilityType:case FileType: {std::string tmp_path = getExtractedPath() + filename.c_str();char tmpname[tmp_path.size() + 1] = {0};strncpy(tmpname, tmp_path.c_str(), tmp_path.length());wirteTarFile(tmpname, (&tar_buffer[0] + bytes_read + TAR_FORMAT_CELL_SIZE), filesize);break;}case DirType: {std::string tmp_path = getExtractedPath() + filename.c_str();char dirname[tmp_path.size() + 1] = {0};strncpy(dirname, tmp_path.c_str(), tmp_path.length());createDir(dirname, parseoct(&buff[0] + 100, 8)); // File modebreak;}default:WARN("[targz]extractSource default type warn:{}", (int)type);break;}bytes_read += nextEntry;} while (bytes_read < vaild_buffer);if (bytes_read != vaild_buffer) {ERROR("[targz]extractSource  extract size wrong:{} vs {}", bytes_read, tar_buffer.size());return false;}return true;
}bool tarGzFile::extractSource() {FILE *a = fopen(tar_file_path_.c_str(), "rb");if (!a) {ERROR("[targz] open tar file failed: {}", tar_file_path_);return false;}bool res = false;char buff[TAR_FORMAT_CELL_SIZE]{};size_t bytes_read = 0;int filesize = 0;for (;;) {std::ofstream writer;bytes_read = fread(buff, 1, TAR_FORMAT_CELL_SIZE, a);if (bytes_read < TAR_FORMAT_CELL_SIZE) {ERROR("[targz] targz file size less than 512, format error!");break;}if (is_end_of_archive(buff)) {INFO("[targz] End tar gz file extract finish!");res = true;break;}if (!verifyChecksum(buff)) {ERROR("[targz]extractSource checksum verification wrong!");break;}std::string filename(&buff[0], &buff[99]);filesize = parseoct(buff + 124, 12);tarEntryType type = static_cast<tarEntryType>(buff[156]);switch (type) {case FileCompatibilityType:case FileType: {std::string tmp_path = getExtractedPath() + filename.c_str();char tmpname[tmp_path.size() + 1] = {0};strncpy(tmpname, tmp_path.c_str(), tmp_path.length());createFileStream(tmpname, writer);break;}case DirType: {std::string tmp_path = getExtractedPath() + filename.c_str();char dirname[tmp_path.size() + 1] = {0};strncpy(dirname, tmp_path.c_str(), tmp_path.length());createDir(dirname, parseoct(buff + 100, 8));filesize = 0;break;}default:WARN("[targz]extractSource default type warn:{}", (int)type);break;}while (filesize > 0) {bytes_read = fread(buff, 1, TAR_FORMAT_CELL_SIZE, a);if (bytes_read < TAR_FORMAT_CELL_SIZE) {ERROR("[targz] extractSource write failed: {}", filename);return false;}if (filesize < TAR_FORMAT_CELL_SIZE) {bytes_read = filesize;}if (writer.is_open()) {writer.write(buff, bytes_read);}else {ERROR("[targz] extractSource write file failed: {}", filename);break;}filesize -= bytes_read;}writer.close();}return res;
}bool tarGzFile::is_end_of_archive(const char *p) {int n;for (n = 511; n >= 0; --n) {if (p[n] != '\0')return false;}return true;
}bool tarGzFile::is_null_of_header(const char *p) {int n;for (n = 511; n >= 0; --n) {if (p[n] != '\0')return false;}return true;
}/* Parse an octal number, ignoring leading and trailing nonsense. */
int tarGzFile::parseoct(const char *p, std::size_t n) {int i = 0;while ((*p < '0' || *p > '7') && n > 0) {++p;--n;}while (*p >= '0' && *p <= '7' && n > 0) {i *= 8;i += *p - '0';++p;--n;}return (i);
}/* Verify the tar checksum. */
int tarGzFile::verifyChecksum(const char *p) {int n, u = 0;for (n = 0; n < 512; ++n) {if (n < 148 || n > 155)/* Standard tar checksum adds unsigned bytes. */u += ((unsigned char *)p)[n];elseu += 0x20;}return (u == parseoct(p + 148, 8));
}void tarGzFile::setExtractedPath(const std::string &path) {if (path.empty()) {ERROR("[targz] not set path where extract to!");return;}if (nullptr == opendir(path.c_str())) {ERROR("[targz] extract path not exist: {}", path);return;}savepath_ = path;
}
std::string tarGzFile::getExtractedPath() { return savepath_; }} // namespace targz
// 测试
int main(int argc, char *argv[]) {if (argc < 3) {INFO("input param number missin,please input tar gz file path and save dir path(end add /)....");return -1;}std::string source_filename = argv[1];std::string save_dir = argv[2];std::unique_ptr<targz::tarGzFile> handle_tmp = std::make_unique<targz::tarGzFile>();handle_tmp->setExtractedPath(save_dir);std::vector<char> tar_buff;handle_tmp->decompressGzFile(source_filename, tar_buff);handle_tmp->extractSource(tar_buff);return 0;
}

更多推荐

c++实现解压+解包tar.gz文件

本文发布于:2024-02-27 23:01:35,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1766635.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:文件   tar   gz

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!