如何使用String解析Hex，Float(How to parse Hex with String, Float)

我有二进制文件，其中包含地点和坐标（纬度，经度）的名称，每当我使用编码将它解析为String .ascii它不会很好地解析它。我假设从Float值（坐标）解析失败。

读取InputStream

extension Data { init(reading input: InputStream) { self.init() input.open() let bufferSize = 1024 let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: bufferSize) while input.hasBytesAvailable { let read = input.read(buffer, maxLength: bufferSize) self.append(buffer, count: read) } buffer.deallocate() input.close() } }

要解析的文件

let filepath = Bundle.main.path(forResource: "MN", ofType: "dat") let data = Data.init(reading: InputStream(fileAtPath: filepath)!) let parsedData = String.init(data: data, encoding: .ascii)

任何想法我怎么能以正确的方式解析它？

例如，Java ObjectInputStream具有以下方法：

inputStreamObj.readUTF() inputStreamObj.readFloat()

Java的

I have binary files which containing names of place and coordinates ( latitude, longitude ), whenever I parse it to String using encoding .ascii it won't parse it well. I assume that parsing from Float values (coordinates) failing.

Reading InputStream

extension Data { init(reading input: InputStream) { self.init() input.open() let bufferSize = 1024 let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: bufferSize) while input.hasBytesAvailable { let read = input.read(buffer, maxLength: bufferSize) self.append(buffer, count: read) } buffer.deallocate() input.close() } }

File to parse

let filepath = Bundle.main.path(forResource: "MN", ofType: "dat") let data = Data.init(reading: InputStream(fileAtPath: filepath)!) let parsedData = String.init(data: data, encoding: .ascii)

Any ideas how could I parse it in correct way ?

For example Java ObjectInputStream have methods called:

inputStreamObj.readUTF() inputStreamObj.readFloat()

Java

最满意答案

正如我在评论中所写，您需要阅读规范对象序列化流协议。

因此，前4个字节表示STREAM_MAGIC，STREAM_VERSION，预期始终为相同的值。 5字节序列0x7A 0xhh 0xhh 0xhh 0xhh表示TC_BLOCKDATALONG（0xhhhhhhhh）。

在解析字符串和浮点数之前，需要连接所有块。

所以，准备DataReader ：

（几乎与Sulthan相同，但这正确地对待修改的UTF-8。）

struct DataReader { enum DataReaderError: Error { case invalidFirstByte(byte: UInt16, offset: Int) case invalidFollowingByte case missingFollowingByte case insufficientData } var data: Data var currentPosition: Int init(data: Data) { self.data = data self.currentPosition = 0 } mutating func skipBytes(_ n: Int) { currentPosition += n } private mutating func readBigEndian<T: FixedWidthInteger>() throws -> T { guard currentPosition + MemoryLayout<T>.size <= data.count else { throw DataReaderError.insufficientData } var fixedWithInteger: T = 0 let range: Range<Int> = currentPosition ..< currentPosition + MemoryLayout<T>.size withUnsafeMutableBytes(of: &fixedWithInteger) {ptrT in let uint8Ptr = ptrT.baseAddress!.assumingMemoryBound(to: UInt8.self) data.copyBytes(to: uint8Ptr, from: range) } currentPosition += MemoryLayout<T>.size return fixedWithInteger.bigEndian } mutating func readFloat() throws -> Float { let floatBits: UInt32 = try readBigEndian() return Float(bitPattern: floatBits) } mutating func readUnsignedShort() throws -> Int { let ushortValue: UInt16 = try readBigEndian() return Int(ushortValue) } mutating func readInt() throws -> Int { let intValue: Int32 = try readBigEndian() return Int(intValue) } mutating func readUnsignedByte() throws -> Int { guard currentPosition < data.count else { throw DataReaderError.insufficientData } let byte = data[currentPosition] currentPosition += 1 return Int(byte) } mutating func readBytes(_ n: Int) throws -> Data { guard currentPosition + n <= data.count else { throw DataReaderError.insufficientData } let subdata = data[currentPosition ..< currentPosition+n] currentPosition += n return subdata } mutating func readUTF() throws -> String { //Get byte size of the string let count = try readUnsignedShort() //Decoding Modified UTF-8 var utf16: [UInt16] = [] var offset = 0 while offset < count { let firstByte = UInt16(data[currentPosition + offset]) if firstByte & 0b1_0000000 == 0b0_0000000 { utf16.append(firstByte) offset += 1 } else if firstByte & 0b111_00000 == 0b110_00000 { guard offset + 1 < count else {throw DataReaderError.missingFollowingByte} let secondByte = UInt16(data[currentPosition + offset + 1]) guard secondByte & 0b11_000000 == 0b10_000000 else {throw DataReaderError.invalidFollowingByte} let codeUnit = ((firstByte & 0b000_11111) << 6) | (secondByte & 0b00_111111) utf16.append(codeUnit) offset += 2 } else if firstByte & 0b1111_0000 == 0b1110_0000 { guard offset + 2 < count else {throw DataReaderError.missingFollowingByte} let secondByte = UInt16(data[currentPosition + offset + 1]) guard secondByte & 0b11_000000 == 0b10_000000 else {throw DataReaderError.invalidFollowingByte} let thirdByte = UInt16(data[currentPosition + offset + 2]) guard thirdByte & 0b11_000000 == 0b10_000000 else {throw DataReaderError.invalidFollowingByte} let codeUnit = ((firstByte & 0b0000_1111) << 12) | ((secondByte & 0b00_111111) << 6) | (thirdByte & 0b00_111111) utf16.append(codeUnit) offset += 3 } else { throw DataReaderError.invalidFirstByte(byte: firstByte, offset: currentPosition+offset) } } currentPosition += offset return String(utf16CodeUnits: &utf16, count: utf16.count) } var isAtEnd: Bool { return currentPosition == data.count } }

我们可以解析你的MN.dat如下：

let mnUrl = Bundle.main.url(forResource: "MN", withExtension: "dat")! do { let data = try Data(contentsOf: mnUrl) var reader = DataReader(data: data) reader.skipBytes(4) //First collect all blocks var blockData = Data() while !reader.isAtEnd { let contentType = try reader.readUnsignedByte() if contentType == 0x7A {//TC_BLOCKDATALONG let size = try reader.readInt() let block = try reader.readBytes(size) blockData.append(block) } else if contentType == 0x77 {//TC_BLOCKDATA let size = try reader.readUnsignedByte() let block = try reader.readBytes(size) blockData.append(block) } else { print("Unsupported content type") break } } //Then read the contents of blockData var blockReader = DataReader(data: blockData) while !blockReader.isAtEnd { let string = try blockReader.readUTF() print(string) let float1 = try blockReader.readFloat() print(float1) let float2 = try blockReader.readFloat() print(float2) //Use string, float1, float2 as you like } } catch { print(error) }

输出：

Albert Lea 43.648 -93.3683 Albertville 45.2377 -93.6544 Alexandria 45.8852 -95.3775 (... no errors...) Woodbury 44.9239 -92.9594 Worthington 43.62 -95.5964 Wyoming 45.3364 -92.9972 Zimmerman 45.4433 -93.59

如果二进制数据可能包含其他内容类型，则可能需要修改上面的代码。

As I wrote in the comment, you need to read the spec Object Serialization Stream Protocol.

So, first 4 bytes represents STREAM_MAGIC, STREAM_VERSION, expected to be always the same value. And 5 byte sequence 0x7A 0xhh 0xhh 0xhh 0xhh represents TC_BLOCKDATALONG(0xhhhhhhhh).

And all blocks needs to be concatenated before parsing strings and floats.

So, preparing the DataReader:

(Nearly the same as Sulthan's, but this treats Modified UTF-8 correctly.)

struct DataReader { enum DataReaderError: Error { case invalidFirstByte(byte: UInt16, offset: Int) case invalidFollowingByte case missingFollowingByte case insufficientData } var data: Data var currentPosition: Int init(data: Data) { self.data = data self.currentPosition = 0 } mutating func skipBytes(_ n: Int) { currentPosition += n } private mutating func readBigEndian<T: FixedWidthInteger>() throws -> T { guard currentPosition + MemoryLayout<T>.size <= data.count else { throw DataReaderError.insufficientData } var fixedWithInteger: T = 0 let range: Range<Int> = currentPosition ..< currentPosition + MemoryLayout<T>.size withUnsafeMutableBytes(of: &fixedWithInteger) {ptrT in let uint8Ptr = ptrT.baseAddress!.assumingMemoryBound(to: UInt8.self) data.copyBytes(to: uint8Ptr, from: range) } currentPosition += MemoryLayout<T>.size return fixedWithInteger.bigEndian } mutating func readFloat() throws -> Float { let floatBits: UInt32 = try readBigEndian() return Float(bitPattern: floatBits) } mutating func readUnsignedShort() throws -> Int { let ushortValue: UInt16 = try readBigEndian() return Int(ushortValue) } mutating func readInt() throws -> Int { let intValue: Int32 = try readBigEndian() return Int(intValue) } mutating func readUnsignedByte() throws -> Int { guard currentPosition < data.count else { throw DataReaderError.insufficientData } let byte = data[currentPosition] currentPosition += 1 return Int(byte) } mutating func readBytes(_ n: Int) throws -> Data { guard currentPosition + n <= data.count else { throw DataReaderError.insufficientData } let subdata = data[currentPosition ..< currentPosition+n] currentPosition += n return subdata } mutating func readUTF() throws -> String { //Get byte size of the string let count = try readUnsignedShort() //Decoding Modified UTF-8 var utf16: [UInt16] = [] var offset = 0 while offset < count { let firstByte = UInt16(data[currentPosition + offset]) if firstByte & 0b1_0000000 == 0b0_0000000 { utf16.append(firstByte) offset += 1 } else if firstByte & 0b111_00000 == 0b110_00000 { guard offset + 1 < count else {throw DataReaderError.missingFollowingByte} let secondByte = UInt16(data[currentPosition + offset + 1]) guard secondByte & 0b11_000000 == 0b10_000000 else {throw DataReaderError.invalidFollowingByte} let codeUnit = ((firstByte & 0b000_11111) << 6) | (secondByte & 0b00_111111) utf16.append(codeUnit) offset += 2 } else if firstByte & 0b1111_0000 == 0b1110_0000 { guard offset + 2 < count else {throw DataReaderError.missingFollowingByte} let secondByte = UInt16(data[currentPosition + offset + 1]) guard secondByte & 0b11_000000 == 0b10_000000 else {throw DataReaderError.invalidFollowingByte} let thirdByte = UInt16(data[currentPosition + offset + 2]) guard thirdByte & 0b11_000000 == 0b10_000000 else {throw DataReaderError.invalidFollowingByte} let codeUnit = ((firstByte & 0b0000_1111) << 12) | ((secondByte & 0b00_111111) << 6) | (thirdByte & 0b00_111111) utf16.append(codeUnit) offset += 3 } else { throw DataReaderError.invalidFirstByte(byte: firstByte, offset: currentPosition+offset) } } currentPosition += offset return String(utf16CodeUnits: &utf16, count: utf16.count) } var isAtEnd: Bool { return currentPosition == data.count } }

We can parse your MN.dat as follows:

let mnUrl = Bundle.main.url(forResource: "MN", withExtension: "dat")! do { let data = try Data(contentsOf: mnUrl) var reader = DataReader(data: data) reader.skipBytes(4) //First collect all blocks var blockData = Data() while !reader.isAtEnd { let contentType = try reader.readUnsignedByte() if contentType == 0x7A {//TC_BLOCKDATALONG let size = try reader.readInt() let block = try reader.readBytes(size) blockData.append(block) } else if contentType == 0x77 {//TC_BLOCKDATA let size = try reader.readUnsignedByte() let block = try reader.readBytes(size) blockData.append(block) } else { print("Unsupported content type") break } } //Then read the contents of blockData var blockReader = DataReader(data: blockData) while !blockReader.isAtEnd { let string = try blockReader.readUTF() print(string) let float1 = try blockReader.readFloat() print(float1) let float2 = try blockReader.readFloat() print(float2) //Use string, float1, float2 as you like } } catch { print(error) }

Output:

Albert Lea 43.648 -93.3683 Albertville 45.2377 -93.6544 Alexandria 45.8852 -95.3775 (... no errors...) Woodbury 44.9239 -92.9594 Worthington 43.62 -95.5964 Wyoming 45.3364 -92.9972 Zimmerman 45.4433 -93.59

You may need to modify the code above if your binary data may contain other content types.

更多推荐