iOS 小说字数统计

编程入门 行业动态 更新时间:2024-10-11 15:23:59

iOS 小说<a href=https://www.elefans.com/category/jswz/34/1754682.html style=字数统计"/>

iOS 小说字数统计

小说单词统计规则(泰语除外)

iOS 小说单词统计规则算法

import Foundationfinal class WordCounter {static let shared = WordCounter()private init() {// 分别读取分割字符和全角字符配置文件mBreakSpaceRanges = readConfigFile(filePath: "break_spaces")mFullWidthCharRanges = readConfigFile(filePath: "fullwidth_chars")}private var mBreakSpaceRanges = [(Int, Int)]()private var mFullWidthCharRanges = [(Int, Int)]()func getWordCount(content: String?) -> Int {guard let content = content else {return 0}var wordBuffer = ""var clearBuffer = falsevar wordCount = 0// 按单个字符逐一遍历整个内容串let chars = Array(content)let count = chars.countfor (i, c) in chars.enumerated() {// 是否是单词拆分符号if (isBreakSpace(c: c)) {clearBuffer = true} else {// 是否是全角字符、全形符号if (isFullWidthChar(c: c)) {wordCount += 1clearBuffer = true} else {// 不满足情况时将此次遍历字符加入到Buffer中wordBuffer.append(c)}}// 末尾字符if (i == count - 1) {clearBuffer = true}// 单词拆分符号、全角字符、字符末尾时clearBuffer为trueif (clearBuffer) {clearBuffer = false// 碰到以上3种情况时需要清空Buffer对象,字数需要累加if (!wordBuffer.isEmpty) {wordBuffer.removeAll()wordCount += 1}}}return wordCount}/// 判断是否是单词拆分符号private func isBreakSpace(c: Character) -> Bool {return compareCode(c: c, codeArr: mBreakSpaceRanges)}/// 判断是否是全角字符、全形符号private func isFullWidthChar(c: Character) -> Bool {return compareCode(c: c, codeArr: mFullWidthCharRanges)}/// 判断字符是否在字符集区间内private func compareCode(c: Character, codeArr: [(Int, Int)]) -> Bool {let s = String(c).unicodeScalarslet value = s[s.startIndex].valuereturn codeArr.contains { value >= $0.0 && value <= $0.1 }}// 读取配置文件,生成以ValueRange(min-max)为结构的配置列表对象private func readConfigFile(filePath: String) -> [(Int, Int)] {var intRanges = [(Int, Int)]()guard let path = Bundle.main.path(forResource: filePath, ofType: ".txt") else {return intRanges}do {let data = try String(contentsOfFile: path, encoding: .utf8)let br = data.components(separatedBy: .newlines)var result: UInt64 = 0br.forEach {if !$0.isEmpty && !$0.hasPrefix("#") {let min: Intlet max: Intif $0.contains("-") {let array = $0.split(separator: "-")let minString = String(array[0])let maxString = String(array[1])Scanner(string: minString).scanHexInt64(&result)min = Int(result)Scanner(string: maxString).scanHexInt64(&result)max = Int(result)} else {Scanner(string: $0).scanHexInt64(&result)min = Int(result)max = min}intRanges.append((min, max))}}} catch {print(error.localizedDescription)}return intRanges}
}
  • break_spaces.txt
0x20
0x3000
0x0A
0x09
0x0B
0x0D
0xA0
  • fullwidth_chars.txt
0x80
0x82
0x84-0x89
0x8B
0x91-0x99
0x9B
0xA1
0xA4
0xA7-0xA8
0xAF
0xB0-0xB1
0xB4-0xB8
0xBC-0xBF
0xF7
0xD7# CJK_UNIFIED_IDEOGRAPHS
0x4E00-0x9FFF# CJK_COMPATIBILITY_IDEOGRAPHS
0xF900-0xFAFF# CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
0x3400-0x4DBF# CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
0x20000-0x2A6DF# GENERAL_PUNCTUATION
0x2000-0x2018
0x201A-0x206F# SPACING_MODIFIER_LETTERS
0x02B0-0x02FF# CJK_SYMBOLS_AND_PUNCTUATION
0x3000-0x303F# CJK_COMPATIBILITY
0x3300-0x33FF# CJK_COMPATIBILITY_FORMS
0xFE30-0xFE4F# HANGUL_JAMO
0x1100-0x11FF# CURRENCY_SYMBOLS
0x20A0-0x20CF# DINGBATS
0x2700-0x27BF# GEOMETRIC_SHAPES
0x25A0-0x25FF# HANGUL_SYLLABLES
0xAC00-0xD7AF# HANGUL_COMPATIBILITY_JAMO
0x3130-0x318F# HANGUL_JAMO_EXTENDED_A
0xA960-0xA97F# HANGUL_JAMO_EXTENDED_B
0xD7B0-0xD7FF# VERTICAL_FORMS
0xFE10-0xFE1F# KATAKANA_PHONETIC_EXTENSIONS
0x31F0-0x31FF# KATAKANA
0x30A0-0x30FF# HIRAGANA
0x3040-0x309F# HALFWIDTH_AND_FULLWIDTH_FORMS
0xFF00-0xFFEF# Combining Diacritical Marks
0x0300-0x036F

iOS 泰语字数统计规则算法 (需要泰语库)

import Foundationfinal class ThaiWordCount {private var thaiWords = [String: Bool]()private var wordTree = NSMutableDictionary()private var compoundWords = [String: [String]]()static let shared = ThaiWordCount()private init() {let url = Bundle.main.url(forResource: "thaiDicts.json", withExtension: nil)!do {let data = try Data(contentsOf: url)let json = try JSONSerialization.jsonObject(with: data, options: []) as! [String]readDictionry(json)} catch  {print(error.localizedDescription)}}// 获取分词长度func getTokenSize(content: String?) -> Int {guard let content = content else {return 0}return tokenize(content: content).count}
}/// 字典树
extension ThaiWordCount {// 读取词典数据并生成单词数private func readDictionry(_ words: [String]) {for var word in words {if word.count > 0 {if word.contains(",") {let compoundWord = word.split(separator: ":").map { String($0) }word = compoundWord[0]compoundWords[word] = compoundWord[1].split(separator: ",").map { String($0) }}thaiWords[word] = truegenerateWordTree(word: word)}}}// 生成单词树private func generateWordTree(word: String) {var path = wordTreefor c in word {if (path[String(c)] == nil) {path[String(c)] = NSMutableDictionary()}path = path[String(c)] as! NSMutableDictionary}}// 查询单词树private func queryWordTree(word: String) -> Bool {var isFound = truevar path = wordTreefor c in word {if path[String(c)] == nil {isFound = falsebreak}path = path[String(c)] as! NSMutableDictionary}return isFound}
}extension ThaiWordCount {// 分词private func tokenize(content: String) -> [String] {let lowerCaseContent = convertLowerCase(content: content)let filteredContent = filterSymbols(content: lowerCaseContent)let workingArray = filteredContent.split(separator: " ").map { String($0) }var result = [String]()let pattern = "[ก-๙]"let regex = try! NSRegularExpression(pattern: pattern, options: [])for str in workingArray {if regex.numberOfMatches(in: str, options: [], range: NSRange(location: 0, length: str.count)) > 0 {let thaiTokens = breakThaiWords(word: str)for thaiToken in thaiTokens {if thaiToken.count > 0 {result.append(thaiToken)}}}else {if str.count > 0 {result.append(str)}}}return result}// 泰语单词拆分private func  breakThaiWords(word: String) -> [String] {var words = [String]()var index = 0var currentWord = ""var spareWord = ""var badWord = ""var nextWordAble = false;for c in word {let checkWord = currentWord + String(c)if queryWordTree(word: checkWord) {currentWord = checkWordif let _ = thaiWords[currentWord] {if badWord != "" {if words.count > index {words[index] = (badWord as NSString).substring(with: NSRange(location: 0, length: badWord.count - 1))} else {words.append((badWord as NSString).substring(with: NSRange(location: 0, length: badWord.count - 1)))}badWord = ""index += 1}if let brokenWords = compoundWords[checkWord] {for brokenWord in brokenWords {words[index] = brokenWordindex += 1}index -= 1}else {if words.count > index {words[index] = checkWord} else {words.append(checkWord)}}spareWord = ""}else {spareWord += String(c)}nextWordAble = true}else {if nextWordAble {nextWordAble = falsecurrentWord = spareWord + String(c)spareWord = String(c)index += 1}else {if badWord == "" {badWord = currentWord + String(c)}else {badWord += String(c)}currentWord = String(c)}}}if badWord != "" {words.append(badWord)}return words}}extension ThaiWordCount {// 过滤符号private func filterSymbols(content: String) -> String {var result = contentlet pattern = "[^a-z0-9ก-๙]"let regex = try! NSRegularExpression(pattern: pattern, options: .caseInsensitive)result = regex.stringByReplacingMatches(in: result, options: [], range: NSRange(location: 0, length: result.count), withTemplate: " ")return result}// 转换成小写private func convertLowerCase(content: String) -> String {return content.lowercased()}
}

跨平台实现 Swift 调用 Javascript 算法

js实现字数统计,三端统一算法,以下调oc调js的方式:

import Foundation
import JavaScriptCorefinal class WordCounterFromJS: NSObject {static let shared = WordCounterFromJS()private let vm = JSVirtualMachine()private let context: JSContextprivate override init() {let jsCode = try? String(contentsOf: Bundle.main.url(forResource: "stary-wordcount", withExtension: "js")!)self.context = JSContext(virtualMachine: self.vm)self.context.exceptionHandler = { context, exception inprint("JS Error:\(exception.debugDescription)")}self.context.evaluateScript(jsCode)}func getWordCount(content: String?) -> Int {guard let content = content else {return 0}let wordCounter = self.context.objectForKeyedSubscript("WordCount")let result = wordCounter?.invokeMethod("getWordCount", withArguments: [content, true])return Int(result?.toInt32() ?? 0)}
}

更多推荐

iOS 小说字数统计

本文发布于:2023-06-29 08:31:57,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/942144.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:字数   小说   iOS

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!