|
@@ -15,445 +15,25 @@ extension OSLog {
|
|
static let performance = OSLog(subsystem: subsystem, category: "Peformance")
|
|
static let performance = OSLog(subsystem: subsystem, category: "Peformance")
|
|
}
|
|
}
|
|
|
|
|
|
-// Tag definition
|
|
|
|
-public protocol CharacterStyling {
|
|
|
|
- func isEqualTo( _ other : CharacterStyling ) -> Bool
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-public enum SpaceAllowed {
|
|
|
|
- case no
|
|
|
|
- case bothSides
|
|
|
|
- case oneSide
|
|
|
|
- case leadingSide
|
|
|
|
- case trailingSide
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-public enum Cancel {
|
|
|
|
- case none
|
|
|
|
- case allRemaining
|
|
|
|
- case currentSet
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-public struct CharacterRule : CustomStringConvertible {
|
|
|
|
- public let openTag : String
|
|
|
|
- public let intermediateTag : String?
|
|
|
|
- public let closingTag : String?
|
|
|
|
- public let escapeCharacter : Character?
|
|
|
|
- public let styles : [Int : [CharacterStyling]]
|
|
|
|
- public var minTags : Int = 1
|
|
|
|
- public var maxTags : Int = 1
|
|
|
|
- public var spacesAllowed : SpaceAllowed = .oneSide
|
|
|
|
- public var cancels : Cancel = .none
|
|
|
|
-
|
|
|
|
- public var tagVarieties : [Int : String]
|
|
|
|
-
|
|
|
|
- public var description: String {
|
|
|
|
- return "Character Rule with Open tag: \(self.openTag) and current styles : \(self.styles) "
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- public init(openTag: String, intermediateTag: String? = nil, closingTag: String? = nil, escapeCharacter: Character? = nil, styles: [Int : [CharacterStyling]] = [:], minTags : Int = 1, maxTags : Int = 1, cancels : Cancel = .none) {
|
|
|
|
- self.openTag = openTag
|
|
|
|
- self.intermediateTag = intermediateTag
|
|
|
|
- self.closingTag = closingTag
|
|
|
|
- self.escapeCharacter = escapeCharacter
|
|
|
|
- self.styles = styles
|
|
|
|
- self.minTags = minTags
|
|
|
|
- self.maxTags = maxTags
|
|
|
|
- self.cancels = cancels
|
|
|
|
-
|
|
|
|
- self.tagVarieties = [:]
|
|
|
|
- for i in minTags...maxTags {
|
|
|
|
- self.tagVarieties[i] = openTag.repeating(i)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// Token definition
|
|
|
|
-public enum TokenType {
|
|
|
|
- case repeatingTag
|
|
|
|
- case openTag
|
|
|
|
- case intermediateTag
|
|
|
|
- case closeTag
|
|
|
|
- case string
|
|
|
|
- case escape
|
|
|
|
- case replacement
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-public struct Token {
|
|
|
|
- public let id = UUID().uuidString
|
|
|
|
- public let type : TokenType
|
|
|
|
- public let inputString : String
|
|
|
|
- public fileprivate(set) var group : Int = 0
|
|
|
|
- public fileprivate(set) var metadataString : String? = nil
|
|
|
|
- public fileprivate(set) var characterStyles : [CharacterStyling] = []
|
|
|
|
- public fileprivate(set) var count : Int = 0
|
|
|
|
- public fileprivate(set) var shouldSkip : Bool = false
|
|
|
|
- public fileprivate(set) var tokenIndex : Int = -1
|
|
|
|
- public fileprivate(set) var isProcessed : Bool = false
|
|
|
|
- public fileprivate(set) var isMetadata : Bool = false
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- public var outputString : String {
|
|
|
|
- get {
|
|
|
|
- switch self.type {
|
|
|
|
- case .repeatingTag:
|
|
|
|
- if count <= 0 {
|
|
|
|
- return ""
|
|
|
|
- } else {
|
|
|
|
- let range = inputString.startIndex..<inputString.index(inputString.startIndex, offsetBy: self.count)
|
|
|
|
- return String(inputString[range])
|
|
|
|
- }
|
|
|
|
- case .openTag, .closeTag, .intermediateTag:
|
|
|
|
- return (self.isProcessed || self.isMetadata) ? "" : inputString
|
|
|
|
- case .escape, .string:
|
|
|
|
- return (self.isProcessed || self.isMetadata) ? "" : inputString
|
|
|
|
- case .replacement:
|
|
|
|
- return self.inputString
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- public init( type : TokenType, inputString : String, characterStyles : [CharacterStyling] = []) {
|
|
|
|
- self.type = type
|
|
|
|
- self.inputString = inputString
|
|
|
|
- self.characterStyles = characterStyles
|
|
|
|
- if type == .repeatingTag {
|
|
|
|
- self.count = inputString.count
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- func newToken( fromSubstring string: String, isReplacement : Bool) -> Token {
|
|
|
|
- var newToken = Token(type: (isReplacement) ? .replacement : .string , inputString: string, characterStyles: self.characterStyles)
|
|
|
|
- newToken.metadataString = self.metadataString
|
|
|
|
- newToken.isMetadata = self.isMetadata
|
|
|
|
- newToken.isProcessed = self.isProcessed
|
|
|
|
- return newToken
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-extension Sequence where Iterator.Element == Token {
|
|
|
|
- var oslogDisplay: String {
|
|
|
|
- return "[\"\(self.map( { ($0.outputString.isEmpty) ? "\($0.type): \($0.inputString)" : $0.outputString }).joined(separator: "\", \""))\"]"
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-enum TagState {
|
|
|
|
- case none
|
|
|
|
- case open
|
|
|
|
- case intermediate
|
|
|
|
- case closed
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-struct TagString {
|
|
|
|
- var state : TagState = .none
|
|
|
|
- var preOpenString = ""
|
|
|
|
- var openTagString : [String] = []
|
|
|
|
- var intermediateString = ""
|
|
|
|
- var intermediateTagString = ""
|
|
|
|
- var metadataString = ""
|
|
|
|
- var closedTagString : [String] = []
|
|
|
|
- var postClosedString = ""
|
|
|
|
-
|
|
|
|
- let rule : CharacterRule
|
|
|
|
- var tokenGroup = 0
|
|
|
|
-
|
|
|
|
- init( with rule : CharacterRule ) {
|
|
|
|
- self.rule = rule
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- mutating func append( _ string : String? ) {
|
|
|
|
- guard let existentString = string else {
|
|
|
|
- return
|
|
|
|
- }
|
|
|
|
- switch self.state {
|
|
|
|
- case .none:
|
|
|
|
- self.preOpenString += existentString
|
|
|
|
- case .open:
|
|
|
|
- self.intermediateString += existentString
|
|
|
|
- case .intermediate:
|
|
|
|
- self.metadataString += existentString
|
|
|
|
- case .closed:
|
|
|
|
- self.postClosedString += existentString
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- mutating func handleRepeatingTags( _ tokenGroup : [TokenGroup] ) {
|
|
|
|
- var availableCount = self.rule.maxTags
|
|
|
|
- var sameOpenGroup = false
|
|
|
|
- for token in tokenGroup {
|
|
|
|
-
|
|
|
|
- switch token.state {
|
|
|
|
- case .none:
|
|
|
|
- self.append(token.string)
|
|
|
|
- if self.state == .closed {
|
|
|
|
- self.state = .none
|
|
|
|
- }
|
|
|
|
- case .open:
|
|
|
|
- switch self.state {
|
|
|
|
- case .none:
|
|
|
|
- self.openTagString.append(token.string)
|
|
|
|
- self.state = .open
|
|
|
|
- availableCount = self.rule.maxTags - token.string.count
|
|
|
|
- sameOpenGroup = true
|
|
|
|
- case .open:
|
|
|
|
- if availableCount > 0 {
|
|
|
|
- if sameOpenGroup {
|
|
|
|
- self.openTagString.append(token.string)
|
|
|
|
- availableCount = self.rule.maxTags - token.string.count
|
|
|
|
- } else {
|
|
|
|
- self.closedTagString.append(token.string)
|
|
|
|
- self.state = .closed
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- self.append(token.string)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- case .intermediate:
|
|
|
|
- self.preOpenString += self.openTagString.joined() + token.string
|
|
|
|
- case .closed:
|
|
|
|
- self.append(token.string)
|
|
|
|
- }
|
|
|
|
- case .intermediate:
|
|
|
|
- switch self.state {
|
|
|
|
- case .none:
|
|
|
|
- self.preOpenString += token.string
|
|
|
|
- case .open:
|
|
|
|
- self.intermediateTagString += token.string
|
|
|
|
- self.state = .intermediate
|
|
|
|
- case .intermediate:
|
|
|
|
- self.metadataString += token.string
|
|
|
|
- case .closed:
|
|
|
|
- self.postClosedString += token.string
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- case .closed:
|
|
|
|
- switch self.state {
|
|
|
|
- case .intermediate:
|
|
|
|
- self.closedTagString.append(token.string)
|
|
|
|
- self.state = .closed
|
|
|
|
- case .closed:
|
|
|
|
- self.postClosedString += token.string
|
|
|
|
- case .open:
|
|
|
|
- if self.rule.intermediateTag == nil {
|
|
|
|
- self.closedTagString.append(token.string)
|
|
|
|
- self.state = .closed
|
|
|
|
- } else {
|
|
|
|
- self.preOpenString += self.openTagString.joined()
|
|
|
|
- self.preOpenString += self.intermediateString
|
|
|
|
- self.preOpenString += token.string
|
|
|
|
- self.intermediateString = ""
|
|
|
|
- self.openTagString.removeAll()
|
|
|
|
- }
|
|
|
|
- case .none:
|
|
|
|
- self.preOpenString += token.string
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if !self.openTagString.isEmpty && self.rule.closingTag == nil && self.state != .closed {
|
|
|
|
- self.state = .open
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- mutating func handleRegularTags( _ tokenGroup : [TokenGroup] ) {
|
|
|
|
- for token in tokenGroup {
|
|
|
|
-
|
|
|
|
- switch token.state {
|
|
|
|
- case .none:
|
|
|
|
- self.append(token.string)
|
|
|
|
- if self.state == .closed {
|
|
|
|
- self.state = .none
|
|
|
|
- }
|
|
|
|
- case .open:
|
|
|
|
- switch self.state {
|
|
|
|
- case .none:
|
|
|
|
- self.openTagString.append(token.string)
|
|
|
|
- self.state = .open
|
|
|
|
- case .open:
|
|
|
|
- if self.rule.maxTags == 1, self.openTagString.first == rule.openTag {
|
|
|
|
- self.preOpenString = self.preOpenString + self.openTagString.joined() + self.intermediateString
|
|
|
|
- self.intermediateString = ""
|
|
|
|
- self.openTagString.removeAll()
|
|
|
|
- self.openTagString.append(token.string)
|
|
|
|
- } else {
|
|
|
|
- self.openTagString.append(token.string)
|
|
|
|
- }
|
|
|
|
- case .intermediate:
|
|
|
|
- self.preOpenString += self.openTagString.joined() + token.string
|
|
|
|
- case .closed:
|
|
|
|
- self.openTagString.append(token.string)
|
|
|
|
- }
|
|
|
|
- case .intermediate:
|
|
|
|
- switch self.state {
|
|
|
|
- case .none:
|
|
|
|
- self.preOpenString += token.string
|
|
|
|
- case .open:
|
|
|
|
- self.intermediateTagString += token.string
|
|
|
|
- self.state = .intermediate
|
|
|
|
- case .intermediate:
|
|
|
|
- self.metadataString += token.string
|
|
|
|
- case .closed:
|
|
|
|
- self.postClosedString += token.string
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- case .closed:
|
|
|
|
- switch self.state {
|
|
|
|
- case .intermediate:
|
|
|
|
- self.closedTagString.append(token.string)
|
|
|
|
- self.state = .closed
|
|
|
|
- case .closed:
|
|
|
|
- self.postClosedString += token.string
|
|
|
|
- case .open:
|
|
|
|
- if self.rule.intermediateTag == nil {
|
|
|
|
- self.closedTagString.append(token.string)
|
|
|
|
- self.state = .closed
|
|
|
|
- } else {
|
|
|
|
- self.preOpenString += self.openTagString.joined()
|
|
|
|
- self.preOpenString += self.intermediateString
|
|
|
|
- self.preOpenString += token.string
|
|
|
|
- self.intermediateString = ""
|
|
|
|
- self.openTagString.removeAll()
|
|
|
|
- }
|
|
|
|
- case .none:
|
|
|
|
- self.preOpenString += token.string
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- mutating func append( contentsOf tokenGroup: [TokenGroup] ) {
|
|
|
|
- if self.rule.closingTag == nil {
|
|
|
|
- self.handleRepeatingTags(tokenGroup)
|
|
|
|
- } else {
|
|
|
|
- self.handleRegularTags(tokenGroup)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- func configureToken(ofType type : TokenType = .string, with string : String ) -> Token {
|
|
|
|
- var token = Token(type: type, inputString: string)
|
|
|
|
- token.group = self.tokenGroup
|
|
|
|
- return token
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- mutating func reset() {
|
|
|
|
- self.preOpenString = ""
|
|
|
|
- self.openTagString.removeAll()
|
|
|
|
- self.intermediateString = ""
|
|
|
|
- self.intermediateTagString = ""
|
|
|
|
- self.metadataString = ""
|
|
|
|
- self.closedTagString.removeAll()
|
|
|
|
- self.postClosedString = ""
|
|
|
|
-
|
|
|
|
- self.state = .none
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- mutating func consolidate(with string : String, into tokens : inout [Token]) -> [Token] {
|
|
|
|
- self.reset()
|
|
|
|
- guard !string.isEmpty else {
|
|
|
|
- return tokens
|
|
|
|
- }
|
|
|
|
- tokens.append(self.configureToken(with: string))
|
|
|
|
- return tokens
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- mutating func tokens(beginningGroupNumberAt group : Int = 0) -> [Token] {
|
|
|
|
- self.tokenGroup = group
|
|
|
|
- var tokens : [Token] = []
|
|
|
|
-
|
|
|
|
- if self.intermediateString.isEmpty && self.intermediateTagString.isEmpty && self.metadataString.isEmpty {
|
|
|
|
- let actualString = self.preOpenString + self.openTagString.joined() + self.closedTagString.joined() + self.postClosedString
|
|
|
|
- return self.consolidate(with: actualString, into: &tokens)
|
|
|
|
- }
|
|
|
|
- if self.state == .open && !self.openTagString.isEmpty {
|
|
|
|
- let actualString = self.preOpenString + self.openTagString.joined() + self.intermediateString
|
|
|
|
- return self.consolidate(with: actualString, into: &tokens)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if !self.preOpenString.isEmpty {
|
|
|
|
- tokens.append(self.configureToken(with: self.preOpenString))
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- for tag in self.openTagString {
|
|
|
|
- if self.rule.closingTag == nil {
|
|
|
|
- tokens.append(self.configureToken(ofType: .repeatingTag, with: tag))
|
|
|
|
- } else {
|
|
|
|
- tokens.append(self.configureToken(ofType: .openTag, with: tag))
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- self.tokenGroup += 1
|
|
|
|
- if !self.intermediateString.isEmpty {
|
|
|
|
- var token = self.configureToken(with: self.intermediateString)
|
|
|
|
- token.metadataString = (self.metadataString.isEmpty) ? nil : self.metadataString
|
|
|
|
- tokens.append(token)
|
|
|
|
- }
|
|
|
|
- if !self.intermediateTagString.isEmpty {
|
|
|
|
- tokens.append(self.configureToken(ofType: .intermediateTag, with: self.intermediateTagString))
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- self.tokenGroup += 1
|
|
|
|
-
|
|
|
|
- if !self.metadataString.isEmpty {
|
|
|
|
- tokens.append(self.configureToken(with: self.metadataString))
|
|
|
|
- }
|
|
|
|
- var remainingTags = ( self.rule.closingTag == nil ) ? self.openTagString.joined() : ""
|
|
|
|
- for tag in self.closedTagString {
|
|
|
|
- if self.rule.closingTag == nil {
|
|
|
|
- remainingTags = remainingTags.replacingOccurrences(of: tag, with: "")
|
|
|
|
- tokens.append(self.configureToken(ofType: .repeatingTag, with: tag))
|
|
|
|
- } else {
|
|
|
|
- tokens.append(self.configureToken(ofType: .closeTag, with: tag))
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if !self.postClosedString.isEmpty {
|
|
|
|
- tokens.append(self.configureToken(with: self.postClosedString))
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- self.reset()
|
|
|
|
-
|
|
|
|
- if !remainingTags.isEmpty {
|
|
|
|
- self.state = .open
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return tokens
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-struct TokenGroup {
|
|
|
|
- enum TokenGroupType {
|
|
|
|
- case string
|
|
|
|
- case tag
|
|
|
|
- case escape
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- let string : String
|
|
|
|
- let isEscaped : Bool
|
|
|
|
- let type : TokenGroupType
|
|
|
|
- var state : TagState = .none
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
public class SwiftyTokeniser {
|
|
public class SwiftyTokeniser {
|
|
let rules : [CharacterRule]
|
|
let rules : [CharacterRule]
|
|
var replacements : [String : [Token]] = [:]
|
|
var replacements : [String : [Token]] = [:]
|
|
|
|
|
|
- var currentRunTime : TimeInterval = 0
|
|
|
|
- var totalTime : TimeInterval = 0
|
|
|
|
var enableLog = (ProcessInfo.processInfo.environment["SwiftyTokeniserLogging"] != nil)
|
|
var enableLog = (ProcessInfo.processInfo.environment["SwiftyTokeniserLogging"] != nil)
|
|
- var enablePerformanceLog = (ProcessInfo.processInfo.environment["SwiftyTokeniserPerformanceLogging"] != nil)
|
|
|
|
|
|
+ let totalPerfomanceLog = PerformanceLog(with: "SwiftyTokeniserPerformanceLogging", identifier: "Tokeniser Total Run Time", log: OSLog.performance)
|
|
|
|
+ let currentPerfomanceLog = PerformanceLog(with: "SwiftyTokeniserPerformanceLogging", identifier: "Tokeniser Current", log: OSLog.performance)
|
|
|
|
+
|
|
|
|
+ public var scanner : SwiftyScanning
|
|
|
|
|
|
- public init( with rules : [CharacterRule] ) {
|
|
|
|
|
|
+ public init( with rules : [CharacterRule], scanner : SwiftyScanning ) {
|
|
self.rules = rules
|
|
self.rules = rules
|
|
- if enablePerformanceLog {
|
|
|
|
- self.totalTime = Date.timeIntervalSinceReferenceDate
|
|
|
|
- os_log("--- TIMER: Tokeniser initialised", log: .performance, type: .info)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
|
|
+ self.scanner = scanner
|
|
|
|
+
|
|
|
|
+ self.totalPerfomanceLog.start()
|
|
}
|
|
}
|
|
|
|
|
|
deinit {
|
|
deinit {
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("--- TIMER (Tokeniser deinitialised): %f", log: .performance, type: .info, Date.timeIntervalSinceReferenceDate - self.totalTime)
|
|
|
|
- }
|
|
|
|
|
|
+ self.totalPerfomanceLog.end()
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -476,12 +56,8 @@ public class SwiftyTokeniser {
|
|
var currentTokens : [Token] = []
|
|
var currentTokens : [Token] = []
|
|
var mutableRules = self.rules
|
|
var mutableRules = self.rules
|
|
|
|
|
|
- self.totalTime = Date().timeIntervalSinceReferenceDate
|
|
|
|
|
|
|
|
- if enablePerformanceLog {
|
|
|
|
- self.currentRunTime = Date().timeIntervalSinceReferenceDate
|
|
|
|
- os_log("TIMER (total run time): %f", log: .performance, type: .info, Date().timeIntervalSinceReferenceDate - self.totalTime)
|
|
|
|
- }
|
|
|
|
|
|
+ self.currentPerfomanceLog.start()
|
|
|
|
|
|
while !mutableRules.isEmpty {
|
|
while !mutableRules.isEmpty {
|
|
let nextRule = mutableRules.removeFirst()
|
|
let nextRule = mutableRules.removeFirst()
|
|
@@ -490,14 +66,12 @@ public class SwiftyTokeniser {
|
|
os_log("------------------------------", log: .tokenising, type: .info)
|
|
os_log("------------------------------", log: .tokenising, type: .info)
|
|
os_log("RULE: %@", log: OSLog.tokenising, type:.info , nextRule.description)
|
|
os_log("RULE: %@", log: OSLog.tokenising, type:.info , nextRule.description)
|
|
}
|
|
}
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("TIMER (start rule %@): %f", log: .performance, type: .info, nextRule.openTag, Date().timeIntervalSinceReferenceDate - self.currentRunTime)
|
|
|
|
- }
|
|
|
|
|
|
+ self.currentPerfomanceLog.tag(with: "(start rule %@)")
|
|
|
|
|
|
|
|
|
|
if currentTokens.isEmpty {
|
|
if currentTokens.isEmpty {
|
|
// This means it's the first time through
|
|
// This means it's the first time through
|
|
- currentTokens = self.applyStyles(to: self.scan(inputString, with: nextRule), usingRule: nextRule)
|
|
|
|
|
|
+ currentTokens = self.applyStyles(to: self.scanner.scan(inputString, with: nextRule), usingRule: nextRule)
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
|
|
|
|
@@ -555,9 +129,7 @@ public class SwiftyTokeniser {
|
|
// The one string token might then be exploded into multiple more tokens
|
|
// The one string token might then be exploded into multiple more tokens
|
|
}
|
|
}
|
|
|
|
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("TIMER (finished all rules): %f", log: .performance, type: .info, Date().timeIntervalSinceReferenceDate - self.currentRunTime)
|
|
|
|
- }
|
|
|
|
|
|
+ self.currentPerfomanceLog.tag(with: "(finished all rules)")
|
|
|
|
|
|
if enableLog {
|
|
if enableLog {
|
|
os_log("=====RULE PROCESSING COMPLETE=====", log: .tokenising, type: .info)
|
|
os_log("=====RULE PROCESSING COMPLETE=====", log: .tokenising, type: .info)
|
|
@@ -682,7 +254,7 @@ public class SwiftyTokeniser {
|
|
|
|
|
|
let combinedString = tokens.map({ $0.outputString }).joined()
|
|
let combinedString = tokens.map({ $0.outputString }).joined()
|
|
|
|
|
|
- let nextTokens = self.scan(combinedString, with: rule)
|
|
|
|
|
|
+ let nextTokens = self.scanner.scan(combinedString, with: rule)
|
|
var replacedTokens = self.applyStyles(to: nextTokens, usingRule: rule)
|
|
var replacedTokens = self.applyStyles(to: nextTokens, usingRule: rule)
|
|
|
|
|
|
/// It's necessary here to check to see if the first token (which will always represent the styles
|
|
/// It's necessary here to check to see if the first token (which will always represent the styles
|
|
@@ -929,256 +501,7 @@ public class SwiftyTokeniser {
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
- func scanSpacing( _ scanner : Scanner, usingCharactersIn set : CharacterSet ) -> (preTag : String?, foundChars : String?, postTag : String?) {
|
|
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("TIMER (scan space) : %f", log: .performance, type: .info, Date().timeIntervalSinceReferenceDate - self.currentRunTime)
|
|
|
|
- }
|
|
|
|
- let lastChar : String?
|
|
|
|
- if #available(iOS 13.0, OSX 10.15, watchOS 6.0, tvOS 13.0, *) {
|
|
|
|
- lastChar = ( scanner.currentIndex > scanner.string.startIndex ) ? String(scanner.string[scanner.string.index(before: scanner.currentIndex)..<scanner.currentIndex]) : nil
|
|
|
|
- } else {
|
|
|
|
- if let scanLocation = scanner.string.index(scanner.string.startIndex, offsetBy: scanner.scanLocation, limitedBy: scanner.string.endIndex) {
|
|
|
|
- lastChar = ( scanLocation > scanner.string.startIndex ) ? String(scanner.string[scanner.string.index(before: scanLocation)..<scanLocation]) : nil
|
|
|
|
- } else {
|
|
|
|
- lastChar = nil
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- }
|
|
|
|
- let maybeFoundChars : String?
|
|
|
|
- if #available(iOS 13.0, OSX 10.15, watchOS 6.0, tvOS 13.0, *) {
|
|
|
|
- maybeFoundChars = scanner.scanCharacters(from: set )
|
|
|
|
- } else {
|
|
|
|
- var string : NSString?
|
|
|
|
- scanner.scanCharacters(from: set, into: &string)
|
|
|
|
- maybeFoundChars = string as String?
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- let nextChar : String?
|
|
|
|
- if #available(iOS 13.0, OSX 10.15, watchOS 6.0,tvOS 13.0, *) {
|
|
|
|
- nextChar = (scanner.currentIndex != scanner.string.endIndex) ? String(scanner.string[scanner.currentIndex]) : nil
|
|
|
|
- } else {
|
|
|
|
- if let scanLocation = scanner.string.index(scanner.string.startIndex, offsetBy: scanner.scanLocation, limitedBy: scanner.string.endIndex) {
|
|
|
|
- nextChar = (scanLocation != scanner.string.endIndex) ? String(scanner.string[scanLocation]) : nil
|
|
|
|
- } else {
|
|
|
|
- nextChar = nil
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("TIMER (end space) : %f", log: .performance, type: .info, Date().timeIntervalSinceReferenceDate - self.currentRunTime)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return (lastChar, maybeFoundChars, nextChar)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- func getTokenGroups( for string : inout String, with rule : CharacterRule, shouldEmpty : Bool = false ) -> [TokenGroup] {
|
|
|
|
- if string.isEmpty {
|
|
|
|
- return []
|
|
|
|
- }
|
|
|
|
- var groups : [TokenGroup] = []
|
|
|
|
-
|
|
|
|
- if string.contains(rule.openTag) {
|
|
|
|
- if shouldEmpty || string == rule.tagVarieties[rule.maxTags]{
|
|
|
|
- var token = TokenGroup(string: string, isEscaped: false, type: .tag)
|
|
|
|
- token.state = .open
|
|
|
|
- groups.append(token)
|
|
|
|
- string.removeAll()
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- } else if let intermediateString = rule.intermediateTag, string.contains(intermediateString) {
|
|
|
|
-
|
|
|
|
- if let range = string.range(of: intermediateString) {
|
|
|
|
- let prior = string[string.startIndex..<range.lowerBound]
|
|
|
|
- let tag = string[range]
|
|
|
|
- let following = string[range.upperBound..<string.endIndex]
|
|
|
|
- if !prior.isEmpty {
|
|
|
|
- groups.append(TokenGroup(string: String(prior), isEscaped: false, type: .string))
|
|
|
|
- }
|
|
|
|
- var token = TokenGroup(string: String(tag), isEscaped: false, type: .tag)
|
|
|
|
- token.state = .intermediate
|
|
|
|
- groups.append(token)
|
|
|
|
- if !following.isEmpty {
|
|
|
|
- groups.append(TokenGroup(string: String(following), isEscaped: false, type: .string))
|
|
|
|
- }
|
|
|
|
- string.removeAll()
|
|
|
|
- }
|
|
|
|
- } else if let closingTag = rule.closingTag, closingTag.contains(string) {
|
|
|
|
- var token = TokenGroup(string: string, isEscaped: false, type: .tag)
|
|
|
|
- token.state = .closed
|
|
|
|
- groups.append(token)
|
|
|
|
- string.removeAll()
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if shouldEmpty && !string.isEmpty {
|
|
|
|
- let token = TokenGroup(string: string, isEscaped: false, type: .tag)
|
|
|
|
- groups.append(token)
|
|
|
|
- string.removeAll()
|
|
|
|
- }
|
|
|
|
- return groups
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- func scan( _ string : String, with rule : CharacterRule) -> [Token] {
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- let scanner = Scanner(string: string)
|
|
|
|
- scanner.charactersToBeSkipped = nil
|
|
|
|
- var tokens : [Token] = []
|
|
|
|
- var set = CharacterSet(charactersIn: "\(rule.openTag)\(rule.intermediateTag ?? "")\(rule.closingTag ?? "")")
|
|
|
|
- if let existentEscape = rule.escapeCharacter {
|
|
|
|
- set.insert(charactersIn: String(existentEscape))
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- var tagString = TagString(with: rule)
|
|
|
|
- var tokenGroup = 0
|
|
|
|
-
|
|
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("TIMER (start scan %@): %f (string: %@)", log: .performance, type: .info, rule.openTag, Date().timeIntervalSinceReferenceDate - self.currentRunTime, string)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if !string.contains( rule.openTag ) {
|
|
|
|
- return [Token(type: .string, inputString: string)]
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- while !scanner.isAtEnd {
|
|
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("TIMER (loop start %@): %f", log: .performance, type: .info, rule.openTag, Date().timeIntervalSinceReferenceDate - self.currentRunTime)
|
|
|
|
- }
|
|
|
|
- tokenGroup += 1
|
|
|
|
- if #available(iOS 13.0, OSX 10.15, watchOS 6.0, tvOS 13.0, *) {
|
|
|
|
- if let start = scanner.scanUpToCharacters(from: set) {
|
|
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("TIMER (first chars) : %f", log: .performance, type: .info, Date().timeIntervalSinceReferenceDate - self.currentRunTime)
|
|
|
|
- }
|
|
|
|
- tagString.append(start)
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- var string : NSString?
|
|
|
|
- scanner.scanUpToCharacters(from: set, into: &string)
|
|
|
|
- if let existentString = string as String? {
|
|
|
|
- tagString.append(existentString)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // The end of the string
|
|
|
|
- let spacing = self.scanSpacing(scanner, usingCharactersIn: set)
|
|
|
|
- guard let foundTag = spacing.foundChars else {
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if foundTag == rule.openTag && foundTag.count < rule.minTags {
|
|
|
|
- tagString.append(foundTag)
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if !validateSpacing(nextCharacter: spacing.postTag, previousCharacter: spacing.preTag, with: rule) {
|
|
|
|
- let escapeString = String("\(rule.escapeCharacter ?? Character(""))")
|
|
|
|
- var escaped = foundTag.replacingOccurrences(of: "\(escapeString)\(rule.openTag)", with: rule.openTag)
|
|
|
|
- if let hasIntermediateTag = rule.intermediateTag {
|
|
|
|
- escaped = foundTag.replacingOccurrences(of: "\(escapeString)\(hasIntermediateTag)", with: hasIntermediateTag)
|
|
|
|
- }
|
|
|
|
- if let existentClosingTag = rule.closingTag {
|
|
|
|
- escaped = foundTag.replacingOccurrences(of: "\(escapeString)\(existentClosingTag)", with: existentClosingTag)
|
|
|
|
- }
|
|
|
|
- tagString.append(escaped)
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("TIMER (found tag %@) : %f", log: .performance, type: .info, rule.openTag, Date().timeIntervalSinceReferenceDate - self.currentRunTime)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if !foundTag.contains(rule.openTag) && !foundTag.contains(rule.intermediateTag ?? "") && !foundTag.contains(rule.closingTag ?? "") {
|
|
|
|
- tagString.append(foundTag)
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- var tokenGroups : [TokenGroup] = []
|
|
|
|
- var escapeCharacter : Character? = nil
|
|
|
|
- var cumulatedString = ""
|
|
|
|
- for char in foundTag {
|
|
|
|
- if let existentEscapeCharacter = escapeCharacter {
|
|
|
|
-
|
|
|
|
- // If any of the tags feature the current character
|
|
|
|
- let escape = String(existentEscapeCharacter)
|
|
|
|
- let nextTagCharacter = String(char)
|
|
|
|
- if rule.openTag.contains(nextTagCharacter) || rule.intermediateTag?.contains(nextTagCharacter) ?? false || rule.closingTag?.contains(nextTagCharacter) ?? false {
|
|
|
|
- tokenGroups.append(TokenGroup(string: nextTagCharacter, isEscaped: true, type: .tag))
|
|
|
|
- escapeCharacter = nil
|
|
|
|
- } else if nextTagCharacter == escape {
|
|
|
|
- // Doesn't apply to this rule
|
|
|
|
- tokenGroups.append(TokenGroup(string: nextTagCharacter, isEscaped: false, type: .escape))
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
- if let existentEscape = rule.escapeCharacter {
|
|
|
|
- if char == existentEscape {
|
|
|
|
- tokenGroups.append(contentsOf: getTokenGroups(for: &cumulatedString, with: rule, shouldEmpty: true))
|
|
|
|
- escapeCharacter = char
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- cumulatedString.append(char)
|
|
|
|
- tokenGroups.append(contentsOf: getTokenGroups(for: &cumulatedString, with: rule))
|
|
|
|
-
|
|
|
|
- }
|
|
|
|
- if let remainingEscape = escapeCharacter {
|
|
|
|
- tokenGroups.append(TokenGroup(string: String(remainingEscape), isEscaped: false, type: .escape))
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- tokenGroups.append(contentsOf: getTokenGroups(for: &cumulatedString, with: rule, shouldEmpty: true))
|
|
|
|
- tagString.append(contentsOf: tokenGroups)
|
|
|
|
-
|
|
|
|
- if tagString.state == .closed {
|
|
|
|
- tokens.append(contentsOf: tagString.tokens(beginningGroupNumberAt : tokenGroup))
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- tokens.append(contentsOf: tagString.tokens(beginningGroupNumberAt : tokenGroup))
|
|
|
|
- if enablePerformanceLog {
|
|
|
|
- os_log("TIMER (end scan %@) : %f", log: .performance, type: .info, rule.openTag, Date().timeIntervalSinceReferenceDate - self.currentRunTime)
|
|
|
|
- }
|
|
|
|
|
|
|
|
- return tokens
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- func validateSpacing( nextCharacter : String?, previousCharacter : String?, with rule : CharacterRule ) -> Bool {
|
|
|
|
- switch rule.spacesAllowed {
|
|
|
|
- case .leadingSide:
|
|
|
|
- guard nextCharacter != nil else {
|
|
|
|
- return true
|
|
|
|
- }
|
|
|
|
- if nextCharacter == " " {
|
|
|
|
- return false
|
|
|
|
- }
|
|
|
|
- case .trailingSide:
|
|
|
|
- guard previousCharacter != nil else {
|
|
|
|
- return true
|
|
|
|
- }
|
|
|
|
- if previousCharacter == " " {
|
|
|
|
- return false
|
|
|
|
- }
|
|
|
|
- case .no:
|
|
|
|
- switch (previousCharacter, nextCharacter) {
|
|
|
|
- case (nil, nil), ( " ", _ ), ( _, " " ):
|
|
|
|
- return false
|
|
|
|
- default:
|
|
|
|
- return true
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- case .oneSide:
|
|
|
|
- switch (previousCharacter, nextCharacter) {
|
|
|
|
- case (nil, " " ), (" ", nil), (" ", " " ):
|
|
|
|
- return false
|
|
|
|
- default:
|
|
|
|
- return true
|
|
|
|
- }
|
|
|
|
- default:
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- return true
|
|
|
|
- }
|
|
|
|
|
|
|
|
}
|
|
}
|
|
|
|
|