|
@@ -15,235 +15,557 @@
|
|
|
import Foundation
|
|
|
import os.log
|
|
|
|
|
|
-enum v2_TokenType {
|
|
|
- case string
|
|
|
- case link
|
|
|
- case metadata
|
|
|
- case tag
|
|
|
+
|
|
|
+extension OSLog {
|
|
|
+ private static var subsystem = "SwiftyScanner"
|
|
|
+ static let swiftyScannerScanner = OSLog(subsystem: subsystem, category: "Swifty Scanner Scanner")
|
|
|
+ static let swiftyScannerScannerPerformance = OSLog(subsystem: subsystem, category: "Swifty Scanner Scanner Peformance")
|
|
|
+}
|
|
|
+
|
|
|
+enum RepeatingTagType {
|
|
|
+ case open
|
|
|
+ case either
|
|
|
+ case close
|
|
|
+ case neither
|
|
|
}
|
|
|
|
|
|
-struct v2_Token {
|
|
|
- var type : v2_TokenType
|
|
|
- let string : String
|
|
|
- var metadata : String = ""
|
|
|
-// let startIndex : String.Index
|
|
|
+struct TagGroup {
|
|
|
+ let groupID = UUID().uuidString
|
|
|
+ var tagRanges : [ClosedRange<Int>]
|
|
|
+ var tagType : RepeatingTagType = .open
|
|
|
+ var count = 1
|
|
|
}
|
|
|
|
|
|
-class SwiftyScannerNonRepeating : SwiftyScanning {
|
|
|
- var metadataLookup: [String : String]
|
|
|
+class SwiftyScannerNonRepeating {
|
|
|
+ var elements : [Element]
|
|
|
+ let rule : CharacterRule
|
|
|
+ let metadata : [String : String]
|
|
|
+ var pointer : Int = 0
|
|
|
|
|
|
- var str : String
|
|
|
- var currentIndex : String.Index
|
|
|
+ var tagGroups : [TagGroup] = []
|
|
|
|
|
|
- var rule : CharacterRule
|
|
|
- var tokens : [Token]
|
|
|
-
|
|
|
- var openIndices : [Int] = []
|
|
|
- var accumulatedStr : String = ""
|
|
|
- var stringList : [v2_Token] = []
|
|
|
-
|
|
|
+ var isMetadataOpen = false
|
|
|
+
|
|
|
+ var enableLog = (ProcessInfo.processInfo.environment["SwiftyScannerScanner"] != nil)
|
|
|
|
|
|
- init( tokens : [Token], rule : CharacterRule, metadataLookup : [String : String] = [:] ) {
|
|
|
- self.tokens = tokens
|
|
|
+ let currentPerfomanceLog = PerformanceLog(with: "SwiftyScannerScannerPerformanceLogging", identifier: "Scanner", log: OSLog.swiftyScannerPerformance)
|
|
|
+ let log = PerformanceLog(with: "SwiftyScannerScanner", identifier: "Scanner", log: OSLog.swiftyScannerScanner)
|
|
|
+
|
|
|
+ enum Position {
|
|
|
+ case forward(Int)
|
|
|
+ case backward(Int)
|
|
|
+ }
|
|
|
+
|
|
|
+ init( withElements elements : [Element], rule : CharacterRule, metadata : [String : String]) {
|
|
|
+ self.elements = elements
|
|
|
self.rule = rule
|
|
|
- self.str = tokens.map({ $0.inputString }).joined()
|
|
|
- self.currentIndex = self.str.startIndex
|
|
|
- self.metadataLookup = metadataLookup
|
|
|
+ self.currentPerfomanceLog.start()
|
|
|
+ self.metadata = metadata
|
|
|
}
|
|
|
|
|
|
- func scan() -> [Token] {
|
|
|
+ func elementsBetweenCurrentPosition( and newPosition : Position ) -> [Element]? {
|
|
|
|
|
|
- if !self.str.contains(rule.primaryTag.tag) {
|
|
|
- return self.tokens
|
|
|
+ let newIdx : Int
|
|
|
+ var isForward = true
|
|
|
+ switch newPosition {
|
|
|
+ case .backward(let positions):
|
|
|
+ isForward = false
|
|
|
+ newIdx = pointer - positions
|
|
|
+ if newIdx < 0 {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ case .forward(let positions):
|
|
|
+ newIdx = pointer + positions
|
|
|
+ if newIdx >= self.elements.count {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
}
|
|
|
- self.process()
|
|
|
- return self.convertTokens()
|
|
|
+
|
|
|
+
|
|
|
+ let range : ClosedRange<Int> = ( isForward ) ? self.pointer...newIdx : newIdx...self.pointer
|
|
|
+ return Array(self.elements[range])
|
|
|
}
|
|
|
|
|
|
- func emptyAccumulatedString() {
|
|
|
- if !accumulatedStr.isEmpty {
|
|
|
- stringList.append(v2_Token(type: .string, string: accumulatedStr))
|
|
|
- accumulatedStr.removeAll()
|
|
|
+
|
|
|
+ func element( for position : Position ) -> Element? {
|
|
|
+ let newIdx : Int
|
|
|
+ switch position {
|
|
|
+ case .backward(let positions):
|
|
|
+ newIdx = pointer - positions
|
|
|
+ if newIdx < 0 {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ case .forward(let positions):
|
|
|
+ newIdx = pointer + positions
|
|
|
+ if newIdx >= self.elements.count {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return self.elements[newIdx]
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ func positionIsEqualTo( character : Character, direction : Position ) -> Bool {
|
|
|
+ guard let validElement = self.element(for: direction) else {
|
|
|
+ return false
|
|
|
+ }
|
|
|
+ return validElement.character == character
|
|
|
+ }
|
|
|
+
|
|
|
+ func positionContains( characters : [Character], direction : Position ) -> Bool {
|
|
|
+ guard let validElement = self.element(for: direction) else {
|
|
|
+ return false
|
|
|
}
|
|
|
+ return characters.contains(validElement.character)
|
|
|
}
|
|
|
|
|
|
- func process() {
|
|
|
- var tokens : [Token] = []
|
|
|
+ func isEscaped() -> Bool {
|
|
|
+ let isEscaped = self.positionContains(characters: self.rule.escapeCharacters, direction: .backward(1))
|
|
|
+ if isEscaped {
|
|
|
+ self.elements[self.pointer - 1].type = .escape
|
|
|
+ }
|
|
|
+ return isEscaped
|
|
|
+ }
|
|
|
+
|
|
|
+ func range( for tag : String? ) -> ClosedRange<Int>? {
|
|
|
|
|
|
- let openTagStart = rule.primaryTag.tag[rule.primaryTag.tag.startIndex]
|
|
|
- let closeTagStart = ( rule.tag(for: .close)?.tag != nil ) ? rule.tag(for: .close)?.tag[rule.tag(for: .close)!.tag.startIndex] : nil
|
|
|
+ guard let tag = tag else {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
-
|
|
|
+ guard let openChar = tag.first else {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+
|
|
|
+ if self.pointer == self.elements.count {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+
|
|
|
+ if self.elements[self.pointer].character != openChar {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
|
|
|
+ if isEscaped() {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+
|
|
|
+ let range : ClosedRange<Int>
|
|
|
+ if tag.count > 1 {
|
|
|
+ guard let elements = self.elementsBetweenCurrentPosition(and: .forward(tag.count - 1) ) else {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ // If it's already a tag, then it should be ignored
|
|
|
+ if elements.filter({ $0.type != .string }).count > 0 {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ if elements.map( { String($0.character) }).joined() != tag {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ let endIdx = (self.pointer + tag.count - 1)
|
|
|
+ for i in self.pointer...endIdx {
|
|
|
+ self.elements[i].type = .tag
|
|
|
+ }
|
|
|
+ range = self.pointer...endIdx
|
|
|
+ self.pointer += tag.count
|
|
|
+ } else {
|
|
|
+ // If it's already a tag, then it should be ignored
|
|
|
+ if self.elements[self.pointer].type != .string {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ self.elements[self.pointer].type = .tag
|
|
|
+ range = self.pointer...self.pointer
|
|
|
+ self.pointer += 1
|
|
|
+ }
|
|
|
+ return range
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ func resetTagGroup( withID id : String ) {
|
|
|
+ if let idx = self.tagGroups.firstIndex(where: { $0.groupID == id }) {
|
|
|
+ for range in self.tagGroups[idx].tagRanges {
|
|
|
+ self.resetTag(in: range)
|
|
|
+ }
|
|
|
+ self.tagGroups.remove(at: idx)
|
|
|
+ }
|
|
|
+ self.isMetadataOpen = false
|
|
|
+ }
|
|
|
+
|
|
|
+ func resetTag( in range : ClosedRange<Int>) {
|
|
|
+ for idx in range {
|
|
|
+ self.elements[idx].type = .string
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ func resetLastTag( for range : inout [ClosedRange<Int>]) {
|
|
|
+ guard let last = range.last else {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ for idx in last {
|
|
|
+ self.elements[idx].type = .string
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ func closeTag( _ tag : String, withGroupID id : String ) {
|
|
|
+ guard var tagGroup = self.tagGroups.first(where: { $0.groupID == id }) else {
|
|
|
+ return
|
|
|
+ }
|
|
|
|
|
|
- while currentIndex != str.endIndex {
|
|
|
- let char = str[currentIndex]
|
|
|
+ var metadataString = ""
|
|
|
+ if self.isMetadataOpen {
|
|
|
+ let metadataCloseRange = tagGroup.tagRanges.removeLast()
|
|
|
+ let metadataOpenRange = tagGroup.tagRanges.removeLast()
|
|
|
|
|
|
- if str[currentIndex] != openTagStart && str[currentIndex] != closeTagStart {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
- continue
|
|
|
+ if metadataOpenRange.upperBound + 1 == (metadataCloseRange.lowerBound) {
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("Nothing between the tags", log: OSLog.swiftyScannerScanner, type:.info , self.rule.description)
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ for idx in (metadataOpenRange.upperBound)...(metadataCloseRange.lowerBound) {
|
|
|
+ self.elements[idx].type = .metadata
|
|
|
+ if self.rule.definesBoundary {
|
|
|
+ self.elements[idx].boundaryCount += 1
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ let key = self.elements[metadataOpenRange.upperBound + 1..<metadataCloseRange.lowerBound].map( { String( $0.character )}).joined()
|
|
|
+ if self.rule.metadataLookup {
|
|
|
+ metadataString = self.metadata[key] ?? ""
|
|
|
+ } else {
|
|
|
+ metadataString = key
|
|
|
+ }
|
|
|
}
|
|
|
+ }
|
|
|
+
|
|
|
+ let closeRange = tagGroup.tagRanges.removeLast()
|
|
|
+ let openRange = tagGroup.tagRanges.removeLast()
|
|
|
|
|
|
-
|
|
|
- // We have the first character of a possible open tag
|
|
|
- if char == openTagStart {
|
|
|
- // Checks to see if there is an escape character before this one
|
|
|
- if let prevIndex = str.index(currentIndex, offsetBy: -1, limitedBy: str.startIndex) {
|
|
|
- if let escapeChar = self.rule.primaryTag.escapeCharacter(for: str[prevIndex]) {
|
|
|
- switch escapeChar.rule {
|
|
|
- case .remove:
|
|
|
- if !accumulatedStr.isEmpty {
|
|
|
- accumulatedStr.removeLast()
|
|
|
- }
|
|
|
- case .keep:
|
|
|
- break
|
|
|
+ if self.rule.balancedTags && closeRange.count != openRange.count {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ var shouldRemove = true
|
|
|
+ var styles : [CharacterStyling] = []
|
|
|
+ if openRange.upperBound + 1 == (closeRange.lowerBound) {
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("Nothing between the tags", log: OSLog.swiftyScannerScanner, type:.info , self.rule.description)
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ var remainingTags = min(openRange.upperBound - openRange.lowerBound, closeRange.upperBound - closeRange.lowerBound) + 1
|
|
|
+ while remainingTags > 0 {
|
|
|
+ if remainingTags >= self.rule.maxTags {
|
|
|
+ remainingTags -= self.rule.maxTags
|
|
|
+ if let style = self.rule.styles[ self.rule.maxTags ] {
|
|
|
+ if !styles.contains(where: { $0.isEqualTo(style)}) {
|
|
|
+ styles.append(style)
|
|
|
}
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
- continue
|
|
|
}
|
|
|
}
|
|
|
+ if let style = self.rule.styles[remainingTags] {
|
|
|
+ remainingTags -= remainingTags
|
|
|
+ if !styles.contains(where: { $0.isEqualTo(style)}) {
|
|
|
+ styles.append(style)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for idx in (openRange.upperBound)...(closeRange.lowerBound) {
|
|
|
+ self.elements[idx].styles.append(contentsOf: styles)
|
|
|
+ self.elements[idx].metadata.append(metadataString)
|
|
|
+ if self.rule.definesBoundary {
|
|
|
+ self.elements[idx].boundaryCount += 1
|
|
|
+ }
|
|
|
+ if self.rule.shouldCancelRemainingTags {
|
|
|
+ self.elements[idx].boundaryCount = 1000
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if self.rule.isRepeatingTag {
|
|
|
+ let difference = ( openRange.upperBound - openRange.lowerBound ) - (closeRange.upperBound - closeRange.lowerBound)
|
|
|
+ switch difference {
|
|
|
+ case 1...:
|
|
|
+ for idx in openRange.upperBound - (difference - 1)...openRange.upperBound {
|
|
|
+ self.elements[idx].type = .string
|
|
|
+ }
|
|
|
+ case ...(-1):
|
|
|
+ for idx in closeRange.upperBound - (abs(difference) - 1)...closeRange.upperBound{
|
|
|
+ self.elements[idx].type = .string
|
|
|
+ }
|
|
|
+ default:
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ if shouldRemove {
|
|
|
+ self.tagGroups.removeAll(where: { $0.groupID == id })
|
|
|
+ self.isMetadataOpen = false
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ func emptyRanges( _ ranges : inout [ClosedRange<Int>] ) {
|
|
|
+ while !ranges.isEmpty {
|
|
|
+ self.resetLastTag(for: &ranges)
|
|
|
+ ranges.removeLast()
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ func scanNonRepeatingTags() {
|
|
|
+ var groupID = ""
|
|
|
+ let closeTag = self.rule.tag(for: .close)?.tag
|
|
|
+ let metadataOpen = self.rule.tag(for: .metadataOpen)?.tag
|
|
|
+ let metadataClose = self.rule.tag(for: .metadataClose)?.tag
|
|
|
+
|
|
|
+ while self.pointer < self.elements.count {
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("CHARACTER: %@", log: OSLog.swiftyScannerScanner, type:.info , String(self.elements[self.pointer].character))
|
|
|
+ }
|
|
|
+
|
|
|
+ if let range = self.range(for: metadataClose) {
|
|
|
+ if self.isMetadataOpen {
|
|
|
+ guard let groupIdx = self.tagGroups.firstIndex(where: { $0.groupID == groupID }) else {
|
|
|
+ self.pointer += 1
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ guard !self.tagGroups.isEmpty else {
|
|
|
+ self.resetTagGroup(withID: groupID)
|
|
|
+ continue
|
|
|
+ }
|
|
|
|
|
|
- emptyAccumulatedString()
|
|
|
-
|
|
|
- guard let nextIdx = str.index(currentIndex, offsetBy: rule.primaryTag.tag.count, limitedBy: str.endIndex) else {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
+ guard self.isMetadataOpen else {
|
|
|
+
|
|
|
+ self.resetTagGroup(withID: groupID)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("Closing metadata tag found. Closing tag with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
|
|
|
+ }
|
|
|
+ self.tagGroups[groupIdx].tagRanges.append(range)
|
|
|
+ self.closeTag(closeTag!, withGroupID: groupID)
|
|
|
+ self.isMetadataOpen = false
|
|
|
continue
|
|
|
+ } else {
|
|
|
+ self.resetTag(in: range)
|
|
|
+ self.pointer -= metadataClose!.count
|
|
|
}
|
|
|
- let tag = String(str[currentIndex..<nextIdx])
|
|
|
- if tag != rule.primaryTag.tag {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
- continue
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ if let openRange = self.range(for: self.rule.primaryTag.tag) {
|
|
|
+ if self.isMetadataOpen {
|
|
|
+ self.resetTagGroup(withID: groupID)
|
|
|
}
|
|
|
|
|
|
- openIndices.append(stringList.count)
|
|
|
- stringList.append(v2_Token(type: .tag, string: tag))
|
|
|
- currentIndex = str.index(currentIndex, offsetBy: rule.primaryTag.tag.count, limitedBy: str.endIndex) ?? str.endIndex
|
|
|
+ let tagGroup = TagGroup(tagRanges: [openRange])
|
|
|
+ groupID = tagGroup.groupID
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("New open tag found. Starting new Group with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
|
|
|
+ }
|
|
|
+ if self.rule.isRepeatingTag {
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ self.tagGroups.append(tagGroup)
|
|
|
continue
|
|
|
}
|
|
|
- if char == closeTagStart {
|
|
|
-
|
|
|
- emptyAccumulatedString()
|
|
|
-
|
|
|
- guard let closeTag = rule.tag(for: .close)?.tag else {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
+
|
|
|
+ if let range = self.range(for: closeTag) {
|
|
|
+ guard !self.tagGroups.isEmpty else {
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("No open tags exist, resetting this close tag", log: OSLog.swiftyScannerScanner, type:.info)
|
|
|
+ }
|
|
|
+ self.resetTag(in: range)
|
|
|
continue
|
|
|
}
|
|
|
-
|
|
|
- guard let nextIdx = str.index(currentIndex, offsetBy: closeTag.count, limitedBy: str.endIndex) else {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
+ self.tagGroups[self.tagGroups.count - 1].tagRanges.append(range)
|
|
|
+ groupID = self.tagGroups[self.tagGroups.count - 1].groupID
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("New close tag found. Appending to group with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
|
|
|
+ }
|
|
|
+ guard metadataOpen != nil else {
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("No metadata tags exist, closing valid tag with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
|
|
|
+ }
|
|
|
+ self.closeTag(closeTag!, withGroupID: groupID)
|
|
|
continue
|
|
|
}
|
|
|
- let tag = String(str[currentIndex..<nextIdx])
|
|
|
- if tag != closeTag {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
+
|
|
|
+ guard self.pointer != self.elements.count else {
|
|
|
continue
|
|
|
}
|
|
|
- if openIndices.isEmpty {
|
|
|
- stringList.append(v2_Token(type: .string, string: String(char)))
|
|
|
- movePointer(¤tIndex)
|
|
|
+
|
|
|
+ guard let range = self.range(for: metadataOpen) else {
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("No metadata tag found, resetting group with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
|
|
|
+ }
|
|
|
+ self.resetTagGroup(withID: groupID)
|
|
|
continue
|
|
|
}
|
|
|
+ self.tagGroups[self.tagGroups.count - 1].tagRanges.append(range)
|
|
|
+ self.isMetadataOpen = true
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
- // At this point we have gathered a valid close tag and we have a valid open tag
|
|
|
+ if let range = self.range(for: metadataOpen) {
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("Multiple open metadata tags found!", log: OSLog.swiftyScannerScanner, type:.info , groupID)
|
|
|
+ }
|
|
|
+ self.resetTag(in: range)
|
|
|
+ self.resetTagGroup(withID: groupID)
|
|
|
+ self.isMetadataOpen = false
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ self.pointer += 1
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ var spaceAndNewLine = CharacterSet.whitespacesAndNewlines
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ func scanRepeatingTags() {
|
|
|
+
|
|
|
+ var groupID = ""
|
|
|
+ let escapeCharacters = "" //self.rule.escapeCharacters.map( { String( $0 ) }).joined()
|
|
|
+ let unionSet = spaceAndNewLine.union(CharacterSet(charactersIn: escapeCharacters))
|
|
|
+ while self.pointer < self.elements.count {
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("CHARACTER: %@", log: OSLog.swiftyScannerScanner, type:.info , String(self.elements[self.pointer].character))
|
|
|
+ }
|
|
|
+
|
|
|
+ if var openRange = self.range(for: self.rule.primaryTag.tag) {
|
|
|
|
|
|
- guard let metadataOpen = rule.tag(for: .metadataOpen), let metadataClose = rule.tag(for: .metadataClose) else {
|
|
|
- currentIndex = nextIdx
|
|
|
- addLink()
|
|
|
+ if self.elements[openRange].first?.boundaryCount == 1000 {
|
|
|
+ self.resetTag(in: openRange)
|
|
|
continue
|
|
|
}
|
|
|
- if nextIdx == str.endIndex {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
- continue
|
|
|
+
|
|
|
+ var count = 1
|
|
|
+ var tagType : RepeatingTagType = .open
|
|
|
+ if let prevElement = self.element(for: .backward(self.rule.primaryTag.tag.count + 1)) {
|
|
|
+ if !unionSet.containsUnicodeScalars(of: prevElement.character) {
|
|
|
+ tagType = .either
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ tagType = .open
|
|
|
}
|
|
|
- guard str[nextIdx] == metadataOpen.tag.first else {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
- continue
|
|
|
+
|
|
|
+ while let nextRange = self.range(for: self.rule.primaryTag.tag) {
|
|
|
+ count += 1
|
|
|
+ openRange = openRange.lowerBound...nextRange.upperBound
|
|
|
}
|
|
|
|
|
|
- let substr = str[nextIdx..<str.endIndex]
|
|
|
- guard let closeIdx = substr.firstIndex(of: metadataClose.tag.first!) else {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
- continue
|
|
|
+ if self.rule.minTags > 1 {
|
|
|
+ if (openRange.upperBound - openRange.lowerBound) + 1 < self.rule.minTags {
|
|
|
+ self.resetTag(in: openRange)
|
|
|
+ os_log("Tag does not meet minimum length", log: .swiftyScannerScanner, type: .info)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ var validTagGroup = true
|
|
|
+ if let nextElement = self.element(for: .forward(0)) {
|
|
|
+ if unionSet.containsUnicodeScalars(of: nextElement.character) {
|
|
|
+ if tagType == .either {
|
|
|
+ tagType = .close
|
|
|
+ } else {
|
|
|
+ validTagGroup = tagType != .open
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if tagType == .either {
|
|
|
+ tagType = .close
|
|
|
+ } else {
|
|
|
+ validTagGroup = tagType != .open
|
|
|
+ }
|
|
|
}
|
|
|
- let open = substr.index(nextIdx, offsetBy: 1, limitedBy: substr.endIndex) ?? substr.endIndex
|
|
|
- let metadataStr = String(substr[open..<closeIdx])
|
|
|
|
|
|
- guard !metadataStr.contains(rule.primaryTag.tag) else {
|
|
|
- movePointer(¤tIndex, addCharacter: char)
|
|
|
- continue
|
|
|
|
|
|
+
|
|
|
+
|
|
|
+ if !validTagGroup {
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("Tag has whitespace on both sides", log: .swiftyScannerScanner, type: .info)
|
|
|
+ }
|
|
|
+ self.resetTag(in: openRange)
|
|
|
+ continue
|
|
|
}
|
|
|
|
|
|
- currentIndex = str.index(closeIdx, offsetBy: 1, limitedBy: str.endIndex) ?? closeIdx
|
|
|
+ if let idx = tagGroups.firstIndex(where: { $0.groupID == groupID }) {
|
|
|
+
|
|
|
+ if tagType == .either {
|
|
|
+ if tagGroups[idx].count == count {
|
|
|
+ self.tagGroups[idx].tagRanges.append(openRange)
|
|
|
+ self.closeTag(self.rule.primaryTag.tag, withGroupID: groupID)
|
|
|
+
|
|
|
+ if let last = self.tagGroups.last {
|
|
|
+ groupID = last.groupID
|
|
|
+ }
|
|
|
+
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if let prevRange = tagGroups[idx].tagRanges.first {
|
|
|
+ if self.elements[prevRange].first?.boundaryCount == self.elements[openRange].first?.boundaryCount {
|
|
|
+ self.tagGroups[idx].tagRanges.append(openRange)
|
|
|
+ self.closeTag(self.rule.primaryTag.tag, withGroupID: groupID)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
- addLink(with: metadataStr)
|
|
|
+
|
|
|
+ }
|
|
|
+ var tagGroup = TagGroup(tagRanges: [openRange])
|
|
|
+ groupID = tagGroup.groupID
|
|
|
+ tagGroup.tagType = tagType
|
|
|
+ tagGroup.count = count
|
|
|
+
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("New open tag found. Starting new Group with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
|
|
|
+ }
|
|
|
+
|
|
|
+ self.tagGroups.append(tagGroup)
|
|
|
+ continue
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- if !accumulatedStr.isEmpty {
|
|
|
- stringList.append(v2_Token(type: .string, string: accumulatedStr))
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- func movePointer( _ idx : inout String.Index, addCharacter char : Character? = nil ) {
|
|
|
- idx = str.index(idx, offsetBy: 1, limitedBy: str.endIndex) ?? str.endIndex
|
|
|
- if let character = char {
|
|
|
- accumulatedStr.append(character)
|
|
|
+ self.pointer += 1
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- func addLink(with metadataStr : String? = nil) {
|
|
|
- let openIndex = openIndices.removeLast()
|
|
|
- stringList.remove(at: openIndex)
|
|
|
- let subarray = stringList[openIndex..<stringList.count]
|
|
|
- stringList.removeSubrange(openIndex..<stringList.count)
|
|
|
- stringList.append(v2_Token(type: .link, string: subarray.map({ $0.string }).joined(), metadata: metadataStr ?? ""))
|
|
|
- }
|
|
|
|
|
|
- func convertTokens() -> [Token] {
|
|
|
- if !stringList.contains(where: { $0.type == .link }) {
|
|
|
- return [Token(type: .string, inputString: stringList.map({ $0.string}).joined())]
|
|
|
+ func scan() -> [Element] {
|
|
|
+
|
|
|
+ guard self.elements.filter({ $0.type == .string }).map({ String($0.character) }).joined().contains(self.rule.primaryTag.tag) else {
|
|
|
+ return self.elements
|
|
|
}
|
|
|
- var tokens : [Token] = []
|
|
|
- var allStrings : [v2_Token] = []
|
|
|
- for tok in stringList {
|
|
|
- if tok.type == .link {
|
|
|
- if !allStrings.isEmpty {
|
|
|
- tokens.append(Token(type: .string, inputString: allStrings.map({ $0.string }).joined()))
|
|
|
- allStrings.removeAll()
|
|
|
- }
|
|
|
- let ruleStyles = self.rule.styles[1] ?? []
|
|
|
- let charStyles = ( rule.isSelfContained ) ? [] : ruleStyles
|
|
|
- var token = Token(type: .string, inputString: tok.string, characterStyles: charStyles)
|
|
|
- token.metadataString = tok.metadata
|
|
|
-
|
|
|
- if rule.isSelfContained {
|
|
|
- var parentToken = Token(type: .string, inputString: token.id, characterStyles: ruleStyles)
|
|
|
- parentToken.children = [token]
|
|
|
- tokens.append(parentToken)
|
|
|
- } else {
|
|
|
- tokens.append(token)
|
|
|
- }
|
|
|
- } else {
|
|
|
- allStrings.append(tok)
|
|
|
- }
|
|
|
+
|
|
|
+ self.currentPerfomanceLog.tag(with: "Beginning \(self.rule.primaryTag.tag)")
|
|
|
+
|
|
|
+ if self.enableLog {
|
|
|
+ os_log("RULE: %@", log: OSLog.swiftyScannerScanner, type:.info , self.rule.description)
|
|
|
}
|
|
|
- if !allStrings.isEmpty {
|
|
|
- tokens.append(Token(type: .string, inputString: allStrings.map({ $0.string }).joined()))
|
|
|
+
|
|
|
+ if self.rule.isRepeatingTag {
|
|
|
+ self.scanRepeatingTags()
|
|
|
+ } else {
|
|
|
+ self.scanNonRepeatingTags()
|
|
|
}
|
|
|
|
|
|
- return tokens
|
|
|
- }
|
|
|
-
|
|
|
- // Old
|
|
|
-
|
|
|
- func scan( _ tokens : [Token], with rule : CharacterRule ) -> [Token] {
|
|
|
- self.tokens = tokens
|
|
|
- return self.scan(tokens.map({ $0.inputString }).joined(), with: rule)
|
|
|
- }
|
|
|
-
|
|
|
- func scan(_ string: String, with rule: CharacterRule) -> [Token] {
|
|
|
- return []
|
|
|
+ for tagGroup in self.tagGroups {
|
|
|
+ self.resetTagGroup(withID: tagGroup.groupID)
|
|
|
+ }
|
|
|
+
|
|
|
+ if self.enableLog {
|
|
|
+ for element in self.elements {
|
|
|
+ print(element)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return self.elements
|
|
|
}
|
|
|
}
|