SwiftyTokeniser.swift 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. //
  2. // SwiftyTokeniser.swift
  3. // SwiftyMarkdown
  4. //
  5. // Created by Simon Fairbairn on 16/12/2019.
  6. // Copyright © 2019 Voyage Travel Apps. All rights reserved.
  7. //
  8. import Foundation
  9. import os.log
  10. extension OSLog {
  11. private static var subsystem = "SwiftyTokeniser"
  12. static let tokenising = OSLog(subsystem: subsystem, category: "Tokenising")
  13. static let styling = OSLog(subsystem: subsystem, category: "Styling")
  14. static let performance = OSLog(subsystem: subsystem, category: "Peformance")
  15. }
  16. public class SwiftyTokeniser {
  17. let rules : [CharacterRule]
  18. var replacements : [String : [Token]] = [:]
  19. var enableLog = (ProcessInfo.processInfo.environment["SwiftyTokeniserLogging"] != nil)
  20. let totalPerfomanceLog = PerformanceLog(with: "SwiftyTokeniserPerformanceLogging", identifier: "Tokeniser Total Run Time", log: OSLog.performance)
  21. let currentPerfomanceLog = PerformanceLog(with: "SwiftyTokeniserPerformanceLogging", identifier: "Tokeniser Current", log: OSLog.performance)
  22. public var metadataLookup : [String : String] = [:]
  23. let newlines = CharacterSet.newlines
  24. let spaces = CharacterSet.whitespaces
  25. public init( with rules : [CharacterRule] ) {
  26. self.rules = rules
  27. self.totalPerfomanceLog.start()
  28. }
  29. deinit {
  30. self.totalPerfomanceLog.end()
  31. }
  32. /// This goes through every CharacterRule in order and applies it to the input string, tokenising the string
  33. /// if there are any matches.
  34. ///
  35. /// The for loop in the while loop (yeah, I know) is there to separate strings from within tags to
  36. /// those outside them.
  37. ///
  38. /// e.g. "A string with a \[link\]\(url\) tag" would have the "link" text tokenised separately.
  39. ///
  40. /// This is to prevent situations like **\[link**\](url) from returing a bold string.
  41. ///
  42. /// - Parameter inputString: A string to have the CharacterRules in `self.rules` applied to
  43. public func process( _ inputString : String ) -> [Token] {
  44. let currentTokens = [Token(type: .string, inputString: inputString)]
  45. guard rules.count > 0 else {
  46. return currentTokens
  47. }
  48. var mutableRules = self.rules
  49. if inputString.isEmpty {
  50. return [Token(type: .string, inputString: "", characterStyles: [])]
  51. }
  52. self.currentPerfomanceLog.start()
  53. var elementArray : [Element] = []
  54. for char in inputString {
  55. if newlines.containsUnicodeScalars(of: char) {
  56. let element = Element(character: char, type: .newline)
  57. elementArray.append(element)
  58. continue
  59. }
  60. if spaces.containsUnicodeScalars(of: char) {
  61. let element = Element(character: char, type: .space)
  62. elementArray.append(element)
  63. continue
  64. }
  65. let element = Element(character: char, type: .string)
  66. elementArray.append(element)
  67. }
  68. while !mutableRules.isEmpty {
  69. let nextRule = mutableRules.removeFirst()
  70. if enableLog {
  71. os_log("------------------------------", log: .tokenising, type: .info)
  72. os_log("RULE: %@", log: OSLog.tokenising, type:.info , nextRule.description)
  73. }
  74. self.currentPerfomanceLog.tag(with: "(start rule %@)")
  75. let scanner = SwiftyScanner(withElements: elementArray, rule: nextRule, metadata: self.metadataLookup)
  76. elementArray = scanner.scan()
  77. }
  78. var output : [Token] = []
  79. func empty( _ string : inout String, into tokens : inout [Token] ) {
  80. guard !string.isEmpty else {
  81. return
  82. }
  83. var token = Token(type: .string, inputString: string)
  84. token.metadataStrings.append(contentsOf: lastElement.metadata)
  85. token.characterStyles = lastElement.styles
  86. string.removeAll()
  87. tokens.append(token)
  88. }
  89. var lastElement = elementArray.first!
  90. var accumulatedString = ""
  91. for element in elementArray {
  92. guard element.type != .escape else {
  93. continue
  94. }
  95. guard element.type == .string || element.type == .space || element.type == .newline else {
  96. empty(&accumulatedString, into: &output)
  97. continue
  98. }
  99. if lastElement.styles as? [CharacterStyle] != element.styles as? [CharacterStyle] {
  100. empty(&accumulatedString, into: &output)
  101. }
  102. accumulatedString.append(element.character)
  103. lastElement = element
  104. }
  105. empty(&accumulatedString, into: &output)
  106. self.currentPerfomanceLog.tag(with: "(finished all rules)")
  107. if enableLog {
  108. os_log("=====RULE PROCESSING COMPLETE=====", log: .tokenising, type: .info)
  109. os_log("==================================", log: .tokenising, type: .info)
  110. }
  111. return output
  112. }
  113. }
  114. extension String {
  115. func repeating( _ max : Int ) -> String {
  116. var output = self
  117. for _ in 1..<max {
  118. output += self
  119. }
  120. return output
  121. }
  122. }