SwiftyScannerNonRepeating.swift 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. //
  2. // File.swift
  3. //
  4. //
  5. // Created by Simon Fairbairn on 04/04/2020.
  6. //
  7. //
  8. // SwiftyScanner.swift
  9. // SwiftyMarkdown
  10. //
  11. // Created by Simon Fairbairn on 04/02/2020.
  12. //
  13. import Foundation
  14. import os.log
  15. extension OSLog {
  16. private static var subsystem = "SwiftyScanner"
  17. static let swiftyScannerScanner = OSLog(subsystem: subsystem, category: "Swifty Scanner Scanner")
  18. static let swiftyScannerScannerPerformance = OSLog(subsystem: subsystem, category: "Swifty Scanner Scanner Peformance")
  19. }
  20. enum RepeatingTagType {
  21. case open
  22. case either
  23. case close
  24. case neither
  25. }
  26. struct TagGroup {
  27. let groupID = UUID().uuidString
  28. var tagRanges : [ClosedRange<Int>]
  29. var tagType : RepeatingTagType = .open
  30. var count = 1
  31. }
  32. class SwiftyScannerNonRepeating {
  33. var elements : [Element]
  34. let rule : CharacterRule
  35. let metadata : [String : String]
  36. var pointer : Int = 0
  37. var spaceAndNewLine = CharacterSet.whitespacesAndNewlines
  38. var tagGroups : [TagGroup] = []
  39. var isMetadataOpen = false
  40. var enableLog = (ProcessInfo.processInfo.environment["SwiftyScannerScanner"] != nil)
  41. let currentPerfomanceLog = PerformanceLog(with: "SwiftyScannerScannerPerformanceLogging", identifier: "Scanner", log: OSLog.swiftyScannerPerformance)
  42. let log = PerformanceLog(with: "SwiftyScannerScanner", identifier: "Scanner", log: OSLog.swiftyScannerScanner)
  43. enum Position {
  44. case forward(Int)
  45. case backward(Int)
  46. }
  47. init( withElements elements : [Element], rule : CharacterRule, metadata : [String : String]) {
  48. self.elements = elements
  49. self.rule = rule
  50. self.currentPerfomanceLog.start()
  51. self.metadata = metadata
  52. }
  53. func elementsBetweenCurrentPosition( and newPosition : Position ) -> [Element]? {
  54. let newIdx : Int
  55. var isForward = true
  56. switch newPosition {
  57. case .backward(let positions):
  58. isForward = false
  59. newIdx = pointer - positions
  60. if newIdx < 0 {
  61. return nil
  62. }
  63. case .forward(let positions):
  64. newIdx = pointer + positions
  65. if newIdx >= self.elements.count {
  66. return nil
  67. }
  68. }
  69. let range : ClosedRange<Int> = ( isForward ) ? self.pointer...newIdx : newIdx...self.pointer
  70. return Array(self.elements[range])
  71. }
  72. func element( for position : Position ) -> Element? {
  73. let newIdx : Int
  74. switch position {
  75. case .backward(let positions):
  76. newIdx = pointer - positions
  77. if newIdx < 0 {
  78. return nil
  79. }
  80. case .forward(let positions):
  81. newIdx = pointer + positions
  82. if newIdx >= self.elements.count {
  83. return nil
  84. }
  85. }
  86. return self.elements[newIdx]
  87. }
  88. func positionIsEqualTo( character : Character, direction : Position ) -> Bool {
  89. guard let validElement = self.element(for: direction) else {
  90. return false
  91. }
  92. return validElement.character == character
  93. }
  94. func positionContains( characters : [Character], direction : Position ) -> Bool {
  95. guard let validElement = self.element(for: direction) else {
  96. return false
  97. }
  98. return characters.contains(validElement.character)
  99. }
  100. func isEscaped() -> Bool {
  101. let isEscaped = self.positionContains(characters: self.rule.escapeCharacters, direction: .backward(1))
  102. if isEscaped {
  103. self.elements[self.pointer - 1].type = .escape
  104. }
  105. return isEscaped
  106. }
  107. func range( for tag : String? ) -> ClosedRange<Int>? {
  108. guard let tag = tag else {
  109. return nil
  110. }
  111. guard let openChar = tag.first else {
  112. return nil
  113. }
  114. if self.pointer == self.elements.count {
  115. return nil
  116. }
  117. if self.elements[self.pointer].character != openChar {
  118. return nil
  119. }
  120. if isEscaped() {
  121. return nil
  122. }
  123. let range : ClosedRange<Int>
  124. if tag.count > 1 {
  125. guard let elements = self.elementsBetweenCurrentPosition(and: .forward(tag.count - 1) ) else {
  126. return nil
  127. }
  128. // If it's already a tag, then it should be ignored
  129. if elements.filter({ $0.type != .string }).count > 0 {
  130. return nil
  131. }
  132. if elements.map( { String($0.character) }).joined() != tag {
  133. return nil
  134. }
  135. let endIdx = (self.pointer + tag.count - 1)
  136. for i in self.pointer...endIdx {
  137. self.elements[i].type = .tag
  138. }
  139. range = self.pointer...endIdx
  140. self.pointer += tag.count
  141. } else {
  142. // If it's already a tag, then it should be ignored
  143. if self.elements[self.pointer].type != .string {
  144. return nil
  145. }
  146. self.elements[self.pointer].type = .tag
  147. range = self.pointer...self.pointer
  148. self.pointer += 1
  149. }
  150. return range
  151. }
  152. func resetTagGroup( withID id : String ) {
  153. if let idx = self.tagGroups.firstIndex(where: { $0.groupID == id }) {
  154. for range in self.tagGroups[idx].tagRanges {
  155. self.resetTag(in: range)
  156. }
  157. self.tagGroups.remove(at: idx)
  158. }
  159. self.isMetadataOpen = false
  160. }
  161. func resetTag( in range : ClosedRange<Int>) {
  162. for idx in range {
  163. self.elements[idx].type = .string
  164. }
  165. }
  166. func resetLastTag( for range : inout [ClosedRange<Int>]) {
  167. guard let last = range.last else {
  168. return
  169. }
  170. for idx in last {
  171. self.elements[idx].type = .string
  172. }
  173. }
  174. func closeTag( _ tag : String, withGroupID id : String ) {
  175. guard let tagIdx = self.tagGroups.firstIndex(where: { $0.groupID == id }) else {
  176. return
  177. }
  178. var metadataString = ""
  179. if self.isMetadataOpen {
  180. let metadataCloseRange = self.tagGroups[tagIdx].tagRanges.removeLast()
  181. let metadataOpenRange = self.tagGroups[tagIdx].tagRanges.removeLast()
  182. if metadataOpenRange.upperBound + 1 == (metadataCloseRange.lowerBound) {
  183. if self.enableLog {
  184. os_log("Nothing between the tags", log: OSLog.swiftyScannerScanner, type:.info , self.rule.description)
  185. }
  186. } else {
  187. for idx in (metadataOpenRange.upperBound)...(metadataCloseRange.lowerBound) {
  188. self.elements[idx].type = .metadata
  189. if self.rule.definesBoundary {
  190. self.elements[idx].boundaryCount += 1
  191. }
  192. }
  193. let key = self.elements[metadataOpenRange.upperBound + 1..<metadataCloseRange.lowerBound].map( { String( $0.character )}).joined()
  194. if self.rule.metadataLookup {
  195. metadataString = self.metadata[key] ?? ""
  196. } else {
  197. metadataString = key
  198. }
  199. }
  200. }
  201. let closeRange = self.tagGroups[tagIdx].tagRanges.removeLast()
  202. let openRange = self.tagGroups[tagIdx].tagRanges.removeLast()
  203. if self.rule.balancedTags && closeRange.count != openRange.count {
  204. self.tagGroups[tagIdx].tagRanges.append(openRange)
  205. self.tagGroups[tagIdx].tagRanges.append(closeRange)
  206. return
  207. }
  208. var shouldRemove = true
  209. var styles : [CharacterStyling] = []
  210. if openRange.upperBound + 1 == (closeRange.lowerBound) {
  211. if self.enableLog {
  212. os_log("Nothing between the tags", log: OSLog.swiftyScannerScanner, type:.info , self.rule.description)
  213. }
  214. } else {
  215. var remainingTags = min(openRange.upperBound - openRange.lowerBound, closeRange.upperBound - closeRange.lowerBound) + 1
  216. while remainingTags > 0 {
  217. if remainingTags >= self.rule.maxTags {
  218. remainingTags -= self.rule.maxTags
  219. if let style = self.rule.styles[ self.rule.maxTags ] {
  220. if !styles.contains(where: { $0.isEqualTo(style)}) {
  221. styles.append(style)
  222. }
  223. }
  224. }
  225. if let style = self.rule.styles[remainingTags] {
  226. remainingTags -= remainingTags
  227. if !styles.contains(where: { $0.isEqualTo(style)}) {
  228. styles.append(style)
  229. }
  230. }
  231. }
  232. for idx in (openRange.upperBound)...(closeRange.lowerBound) {
  233. self.elements[idx].styles.append(contentsOf: styles)
  234. self.elements[idx].metadata.append(metadataString)
  235. if self.rule.definesBoundary {
  236. self.elements[idx].boundaryCount += 1
  237. }
  238. if self.rule.shouldCancelRemainingRules {
  239. self.elements[idx].boundaryCount = 1000
  240. }
  241. }
  242. if self.rule.isRepeatingTag {
  243. let difference = ( openRange.upperBound - openRange.lowerBound ) - (closeRange.upperBound - closeRange.lowerBound)
  244. switch difference {
  245. case 1...:
  246. shouldRemove = false
  247. self.tagGroups[tagIdx].count = difference
  248. self.tagGroups[tagIdx].tagRanges.append( openRange.upperBound - (abs(difference) - 1)...openRange.upperBound )
  249. case ...(-1):
  250. for idx in closeRange.upperBound - (abs(difference) - 1)...closeRange.upperBound {
  251. self.elements[idx].type = .string
  252. }
  253. default:
  254. break
  255. }
  256. }
  257. }
  258. if shouldRemove {
  259. self.tagGroups.removeAll(where: { $0.groupID == id })
  260. }
  261. self.isMetadataOpen = false
  262. }
  263. func emptyRanges( _ ranges : inout [ClosedRange<Int>] ) {
  264. while !ranges.isEmpty {
  265. self.resetLastTag(for: &ranges)
  266. ranges.removeLast()
  267. }
  268. }
  269. func scanNonRepeatingTags() {
  270. var groupID = ""
  271. let closeTag = self.rule.tag(for: .close)?.tag
  272. let metadataOpen = self.rule.tag(for: .metadataOpen)?.tag
  273. let metadataClose = self.rule.tag(for: .metadataClose)?.tag
  274. while self.pointer < self.elements.count {
  275. if self.enableLog {
  276. os_log("CHARACTER: %@", log: OSLog.swiftyScannerScanner, type:.info , String(self.elements[self.pointer].character))
  277. }
  278. if let range = self.range(for: metadataClose) {
  279. if self.isMetadataOpen {
  280. guard let groupIdx = self.tagGroups.firstIndex(where: { $0.groupID == groupID }) else {
  281. self.pointer += 1
  282. continue
  283. }
  284. guard !self.tagGroups.isEmpty else {
  285. self.resetTagGroup(withID: groupID)
  286. continue
  287. }
  288. guard self.isMetadataOpen else {
  289. self.resetTagGroup(withID: groupID)
  290. continue
  291. }
  292. if self.enableLog {
  293. os_log("Closing metadata tag found. Closing tag with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
  294. }
  295. self.tagGroups[groupIdx].tagRanges.append(range)
  296. self.closeTag(closeTag!, withGroupID: groupID)
  297. self.isMetadataOpen = false
  298. continue
  299. } else {
  300. self.resetTag(in: range)
  301. self.pointer -= metadataClose!.count
  302. }
  303. }
  304. if let openRange = self.range(for: self.rule.primaryTag.tag) {
  305. if self.isMetadataOpen {
  306. self.resetTagGroup(withID: groupID)
  307. }
  308. let tagGroup = TagGroup(tagRanges: [openRange])
  309. groupID = tagGroup.groupID
  310. if self.enableLog {
  311. os_log("New open tag found. Starting new Group with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
  312. }
  313. if self.rule.isRepeatingTag {
  314. }
  315. self.tagGroups.append(tagGroup)
  316. continue
  317. }
  318. if let range = self.range(for: closeTag) {
  319. guard !self.tagGroups.isEmpty else {
  320. if self.enableLog {
  321. os_log("No open tags exist, resetting this close tag", log: OSLog.swiftyScannerScanner, type:.info)
  322. }
  323. self.resetTag(in: range)
  324. continue
  325. }
  326. self.tagGroups[self.tagGroups.count - 1].tagRanges.append(range)
  327. groupID = self.tagGroups[self.tagGroups.count - 1].groupID
  328. if self.enableLog {
  329. os_log("New close tag found. Appending to group with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
  330. }
  331. guard metadataOpen != nil else {
  332. if self.enableLog {
  333. os_log("No metadata tags exist, closing valid tag with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
  334. }
  335. self.closeTag(closeTag!, withGroupID: groupID)
  336. continue
  337. }
  338. guard self.pointer != self.elements.count else {
  339. continue
  340. }
  341. guard let range = self.range(for: metadataOpen) else {
  342. if self.enableLog {
  343. os_log("No metadata tag found, resetting group with ID %@", log: OSLog.swiftyScannerScanner, type:.info , groupID)
  344. }
  345. self.resetTagGroup(withID: groupID)
  346. continue
  347. }
  348. self.tagGroups[self.tagGroups.count - 1].tagRanges.append(range)
  349. self.isMetadataOpen = true
  350. continue
  351. }
  352. if let range = self.range(for: metadataOpen) {
  353. if self.enableLog {
  354. os_log("Multiple open metadata tags found!", log: OSLog.swiftyScannerScanner, type:.info , groupID)
  355. }
  356. self.resetTag(in: range)
  357. self.resetTagGroup(withID: groupID)
  358. self.isMetadataOpen = false
  359. continue
  360. }
  361. self.pointer += 1
  362. }
  363. }
  364. func scanRepeatingTags() {
  365. var groupID = ""
  366. let escapeCharacters = "" //self.rule.escapeCharacters.map( { String( $0 ) }).joined()
  367. let unionSet = spaceAndNewLine.union(CharacterSet(charactersIn: escapeCharacters))
  368. while self.pointer < self.elements.count {
  369. if self.enableLog {
  370. os_log("CHARACTER: %@", log: OSLog.swiftyScannerScanner, type:.info , String(self.elements[self.pointer].character))
  371. }
  372. if var openRange = self.range(for: self.rule.primaryTag.tag) {
  373. if self.elements[openRange].first?.boundaryCount == 1000 {
  374. self.resetTag(in: openRange)
  375. continue
  376. }
  377. var count = 1
  378. var tagType : RepeatingTagType = .open
  379. if let prevElement = self.element(for: .backward(self.rule.primaryTag.tag.count + 1)) {
  380. if !unionSet.containsUnicodeScalars(of: prevElement.character) {
  381. tagType = .either
  382. }
  383. } else {
  384. tagType = .open
  385. }
  386. while let nextRange = self.range(for: self.rule.primaryTag.tag) {
  387. count += 1
  388. openRange = openRange.lowerBound...nextRange.upperBound
  389. }
  390. if self.rule.minTags > 1 {
  391. if (openRange.upperBound - openRange.lowerBound) + 1 < self.rule.minTags {
  392. self.resetTag(in: openRange)
  393. os_log("Tag does not meet minimum length", log: .swiftyScannerScanner, type: .info)
  394. continue
  395. }
  396. }
  397. var validTagGroup = true
  398. if let nextElement = self.element(for: .forward(0)) {
  399. if unionSet.containsUnicodeScalars(of: nextElement.character) {
  400. if tagType == .either {
  401. tagType = .close
  402. } else {
  403. validTagGroup = tagType != .open
  404. }
  405. }
  406. } else {
  407. if tagType == .either {
  408. tagType = .close
  409. } else {
  410. validTagGroup = tagType != .open
  411. }
  412. }
  413. if !validTagGroup {
  414. if self.enableLog {
  415. os_log("Tag has whitespace on both sides", log: .swiftyScannerScanner, type: .info)
  416. }
  417. self.resetTag(in: openRange)
  418. continue
  419. }
  420. if let idx = tagGroups.firstIndex(where: { $0.groupID == groupID }) {
  421. if tagType == .either {
  422. if tagGroups[idx].count == count {
  423. self.tagGroups[idx].tagRanges.append(openRange)
  424. self.closeTag(self.rule.primaryTag.tag, withGroupID: groupID)
  425. if let last = self.tagGroups.last {
  426. groupID = last.groupID
  427. }
  428. continue
  429. }
  430. } else {
  431. if let prevRange = tagGroups[idx].tagRanges.first {
  432. if self.elements[prevRange].first?.boundaryCount == self.elements[openRange].first?.boundaryCount {
  433. self.tagGroups[idx].tagRanges.append(openRange)
  434. self.closeTag(self.rule.primaryTag.tag, withGroupID: groupID)
  435. }
  436. }
  437. continue
  438. }
  439. }
  440. var tagGroup = TagGroup(tagRanges: [openRange])
  441. groupID = tagGroup.groupID
  442. tagGroup.tagType = tagType
  443. tagGroup.count = count
  444. if self.enableLog {
  445. os_log("New open tag found with characters %@. Starting new Group with ID %@", log: OSLog.swiftyScannerScanner, type:.info, self.elements[openRange].map( { String($0.character) }).joined(), groupID)
  446. }
  447. self.tagGroups.append(tagGroup)
  448. continue
  449. }
  450. self.pointer += 1
  451. }
  452. }
  453. func scan() -> [Element] {
  454. guard self.elements.filter({ $0.type == .string }).map({ String($0.character) }).joined().contains(self.rule.primaryTag.tag) else {
  455. return self.elements
  456. }
  457. self.currentPerfomanceLog.tag(with: "Beginning \(self.rule.primaryTag.tag)")
  458. if self.enableLog {
  459. os_log("RULE: %@", log: OSLog.swiftyScannerScanner, type:.info , self.rule.description)
  460. }
  461. if self.rule.isRepeatingTag {
  462. self.scanRepeatingTags()
  463. } else {
  464. self.scanNonRepeatingTags()
  465. }
  466. for tagGroup in self.tagGroups {
  467. self.resetTagGroup(withID: tagGroup.groupID)
  468. }
  469. if self.enableLog {
  470. for element in self.elements {
  471. print(element)
  472. }
  473. }
  474. return self.elements
  475. }
  476. }