소스 검색

Initial commit

xcbosa-mini 2 년 전
커밋
51d0c296e4
49개의 변경된 파일4797개의 추가작업 그리고 0개의 파일을 삭제
  1. 36 0
      .gitignore
  2. 14 0
      .travis.yml
  3. 395 0
      Example/Jieba.xcodeproj/project.pbxproj
  4. 7 0
      Example/Jieba.xcodeproj/project.xcworkspace/contents.xcworkspacedata
  5. 101 0
      Example/Jieba.xcodeproj/xcshareddata/xcschemes/Jieba-Example.xcscheme
  6. 6 0
      Example/Podfile
  7. 22 0
      Example/Pods/Local Podspecs/Jieba.podspec.json
  8. 22 0
      Example/Tests/Tests-Info.plist
  9. 7 0
      Example/Tests/Tests-Prefix.pch
  10. 35 0
      Example/Tests/Tests.m
  11. 2 0
      Example/Tests/en.lproj/InfoPlist.strings
  12. 42 0
      Jieba.podspec
  13. 0 0
      Jieba/Assets/.gitkeep
  14. 0 0
      Jieba/Classes/.gitkeep
  15. 211 0
      Jieba/Classes/CppJieba/DictTrie.hpp
  16. 153 0
      Jieba/Classes/CppJieba/FullSegment.hpp
  17. 394 0
      Jieba/Classes/CppJieba/HMMSegment.hpp
  18. 17 0
      Jieba/Classes/CppJieba/ISegment.hpp
  19. 173 0
      Jieba/Classes/CppJieba/KeywordExtractor.hpp
  20. 84 0
      Jieba/Classes/CppJieba/Limonp/ArgvContext.hpp
  21. 128 0
      Jieba/Classes/CppJieba/Limonp/BlockingQueue.hpp
  22. 73 0
      Jieba/Classes/CppJieba/Limonp/BoundedQueue.hpp
  23. 90 0
      Jieba/Classes/CppJieba/Limonp/CastFloat.hpp
  24. 48 0
      Jieba/Classes/CppJieba/Limonp/Condition.hpp
  25. 118 0
      Jieba/Classes/CppJieba/Limonp/Config.hpp
  26. 31 0
      Jieba/Classes/CppJieba/Limonp/HandyMacro.hpp
  27. 21 0
      Jieba/Classes/CppJieba/Limonp/InitOnOff.hpp
  28. 171 0
      Jieba/Classes/CppJieba/Limonp/LocalVector.hpp
  29. 74 0
      Jieba/Classes/CppJieba/Limonp/Logger.hpp
  30. 432 0
      Jieba/Classes/CppJieba/Limonp/Md5.hpp
  31. 57 0
      Jieba/Classes/CppJieba/Limonp/MutexLock.hpp
  32. 125 0
      Jieba/Classes/CppJieba/Limonp/MysqlClient.hpp
  33. 22 0
      Jieba/Classes/CppJieba/Limonp/NonCopyable.hpp
  34. 139 0
      Jieba/Classes/CppJieba/Limonp/StdExtension.hpp
  35. 349 0
      Jieba/Classes/CppJieba/Limonp/StringUtil.hpp
  36. 50 0
      Jieba/Classes/CppJieba/Limonp/Thread.hpp
  37. 105 0
      Jieba/Classes/CppJieba/Limonp/ThreadPool.hpp
  38. 148 0
      Jieba/Classes/CppJieba/MPSegment.hpp
  39. 121 0
      Jieba/Classes/CppJieba/MixSegment.hpp
  40. 109 0
      Jieba/Classes/CppJieba/PosTagger.hpp
  41. 123 0
      Jieba/Classes/CppJieba/QuerySegment.hpp
  42. 78 0
      Jieba/Classes/CppJieba/SegmentBase.hpp
  43. 63 0
      Jieba/Classes/CppJieba/TransCode.hpp
  44. 297 0
      Jieba/Classes/CppJieba/Trie.hpp
  45. 30 0
      Jieba/Classes/Segmentor.cpp
  46. 25 0
      Jieba/Classes/Segmentor.h
  47. 19 0
      LICENSE
  48. 29 0
      README.md
  49. 1 0
      _Pods.xcodeproj

+ 36 - 0
.gitignore

@@ -0,0 +1,36 @@
+# macOS
+.DS_Store
+
+# Xcode
+build/
+*.pbxuser
+!default.pbxuser
+*.mode1v3
+!default.mode1v3
+*.mode2v3
+!default.mode2v3
+*.perspectivev3
+!default.perspectivev3
+xcuserdata/
+*.xccheckout
+*.moved-aside
+DerivedData
+*.hmap
+*.ipa
+
+# Bundler
+.bundle
+
+# Add this line if you want to avoid checking in source code from Carthage dependencies.
+# Carthage/Checkouts
+
+Carthage/Build
+
+# We recommend against adding the Pods directory to your .gitignore. However
+# you should judge for yourself, the pros and cons are mentioned at:
+# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-ignore-the-pods-directory-in-source-control
+# 
+# Note: if you ignore the Pods directory, make sure to uncomment
+# `pod install` in .travis.yml
+#
+# Pods/

+ 14 - 0
.travis.yml

@@ -0,0 +1,14 @@
+# references:
+# * https://www.objc.io/issues/6-build-tools/travis-ci/
+# * https://github.com/supermarin/xcpretty#usage
+
+osx_image: xcode7.3
+language: objective-c
+# cache: cocoapods
+# podfile: Example/Podfile
+# before_install:
+# - gem install cocoapods # Since Travis is not always on latest version
+# - pod install --project-directory=Example
+script:
+- set -o pipefail && xcodebuild test -enableCodeCoverage YES -workspace Example/Jieba.xcworkspace -scheme Jieba-Example -sdk iphonesimulator9.3 ONLY_ACTIVE_ARCH=NO | xcpretty
+- pod lib lint

+ 395 - 0
Example/Jieba.xcodeproj/project.pbxproj

@@ -0,0 +1,395 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		6003F58E195388D20070C39A /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6003F58D195388D20070C39A /* Foundation.framework */; };
+		6003F590195388D20070C39A /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6003F58F195388D20070C39A /* CoreGraphics.framework */; };
+		6003F592195388D20070C39A /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6003F591195388D20070C39A /* UIKit.framework */; };
+		6003F598195388D20070C39A /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = 6003F596195388D20070C39A /* InfoPlist.strings */; };
+		6003F59A195388D20070C39A /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 6003F599195388D20070C39A /* main.m */; };
+		6003F59E195388D20070C39A /* JBAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 6003F59D195388D20070C39A /* JBAppDelegate.m */; };
+		6003F5A7195388D20070C39A /* JBViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 6003F5A6195388D20070C39A /* JBViewController.m */; };
+		6003F5A9195388D20070C39A /* Images.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 6003F5A8195388D20070C39A /* Images.xcassets */; };
+		6003F5B0195388D20070C39A /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6003F5AF195388D20070C39A /* XCTest.framework */; };
+		6003F5B1195388D20070C39A /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6003F58D195388D20070C39A /* Foundation.framework */; };
+		6003F5B2195388D20070C39A /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6003F591195388D20070C39A /* UIKit.framework */; };
+		6003F5BA195388D20070C39A /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = 6003F5B8195388D20070C39A /* InfoPlist.strings */; };
+		6003F5BC195388D20070C39A /* Tests.m in Sources */ = {isa = PBXBuildFile; fileRef = 6003F5BB195388D20070C39A /* Tests.m */; };
+		71719F9F1E33DC2100824A3D /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 71719F9D1E33DC2100824A3D /* LaunchScreen.storyboard */; };
+		873B8AEB1B1F5CCA007FD442 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 873B8AEA1B1F5CCA007FD442 /* Main.storyboard */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		6003F58D195388D20070C39A /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
+		6003F58F195388D20070C39A /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; };
+		6003F591195388D20070C39A /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; };
+		6003F595195388D20070C39A /* Jieba-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "Jieba-Info.plist"; sourceTree = "<group>"; };
+		6003F597195388D20070C39A /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = "<group>"; };
+		6003F599195388D20070C39A /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
+		6003F59B195388D20070C39A /* Jieba-Prefix.pch */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "Jieba-Prefix.pch"; sourceTree = "<group>"; };
+		6003F59D195388D20070C39A /* JBAppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = JBAppDelegate.m; sourceTree = "<group>"; };
+		6003F5A6195388D20070C39A /* JBViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = JBViewController.m; sourceTree = "<group>"; };
+		6003F5A8195388D20070C39A /* Images.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Images.xcassets; sourceTree = "<group>"; };
+		6003F5AE195388D20070C39A /* Jieba_Tests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = Jieba_Tests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
+		6003F5AF195388D20070C39A /* XCTest.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = XCTest.framework; path = Library/Frameworks/XCTest.framework; sourceTree = DEVELOPER_DIR; };
+		6003F5B7195388D20070C39A /* Tests-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "Tests-Info.plist"; sourceTree = "<group>"; };
+		6003F5B9195388D20070C39A /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = "<group>"; };
+		6003F5BB195388D20070C39A /* Tests.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = Tests.m; sourceTree = "<group>"; };
+		606FC2411953D9B200FFA9A0 /* Tests-Prefix.pch */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "Tests-Prefix.pch"; sourceTree = "<group>"; };
+		71719F9E1E33DC2100824A3D /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
+		873B8AEA1B1F5CCA007FD442 /* Main.storyboard */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.storyboard; name = Main.storyboard; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
+		B6C474C37D5F2C068FEC335E /* LICENSE */ = {isa = PBXFileReference; includeInIndex = 1; name = LICENSE; path = ../LICENSE; sourceTree = "<group>"; };
+		CB9586BE7F88E1BDCEDC4CAA /* Jieba.podspec */ = {isa = PBXFileReference; includeInIndex = 1; name = Jieba.podspec; path = ../Jieba.podspec; sourceTree = "<group>"; };
+		F45193E147A6B461A2805FF1 /* README.md */ = {isa = PBXFileReference; includeInIndex = 1; name = README.md; path = ../README.md; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		6003F5AB195388D20070C39A /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				6003F5B0195388D20070C39A /* XCTest.framework in Frameworks */,
+				6003F5B2195388D20070C39A /* UIKit.framework in Frameworks */,
+				6003F5B1195388D20070C39A /* Foundation.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		6003F581195388D10070C39A = {
+			isa = PBXGroup;
+			children = (
+				60FF7A9C1954A5C5007DD14C /* Podspec Metadata */,
+				6003F5B5195388D20070C39A /* Tests */,
+				6003F58C195388D20070C39A /* Frameworks */,
+				6003F58B195388D20070C39A /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		6003F58B195388D20070C39A /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				6003F5AE195388D20070C39A /* Jieba_Tests.xctest */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		6003F58C195388D20070C39A /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				6003F58D195388D20070C39A /* Foundation.framework */,
+				6003F58F195388D20070C39A /* CoreGraphics.framework */,
+				6003F591195388D20070C39A /* UIKit.framework */,
+				6003F5AF195388D20070C39A /* XCTest.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		6003F5B5195388D20070C39A /* Tests */ = {
+			isa = PBXGroup;
+			children = (
+				6003F5BB195388D20070C39A /* Tests.m */,
+				6003F5B6195388D20070C39A /* Supporting Files */,
+			);
+			path = Tests;
+			sourceTree = "<group>";
+		};
+		6003F5B6195388D20070C39A /* Supporting Files */ = {
+			isa = PBXGroup;
+			children = (
+				6003F5B7195388D20070C39A /* Tests-Info.plist */,
+				6003F5B8195388D20070C39A /* InfoPlist.strings */,
+				606FC2411953D9B200FFA9A0 /* Tests-Prefix.pch */,
+			);
+			name = "Supporting Files";
+			sourceTree = "<group>";
+		};
+		60FF7A9C1954A5C5007DD14C /* Podspec Metadata */ = {
+			isa = PBXGroup;
+			children = (
+				CB9586BE7F88E1BDCEDC4CAA /* Jieba.podspec */,
+				F45193E147A6B461A2805FF1 /* README.md */,
+				B6C474C37D5F2C068FEC335E /* LICENSE */,
+			);
+			name = "Podspec Metadata";
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		6003F5AD195388D20070C39A /* Jieba_Tests */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 6003F5C2195388D20070C39A /* Build configuration list for PBXNativeTarget "Jieba_Tests" */;
+			buildPhases = (
+				6003F5AA195388D20070C39A /* Sources */,
+				6003F5AB195388D20070C39A /* Frameworks */,
+				6003F5AC195388D20070C39A /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = Jieba_Tests;
+			productName = JiebaTests;
+			productReference = 6003F5AE195388D20070C39A /* Jieba_Tests.xctest */;
+			productType = "com.apple.product-type.bundle.unit-test";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		6003F582195388D10070C39A /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				CLASSPREFIX = JB;
+				LastUpgradeCheck = 0720;
+				ORGANIZATIONNAME = "xcbosa-mini";
+				TargetAttributes = {
+					6003F5AD195388D20070C39A = {
+						TestTargetID = 6003F589195388D20070C39A;
+					};
+				};
+			};
+			buildConfigurationList = 6003F585195388D10070C39A /* Build configuration list for PBXProject "PROJECT" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 6003F581195388D10070C39A;
+			productRefGroup = 6003F58B195388D20070C39A /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				6003F5AD195388D20070C39A /* Jieba_Tests */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		6003F5AC195388D20070C39A /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				6003F5BA195388D20070C39A /* InfoPlist.strings in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		6003F5AA195388D20070C39A /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				6003F5BC195388D20070C39A /* Tests.m in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXVariantGroup section */
+		6003F596195388D20070C39A /* InfoPlist.strings */ = {
+			isa = PBXVariantGroup;
+			children = (
+				6003F597195388D20070C39A /* en */,
+			);
+			name = InfoPlist.strings;
+			sourceTree = "<group>";
+		};
+		6003F5B8195388D20070C39A /* InfoPlist.strings */ = {
+			isa = PBXVariantGroup;
+			children = (
+				6003F5B9195388D20070C39A /* en */,
+			);
+			name = InfoPlist.strings;
+			sourceTree = "<group>";
+		};
+		71719F9D1E33DC2100824A3D /* LaunchScreen.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				71719F9E1E33DC2100824A3D /* Base */,
+			);
+			name = LaunchScreen.storyboard;
+			sourceTree = "<group>";
+		};
+/* End PBXVariantGroup section */
+
+/* Begin XCBuildConfiguration section */
+		6003F5BD195388D20070C39A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 9.3;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		6003F5BE195388D20070C39A /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
+				COPY_PHASE_STRIP = YES;
+				ENABLE_NS_ASSERTIONS = NO;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 9.3;
+				SDKROOT = iphoneos;
+				TARGETED_DEVICE_FAMILY = "1,2";
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		6003F5C0195388D20070C39A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				GCC_PRECOMPILE_PREFIX_HEADER = YES;
+				GCC_PREFIX_HEADER = "Jieba/Jieba-Prefix.pch";
+				INFOPLIST_FILE = "Jieba/Jieba-Info.plist";
+				MODULE_NAME = ExampleApp;
+				PRODUCT_BUNDLE_IDENTIFIER = "org.cocoapods.demo.${PRODUCT_NAME:rfc1034identifier}";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				WRAPPER_EXTENSION = app;
+			};
+			name = Debug;
+		};
+		6003F5C1195388D20070C39A /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				GCC_PRECOMPILE_PREFIX_HEADER = YES;
+				GCC_PREFIX_HEADER = "Jieba/Jieba-Prefix.pch";
+				INFOPLIST_FILE = "Jieba/Jieba-Info.plist";
+				MODULE_NAME = ExampleApp;
+				PRODUCT_BUNDLE_IDENTIFIER = "org.cocoapods.demo.${PRODUCT_NAME:rfc1034identifier}";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				WRAPPER_EXTENSION = app;
+			};
+			name = Release;
+		};
+		6003F5C3195388D20070C39A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				FRAMEWORK_SEARCH_PATHS = (
+					"$(PLATFORM_DIR)/Developer/Library/Frameworks",
+					"$(inherited)",
+					"$(DEVELOPER_FRAMEWORKS_DIR)",
+				);
+				GCC_PRECOMPILE_PREFIX_HEADER = YES;
+				GCC_PREFIX_HEADER = "Tests/Tests-Prefix.pch";
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				INFOPLIST_FILE = "Tests/Tests-Info.plist";
+				PRODUCT_BUNDLE_IDENTIFIER = "org.cocoapods.demo.${PRODUCT_NAME:rfc1034identifier}";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Jieba_Example.app/Jieba_Example";
+				WRAPPER_EXTENSION = xctest;
+			};
+			name = Debug;
+		};
+		6003F5C4195388D20070C39A /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				FRAMEWORK_SEARCH_PATHS = (
+					"$(PLATFORM_DIR)/Developer/Library/Frameworks",
+					"$(inherited)",
+					"$(DEVELOPER_FRAMEWORKS_DIR)",
+				);
+				GCC_PRECOMPILE_PREFIX_HEADER = YES;
+				GCC_PREFIX_HEADER = "Tests/Tests-Prefix.pch";
+				INFOPLIST_FILE = "Tests/Tests-Info.plist";
+				PRODUCT_BUNDLE_IDENTIFIER = "org.cocoapods.demo.${PRODUCT_NAME:rfc1034identifier}";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Jieba_Example.app/Jieba_Example";
+				WRAPPER_EXTENSION = xctest;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		6003F585195388D10070C39A /* Build configuration list for PBXProject "PROJECT" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				6003F5BD195388D20070C39A /* Debug */,
+				6003F5BE195388D20070C39A /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		6003F5C2195388D20070C39A /* Build configuration list for PBXNativeTarget "Jieba_Tests" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				6003F5C3195388D20070C39A /* Debug */,
+				6003F5C4195388D20070C39A /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 6003F582195388D10070C39A /* Project object */;
+}

+ 7 - 0
Example/Jieba.xcodeproj/project.xcworkspace/contents.xcworkspacedata

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:Jieba.xcodeproj">
+   </FileRef>
+</Workspace>

+ 101 - 0
Example/Jieba.xcodeproj/xcshareddata/xcschemes/Jieba-Example.xcscheme

@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   LastUpgradeVersion = "0720"
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "6003F589195388D20070C39A"
+               BuildableName = "Jieba_Example.app"
+               BlueprintName = "Jieba_Example"
+               ReferencedContainer = "container:Jieba.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      shouldUseLaunchSchemeArgsEnv = "YES">
+      <Testables>
+         <TestableReference
+            skipped = "NO">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "6003F5AD195388D20070C39A"
+               BuildableName = "Jieba_Tests.xctest"
+               BlueprintName = "Jieba_Tests"
+               ReferencedContainer = "container:Jieba.xcodeproj">
+            </BuildableReference>
+         </TestableReference>
+      </Testables>
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "6003F589195388D20070C39A"
+            BuildableName = "Jieba_Example.app"
+            BlueprintName = "Jieba_Example"
+            ReferencedContainer = "container:Jieba.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </TestAction>
+   <LaunchAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "NO"
+      ignoresPersistentStateOnLaunch = "NO"
+      debugDocumentVersioning = "YES"
+      debugServiceExtension = "internal"
+      allowLocationSimulation = "YES">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "6003F589195388D20070C39A"
+            BuildableName = "Jieba_Example.app"
+            BlueprintName = "Jieba_Example"
+            ReferencedContainer = "container:Jieba.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </LaunchAction>
+   <ProfileAction
+      buildConfiguration = "Release"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      debugDocumentVersioning = "YES">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "6003F589195388D20070C39A"
+            BuildableName = "Jieba_Example.app"
+            BlueprintName = "Jieba_Example"
+            ReferencedContainer = "container:Jieba.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>

+ 6 - 0
Example/Podfile

@@ -0,0 +1,6 @@
+use_frameworks!
+target 'Jieba_Tests' do
+  pod 'Jieba', :path => '../'
+  
+  
+end

+ 22 - 0
Example/Pods/Local Podspecs/Jieba.podspec.json

@@ -0,0 +1,22 @@
+{
+  "name": "Jieba",
+  "version": "0.1.0",
+  "summary": "A short description of Jieba.",
+  "description": "TODO: Add long description of the pod here.",
+  "homepage": "https://github.com/xcbosa-mini/Jieba",
+  "license": {
+    "type": "MIT",
+    "file": "LICENSE"
+  },
+  "authors": {
+    "xcbosa-mini": "xcbosa@forgetive.org"
+  },
+  "source": {
+    "git": "https://github.com/xcbosa-mini/Jieba.git",
+    "tag": "0.1.0"
+  },
+  "platforms": {
+    "ios": "10.0"
+  },
+  "source_files": "Jieba/Classes/**/*"
+}

+ 22 - 0
Example/Tests/Tests-Info.plist

@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleExecutable</key>
+	<string>${EXECUTABLE_NAME}</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundlePackageType</key>
+	<string>BNDL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+</dict>
+</plist>

+ 7 - 0
Example/Tests/Tests-Prefix.pch

@@ -0,0 +1,7 @@
+//  The contents of this file are implicitly included at the beginning of every test case source file.
+
+#ifdef __OBJC__
+
+  
+
+#endif

+ 35 - 0
Example/Tests/Tests.m

@@ -0,0 +1,35 @@
+//
+//  JiebaTests.m
+//  JiebaTests
+//
+//  Created by xcbosa-mini on 08/20/2023.
+//  Copyright (c) 2023 xcbosa-mini. All rights reserved.
+//
+
+@import XCTest;
+
+@interface Tests : XCTestCase
+
+@end
+
+@implementation Tests
+
+- (void)setUp
+{
+    [super setUp];
+    // Put setup code here. This method is called before the invocation of each test method in the class.
+}
+
+- (void)tearDown
+{
+    // Put teardown code here. This method is called after the invocation of each test method in the class.
+    [super tearDown];
+}
+
+- (void)testExample
+{
+    XCTFail(@"No implementation for \"%s\"", __PRETTY_FUNCTION__);
+}
+
+@end
+

+ 2 - 0
Example/Tests/en.lproj/InfoPlist.strings

@@ -0,0 +1,2 @@
+/* Localized versions of Info.plist keys */
+

+ 42 - 0
Jieba.podspec

@@ -0,0 +1,42 @@
+#
+# Be sure to run `pod lib lint Jieba.podspec' to ensure this is a
+# valid spec before submitting.
+#
+# Any lines starting with a # are optional, but their use is encouraged
+# To learn more about a Podspec see https://guides.cocoapods.org/syntax/podspec.html
+#
+
+Pod::Spec.new do |s|
+  s.name             = 'Jieba'
+  s.version          = '0.1.0'
+  s.summary          = 'A short description of Jieba.'
+
+# This description is used to generate tags and improve search results.
+#   * Think: What does it do? Why did you write it? What is the focus?
+#   * Try to keep it short, snappy and to the point.
+#   * Write the description between the DESC delimiters below.
+#   * Finally, don't worry about the indent, CocoaPods strips it!
+
+  s.description      = <<-DESC
+TODO: Add long description of the pod here.
+                       DESC
+
+  s.homepage         = 'https://github.com/xcbosa-mini/Jieba'
+  # s.screenshots     = 'www.example.com/screenshots_1', 'www.example.com/screenshots_2'
+  s.license          = { :type => 'MIT', :file => 'LICENSE' }
+  s.author           = { 'xcbosa-mini' => 'xcbosa@forgetive.org' }
+  s.source           = { :git => 'https://github.com/xcbosa-mini/Jieba.git', :tag => s.version.to_s }
+  # s.social_media_url = 'https://twitter.com/<TWITTER_USERNAME>'
+
+  s.ios.deployment_target = '10.0'
+
+  s.source_files = 'Jieba/Classes/**/*'
+  
+  # s.resource_bundles = {
+  #   'Jieba' => ['Jieba/Assets/*.png']
+  # }
+
+  # s.public_header_files = 'Pod/Classes/**/*.h'
+  # s.frameworks = 'UIKit', 'MapKit'
+  # s.dependency 'AFNetworking', '~> 2.3'
+end

+ 0 - 0
Jieba/Assets/.gitkeep


+ 0 - 0
Jieba/Classes/.gitkeep


+ 211 - 0
Jieba/Classes/CppJieba/DictTrie.hpp

@@ -0,0 +1,211 @@
+#ifndef CPPJIEBA_DICT_TRIE_HPP
+#define CPPJIEBA_DICT_TRIE_HPP
+
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <cstring>
+#include <stdint.h>
+#include <cmath>
+#include <limits>
+#include "Limonp/StringUtil.hpp"
+#include "Limonp/Logger.hpp"
+#include "TransCode.hpp"
+#include "Trie.hpp"
+
+
+
+namespace CppJieba
+{
+    using namespace Limonp;
+    const double MIN_DOUBLE = -3.14e+100;
+    const double MAX_DOUBLE = 3.14e+100;
+    const size_t DICT_COLUMN_NUM = 3;
+    const char* const UNKNOWN_TAG = "";
+
+    class DictTrie
+    {
+        private:
+            vector<DictUnit> _nodeInfos;
+            Trie * _trie;
+
+            double _minWeight;
+        private:
+            unordered_set<Unicode::value_type> _userDictSingleChineseWord;
+        public:
+            bool isUserDictSingleChineseWord(const Unicode::value_type& word) const
+            {
+                return isIn(_userDictSingleChineseWord, word);
+            }
+        public:
+            double getMinWeight() const {return _minWeight;};
+
+        public:
+            DictTrie()
+            {
+                _trie = NULL;
+                _minWeight = MAX_DOUBLE;
+            }
+            DictTrie(const string& dictPath, const string& userDictPath = "")
+            {
+                new (this) DictTrie();
+                init(dictPath, userDictPath);
+            }
+            ~DictTrie()
+            {
+                if(_trie)
+                {
+                    delete _trie;
+                }
+            }
+            
+        public:
+            bool init(const string& dictPath, const string& userDictPath = "")
+            {
+                assert(!_trie);
+                _loadDict(dictPath);
+                _calculateWeight(_nodeInfos);
+                _minWeight = _findMinWeight(_nodeInfos);
+                
+                if(userDictPath.size())
+                {
+                    double maxWeight = _findMaxWeight(_nodeInfos);
+                    _loadUserDict(userDictPath, maxWeight, UNKNOWN_TAG);
+                }
+                _shrink(_nodeInfos);
+                _trie = _createTrie(_nodeInfos);
+                assert(_trie);
+                return true;
+            }
+
+        public:
+            const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const
+            {
+                return _trie->find(begin, end);
+            }
+            bool find(Unicode::const_iterator begin, Unicode::const_iterator end, DagType& dag, size_t offset = 0) const
+            {
+                return _trie->find(begin, end, dag, offset);
+            }
+            void find(
+                        Unicode::const_iterator begin, 
+                        Unicode::const_iterator end, 
+                        vector<SegmentChar>& res
+                        ) const
+            {
+                _trie->find(begin, end, res);
+            }
+
+
+        private:
+            Trie * _createTrie(const vector<DictUnit>& dictUnits)
+            {
+                assert(dictUnits.size());
+                vector<Unicode> words;
+                vector<const DictUnit*> valuePointers;
+                for(size_t i = 0 ; i < dictUnits.size(); i ++)
+                {
+                    words.push_back(dictUnits[i].word);
+                    valuePointers.push_back(&dictUnits[i]);
+                }
+
+                Trie * trie = new Trie(words, valuePointers);
+                return trie;
+            }
+            void _loadUserDict(const string& filePath, double defaultWeight, const string& defaultTag)
+            {
+                ifstream ifs(filePath.c_str());
+                assert(ifs);
+                string line;
+                DictUnit nodeInfo;
+                vector<string> buf;
+                size_t lineno;
+                for(lineno = 0; getline(ifs, line); lineno++)
+                {
+                    buf.clear();
+                    split(line, buf, " ");
+                    assert(buf.size() >= 1);
+                    if(!TransCode::decode(buf[0], nodeInfo.word))
+                    {
+                        LogError("line[%u:%s] illegal.", lineno, line.c_str());
+                        continue;
+                    }
+                    if(nodeInfo.word.size() == 1)
+                    {
+                        _userDictSingleChineseWord.insert(nodeInfo.word[0]);
+                    }
+                    nodeInfo.weight = defaultWeight;
+                    nodeInfo.tag = (buf.size() == 2 ? buf[1] : defaultTag);
+                    _nodeInfos.push_back(nodeInfo);
+                }
+                LogInfo("load userdict[%s] ok. lines[%u]", filePath.c_str(), lineno);
+            }
+            void _loadDict(const string& filePath) 
+            {
+                ifstream ifs(filePath.c_str());
+                assert(ifs);
+                string line;
+                vector<string> buf;
+
+                DictUnit nodeInfo;
+                for(size_t lineno = 0 ; getline(ifs, line); lineno++)
+                {
+                    split(line, buf, " ");
+                    assert(buf.size() == DICT_COLUMN_NUM);
+                    
+                    if(!TransCode::decode(buf[0], nodeInfo.word))
+                    {
+                        LogError("line[%u:%s] illegal.", lineno, line.c_str());
+                        continue;
+                    }
+                    nodeInfo.weight = atof(buf[1].c_str());
+                    nodeInfo.tag = buf[2];
+                    
+                    _nodeInfos.push_back(nodeInfo);
+                }
+            }
+            double _findMinWeight(const vector<DictUnit>& nodeInfos) const
+            {
+                double ret = MAX_DOUBLE;
+                for(size_t i = 0; i < nodeInfos.size(); i++)
+                {
+                    ret = min(nodeInfos[i].weight, ret);
+                }
+                return ret;
+            }
+            double _findMaxWeight(const vector<DictUnit>& nodeInfos) const
+            {
+                double ret = MIN_DOUBLE;
+                for(size_t i = 0; i < nodeInfos.size(); i++)
+                {
+                    ret = max(nodeInfos[i].weight, ret);
+                }
+                return ret;
+            }
+
+            void _calculateWeight(vector<DictUnit>& nodeInfos) const
+            {
+                double sum = 0.0;
+                for(size_t i = 0; i < nodeInfos.size(); i++)
+                {
+                    sum += nodeInfos[i].weight;
+                }
+                assert(sum);
+                for(size_t i = 0; i < nodeInfos.size(); i++)
+                {
+                    DictUnit& nodeInfo = nodeInfos[i];
+                    assert(nodeInfo.weight);
+                    nodeInfo.weight = log(double(nodeInfo.weight)/double(sum));
+                }
+            }
+
+            void _shrink(vector<DictUnit>& units) const
+            {
+                vector<DictUnit>(units.begin(), units.end()).swap(units);
+            }
+
+
+    };
+}
+
+#endif

+ 153 - 0
Jieba/Classes/CppJieba/FullSegment.hpp

@@ -0,0 +1,153 @@
+#ifndef CPPJIEBA_FULLSEGMENT_H
+#define CPPJIEBA_FULLSEGMENT_H
+
+#include <algorithm>
+#include <set>
+#include <cassert>
+#include "Limonp/Logger.hpp"
+#include "DictTrie.hpp"
+#include "ISegment.hpp"
+#include "SegmentBase.hpp"
+#include "TransCode.hpp"
+
+namespace CppJieba
+{
+    class FullSegment: public SegmentBase
+    {
+        private:
+            const DictTrie* _dictTrie;
+            bool _isBorrowed;
+        public:
+            FullSegment()
+            {
+                _dictTrie = NULL;
+                _isBorrowed = false;
+            }
+            explicit FullSegment(const string& dictPath)
+            {
+                _dictTrie = NULL;
+                init(dictPath);
+            }
+            explicit FullSegment(const DictTrie* dictTrie) 
+            {
+                _dictTrie = NULL;
+                init(dictTrie);
+            }
+            virtual ~FullSegment()
+            {
+                if(_dictTrie && ! _isBorrowed) 
+                {
+                    delete _dictTrie;
+                }
+
+            };
+        public:
+            bool init(const string& dictPath)
+            {
+                assert(_dictTrie == NULL);
+                _dictTrie = new DictTrie(dictPath);
+                _isBorrowed = false;
+                return true;
+            }
+            bool init(const DictTrie* dictTrie) 
+            {
+                assert(_dictTrie == NULL);
+                assert(dictTrie);
+                _dictTrie = dictTrie;
+                _isBorrowed = true;
+                return true;
+            }
+
+        public:
+            using SegmentBase::cut;
+
+        public:
+            bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
+            {
+                assert(_dictTrie);
+                if (begin >= end)
+                {
+                    LogError("begin >= end");
+                    return false;
+                }
+
+                //resut of searching in trie tree
+                DagType tRes;
+
+                //max index of res's words
+                int maxIdx = 0;
+
+                // always equals to (uItr - begin)
+                int uIdx = 0;
+
+                //tmp variables
+                int wordLen = 0;
+                for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
+                {
+                    //find word start from uItr
+                    if (_dictTrie->find(uItr, end, tRes, 0))
+                    {
+                        for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
+                        //for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
+                        {
+                            wordLen = itr->second->word.size();
+                            if (wordLen >= 2 || (tRes.size() == 1 && maxIdx <= uIdx))
+                            {
+                                res.push_back(itr->second->word);
+                            }
+                            maxIdx = uIdx+wordLen > maxIdx ? uIdx+wordLen : maxIdx;
+                        }
+                        tRes.clear();
+                    }
+                    else // not found word start from uItr
+                    {
+                        if (maxIdx <= uIdx) // never exist in prev results
+                        {
+                            //put itr itself in res
+                            res.push_back(Unicode(1, *uItr));
+
+                            //mark it exits
+                            ++maxIdx;
+                        }
+                    }
+                    ++uIdx;
+                }
+
+                return true;
+            }
+
+            bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
+            {
+                assert(_dictTrie);
+                if (begin >= end)
+                {
+                    LogError("begin >= end");
+                    return false;
+                }
+
+                vector<Unicode> uRes;
+                if (!cut(begin, end, uRes))
+                {
+                    LogError("get unicode cut result error.");
+                    return false;
+                }
+
+                string tmp;
+                for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++)
+                {
+                    if (TransCode::encode(*uItr, tmp))
+                    {
+                        res.push_back(tmp);
+                    }
+                    else
+                    {
+                        LogError("encode failed.");
+                    }
+                }
+
+                return true;
+            }
+    };
+}
+
+#endif

+ 394 - 0
Jieba/Classes/CppJieba/HMMSegment.hpp

@@ -0,0 +1,394 @@
+#ifndef CPPJIBEA_HMMSEGMENT_H
+#define CPPJIBEA_HMMSEGMENT_H
+
+#include <iostream>
+#include <fstream>
+#include <memory.h>
+#include <cassert>
+#include "Limonp/StringUtil.hpp"
+#include "Limonp/Logger.hpp"
+#include "TransCode.hpp"
+#include "ISegment.hpp"
+#include "SegmentBase.hpp"
+#include "DictTrie.hpp"
+
+namespace CppJieba
+{
+    using namespace Limonp;
+    typedef unordered_map<uint16_t, double> EmitProbMap;
+    class HMMSegment: public SegmentBase
+    {
+        public:
+            /*
+             * STATUS:
+             * 0:B, 1:E, 2:M, 3:S
+             * */
+            enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
+        private:
+            char _statMap[STATUS_SUM];
+            double _startProb[STATUS_SUM];
+            double _transProb[STATUS_SUM][STATUS_SUM];
+            EmitProbMap _emitProbB;
+            EmitProbMap _emitProbE;
+            EmitProbMap _emitProbM;
+            EmitProbMap _emitProbS;
+            vector<EmitProbMap* > _emitProbVec;
+
+        public:
+            HMMSegment(){}
+            explicit HMMSegment(const string& filePath)
+            {
+                LIMONP_CHECK(init(filePath));
+            }
+            virtual ~HMMSegment(){}
+        public:
+            bool init(const string& filePath)
+            {
+                memset(_startProb, 0, sizeof(_startProb));
+                memset(_transProb, 0, sizeof(_transProb));
+                _statMap[0] = 'B';
+                _statMap[1] = 'E';
+                _statMap[2] = 'M';
+                _statMap[3] = 'S';
+                _emitProbVec.push_back(&_emitProbB);
+                _emitProbVec.push_back(&_emitProbE);
+                _emitProbVec.push_back(&_emitProbM);
+                _emitProbVec.push_back(&_emitProbS);
+                LIMONP_CHECK(_loadModel(filePath.c_str()));
+                LogInfo("HMMSegment init(%s) ok.", filePath.c_str());
+                return true;
+            }
+        public:
+            using SegmentBase::cut;
+        public:
+            bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const 
+            {
+                Unicode::const_iterator left = begin;
+                Unicode::const_iterator right = begin;
+                while(right != end)
+                {
+                    if(*right < 0x80) 
+                    {
+                        if(left != right && !_cut(left, right, res))
+                        {
+                            return false;
+                        }
+                        left = right;
+                        do {
+                            right = _sequentialLetterRule(left, end);
+                            if(right != left)
+                            {
+                                break;
+                            }
+                            right = _numbersRule(left, end);
+                            if(right != left)
+                            {
+                                break;
+                            }
+                            right ++;
+                        } while(false);
+                        res.push_back(Unicode(left, right));
+                        left = right;
+                    }
+                    else
+                    {
+                        right++;
+                    }
+                }
+                if(left != right && !_cut(left, right, res))
+                {
+                    return false;
+                }
+                return true;
+            }
+        private:
+            // sequential letters rule
+            Unicode::const_iterator _sequentialLetterRule(Unicode::const_iterator begin, Unicode::const_iterator end) const
+            {
+                Unicode::value_type x;
+                while(begin != end)
+                {
+                    x = *begin;
+                    if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z'))
+                    {
+                        begin ++;
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+                return begin;
+            }
+            // 
+            Unicode::const_iterator _numbersRule(Unicode::const_iterator begin, Unicode::const_iterator end) const
+            {
+                Unicode::value_type x = *begin;
+                if('0' <= x && x <= '9')
+                {
+                    begin ++;
+                }
+                else
+                {
+                    return begin;
+                }
+                while(begin != end)
+                {
+                    x = *begin;
+                    if( ('0' <= x && x <= '9') || x == '.')
+                    {
+                        begin++;
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+                return begin;
+            }
+            bool _cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const 
+            {
+                vector<size_t> status; 
+                if(!_viterbi(begin, end, status))
+                {
+                    LogError("_viterbi failed.");
+                    return false;
+                }
+
+                Unicode::const_iterator left = begin;
+                Unicode::const_iterator right;
+                for(size_t i = 0; i < status.size(); i++)
+                {
+                    if(status[i] % 2) //if(E == status[i] || S == status[i])
+                    {
+                        right = begin + i + 1;
+                        res.push_back(Unicode(left, right));
+                        left = right;
+                    }
+                }
+                return true;
+            }
+        public:
+            virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
+            {
+                if(begin == end)
+                {
+                    return false;
+                }
+                vector<Unicode> words;
+                words.reserve(end - begin);
+                if(!cut(begin, end, words))
+                {
+                    return false;
+                }
+                size_t offset = res.size();
+                res.resize(res.size() + words.size());
+                for(size_t i = 0; i < words.size(); i++)
+                {
+                    if(!TransCode::encode(words[i], res[offset + i]))
+                    {
+                        LogError("encode failed.");
+                    }
+                }
+                return true;
+            }
+
+        private:
+            bool _viterbi(Unicode::const_iterator begin, Unicode::const_iterator end, vector<size_t>& status)const
+            {
+                if(begin == end)
+                {
+                    return false;
+                }
+
+                size_t Y = STATUS_SUM;
+                size_t X = end - begin;
+
+                size_t XYSize = X * Y;
+                size_t now, old, stat;
+                double tmp, endE, endS;
+
+                vector<int> path(XYSize);
+                vector<double> weight(XYSize);
+
+                //start
+                for(size_t y = 0; y < Y; y++)
+                {
+                    weight[0 + y * X] = _startProb[y] + _getEmitProb(_emitProbVec[y], *begin, MIN_DOUBLE);
+                    path[0 + y * X] = -1;
+                }
+
+
+                double emitProb;
+
+                for(size_t x = 1; x < X; x++)
+                {
+                    for(size_t y = 0; y < Y; y++)
+                    {
+                        now = x + y*X;
+                        weight[now] = MIN_DOUBLE;
+                        path[now] = E; // warning
+                        emitProb = _getEmitProb(_emitProbVec[y], *(begin+x), MIN_DOUBLE);
+                        for(size_t preY = 0; preY < Y; preY++)
+                        {
+                            old = x - 1 + preY * X;
+                            tmp = weight[old] + _transProb[preY][y] + emitProb;
+                            if(tmp > weight[now])
+                            {
+                                weight[now] = tmp;
+                                path[now] = preY;
+                            }
+                        }
+                    }
+                }
+
+                endE = weight[X-1+E*X];
+                endS = weight[X-1+S*X];
+                stat = 0;
+                if(endE >= endS)
+                {
+                    stat = E;
+                }
+                else
+                {
+                    stat = S;
+                }
+
+                status.resize(X);
+                for(int x = X -1 ; x >= 0; x--)
+                {
+                    status[x] = stat;
+                    stat = path[x + stat*X];
+                }
+
+                return true;
+            }
+            bool _loadModel(const char* const filePath)
+            {
+                LogDebug("loadModel [%s] start ...", filePath);
+                ifstream ifile(filePath);
+                string line;
+                vector<string> tmp;
+                vector<string> tmp2;
+                //load _startProb
+                if(!_getLine(ifile, line))
+                {
+                    return false;
+                }
+                split(line, tmp, " ");
+                if(tmp.size() != STATUS_SUM)
+                {
+                    LogError("start_p illegal");
+                    return false;
+                }
+                for(size_t j = 0; j< tmp.size(); j++)
+                {
+                    _startProb[j] = atof(tmp[j].c_str());
+                }
+
+                //load _transProb
+                for(size_t i = 0; i < STATUS_SUM; i++)
+                {
+                    if(!_getLine(ifile, line))
+                    {
+                        return false;
+                    }
+                    split(line, tmp, " ");
+                    if(tmp.size() != STATUS_SUM)
+                    {
+                        LogError("trans_p illegal");
+                        return false;
+                    }
+                    for(size_t j =0; j < STATUS_SUM; j++)
+                    {
+                        _transProb[i][j] = atof(tmp[j].c_str());
+                    }
+                }
+
+                //load _emitProbB
+                if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbB))
+                {
+                    return false;
+                }
+
+                //load _emitProbE
+                if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbE))
+                {
+                    return false;
+                }
+
+                //load _emitProbM
+                if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbM))
+                {
+                    return false;
+                }
+
+                //load _emitProbS
+                if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbS))
+                {
+                    return false;
+                }
+
+                LogDebug("loadModel [%s] end.", filePath);
+
+                return true;
+            }
+            bool _getLine(ifstream& ifile, string& line)
+            {
+                while(getline(ifile, line))
+                {
+                    trim(line);
+                    if(line.empty())
+                    {
+                        continue;
+                    }
+                    if(startsWith(line, "#"))
+                    {
+                        continue;
+                    }
+                    return true;
+                }
+                return false;
+            }
+            bool _loadEmitProb(const string& line, EmitProbMap& mp)
+            {
+                if(line.empty())
+                {
+                    return false;
+                }
+                vector<string> tmp, tmp2;
+                Unicode unicode;
+                split(line, tmp, ",");
+                for(size_t i = 0; i < tmp.size(); i++)
+                {
+                    split(tmp[i], tmp2, ":");
+                    if(2 != tmp2.size())
+                    {
+                        LogError("_emitProb illegal.");
+                        return false;
+                    }
+                    if(!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1)
+                    {
+                        LogError("TransCode failed.");
+                        return false;
+                    }
+                    mp[unicode[0]] = atof(tmp2[1].c_str());
+                }
+                return true;
+            }
+            double _getEmitProb(const EmitProbMap* ptMp, uint16_t key, double defVal)const 
+            {
+                EmitProbMap::const_iterator cit = ptMp->find(key);
+                if(cit == ptMp->end())
+                {
+                    return defVal;
+                }
+                return cit->second;
+
+            }
+
+
+    };
+}
+
+#endif

+ 17 - 0
Jieba/Classes/CppJieba/ISegment.hpp

@@ -0,0 +1,17 @@
+#ifndef CPPJIEBA_SEGMENTINTERFACE_H
+#define CPPJIEBA_SEGMENTINTERFACE_H
+
+
+namespace CppJieba
+{
+    class ISegment
+    {
+        public:
+            virtual ~ISegment(){};
+        public:
+            virtual bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<string>& res) const = 0;
+            virtual bool cut(const string& str, vector<string>& res) const = 0;
+    };
+}
+
+#endif

+ 173 - 0
Jieba/Classes/CppJieba/KeywordExtractor.hpp

@@ -0,0 +1,173 @@
+#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
+#define CPPJIEBA_KEYWORD_EXTRACTOR_H
+
+#include "MixSegment.hpp"
+#include <cmath>
+#include <set>
+
+namespace CppJieba
+{
+    using namespace Limonp;
+
+    /*utf8*/
+    class KeywordExtractor
+    {
+        private:
+            MixSegment _segment;
+        private:
+            unordered_map<string, double> _idfMap;
+            double _idfAverage;
+
+            unordered_set<string> _stopWords;
+        public:
+            KeywordExtractor(){};
+            KeywordExtractor(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath)
+            {
+                LIMONP_CHECK(init(dictPath, hmmFilePath, idfPath, stopWordPath));
+            };
+            ~KeywordExtractor(){};
+
+        public:
+            bool init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath)
+            {
+                _loadIdfDict(idfPath);
+                _loadStopWordDict(stopWordPath);
+                LIMONP_CHECK(_segment.init(dictPath, hmmFilePath));
+                return true;
+            };
+        public:
+
+            bool extract(const string& str, vector<string>& keywords, size_t topN) const
+            {
+                vector<pair<string, double> > topWords;
+                if(!extract(str, topWords, topN))
+                {
+                    return false;
+                }
+                for(size_t i = 0; i < topWords.size(); i++)
+                {
+                    keywords.push_back(topWords[i].first);
+                }
+                return true;
+            }
+
+            bool extract(const string& str, vector<pair<string, double> >& keywords, size_t topN) const
+            {
+                vector<string> words;
+                if(!_segment.cut(str, words))
+                {
+                    LogError("segment cut(%s) failed.", str.c_str());
+                    return false;
+                }
+
+                map<string, double> wordmap;
+                for(vector<string>::iterator iter = words.begin(); iter != words.end(); iter++)
+                {
+                    if(_isSingleWord(*iter))
+                    {
+                        continue;
+                    }
+                    wordmap[*iter] += 1.0;
+                }
+
+                for(map<string, double>::iterator itr = wordmap.begin(); itr != wordmap.end(); )
+                {
+                    if(_stopWords.end() != _stopWords.find(itr->first))
+                    {
+                        wordmap.erase(itr++);
+                        continue;
+                    }
+
+                    unordered_map<string, double>::const_iterator cit = _idfMap.find(itr->first);
+                    if(cit != _idfMap.end())
+                    {
+                        itr->second *= cit->second;
+                    }
+                    else
+                    {
+                        itr->second *= _idfAverage;
+                    }
+                    itr ++;
+                }
+
+                keywords.clear();
+                std::copy(wordmap.begin(), wordmap.end(), std::inserter(keywords, keywords.begin()));
+                topN = min(topN, keywords.size());
+                partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), _cmp);
+                keywords.resize(topN);
+                return true;
+            }
+        private:
+            void _loadIdfDict(const string& idfPath)
+            {
+                ifstream ifs(idfPath.c_str());
+                if(!ifs)
+                {
+                    LogError("open %s failed.", idfPath.c_str());
+                    assert(false);
+                }
+                string line ;
+                vector<string> buf;
+                double idf = 0.0;
+                double idfSum = 0.0;
+                size_t lineno = 0;
+                for(;getline(ifs, line); lineno++)
+                {
+                    buf.clear();
+                    if(line.empty())
+                    {
+                        LogError("line[%d] empty. skipped.", lineno);
+                        continue;
+                    }
+                    if(!split(line, buf, " ") || buf.size() != 2)
+                    {
+                        LogError("line %d [%s] illegal. skipped.", lineno, line.c_str());
+                        continue;
+                    }
+                    idf = atof(buf[1].c_str());
+                    _idfMap[buf[0]] = idf;
+                    idfSum += idf;
+
+                } 
+
+                assert(lineno);
+                _idfAverage = idfSum / lineno;
+                assert(_idfAverage > 0.0);
+            }
+            void _loadStopWordDict(const string& filePath)
+            {
+                ifstream ifs(filePath.c_str());
+                if(!ifs)
+                {
+                    LogError("open %s failed.", filePath.c_str());
+                    assert(false);
+                }
+                string line ;
+                while(getline(ifs, line))
+                {
+                    _stopWords.insert(line);
+                }
+                assert(_stopWords.size());
+            }
+        private:
+            bool _isSingleWord(const string& str) const
+            {
+                Unicode unicode;
+                TransCode::decode(str, unicode);
+                if(unicode.size() == 1)
+                  return true;
+                return false;
+            }
+
+        private:
+            static bool _cmp(const pair<string, double>& lhs, const pair<string, double>& rhs)
+            {
+                return lhs.second > rhs.second;
+            }
+            
+    };
+}
+
+#endif
+
+

+ 84 - 0
Jieba/Classes/CppJieba/Limonp/ArgvContext.hpp

@@ -0,0 +1,84 @@
+/************************************
+ * file enc : ascii
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+
+#ifndef LIMONP_ARGV_FUNCTS_H
+#define LIMONP_ARGV_FUNCTS_H
+
+#include <set>
+#include <sstream>
+#include "StringUtil.hpp"
+
+namespace Limonp
+{
+    using namespace std;
+    class ArgvContext
+    {
+        public :
+            ArgvContext(int argc, const char* const * argv)
+            {
+
+                for(int i = 0; i < argc; i++)
+                {
+                    if(startsWith(argv[i], "-"))
+                    {
+                        if(i + 1 < argc && !startsWith(argv[i + 1], "-"))
+                        {
+                            mpss_[argv[i]] = argv[i+1];
+                            i++;
+                        }
+                        else
+                        {
+                            sset_.insert(argv[i]);
+                        }
+                    }
+                    else
+                    {
+                        args_.push_back(argv[i]);
+                    }
+                }
+            }
+            ~ArgvContext(){};
+        public:
+            friend ostream& operator << (ostream& os, const ArgvContext& args); 
+            string operator [](size_t i) const
+            {
+                if(i < args_.size())
+                {
+                    return args_[i];
+                }
+                return "";
+            }
+            string operator [](const string& key) const
+            {
+                map<string, string>::const_iterator it = mpss_.find(key);
+                if(it != mpss_.end())
+                {
+                    return it->second;
+                }
+                return "";
+            }
+        public:
+            bool hasKey(const string& key) const
+            {
+                if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end())
+                {
+                    return true;
+                }
+                return false;
+            }
+        private:
+            vector<string> args_;
+            map<string, string> mpss_;
+            set<string> sset_; 
+
+    };
+
+    inline ostream& operator << (ostream& os, const ArgvContext& args)
+    {
+        return os<<args.args_<<args.mpss_<<args.sset_;
+    }
+}
+
+#endif

+ 128 - 0
Jieba/Classes/CppJieba/Limonp/BlockingQueue.hpp

@@ -0,0 +1,128 @@
+/*
+https://github.com/chenshuo/muduo/blob/master/muduo/base/BlockingQueue.h
+*/
+
+#ifndef LIMONP_BLOCKINGQUEUE_HPP
+#define LIMONP_BLOCKINGQUEUE_HPP
+
+#include <queue>
+#include "BoundedQueue.hpp"
+#include "Condition.hpp"
+
+namespace Limonp
+{
+    template<class T>
+        class BlockingQueue: NonCopyable
+        {
+            public:
+                BlockingQueue()
+                    : mutex_(), notEmpty_(mutex_), queue_()
+                {
+                }
+
+                void push(const T& x)
+                {
+                    MutexLockGuard lock(mutex_);
+                    queue_.push(x);
+                    notEmpty_.notify(); // wait morphing saves us
+                }
+
+                T pop()
+                {
+                    MutexLockGuard lock(mutex_);
+                    // always use a while-loop, due to spurious wakeup
+                    while (queue_.empty())
+                    {
+                        notEmpty_.wait();
+                    }
+                    assert(!queue_.empty());
+                    T front(queue_.front());
+                    queue_.pop();
+                    return front;
+                }
+
+                size_t size() const
+                {
+                    MutexLockGuard lock(mutex_);
+                    return queue_.size();
+                }
+                bool empty() const
+                {
+                    return size() == 0;
+                }
+
+            private:
+                mutable MutexLock mutex_;
+                Condition         notEmpty_;
+                std::queue<T>     queue_;
+        };
+
+    template<typename T>
+        class BoundedBlockingQueue : NonCopyable
+        {
+            public:
+                explicit BoundedBlockingQueue(size_t maxSize)
+                    : mutex_(),
+                    notEmpty_(mutex_),
+                    notFull_(mutex_),
+                    queue_(maxSize)
+                {}
+
+                void push(const T& x)
+                {
+                    MutexLockGuard lock(mutex_);
+                    while (queue_.full())
+                    {
+                        notFull_.wait();
+                    }
+                    assert(!queue_.full());
+                    queue_.push(x);
+                    notEmpty_.notify();
+                }
+
+                T pop()
+                {
+                    MutexLockGuard lock(mutex_);
+                    while (queue_.empty())
+                    {
+                        notEmpty_.wait();
+                    }
+                    assert(!queue_.empty());
+                    T res = queue_.pop();
+                    notFull_.notify();
+                    return res;
+                }
+
+                bool empty() const
+                {
+                    MutexLockGuard lock(mutex_);
+                    return queue_.empty();
+                }
+
+                bool full() const
+                {
+                    MutexLockGuard lock(mutex_);
+                    return queue_.full();
+                }
+
+                size_t size() const
+                {
+                    MutexLockGuard lock(mutex_);
+                    return queue_.size();
+                }
+
+                size_t capacity() const
+                {
+                    return queue_.capacity();
+                }
+
+            private:
+                mutable MutexLock          mutex_;
+                Condition                  notEmpty_;
+                Condition                  notFull_;
+                BoundedQueue<T>  queue_;
+        };
+
+}
+
+#endif

+ 73 - 0
Jieba/Classes/CppJieba/Limonp/BoundedQueue.hpp

@@ -0,0 +1,73 @@
+#ifndef LIMONP_BOUNDED_QUEUE_HPP
+#define LIMONP_BOUNDED_QUEUE_HPP
+
+#include <vector>
+#include <fstream>
+#include <cassert>
+
+namespace Limonp
+{
+    using namespace std;
+    template<class T>
+        class BoundedQueue
+        {
+            private:
+                size_t head_;
+                size_t tail_;
+                size_t size_;
+                const size_t capacity_;
+                vector<T> circular__buffer;
+            public:
+                explicit BoundedQueue(size_t capacity): capacity_(capacity), circular__buffer(capacity)
+                {
+                    head_ = 0;
+                    tail_ = 0;
+                    size_ = 0;
+                    assert(capacity_);
+                }
+                ~BoundedQueue(){}
+            public:
+                void clear()
+                {
+                    head_ = 0;
+                    tail_ = 0;
+                    size_ = 0;
+                }
+                bool empty() const
+                {
+                    return !size_;
+                }
+                bool full() const
+                {
+                    return capacity_ == size_;
+                }
+                size_t size() const
+                {
+                    return size_;
+                }
+                size_t capacity() const
+                {
+                    return capacity_;
+                }
+
+                void push(const T& t)
+                {
+                    assert(!full());
+                    circular__buffer[tail_] = t;
+                    tail_ = (tail_ + 1) % capacity_;
+                    size_ ++;
+                }
+
+                T pop()
+                {
+                    assert(!empty());
+                    size_t oldPos = head_;
+                    head_ = (head_ + 1) % capacity_;
+                    size_ --;
+                    return circular__buffer[oldPos];
+                }
+
+        };
+}
+
+#endif

+ 90 - 0
Jieba/Classes/CppJieba/Limonp/CastFloat.hpp

@@ -0,0 +1,90 @@
+#ifndef LIMONP_CAST_FUNCTS_H
+#define LIMONP_CAST_FUNCTS_H
+
+namespace Limonp
+{
+    namespace CastFloat
+    {
+        //logical and or
+        static const int sign_32 = 0xC0000000;
+        static const int exponent_32 = 0x07800000;
+        static const int mantissa_32 = 0x007FE000;
+        static const int sign_exponent_32 = 0x40000000;
+        static const int loss_32 = 0x38000000;
+
+        static const short sign_16 = (short)0xC000;
+        static const short exponent_16 = (short)0x3C00;
+        static const short mantissa_16 = (short)0x03FF;
+        static const short sign_exponent_16 = (short)0x4000;
+        static const int exponent_fill_32 = 0x38000000;
+
+        //infinite
+        static const short infinite_16 = (short) 0x7FFF;
+        static const short infinitesmall_16 = (short) 0x0000;
+
+        inline float intBitsToFloat(unsigned int x)
+        {
+            union
+            {
+                float f;
+                int i;
+            }u;
+            u.i = x;
+            return u.f;
+        }
+
+        inline int floatToIntBits(float f)
+        {
+            union
+            {
+                float f;
+                int i ;
+            }u;
+            u.f = f;
+            return u.i;
+        }
+
+        inline short floatToShortBits(float f)
+        {
+            int fi = floatToIntBits(f);
+
+            // 提取关键信息
+            short sign = (short) ((unsigned int)(fi & sign_32) >> 16);
+            short exponent = (short) ((unsigned int)(fi & exponent_32) >> 13);
+            short mantissa = (short) ((unsigned int)(fi & mantissa_32) >> 13);
+            // 生成编码结果
+            short code = (short) (sign | exponent | mantissa);
+            // 无穷大量、无穷小量的处理
+            if ((fi & loss_32) > 0 && (fi & sign_exponent_32) > 0) {
+                // 当指数符号为1时(正次方),且左234位为1,返回无穷大量
+                return (short) (code | infinite_16);
+            }
+            if (((fi & loss_32) ^ loss_32) > 0 && (fi & sign_exponent_32) == 0) {
+                // 当指数符号位0时(负次方),且左234位为0(与111异或>0),返回无穷小量
+                return infinitesmall_16;
+            }
+
+            return code;
+        }
+
+        inline float shortBitsToFloat(short s)
+        {
+            /*
+             * 指数空余3位:若符号位为1,补0;若符号位为0,补1。 尾数位在后补0(13个)
+             */
+            int sign = ((int) (s & sign_16)) << 16;
+            int exponent = ((int) (s & exponent_16)) << 13;
+            // 指数符号位为0,234位补1
+            if ((s & sign_exponent_16) == 0 && s != 0) {
+                exponent |= exponent_fill_32;
+            }
+            int mantissa = ((int) (s & mantissa_16)) << 13;
+            // 生成解码结果
+            int code = sign | exponent | mantissa;
+            return intBitsToFloat(code);
+
+        }
+    }
+}
+
+#endif

+ 48 - 0
Jieba/Classes/CppJieba/Limonp/Condition.hpp

@@ -0,0 +1,48 @@
+/*
+ * https://github.com/chenshuo/muduo/blob/master/muduo/base/Condition.h
+ */
+
+#ifndef LIMONP_CONDITION_HPP
+#define LIMONP_CONDITION_HPP
+
+#include "MutexLock.hpp"
+
+namespace Limonp
+{
+    class Condition : NonCopyable
+    {
+        public:
+            explicit Condition(MutexLock& mutex)
+                : mutex_(mutex)
+            {
+                LIMONP_CHECK(!pthread_cond_init(&pcond_, NULL));
+            }
+
+            ~Condition()
+            {
+                LIMONP_CHECK(!pthread_cond_destroy(&pcond_));
+            }
+
+            void wait()
+            {
+                LIMONP_CHECK(!pthread_cond_wait(&pcond_, mutex_.getPthreadMutex()));
+            }
+
+            void notify()
+            {
+                LIMONP_CHECK(!pthread_cond_signal(&pcond_));
+            }
+
+            void notifyAll()
+            {
+                LIMONP_CHECK(!pthread_cond_broadcast(&pcond_));
+            }
+
+        private:
+            MutexLock& mutex_;
+            pthread_cond_t pcond_;
+    };
+
+}
+
+#endif

+ 118 - 0
Jieba/Classes/CppJieba/Limonp/Config.hpp

@@ -0,0 +1,118 @@
+/************************************
+ * file enc : utf8
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+#ifndef LIMONP_CONFIG_H
+#define LIMONP_CONFIG_H
+
+
+#include <map>
+#include <fstream>
+#include <iostream>
+#include <assert.h>
+#include "StringUtil.hpp"
+
+namespace Limonp
+{
+    using namespace std;
+    class Config
+    {
+        public:
+            explicit Config(const string& filePath)
+            {
+                loadFile_(filePath);
+            }
+        public:
+            operator bool ()
+            {
+                return !map_.empty();
+            }
+        private:
+            void loadFile_(const string& filePath)
+            {
+                ifstream ifs(filePath.c_str());
+                assert(ifs);
+                string line;
+                vector<string> vecBuf;
+                size_t lineno = 0;
+                while(getline(ifs, line))
+                {
+                    lineno ++;
+                    trim(line);
+                    if(line.empty() || startsWith(line, "#"))
+                    {
+                        continue;
+                    }
+                    vecBuf.clear();
+                    if(!split(line, vecBuf, "=") || 2 != vecBuf.size())
+                    {
+                        fprintf(stderr, "line[%s] illegal.\n", line.c_str());
+                        assert(false);
+                        continue;
+                    }
+                    string& key = vecBuf[0];
+                    string& value = vecBuf[1];
+                    trim(key);
+                    trim(value);
+                    if(!map_.insert(make_pair(key, value)).second)
+                    {
+                        fprintf(stderr, "key[%s] already exits.\n", key.c_str());
+                        assert(false);
+                        continue;
+                    }
+                }
+                ifs.close();
+            }
+        public:
+            bool get(const string& key, string& value) const
+            {
+                map<string, string>::const_iterator it = map_.find(key);
+                if(map_.end() != it)
+                {
+                    value = it->second;
+                    return true;
+                }
+                return false;
+            }
+            bool get(const string& key, int & value) const
+            {
+                string str;
+                if(!get(key, str)) {
+                    return false;
+                }
+                value = atoi(str.c_str());
+                return true;
+            }
+            const char* operator [] (const char* key) const
+            {
+                if(NULL == key)
+                {
+                    return NULL;
+                }
+                map<string, string>::const_iterator it = map_.find(key);
+                if(map_.end() != it)
+                {
+                    return it->second.c_str();
+                }
+                return NULL;
+            }
+        public:
+            string getConfigInfo() const
+            {
+                string res;
+                res << *this;
+                return res;
+            }
+        private:
+            map<string, string> map_;
+        private:
+            friend ostream& operator << (ostream& os, const Config& config);
+    };
+    
+    inline ostream& operator << (ostream& os, const Config& config)
+    {
+        return os << config.map_;
+    }
+}
+
+#endif

+ 31 - 0
Jieba/Classes/CppJieba/Limonp/HandyMacro.hpp

@@ -0,0 +1,31 @@
+#ifndef LIMONP_HANDY_MACRO_HPP
+#define LIMONP_HANDY_MACRO_HPP
+
+#include <cstdio>
+#include <cstdlib>
+
+#define LIMONP_CHECK(exp) \
+    if(!(exp)){fprintf(stderr, "File:%s, Line:%d Exp:[" #exp "] is true, abort.\n", __FILE__, __LINE__); abort();}
+
+#define print(x) cout<< #x": " << x <<endl
+/*
+#define XX_GET_SET(varType, varName, funName)\
+private: varType varName;\
+public: inline varType get##funName(void) const {return varName;}\
+public: inline void set##funName(varType var) {varName = var;}
+
+#define XX_GET(varType, varName, funName)\
+private: varType varName;\
+public: inline varType get##funName(void) const {return varName;}
+
+#define XX_SET(varType, varName, funName)\
+private: varType varName;\
+public: inline void set##funName(varType var) {varName = var;}
+
+#define XX_GET_SET_BY_REF(varType, varName, funName)\
+private: varType varName;\
+public: inline const varType& get##funName(void) const {return varName;}\
+public: inline void set##funName(const varType& var){varName = var;}
+*/
+
+#endif

+ 21 - 0
Jieba/Classes/CppJieba/Limonp/InitOnOff.hpp

@@ -0,0 +1,21 @@
+#ifndef LIMONP_INITONOFF_H
+#define LIMONP_INITONOFF_H
+
+namespace Limonp
+{
+    class InitOnOff
+    {
+        public:
+            InitOnOff():isInited_(false){};
+            ~InitOnOff(){};
+        protected:
+            bool isInited_;
+            bool getInitFlag_()const{return isInited_;};
+            bool setInitFlag_(bool flag){return isInited_ = flag;};
+        public:
+            operator bool() const {return getInitFlag_();};
+
+    };
+}
+
+#endif

+ 171 - 0
Jieba/Classes/CppJieba/Limonp/LocalVector.hpp

@@ -0,0 +1,171 @@
+#ifndef LIMONP_LOCAL_VECTOR_HPP
+#define LIMONP_LOCAL_VECTOR_HPP
+
+#include <iostream>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+namespace Limonp
+{
+    using namespace std;
+    /*
+     * LocalVector<T> : T must be primitive type (char , int, size_t), if T is struct or class, LocalVector<T> may be dangerous..
+     * LocalVector<T> is simple and not well-tested. 
+     */
+    const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
+    template <class T>
+        class LocalVector
+        {
+            public:
+                typedef const T* const_iterator ;
+                typedef T value_type;
+                typedef size_t size_type;
+            private:
+                T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
+                T * ptr_;
+                size_t size_;
+                size_t capacity_;
+            public:
+                LocalVector()
+                {
+                    init_();
+                };
+                LocalVector(const LocalVector<T>& vec)
+                {
+                    init_();
+                    *this = vec;
+                }
+                LocalVector(const_iterator  begin, const_iterator end) // TODO: make it faster
+                {
+                    init_();
+                    while(begin != end)
+                    {
+                        push_back(*begin++);
+                    }
+                }
+                LocalVector(size_t size, const T& t) // TODO: make it faster
+                {
+                    init_();
+                    while(size--)
+                    {
+                        push_back(t);
+                    }
+                }
+                ~LocalVector()
+                {
+                    if(ptr_ != buffer_)
+                    {
+                        free(ptr_);
+                    }
+                };
+            public:
+                LocalVector<T>& operator = (const LocalVector<T>& vec)
+                {
+                    clear();
+                    size_ = vec.size();
+                    capacity_ = vec.capacity();
+                    if(vec.buffer_ == vec.ptr_)
+                    {
+                        memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
+                        ptr_ = buffer_;
+                    }
+                    else
+                    {
+                        ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
+                        assert(ptr_);
+                        memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
+                    }
+                    return *this;
+                }
+            private:
+                void init_()
+                {
+                    ptr_ = buffer_;
+                    size_ = 0;
+                    capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
+                }
+            public:
+                T& operator [] (size_t i) 
+                {
+                    return ptr_[i];
+                }
+                const T& operator [] (size_t i) const
+                {
+                    return ptr_[i];
+                }
+                void push_back(const T& t)
+                {
+                    if(size_ == capacity_)
+                    {
+                        assert(capacity_);
+                        reserve(capacity_ * 2);
+                    }
+                    ptr_[size_ ++ ] = t;
+                }
+                void reserve(size_t size) 
+                {
+                    if(size <= capacity_)
+                    {
+                        return;
+                    }
+                    T * next =  (T*)malloc(sizeof(T) * size);
+                    assert(next);
+                    T * old = ptr_;
+                    ptr_ = next;
+                    memcpy(ptr_, old, sizeof(T) * capacity_);
+                    capacity_ = size;
+                    if(old != buffer_)
+                    {
+                        free(old);
+                    }
+                }
+                bool empty() const
+                {
+                    return 0 == size();
+                }
+                size_t size() const
+                {
+                    return size_;
+                }
+                size_t capacity() const
+                {
+                    return capacity_;
+                }
+                const_iterator begin() const
+                {
+                    return ptr_;
+                }
+                const_iterator end() const
+                {
+                    return ptr_ + size_;
+                }
+                void clear()
+                {
+                    if(ptr_ != buffer_)
+                    {
+                        free(ptr_);
+                    }
+                    init_();
+                }
+        };
+
+    template <class T>
+        ostream & operator << (ostream& os, const LocalVector<T>& vec)
+        {
+            if(vec.empty())
+            {
+                return os << "[]";
+            }
+            os<<"[\""<<vec[0];
+            for(size_t i = 1; i < vec.size(); i++)
+            {
+                os<<"\", \""<<vec[i];
+            }
+            os<<"\"]";
+            return os;
+        }
+
+}
+
+#endif

+ 74 - 0
Jieba/Classes/CppJieba/Limonp/Logger.hpp

@@ -0,0 +1,74 @@
+/************************************
+ * file enc : utf8
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+#ifndef LIMONP_LOGGER_H
+#define LIMONP_LOGGER_H
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <cstring>
+#include <stdio.h>
+#include <cstdlib>
+#include <stdarg.h>
+#include <time.h>
+#include <cassert>
+
+#define FILE_BASENAME strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__
+
+#define LogDebug(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_DEBUG, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
+#define LogInfo(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_INFO, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
+#define LogWarn(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_WARN, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
+#define LogError(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_ERROR, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
+#define LogFatal(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_FATAL, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
+
+namespace Limonp
+{
+    using namespace std;
+    enum {LL_DEBUG = 0, LL_INFO = 1, LL_WARN = 2, LL_ERROR = 3, LL_FATAL = 4, LEVEL_ARRAY_SIZE = 5, CSTR_BUFFER_SIZE = 32};
+    static const char * LOG_LEVEL_ARRAY[LEVEL_ARRAY_SIZE]= {"DEBUG","INFO","WARN","ERROR","FATAL"};
+    static const char * LOG_FORMAT = "%s %s:%d %s %s\n";
+    static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
+
+    class Logger
+    {
+        public:
+            static void Logging(size_t level, const string& msg, const char* fileName, int lineno)
+            {
+                assert(level <= LL_FATAL);
+                char buf[CSTR_BUFFER_SIZE];
+                time_t timeNow;
+                time(&timeNow);
+                strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&timeNow));
+                fprintf(stderr, LOG_FORMAT, buf, fileName, lineno,LOG_LEVEL_ARRAY[level], msg.c_str());
+            }
+            static void LoggingF(size_t level, const char* fileName, int lineno, const char* const fmt, ...)
+            {
+#ifdef LOGGER_LEVEL
+                if(level < LOGGER_LEVEL) return;
+#endif
+                int size = 256;
+                string msg;
+                va_list ap;
+                while (1) {
+                    msg.resize(size);
+                    va_start(ap, fmt);
+                    int n = vsnprintf((char *)msg.c_str(), size, fmt, ap);
+                    va_end(ap);
+                    if (n > -1 && n < size) {
+                        msg.resize(n);
+                        break;
+                    }
+                    if (n > -1)
+                      size = n + 1;
+                    else
+                      size *= 2;
+                }
+                Logging(level, msg, fileName, lineno);
+            }
+    };
+}
+
+#endif

+ 432 - 0
Jieba/Classes/CppJieba/Limonp/Md5.hpp

@@ -0,0 +1,432 @@
+#ifndef __MD5_H__
+#define __MD5_H__
+
+// Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+// rights reserved.
+
+// License to copy and use this software is granted provided that it
+// is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+// Algorithm" in all material mentioning or referencing this software
+// or this function.
+//
+// License is also granted to make and use derivative works provided
+// that such works are identified as "derived from the RSA Data
+// Security, Inc. MD5 Message-Digest Algorithm" in all material
+// mentioning or referencing the derived work.
+//
+// RSA Data Security, Inc. makes no representations concerning either
+// the merchantability of this software or the suitability of this
+// software for any particular purpose. It is provided "as is"
+// without express or implied warranty of any kind.
+//
+// These notices must be retained in any copies of any part of this
+// documentation and/or software.
+
+
+
+// The original md5 implementation avoids external libraries.
+// This version has dependency on stdio.h for file input and
+// string.h for memcpy.
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+
+namespace Limonp 
+{
+
+//#pragma region MD5 defines
+// Constants for MD5Transform routine.
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+
+// F, G, H and I are basic MD5 functions.
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+// ROTATE_LEFT rotates x left n bits.
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+
+// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+// Rotation is separate from addition to prevent recomputation.
+#define FF(a, b, c, d, x, s, ac) { \
+  (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+#define GG(a, b, c, d, x, s, ac) { \
+  (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+#define HH(a, b, c, d, x, s, ac) { \
+  (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+#define II(a, b, c, d, x, s, ac) { \
+  (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+//#pragma endregion
+
+
+typedef unsigned char BYTE ;
+
+// POINTER defines a generic pointer type
+typedef unsigned char *POINTER;
+
+// UINT2 defines a two byte word
+typedef unsigned short int UINT2;
+
+// UINT4 defines a four byte word
+typedef unsigned int UINT4;
+
+static unsigned char PADDING[64] = {
+  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+// convenient object that wraps
+// the C-functions for use in C++ only
+class MD5
+{
+private:
+  struct __context_t {
+    UINT4 state[4];                                   /* state (ABCD) */
+    UINT4 count[2];        /* number of bits, modulo 2^64 (lsb first) */
+    unsigned char buffer[64];                         /* input buffer */
+  } context ;
+
+  //#pragma region static helper functions
+  // The core of the MD5 algorithm is here.
+  // MD5 basic transformation. Transforms state based on block.
+  static void MD5Transform( UINT4 state[4], unsigned char block[64] )
+  {
+    UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+    Decode (x, block, 64);
+
+    /* Round 1 */
+    FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
+    FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
+    FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
+    FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
+    FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
+    FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
+    FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
+    FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
+    FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
+    FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
+    FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
+    FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
+    FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
+    FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
+    FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
+    FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
+
+    /* Round 2 */
+    GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
+    GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
+    GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
+    GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
+    GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
+    GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
+    GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
+    GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
+    GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
+    GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
+    GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
+    GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
+    GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
+    GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
+    GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
+    GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
+
+    /* Round 3 */
+    HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
+    HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
+    HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
+    HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
+    HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
+    HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
+    HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
+    HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
+    HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
+    HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
+    HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
+    HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
+    HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
+    HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
+    HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
+    HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
+
+    /* Round 4 */
+    II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
+    II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
+    II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
+    II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
+    II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
+    II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
+    II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
+    II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
+    II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
+    II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
+    II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
+    II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
+    II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
+    II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
+    II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
+    II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
+
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+
+    // Zeroize sensitive information.
+    memset((POINTER)x, 0, sizeof (x));
+  }
+
+  // Encodes input (UINT4) into output (unsigned char). Assumes len is
+  // a multiple of 4.
+  static void Encode( unsigned char *output, UINT4 *input, unsigned int len )
+  {
+    unsigned int i, j;
+
+    for (i = 0, j = 0; j < len; i++, j += 4) {
+      output[j] = (unsigned char)(input[i] & 0xff);
+      output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
+      output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
+      output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
+    }
+  }
+
+  // Decodes input (unsigned char) into output (UINT4). Assumes len is
+  // a multiple of 4.
+  static void Decode( UINT4 *output, unsigned char *input, unsigned int len )
+  {
+    unsigned int i, j;
+
+    for (i = 0, j = 0; j < len; i++, j += 4)
+      output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
+      (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
+  }
+  //#pragma endregion
+
+
+public:
+  // MAIN FUNCTIONS
+  MD5()
+  {
+    Init() ;
+  }
+
+  // MD5 initialization. Begins an MD5 operation, writing a new context.
+  void Init()
+  {
+    context.count[0] = context.count[1] = 0;
+  
+    // Load magic initialization constants.
+    context.state[0] = 0x67452301;
+    context.state[1] = 0xefcdab89;
+    context.state[2] = 0x98badcfe;
+    context.state[3] = 0x10325476;
+  }
+
+  // MD5 block update operation. Continues an MD5 message-digest
+  // operation, processing another message block, and updating the
+  // context.
+  void Update(
+    unsigned char *input,   // input block
+    unsigned int inputLen ) // length of input block
+  {
+    unsigned int i, index, partLen;
+
+    // Compute number of bytes mod 64
+    index = (unsigned int)((context.count[0] >> 3) & 0x3F);
+
+    // Update number of bits
+    if ((context.count[0] += ((UINT4)inputLen << 3))
+      < ((UINT4)inputLen << 3))
+      context.count[1]++;
+    context.count[1] += ((UINT4)inputLen >> 29);
+
+    partLen = 64 - index;
+
+    // Transform as many times as possible.
+    if (inputLen >= partLen) {
+      memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
+      MD5Transform (context.state, context.buffer);
+
+      for (i = partLen; i + 63 < inputLen; i += 64)
+        MD5Transform (context.state, &input[i]);
+
+      index = 0;
+    }
+    else
+      i = 0;
+
+    /* Buffer remaining input */
+    memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen-i);
+  }
+
+  // MD5 finalization. Ends an MD5 message-digest operation, writing the
+  // the message digest and zeroizing the context.
+  // Writes to digestRaw
+  void Final()
+  {
+    unsigned char bits[8];
+    unsigned int index, padLen;
+
+    // Save number of bits
+    Encode( bits, context.count, 8 );
+
+    // Pad out to 56 mod 64.
+    index = (unsigned int)((context.count[0] >> 3) & 0x3f);
+    padLen = (index < 56) ? (56 - index) : (120 - index);
+    Update( PADDING, padLen );
+
+    // Append length (before padding)
+    Update( bits, 8 );
+
+    // Store state in digest
+    Encode( digestRaw, context.state, 16);
+
+    // Zeroize sensitive information.
+    memset((POINTER)&context, 0, sizeof (context));
+
+    writeToString() ;
+  }
+
+  /// Buffer must be 32+1 (nul) = 33 chars long at least 
+  void writeToString()
+  {
+    int pos ;
+
+    for( pos = 0 ; pos < 16 ; pos++ )
+      sprintf( digestChars+(pos*2), "%02x", digestRaw[pos] ) ;
+  }
+
+
+public:
+  // an MD5 digest is a 16-byte number (32 hex digits)
+  BYTE digestRaw[ 16 ] ;
+
+  // This version of the digest is actually
+  // a "printf'd" version of the digest.
+  char digestChars[ 33 ] ;
+
+  /// Load a file from disk and digest it
+  // Digests a file and returns the result.
+  const char* digestFile( const char *filename )
+  {
+    if (NULL == filename || strcmp(filename, "") == 0)
+        return NULL;
+
+    Init() ;
+
+    FILE *file;
+    
+    unsigned char buffer[1024] ;
+
+    if((file = fopen (filename, "rb")) == NULL)
+    {
+      return NULL;
+    }
+    int len;
+    while( (len = fread( buffer, 1, 1024, file )) )
+      Update( buffer, len ) ;
+    Final();
+
+    fclose( file );
+
+    return digestChars ;
+  }
+
+  /// Digests a byte-array already in memory
+  const char* digestMemory( BYTE *memchunk, int len )
+  {
+    if (NULL == memchunk)
+        return NULL;
+
+    Init() ;
+    Update( memchunk, len ) ;
+    Final() ;
+    
+    return digestChars ;
+  }
+
+  // Digests a string and prints the result.
+  const char* digestString(const char *string )
+  {
+    if (string == NULL)
+        return NULL;
+
+    Init() ;
+    Update( (unsigned char*)string, strlen(string) ) ;
+    Final() ;
+
+    return digestChars ;
+  }
+};
+
+inline bool md5String(const char* str, std::string& res)
+{
+    if (NULL == str)
+    {
+        res = "";
+        return false;
+    }
+
+    MD5 md5;
+    const char *pRes = md5.digestString(str);
+    if (NULL == pRes)
+    {
+        res = "";
+        return false;
+    }
+
+    res = pRes;
+    return true;
+}
+
+inline bool md5File(const char* filepath, std::string& res)
+{
+    if (NULL == filepath || strcmp(filepath, "") == 0)
+    {
+        res = "";
+        return false;
+    }
+
+    MD5 md5;
+    const char *pRes = md5.digestFile(filepath);
+
+    if (NULL == pRes)
+    {
+        res = "";
+        return false;
+    }
+
+    res = pRes;
+    return true;
+}
+}
+#endif

+ 57 - 0
Jieba/Classes/CppJieba/Limonp/MutexLock.hpp

@@ -0,0 +1,57 @@
+#ifndef LIMONP_MUTEX_LOCK_HPP
+#define LIMONP_MUTEX_LOCK_HPP
+
+#include <pthread.h>
+#include "NonCopyable.hpp"
+#include "HandyMacro.hpp"
+
+namespace Limonp
+{
+    class MutexLock: NonCopyable
+    {
+        private:
+            pthread_mutex_t mutex_;
+        public:
+            pthread_mutex_t* getPthreadMutex()
+            {
+                return &mutex_;
+            }
+        public:
+            MutexLock()
+            {
+                LIMONP_CHECK(!pthread_mutex_init(&mutex_, NULL));
+            }
+            ~MutexLock()
+            {
+                LIMONP_CHECK(!pthread_mutex_destroy(&mutex_));
+            }
+        private:
+            void lock()
+            {
+                LIMONP_CHECK(!pthread_mutex_lock(&mutex_));
+            }
+            void unlock()
+            {
+                LIMONP_CHECK(!pthread_mutex_unlock(&mutex_));
+            }
+            friend class MutexLockGuard;
+    };
+    class MutexLockGuard: NonCopyable
+    {
+        public:
+            explicit MutexLockGuard(MutexLock & mutex)
+                : mutex_(mutex)
+            {
+                mutex_.lock();
+            }
+            ~MutexLockGuard()
+            {
+                mutex_.unlock();
+            }
+        private:
+            MutexLock & mutex_;
+    };
+#define MutexLockGuard(x) assert(false);
+}
+
+#endif

+ 125 - 0
Jieba/Classes/CppJieba/Limonp/MysqlClient.hpp

@@ -0,0 +1,125 @@
+#ifndef LIMONP_MYSQLCLIENT_H
+#define LIMONP_MYSQLCLIENT_H
+
+#include <mysql.h>
+#include <iostream>
+#include <vector>
+#include <string>
+#include "Logger.hpp"
+#include "InitOnOff.hpp"
+
+namespace Limonp
+{
+    using namespace std;
+    class MysqlClient: public InitOnOff
+    {
+        public:
+            typedef vector< vector<string> > RowsType;
+        private:
+            const string host_;
+            const size_t port_;
+            const string user_;
+            const string passwd_;
+            const string db_;
+            const string charset_;
+        public:
+            MysqlClient(const string& host, size_t port, const string& user, const string& passwd, const string& db, const string& charset = "utf8"): host_(host), port_(port), user_(user), passwd_(passwd), db_(db), charset_(charset), conn_(NULL)
+            {
+                setInitFlag_(init_());
+            }
+            ~MysqlClient()
+            {
+                if(conn_)
+                {
+                    mysql_close(conn_);
+                }
+            };
+        private:
+            bool init_()
+            {
+                //cout<<mysql_get_client_info()<<endl;
+                if(NULL == (conn_ = mysql_init(NULL)))
+                {
+                    LogError("mysql_init faield. %s", mysql_error(conn_));
+                    return false;
+                }
+
+                if (mysql_real_connect(conn_, host_.c_str(), user_.c_str(), passwd_.c_str(), db_.c_str(), port_, NULL, 0) == NULL)
+                {
+                    LogError("mysql_real_connect failed. %s", mysql_error(conn_));
+                    mysql_close(conn_);
+                    conn_ = NULL;
+                    return false;
+                }  
+
+                if(mysql_set_character_set(conn_, charset_.c_str()))
+                {
+                    LogError("mysql_set_character_set [%s] failed.", charset_.c_str());
+                    return false;
+                }
+
+                //set reconenct
+                char value = 1;
+                mysql_options(conn_, MYSQL_OPT_RECONNECT, &value);
+
+                LogInfo("MysqlClient {host: %s, database:%s, charset:%s}", host_.c_str(), db_.c_str(), charset_.c_str());
+                return true;
+            }
+        public:
+            bool executeSql(const string& sql)
+            {
+                assert(getInitFlag_());
+                if(mysql_query(conn_, sql.c_str())) 
+                {
+                    LogError("mysql_query failed.  %s", mysql_error(conn_));
+                    return false;
+                }
+                return true;
+            }
+            size_t insert(const string& tableName, const string& keys, const vector<string>& vals)
+            {
+                size_t retn = 0;
+                string sql;
+                for(size_t i = 0; i < vals.size(); i ++)
+                {
+                    sql.clear();
+                    string_format(sql, "insert into %s (%s) values %s", tableName.c_str(), keys.c_str(), vals[i].c_str());
+                    retn += executeSql(sql.c_str());
+                }
+                return retn;
+            }
+            bool select(const string& sql, RowsType& rows)
+            {
+                if(!executeSql(sql))
+                {
+                    LogError("executeSql failed. [%s]", sql.c_str());
+                    return false;
+                }
+                MYSQL_RES * result = mysql_store_result(conn_);
+                if(!result)
+                {
+                    LogError("mysql_store_result failed.[%d]", mysql_error(conn_));
+                    return false;
+                }
+                size_t num_fields = mysql_num_fields(result);
+                MYSQL_ROW row;
+                while((row = mysql_fetch_row(result)))
+                {
+                    vector<string> vec;
+                    for(size_t i = 0; i < num_fields; i ++)
+                    {
+                        row[i] ? vec.push_back(row[i]) : vec.push_back("NULL");
+                    }
+                    rows.push_back(vec);
+                }
+                mysql_free_result(result);
+                return true;
+            }
+
+        private:
+            MYSQL * conn_;
+
+    };
+}
+
+#endif

+ 22 - 0
Jieba/Classes/CppJieba/Limonp/NonCopyable.hpp

@@ -0,0 +1,22 @@
+/************************************
+ ************************************/
+#ifndef LIMONP_NONCOPYABLE_H
+#define LIMONP_NONCOPYABLE_H
+
+#include <iostream>
+#include <string>
+
+namespace Limonp
+{
+    class NonCopyable
+    {
+        protected:
+            NonCopyable(){};
+            ~NonCopyable(){};
+        private:
+            NonCopyable(const NonCopyable& );
+            const NonCopyable& operator=(const NonCopyable& );
+    };
+}
+
+#endif

+ 139 - 0
Jieba/Classes/CppJieba/Limonp/StdExtension.hpp

@@ -0,0 +1,139 @@
+#ifndef LIMONP_STD_EXTEMSION_HPP
+#define LIMONP_STD_EXTEMSION_HPP
+
+#include <map>
+
+#if(__cplusplus == 201103L)
+#include <unordered_map>
+#include <unordered_set>
+#else
+#include <tr1/unordered_map>
+#include <tr1/unordered_set>
+namespace std
+{
+    using std::tr1::unordered_map;
+    using std::tr1::unordered_set;
+}
+
+#endif
+
+#include <set>
+#include <vector>
+#include <fstream>
+#include <sstream>
+
+
+namespace std
+{
+    template<typename T>
+        ostream& operator << (ostream& os, const vector<T>& vec)
+        {
+            if(vec.empty())
+            {
+                return os << "[]";
+            }
+            os<<"[\""<<vec[0];
+            for(size_t i = 1; i < vec.size(); i++)
+            {
+                os<<"\", \""<<vec[i];
+            }
+            os<<"\"]";
+            return os;
+        }
+    template<class T1, class T2>
+        ostream& operator << (ostream& os, const pair<T1, T2>& pr)
+        {
+            os << pr.first << ":" << pr.second ;
+            return os;
+        }
+
+
+    template<class T>
+        string& operator << (string& str, const T& obj)
+        {
+            stringstream ss;
+            ss << obj; // call ostream& operator << (ostream& os,
+            return str = ss.str();
+        }
+
+    template<class T1, class T2>
+        ostream& operator << (ostream& os, const map<T1, T2>& mp)
+        {
+            if(mp.empty())
+            {
+                os<<"{}";
+                return os;
+            }
+            os<<'{';
+            typename map<T1, T2>::const_iterator it = mp.begin();
+            os<<*it;
+            it++;
+            while(it != mp.end())
+            {
+                os<<", "<<*it;
+                it++;
+            }
+            os<<'}';
+            return os;
+        }
+    template<class T1, class T2>
+        ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp)
+        {
+            if(mp.empty())
+            {
+                return os << "{}";
+            }
+            os<<'{';
+            typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
+            os<<*it;
+            it++;
+            while(it != mp.end())
+            {
+                os<<", "<<*it++;
+            }
+            return os<<'}';
+        }
+
+    template<class T>
+        ostream& operator << (ostream& os, const set<T>& st)
+        {
+            if(st.empty())
+            {
+                os << "{}";
+                return os;
+            }
+            os<<'{';
+            typename set<T>::const_iterator it = st.begin();
+            os<<*it;
+            it++;
+            while(it != st.end())
+            {
+                os<<", "<<*it;
+                it++;
+            }
+            os<<'}';
+            return os;
+        }
+
+    template<class KeyType, class ContainType>
+        bool isIn(const ContainType& contain, const KeyType& key)
+        {
+            return contain.end() != contain.find(key);
+        }
+
+    template<class T>
+        basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs)
+        {
+            return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
+        }
+
+    template<class T>
+        ofstream & operator << (ofstream & ofs, const basic_string<T>& s)
+        {
+            ostreambuf_iterator<T> itr (ofs);
+            copy(s.begin(), s.end(), itr);
+            return ofs;
+        }
+}
+
+#endif

+ 349 - 0
Jieba/Classes/CppJieba/Limonp/StringUtil.hpp

@@ -0,0 +1,349 @@
+/************************************
+ * file enc : ascii
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+#ifndef LIMONP_STR_FUNCTS_H
+#define LIMONP_STR_FUNCTS_H
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <cctype>
+#include <map>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <memory.h>
+#include <functional> 
+#include <locale>
+#include <sstream>
+#include <sys/types.h>
+#include <iterator>
+#include <algorithm>
+#include "StdExtension.hpp"
+
+namespace Limonp
+{
+    using namespace std;
+    inline string string_format(const char* fmt, ...) 
+    {
+        int size = 256;
+        std::string str;
+        va_list ap;
+        while (1) {
+            str.resize(size);
+            va_start(ap, fmt);
+            int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
+            va_end(ap);
+            if (n > -1 && n < size) {
+                str.resize(n);
+                return str;
+            }
+            if (n > -1)
+              size = n + 1;
+            else
+              size *= 2;
+        }
+        return str;
+    }
+
+    template<class T>
+        void join(T begin, T end, string& res, const string& connector)
+        {
+            if(begin == end)
+            {
+                return;
+            }
+            stringstream ss;
+            ss<<*begin;
+            begin++;
+            while(begin != end)
+            {
+                ss << connector << *begin;
+                begin ++;
+            }
+            res = ss.str();
+        }
+
+    template<class T>
+        string join(T begin, T end, const string& connector)
+        {
+            string res;
+            join(begin ,end, res, connector);
+            return res;
+        }
+
+
+
+    inline bool split(const string& src, vector<string>& res, const string& pattern, size_t offset = 0, size_t len = string::npos)
+    {
+        if(src.empty())
+        {
+            return false;
+        }
+        res.clear();
+
+        size_t start = 0;
+        size_t end = 0;
+        size_t cnt = 0;
+        while(start < src.size() && res.size() < len)
+        {
+            end = src.find_first_of(pattern, start);
+            if(string::npos == end)
+            {
+                if(cnt >= offset)
+                {
+                    res.push_back(src.substr(start));
+                }
+                return true;
+            }
+            //if(end == src.size() - 1)
+            //{
+            //    res.push_back("");
+            //    return true;
+            //}
+            if(cnt >= offset)
+            {
+                res.push_back(src.substr(start, end - start));
+            }
+            cnt ++;
+            start = end + 1;
+        }
+        return true;
+    }
+
+    inline string& upper(string& str)
+    {
+        transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
+        return str;
+    }
+
+    inline string& lower(string& str)
+    {
+        transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
+        return str;
+    }
+
+    inline std::string &ltrim(std::string &s) 
+    {
+        s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
+        return s;
+    }
+
+    inline std::string &rtrim(std::string &s) 
+    {
+        s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
+        return s;
+    }
+
+    inline std::string &trim(std::string &s) 
+    {
+        return ltrim(rtrim(s));
+    }
+
+    inline std::string & ltrim(std::string & s, char x)
+    {
+        s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
+        return s;
+    }
+
+    inline std::string & rtrim(std::string & s, char x)
+    {
+        s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
+        return s;
+    }
+
+    inline std::string &trim(std::string &s, char x)
+    {
+        return ltrim(rtrim(s, x), x);
+    }
+
+    inline bool startsWith(const string& str, const string& prefix)
+    {
+        if(prefix.length() > str.length())
+        {
+            return false;
+        }
+        return 0 == str.compare(0, prefix.length(), prefix);
+    }
+
+    inline bool endsWith(const string& str, const string& suffix)
+    {
+        if(suffix.length() > str.length())
+        {
+            return false;
+        }
+        return 0 == str.compare(str.length() -  suffix.length(), suffix.length(), suffix);
+    }
+
+    inline bool isInStr(const string& str, char ch)
+    {
+        return str.find(ch) != string::npos;
+    }
+
+    inline uint16_t twocharToUint16(char high, char low)
+    {
+        return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
+    }
+
+    template <class Uint16Container>
+    bool utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec)
+    {
+        if(!str)
+        {
+            return false;
+        }
+        char ch1, ch2;
+        uint16_t tmp;
+        vec.clear();
+        for(size_t i = 0;i < len;)
+        {
+            if(!(str[i] & 0x80)) // 0xxxxxxx
+            {
+                vec.push_back(str[i]);
+                i++;
+            }
+            else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) // 110xxxxxx
+            {
+                ch1 = (str[i] >> 2) & 0x07;
+                ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
+                tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
+                vec.push_back(tmp);
+                i += 2;
+            }
+            else if((uint8_t)str[i] <= 0xef && i + 2 < len)
+            {
+                ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
+                ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f); 
+                tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
+                vec.push_back(tmp);
+                i += 3;
+            }
+            else
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+    template <class Uint16Container>
+    bool utf8ToUnicode(const string& str, Uint16Container& vec)
+    {
+        return utf8ToUnicode(str.c_str(), str.size(), vec);
+    }
+
+    template <class Uint16ContainerConIter>
+    bool unicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res)
+    {
+        if(begin >= end)
+        {
+            return false;
+        }
+        res.clear();
+        uint16_t ui;
+        while(begin != end)
+        {
+            ui = *begin;
+            if(ui <= 0x7f)
+            {
+                res += char(ui);
+            }
+            else if(ui <= 0x7ff)
+            {
+                res += char(((ui>>6) & 0x1f) | 0xc0);
+                res += char((ui & 0x3f) | 0x80);
+            }
+            else
+            {
+                res += char(((ui >> 12) & 0x0f )| 0xe0);
+                res += char(((ui>>6) & 0x3f )| 0x80 );
+                res += char((ui & 0x3f) | 0x80);
+            }
+            begin ++;
+        }
+        return true;
+    }
+
+    
+    template <class Uint16Container>
+    bool gbkTrans(const char* const str, size_t len, Uint16Container& vec)
+    {
+        vec.clear();
+        if(!str)
+        {
+            return false;
+        }
+        size_t i = 0;
+        while(i < len)
+        {
+            if(0 == (str[i] & 0x80))
+            {
+                vec.push_back(uint16_t(str[i]));
+                i++;
+            }
+            else
+            {
+                if(i + 1 < len) //&& (str[i+1] & 0x80))
+                {
+                    uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
+                    vec.push_back(tmp);
+                    i += 2;
+                }
+                else
+                {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    template <class Uint16Container>
+    bool gbkTrans(const string& str, Uint16Container& vec)
+    {
+        return gbkTrans(str.c_str(), str.size(), vec);
+    }
+
+    template <class Uint16ContainerConIter>
+    bool gbkTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res)
+    {
+        if(begin >= end)
+        {
+            return false;
+        }
+        res.clear();
+        //pair<char, char> pa;
+        char first, second;
+        while(begin != end)
+        {
+            //pa = uint16ToChar2(*begin);
+            first = ((*begin)>>8) & 0x00ff;
+            second = (*begin) & 0x00ff;
+            if(first & 0x80)
+            {
+                res += first;
+                res += second;
+            }
+            else
+            {
+                res += second;
+            }
+            begin++;
+        }
+        return true;
+    }
+
+    /*
+     * format example: "%Y-%m-%d %H:%M:%S"
+     */
+    inline void getTime(const string& format, string&  timeStr)
+    {
+        time_t timeNow;
+        time(&timeNow);
+        timeStr.resize(64);
+        size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
+        timeStr.resize(len);
+    }
+}
+#endif

+ 50 - 0
Jieba/Classes/CppJieba/Limonp/Thread.hpp

@@ -0,0 +1,50 @@
+#ifndef LIMONP_THREAD_HPP
+#define LIMONP_THREAD_HPP
+
+#include "HandyMacro.hpp"
+#include "NonCopyable.hpp"
+
+namespace Limonp
+{
+    class IThread: NonCopyable
+    {
+        private:
+            pthread_t thread_;
+            bool isStarted;
+            bool isJoined;
+        public:
+            IThread(): isStarted(false), isJoined(false)
+            {
+            }
+            virtual ~IThread()
+            {
+                if(isStarted && !isJoined)
+                {
+                    LIMONP_CHECK(!pthread_detach(thread_));
+                }
+            };
+        public:
+            virtual void run() = 0;
+            void start()
+            {
+                assert(!isStarted);
+                LIMONP_CHECK(!pthread_create(&thread_, NULL, worker_, this));
+                isStarted = true;
+            }
+            void join()
+            {
+                assert(!isJoined);
+                LIMONP_CHECK(!pthread_join(thread_, NULL));
+                isJoined = true;
+            }
+        private:
+            static void * worker_(void * data)
+            {
+                IThread * ptr = (IThread* ) data;
+                ptr->run();
+                return NULL;
+            }
+    };
+}
+
+#endif

+ 105 - 0
Jieba/Classes/CppJieba/Limonp/ThreadPool.hpp

@@ -0,0 +1,105 @@
+#ifndef LIMONP_THREAD_POOL_HPP
+#define LIMONP_THREAD_POOL_HPP
+
+#include "Thread.hpp"
+#include "BlockingQueue.hpp"
+
+namespace Limonp
+{
+    class ITask
+    {
+        public:
+            virtual void run() = 0;
+            virtual ~ITask() {}
+    };
+
+    template <class TaskType, class ArgType>
+        ITask* CreateTask(ArgType arg) 
+        {
+            return new TaskType(arg);
+        }
+    template <class TaskType, class ArgType0, class ArgType1>
+        ITask* CreateTask(ArgType0 arg0, ArgType1 arg1) 
+        {
+            return new TaskType(arg0, arg1);
+        }
+
+    //class ThreadPool;
+    class ThreadPool: NonCopyable
+    {
+        private:
+            class Worker: public IThread
+            {
+                private:
+                    ThreadPool * ptThreadPool_;
+                public:
+                    Worker(ThreadPool* pool): ptThreadPool_(pool)
+                    {
+                        assert(ptThreadPool_);
+                    }
+                    virtual ~Worker()
+                    {
+                    }
+                public:
+                    virtual void run()
+                    {
+                        while(true)
+                        {
+                            ITask * task = ptThreadPool_->queue_.pop();
+                            if(task == NULL) 
+                            {
+                                break;
+                            }
+                            task->run();
+                            delete task;
+                        }
+                    }
+            };
+        private:
+            friend class Worker;
+        private:
+            vector<IThread*> threads_;
+            BoundedBlockingQueue<ITask*> queue_;
+            //mutable MutexLock mutex_;
+            //Condition isEmpty__;
+        public:
+            ThreadPool(size_t threadNum, size_t queueMaxSize): threads_(threadNum), queue_(queueMaxSize)//, mutex_(), isEmpty__(mutex_)
+            {
+                assert(threadNum);
+                assert(queueMaxSize);
+                for(size_t i = 0; i < threads_.size(); i ++)
+                {
+                    threads_[i] = new Worker(this);
+                }
+            }
+            ~ThreadPool()
+            {
+                for(size_t i = 0; i < threads_.size(); i ++)
+                {
+                    queue_.push(NULL);
+                }
+                for(size_t i = 0; i < threads_.size(); i ++)
+                {
+                    threads_[i]->join();
+                    delete threads_[i];
+                }
+            }
+            
+        public:
+            void start()
+            {
+                for(size_t i = 0; i < threads_.size(); i++)
+                {
+                    threads_[i]->start();
+                }
+            }
+
+            void add(ITask* task)
+            {
+                assert(task);
+                queue_.push(task);
+            }
+    };
+}
+
+#endif

+ 148 - 0
Jieba/Classes/CppJieba/MPSegment.hpp

@@ -0,0 +1,148 @@
+#ifndef CPPJIEBA_MPSEGMENT_H
+#define CPPJIEBA_MPSEGMENT_H
+
+#include <algorithm>
+#include <set>
+#include <cassert>
+#include "Limonp/Logger.hpp"
+#include "DictTrie.hpp"
+#include "ISegment.hpp"
+#include "SegmentBase.hpp"
+
+namespace CppJieba
+{
+
+    class MPSegment: public SegmentBase
+    {
+        private:
+            DictTrie _dictTrie;
+
+        public:
+            MPSegment(){};
+            MPSegment(const string& dictPath, const string& userDictPath = "")
+            {
+                LIMONP_CHECK(init(dictPath, userDictPath));
+            };
+            virtual ~MPSegment(){};
+        public:
+            bool init(const string& dictPath, const string& userDictPath = "")
+            {
+                LIMONP_CHECK(_dictTrie.init(dictPath, userDictPath));
+                LogInfo("MPSegment init(%s) ok", dictPath.c_str());
+                return true;
+            }
+            bool isUserDictSingleChineseWord(const Unicode::value_type & value) const
+            {
+                return _dictTrie.isUserDictSingleChineseWord(value);
+            }
+        public:
+            using SegmentBase::cut;
+            virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
+            {
+                if(begin == end)
+                {
+                    return false;
+                }
+
+                vector<Unicode> words;
+                words.reserve(end - begin);
+                if(!cut(begin, end, words))
+                {
+                    return false;
+                }
+                size_t offset = res.size();
+                res.resize(res.size() + words.size());
+                for(size_t i = 0; i < words.size(); i++)
+                {
+                    if(!TransCode::encode(words[i], res[i + offset]))
+                    {
+                        LogError("encode failed.");
+                        res[i + offset].clear();
+                    }
+                }
+                return true;
+            }
+
+            bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const
+            {
+                if(end == begin)
+                {
+                    return false;
+                }
+                vector<SegmentChar> segmentChars;
+
+                _dictTrie.find(begin, end, segmentChars);
+
+                _calcDP(segmentChars);
+
+                _cut(segmentChars, res);
+
+                return true;
+            }
+            const DictTrie* getDictTrie() const 
+            {
+                return &_dictTrie;
+            }
+
+        private:
+            void _calcDP(vector<SegmentChar>& segmentChars) const
+            {
+                size_t nextPos;
+                const DictUnit* p;
+                double val;
+
+                for(ssize_t i = segmentChars.size() - 1; i >= 0; i--)
+                {
+                    segmentChars[i].pInfo = NULL;
+                    segmentChars[i].weight = MIN_DOUBLE;
+                    assert(!segmentChars[i].dag.empty());
+                    for(DagType::const_iterator it = segmentChars[i].dag.begin(); it != segmentChars[i].dag.end(); it++)
+                    {
+                        nextPos = it->first;
+                        p = it->second;
+                        val = 0.0;
+                        if(nextPos + 1 < segmentChars.size())
+                        {
+                            val += segmentChars[nextPos + 1].weight;
+                        }
+
+                        if(p)
+                        {
+                            val += p->weight; 
+                        }
+                        else
+                        {
+                            val += _dictTrie.getMinWeight();
+                        }
+                        if(val > segmentChars[i].weight)
+                        {
+                            segmentChars[i].pInfo = p;
+                            segmentChars[i].weight = val;
+                        }
+                    }
+                }
+            }
+            void _cut(const vector<SegmentChar>& segmentChars, vector<Unicode>& res) const
+            {
+                size_t i = 0;
+                while(i < segmentChars.size())
+                {
+                    const DictUnit* p = segmentChars[i].pInfo;
+                    if(p)
+                    {
+                        res.push_back(p->word);
+                        i += p->word.size();
+                    }
+                    else//single chinese word
+                    {
+                        res.push_back(Unicode(1, segmentChars[i].uniCh));
+                        i++;
+                    }
+                }
+            }
+
+
+    };
+}
+
+#endif

+ 121 - 0
Jieba/Classes/CppJieba/MixSegment.hpp

@@ -0,0 +1,121 @@
+#ifndef CPPJIEBA_MIXSEGMENT_H
+#define CPPJIEBA_MIXSEGMENT_H
+
+#include <cassert>
+#include "MPSegment.hpp"
+#include "HMMSegment.hpp"
+#include "Limonp/StringUtil.hpp"
+
+namespace CppJieba
+{
+    class MixSegment: public SegmentBase
+    {
+        private:
+            MPSegment _mpSeg;
+            HMMSegment _hmmSeg;
+        public:
+            MixSegment(){};
+            MixSegment(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "")
+            {
+                LIMONP_CHECK(init(mpSegDict, hmmSegDict, userDict));
+            }
+            virtual ~MixSegment(){}
+        public:
+            bool init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "")
+            {
+                LIMONP_CHECK(_mpSeg.init(mpSegDict, userDict));
+                LIMONP_CHECK(_hmmSeg.init(hmmSegDict));
+                LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str());
+                return true;
+            }
+        public:
+            using SegmentBase::cut;
+        public:
+            virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
+            {
+                vector<Unicode> words;
+                words.reserve(end - begin);
+                if(!_mpSeg.cut(begin, end, words))
+                {
+                    LogError("mpSeg cutDAG failed.");
+                    return false;
+                }
+
+                vector<Unicode> hmmRes;
+                hmmRes.reserve(end - begin);
+                Unicode piece;
+                piece.reserve(end - begin);
+                for (size_t i = 0, j = 0; i < words.size(); i++)
+                {
+                    //if mp get a word, it's ok, put it into result
+                    if (1 != words[i].size() || (words[i].size() == 1 && _mpSeg.isUserDictSingleChineseWord(words[i][0])))
+                    {
+                        res.push_back(words[i]);
+                        continue;
+                    }
+
+                    // if mp get a single one and it is not in userdict, collect it in sequence
+                    j = i;
+                    while (j < words.size() && 1 == words[j].size() && !_mpSeg.isUserDictSingleChineseWord(words[j][0]))
+                    {
+                        piece.push_back(words[j][0]);
+                        j++;
+                    }
+
+                    // cut the sequence with hmm
+                    if (!_hmmSeg.cut(piece.begin(), piece.end(), hmmRes))
+                    {
+                        LogError("_hmmSeg cut failed.");
+                        return false;
+                    }
+
+                    //put hmm result to result
+                    for (size_t k = 0; k < hmmRes.size(); k++)
+                    {
+                        res.push_back(hmmRes[k]);
+                    }
+
+                    //clear tmp vars
+                    piece.clear();
+                    hmmRes.clear();
+
+                    //let i jump over this piece
+                    i = j - 1;
+                }
+                return true;
+            }
+
+            virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
+            {
+                if(begin == end)
+                {
+                    return false;
+                }
+
+                vector<Unicode> uRes;
+                uRes.reserve(end - begin);
+                if (!cut(begin, end, uRes))
+                {
+                    return false;
+                }
+
+                size_t offset = res.size();
+                res.resize(res.size() + uRes.size());
+                for(size_t i = 0; i < uRes.size(); i ++, offset++)
+                {
+                    if(!TransCode::encode(uRes[i], res[offset]))
+                    {
+                        LogError("encode failed.");
+                    }
+                }
+                return true;
+            }
+
+            const DictTrie* getDictTrie() const 
+            {
+                return _mpSeg.getDictTrie();
+            }
+    };
+}
+
+#endif

+ 109 - 0
Jieba/Classes/CppJieba/PosTagger.hpp

@@ -0,0 +1,109 @@
+#ifndef CPPJIEBA_POS_TAGGING_H
+#define CPPJIEBA_POS_TAGGING_H
+
+#include "MixSegment.hpp"
+#include "Limonp/StringUtil.hpp"
+#include "DictTrie.hpp"
+
+namespace CppJieba
+{
+    using namespace Limonp;
+
+    static const char* const POS_M = "m";
+    static const char* const POS_ENG = "eng";
+    static const char* const POS_X = "x";
+
+    class PosTagger
+    {
+        private:
+            MixSegment _segment;
+            const DictTrie * _dictTrie;
+
+        public:
+            PosTagger()
+            {}
+            PosTagger(
+                const string& dictPath, 
+                const string& hmmFilePath,
+                const string& userDictPath = ""
+            )
+            {
+                init(dictPath, hmmFilePath, userDictPath);
+            };
+            ~PosTagger(){};
+        public:
+            void init(
+                const string& dictPath, 
+                const string& hmmFilePath,
+                const string& userDictPath = ""
+            )
+            {
+                LIMONP_CHECK(_segment.init(dictPath, hmmFilePath, userDictPath));
+                _dictTrie = _segment.getDictTrie();
+                LIMONP_CHECK(_dictTrie);
+            };
+            
+
+            bool tag(const string& src, vector<pair<string, string> >& res) const
+            {
+                vector<string> cutRes;
+                if (!_segment.cut(src, cutRes))
+                {
+                    LogError("_mixSegment cut failed");
+                    return false;
+                }
+
+                const DictUnit *tmp = NULL;
+                Unicode unico;
+                for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr)
+                {
+                    if (!TransCode::decode(*itr, unico))
+                    {
+                        LogError("decode failed.");
+                        return false;
+                    }
+                    tmp = _dictTrie->find(unico.begin(), unico.end());
+                    if(tmp == NULL || tmp->tag.empty())
+                    {
+                        res.push_back(make_pair(*itr, _specialRule(unico)));
+                    }
+                    else
+                    {
+                        res.push_back(make_pair(*itr, tmp->tag));
+                    }
+                }
+                return !res.empty();
+            }
+        private:
+            const char* _specialRule(const Unicode& unicode) const
+            {
+                size_t m = 0;
+                size_t eng = 0;
+                for(size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) 
+                {
+                    if(unicode[i] < 0x80)
+                    {
+                        eng ++;
+                        if('0' <= unicode[i] && unicode[i] <= '9')
+                        {
+                            m++;
+                        }
+                    }
+                }
+                // ascii char is not found
+                if(eng == 0)
+                {
+                    return POS_X;
+                }
+                // all the ascii is number char
+                if(m == eng)
+                {
+                    return POS_M;
+                }
+                // the ascii chars contain english letter
+                return POS_ENG;
+            }
+    };
+}
+
+#endif

+ 123 - 0
Jieba/Classes/CppJieba/QuerySegment.hpp

@@ -0,0 +1,123 @@
+#ifndef CPPJIEBA_QUERYSEGMENT_H
+#define CPPJIEBA_QUERYSEGMENT_H
+
+#include <algorithm>
+#include <set>
+#include <cassert>
+#include "Limonp/Logger.hpp"
+#include "DictTrie.hpp"
+#include "ISegment.hpp"
+#include "SegmentBase.hpp"
+#include "FullSegment.hpp"
+#include "MixSegment.hpp"
+#include "TransCode.hpp"
+#include "DictTrie.hpp"
+
+namespace CppJieba
+{
+    class QuerySegment: public SegmentBase
+    {
+    private:
+        MixSegment _mixSeg;
+        FullSegment _fullSeg;
+        size_t _maxWordLen;
+
+    public:
+        QuerySegment(){};
+        QuerySegment(const string& dict, const string& model, size_t maxWordLen)
+        {
+            init(dict, model, maxWordLen);
+        };
+        virtual ~QuerySegment(){};
+    public:
+        bool init(const string& dict, const string& model, size_t maxWordLen)
+        {
+            LIMONP_CHECK(_mixSeg.init(dict, model));
+            LIMONP_CHECK(_fullSeg.init(_mixSeg.getDictTrie()));
+            assert(maxWordLen);
+            _maxWordLen = maxWordLen;
+            return true;
+        }
+
+    public:
+        using SegmentBase::cut;
+
+    public:
+        bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
+        {
+            if (begin >= end)
+            {
+                LogError("begin >= end");
+                return false;
+            }
+
+            //use mix cut first
+            vector<Unicode> mixRes;
+            if (!_mixSeg.cut(begin, end, mixRes))
+            {
+                LogError("_mixSeg cut failed.");
+                return false;
+            }
+
+            vector<Unicode> fullRes;
+            for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++)
+            {
+                
+                // if it's too long, cut with _fullSeg, put fullRes in res
+                if (mixResItr->size() > _maxWordLen)
+                {
+                    if (_fullSeg.cut(mixResItr->begin(), mixResItr->end(), fullRes))
+                    {
+                       for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++)
+                       {
+                           res.push_back(*fullResItr);
+                       }
+
+                       //clear tmp res
+                       fullRes.clear();
+                    }
+                }
+                else // just use the mix result
+                {
+                    res.push_back(*mixResItr);
+                }
+            }
+
+            return true;
+        }
+
+
+        bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
+        {
+            if (begin >= end)
+            {
+                LogError("begin >= end");
+                return false;
+            }
+
+            vector<Unicode> uRes;
+            if (!cut(begin, end, uRes))
+            {
+                LogError("get unicode cut result error.");
+                return false;
+            }
+
+            string tmp;
+            for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++)
+            {
+                if (TransCode::encode(*uItr, tmp))
+                {
+                    res.push_back(tmp);
+                }
+                else
+                {
+                    LogError("encode failed.");
+                }
+            }
+
+            return true;
+        }
+    };
+}
+
+#endif

+ 78 - 0
Jieba/Classes/CppJieba/SegmentBase.hpp

@@ -0,0 +1,78 @@
+#ifndef CPPJIEBA_SEGMENTBASE_H
+#define CPPJIEBA_SEGMENTBASE_H
+
+#include "TransCode.hpp"
+#include "Limonp/Logger.hpp"
+#include "Limonp/NonCopyable.hpp"
+#include "Limonp/HandyMacro.hpp"
+#include "ISegment.hpp"
+#include <cassert>
+
+
+namespace CppJieba
+{
+    using namespace Limonp;
+
+    //const char* const SPECIAL_CHARS = " \t\n";
+#ifndef CPPJIEBA_GBK
+    const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u, 12290u, 65292u};  
+#else
+    const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u};  
+#endif
+
+    class SegmentBase: public ISegment, public NonCopyable
+    {
+        public:
+            SegmentBase(){_loadSpecialSymbols();};
+            virtual ~SegmentBase(){};
+        private:
+            unordered_set<UnicodeValueType> _specialSymbols;
+        private:
+            void _loadSpecialSymbols()
+            {
+                size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);
+                for(size_t i = 0; i < size; i ++)
+                {
+                    _specialSymbols.insert(SPECIAL_SYMBOL[i]);
+                }
+                assert(_specialSymbols.size());
+            }
+
+        public:
+            virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const = 0;
+            virtual bool cut(const string& str, vector<string>& res) const
+            {
+                res.clear();
+
+                Unicode unicode;
+                unicode.reserve(str.size());
+
+                TransCode::decode(str, unicode);
+                
+                Unicode::const_iterator left = unicode.begin();
+                Unicode::const_iterator right;
+                
+                for(right = unicode.begin(); right != unicode.end(); right++)
+                {
+                    if(isIn(_specialSymbols, *right))
+                    {
+                        if(left != right)
+                        {
+                            cut(left, right, res);
+                        }
+                        res.resize(res.size() + 1);
+                        TransCode::encode(right, right + 1, res.back());
+                        left = right + 1;
+                    }
+                }
+                if(left != right)
+                {
+                    cut(left, right, res);
+                }
+                
+                return true;
+            }
+    };
+}
+
+#endif

+ 63 - 0
Jieba/Classes/CppJieba/TransCode.hpp

@@ -0,0 +1,63 @@
+/************************************
+ * file enc : utf-8
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+#ifndef CPPJIEBA_TRANSCODE_H
+#define CPPJIEBA_TRANSCODE_H
+
+
+#include "Limonp/StringUtil.hpp"
+#include "Limonp/LocalVector.hpp"
+
+namespace CppJieba
+{
+
+    using namespace Limonp;
+    typedef uint16_t UnicodeValueType;
+    typedef Limonp::LocalVector<UnicodeValueType> Unicode;
+    namespace TransCode
+    {
+        inline bool decode(const string& str, Unicode& res)
+        {
+#ifdef CPPJIEBA_GBK
+            return gbkTrans(str, res);
+#else
+            return utf8ToUnicode(str, res);
+#endif
+        }
+
+        inline bool encode(Unicode::const_iterator begin, Unicode::const_iterator end, string& res)
+        {
+#ifdef CPPJIEBA_GBK
+            return gbkTrans(begin, end, res);
+#else
+            return unicodeToUtf8(begin, end, res);
+#endif
+        }
+        
+        inline bool encode(const Unicode& uni, string& res)
+        {
+            return encode(uni.begin(), uni.end(), res);
+        }
+
+        // compiler is expected to optimized this function to avoid return value copy
+        inline string encode(Unicode::const_iterator begin, Unicode::const_iterator end) 
+        {
+            string res;
+            res.reserve(end - begin);
+            encode(begin, end, res);
+            return res;
+        }
+
+        // compiler is expected to optimized this function to avoid return value copy
+        inline Unicode decode(const string& str)
+        {
+            Unicode unicode;
+            unicode.reserve(str.size());
+            decode(str, unicode);
+            return unicode;
+        }
+    }
+}
+
+#endif

+ 297 - 0
Jieba/Classes/CppJieba/Trie.hpp

@@ -0,0 +1,297 @@
+#ifndef CPPJIEBA_TRIE_HPP
+#define CPPJIEBA_TRIE_HPP
+
+#include "Limonp/StdExtension.hpp"
+#include <vector>
+#include <queue>
+
+namespace CppJieba
+{
+    using namespace std;
+
+    struct DictUnit
+    {
+        Unicode word;
+        double weight; 
+        string tag;
+    };
+
+    // for debugging
+    inline ostream & operator << (ostream& os, const DictUnit& unit)
+    {
+        string s;
+        s << unit.word;
+        return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
+    }
+
+    typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
+
+    struct SegmentChar 
+    {
+        uint16_t uniCh;
+        DagType dag;
+        const DictUnit * pInfo;
+        double weight;
+        size_t nextPos;
+        SegmentChar():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0)
+        {}
+        ~SegmentChar() 
+        {}
+    };
+
+    typedef Unicode::value_type TrieKey;
+
+    class TrieNode
+    {
+        public:
+            typedef unordered_map<TrieKey,  TrieNode*> NextMap;
+        public:
+            TrieNode * fail;
+            NextMap * next;
+            const DictUnit * ptValue;
+        public:
+            TrieNode(): fail(NULL), next(NULL), ptValue(NULL) 
+            {}
+            const TrieNode * findNext(TrieKey key) const
+            {
+                if(next == NULL)
+                {
+                    return NULL;
+                }
+                NextMap::const_iterator iter = next->find(key);
+                if(iter == next->end()) 
+                {
+                    return NULL;
+                }
+                return iter->second;
+            }
+    };
+
+    class Trie
+    {
+        private:
+            TrieNode* _root;
+        public:
+            Trie(const vector<Unicode>& keys, const vector<const DictUnit*> & valuePointers)
+            {
+                _root = new TrieNode;
+                _createTrie(keys, valuePointers);
+                _build();// build automation
+            }
+            ~Trie()
+            {
+                if(_root)
+                {
+                    _deleteNode(_root);
+                }
+            }
+        public:
+            const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const
+            {
+                TrieNode::NextMap::const_iterator citer;
+                const TrieNode* ptNode = _root;
+                for(Unicode::const_iterator it = begin; it != end; it++)
+                {// build automation
+                    assert(ptNode);
+                    if(NULL == ptNode->next || ptNode->next->end() == (citer = ptNode->next->find(*it)))
+                    {
+                        return NULL;
+                    }
+                    ptNode = citer->second;
+                }
+                return ptNode->ptValue;
+            }
+            // aho-corasick-automation 
+            void find(
+                        Unicode::const_iterator begin, 
+                        Unicode::const_iterator end, 
+                        vector<struct SegmentChar>& res
+                        ) const
+            {
+                res.resize(end - begin);
+                const TrieNode * now = _root;
+                const TrieNode* node;
+                // compiler will complain warnings if only "i < end - begin" .
+                for (size_t i = 0; i < size_t(end - begin); i++) 
+                {
+                    Unicode::value_type ch = *(begin + i);
+                    res[i].uniCh = ch;
+                    assert(res[i].dag.empty());
+                    res[i].dag.push_back(pair<vector<Unicode >::size_type, const DictUnit* >(i, NULL));
+                    bool flag = false;
+
+                    // rollback
+                    while( now != _root )
+                    {
+                        node = now->findNext(ch);
+                        if (node != NULL) 
+                        {
+                            flag = true;
+                            break;
+                        }
+                        else 
+                        {
+                            now = now->fail;
+                        }
+                    }
+
+                    if(!flag)
+                    {
+                        node = now->findNext(ch);
+                    }
+                    if(node == NULL) 
+                    {
+                        now = _root;
+                    } 
+                    else 
+                    {
+                        now = node;
+                        const TrieNode * temp = now;
+                        while(temp != _root) 
+                        {
+                            if (temp->ptValue) 
+                            {
+                                size_t pos = i - temp->ptValue->word.size() + 1;
+                                res[pos].dag.push_back(pair<vector<Unicode >::size_type, const DictUnit* >(i, temp->ptValue));
+                                if(pos == i) 
+                                {
+                                    res[pos].dag[0].second = temp->ptValue;
+                                }
+                            }
+                            temp = temp->fail;
+                            assert(temp);
+                        }
+                    }
+                }
+            }
+            bool find(
+                        Unicode::const_iterator begin, 
+                        Unicode::const_iterator end, 
+                        DagType & res,
+                        size_t offset = 0) const
+            {
+                const TrieNode * ptNode = _root;
+                TrieNode::NextMap::const_iterator citer;
+                for(Unicode::const_iterator itr = begin; itr != end ; itr++)
+                {
+                    assert(ptNode);
+                    if(NULL == ptNode->next || ptNode->next->end() == (citer = ptNode->next->find(*itr)))
+                    {
+                        break;
+                    }
+                    ptNode = citer->second;
+                    if(ptNode->ptValue)
+                    {
+                        if(itr == begin && res.size() == 1) // first singleword
+                        {
+                            res[0].second = ptNode->ptValue;
+                        }
+                        else
+                        {
+                            res.push_back(pair<vector<Unicode >::size_type, const DictUnit* >(itr - begin + offset, ptNode->ptValue));
+                        }
+                    }
+                }
+                return !res.empty();
+            }
+        private:
+            void _build()
+            {
+                queue<TrieNode*> que;
+                assert(_root->ptValue == NULL);
+                assert(_root->next);
+                _root->fail = NULL;
+                for(TrieNode::NextMap::iterator iter = _root->next->begin(); iter != _root->next->end(); iter++) {
+                    iter->second->fail = _root;
+                    que.push(iter->second);
+                }
+                TrieNode* back = NULL;
+                TrieNode::NextMap::iterator backiter;
+                while(!que.empty()) {
+                    TrieNode * now = que.front();
+                    que.pop();
+                    if(now->next == NULL) {
+                        continue;
+                    }
+                    for(TrieNode::NextMap::iterator iter = now->next->begin(); iter != now->next->end(); iter++) {
+                        back = now->fail;
+                        while(back != NULL) {
+                            if(back->next && (backiter = back->next->find(iter->first)) != back->next->end()) 
+                            {
+                                iter->second->fail = backiter->second;
+                                break;
+                            }
+                            back = back->fail;
+                        }
+                        if(back == NULL) {
+                            iter->second->fail = _root;
+                        }
+                        que.push(iter->second);
+                    }
+                }
+            }
+        private:
+            void _createTrie(const vector<Unicode>& keys, const vector<const DictUnit*> & valuePointers)
+            {
+                if(valuePointers.empty() || keys.empty())
+                {
+                    return;
+                }
+                assert(keys.size() == valuePointers.size());
+
+                for(size_t i = 0; i < keys.size(); i++)
+                {
+                    _insertNode(keys[i], valuePointers[i]);
+                }
+            }
+        private:
+            void _insertNode(const Unicode& key, const DictUnit* ptValue)
+            {
+                TrieNode* ptNode  = _root;
+
+                TrieNode::NextMap::const_iterator kmIter;
+
+                for(Unicode::const_iterator citer = key.begin(); citer != key.end(); citer++)
+                {
+                    if(NULL == ptNode->next)
+                    {
+                        ptNode->next = new TrieNode::NextMap;
+                    }
+                    kmIter = ptNode->next->find(*citer);
+                    if(ptNode->next->end() == kmIter)
+                    {
+                        TrieNode * nextNode = new TrieNode;
+                        nextNode->next = NULL;
+                        nextNode->ptValue = NULL;
+
+                        (*ptNode->next)[*citer] = nextNode;
+                        ptNode = nextNode;
+                    }
+                    else
+                    {
+                        ptNode = kmIter->second;
+                    }
+                }
+                ptNode->ptValue = ptValue;
+            }
+            void _deleteNode(TrieNode* node)
+            {
+                if(!node)
+                {
+                    return;
+                }
+                if(node->next)
+                {
+                     TrieNode::NextMap::iterator it;
+                    for(it = node->next->begin(); it != node->next->end(); it++)
+                    {
+                        _deleteNode(it->second);
+                    }
+                    delete node->next;
+                }
+                delete node;
+            }
+    };
+}
+
+#endif

+ 30 - 0
Jieba/Classes/Segmentor.cpp

@@ -0,0 +1,30 @@
+//
+//  Segmentor.cpp
+//  iosjieba
+//
+//  Created by yanyiwu on 14/12/24.
+//  Copyright (c) 2014年 yanyiwu. All rights reserved.
+//
+
+#include "Segmentor.h"
+#include <iostream>
+
+using namespace CppJieba;
+
+CppJieba::MixSegment * globalSegmentor;
+
+void JiebaInit(const string& dictPath, const string& hmmPath, const string& userDictPath)
+{
+    if(globalSegmentor == NULL) {
+        globalSegmentor = new MixSegment(dictPath, hmmPath, userDictPath);
+    }
+    cout << __FILE__ << __LINE__ << endl;
+}
+
+void JiebaCut(const string& sentence, vector<string>& words)
+{
+    assert(globalSegmentor);
+    globalSegmentor->cut(sentence, words);
+    cout << __FILE__ << __LINE__ << endl;
+    cout << words << endl;
+}

+ 25 - 0
Jieba/Classes/Segmentor.h

@@ -0,0 +1,25 @@
+//
+//  Segmentor.h
+//  iosjieba
+//
+//  Created by yanyiwu on 14/12/24.
+//  Copyright (c) 2014年 yanyiwu. All rights reserved.
+//
+
+#ifndef __iosjieba__Segmentor__
+#define __iosjieba__Segmentor__
+
+#include <stdio.h>
+
+#include "CppJieba/MixSegment.hpp"
+#include <string>
+#include <vector>
+
+extern CppJieba::MixSegment * globalSegmentor;
+
+void JiebaInit(const std::string& dictPath, const std::string& hmmPath, const std::string& userDictPath);
+
+void JiebaCut(const std::string& sentence, std::vector<std::string>& words);
+
+
+#endif /* defined(__iosjieba__Segmentor__) */

+ 19 - 0
LICENSE

@@ -0,0 +1,19 @@
+Copyright (c) 2023 xcbosa-mini <xcbosa@forgetive.org>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

+ 29 - 0
README.md

@@ -0,0 +1,29 @@
+# Jieba
+
+[![CI Status](https://img.shields.io/travis/xcbosa-mini/Jieba.svg?style=flat)](https://travis-ci.org/xcbosa-mini/Jieba)
+[![Version](https://img.shields.io/cocoapods/v/Jieba.svg?style=flat)](https://cocoapods.org/pods/Jieba)
+[![License](https://img.shields.io/cocoapods/l/Jieba.svg?style=flat)](https://cocoapods.org/pods/Jieba)
+[![Platform](https://img.shields.io/cocoapods/p/Jieba.svg?style=flat)](https://cocoapods.org/pods/Jieba)
+
+## Example
+
+To run the example project, clone the repo, and run `pod install` from the Example directory first.
+
+## Requirements
+
+## Installation
+
+Jieba is available through [CocoaPods](https://cocoapods.org). To install
+it, simply add the following line to your Podfile:
+
+```ruby
+pod 'Jieba'
+```
+
+## Author
+
+xcbosa-mini, xcbosa@forgetive.org
+
+## License
+
+Jieba is available under the MIT license. See the LICENSE file for more info.

+ 1 - 0
_Pods.xcodeproj

@@ -0,0 +1 @@
+Example/Pods/Pods.xcodeproj