flow like the river
This commit is contained in:
commit
013fe673f3
42435 changed files with 5764238 additions and 0 deletions
149
BACK_BACK/node_modules/grapheme-breaker/src/GraphemeBreaker.coffee
generated
vendored
Executable file
149
BACK_BACK/node_modules/grapheme-breaker/src/GraphemeBreaker.coffee
generated
vendored
Executable file
|
|
@ -0,0 +1,149 @@
|
|||
{CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT} = require './classes.json'
|
||||
UnicodeTrie = require 'unicode-trie'
|
||||
fs = require 'fs'
|
||||
classTrie = new UnicodeTrie fs.readFileSync __dirname + '/classes.trie'
|
||||
|
||||
# Gets a code point from a UTF-16 string
|
||||
# handling surrogate pairs appropriately
|
||||
codePointAt = (str, idx) ->
|
||||
idx = idx or 0
|
||||
code = str.charCodeAt(idx)
|
||||
|
||||
# High surrogate
|
||||
if 0xD800 <= code <= 0xDBFF
|
||||
hi = code
|
||||
low = str.charCodeAt(idx + 1)
|
||||
if 0xDC00 <= low <= 0xDFFF
|
||||
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000
|
||||
|
||||
return hi
|
||||
|
||||
# Low surrogate
|
||||
if 0xDC00 <= code <= 0xDFFF
|
||||
hi = str.charCodeAt(idx - 1)
|
||||
low = code
|
||||
if 0xD800 <= hi <= 0xDBFF
|
||||
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000
|
||||
|
||||
return low
|
||||
|
||||
return code
|
||||
|
||||
# Returns whether a break is allowed between the
|
||||
# two given grapheme breaking classes
|
||||
shouldBreak = (previous, current) ->
|
||||
# GB3. CR X LF
|
||||
if previous is CR and current is LF
|
||||
return false
|
||||
|
||||
# GB4. (Control|CR|LF) ÷
|
||||
else if previous in [Control, CR, LF]
|
||||
return true
|
||||
|
||||
# GB5. ÷ (Control|CR|LF)
|
||||
else if current in [Control, CR, LF]
|
||||
return true
|
||||
|
||||
# GB6. L X (L|V|LV|LVT)
|
||||
else if previous is L and current in [L, V, LV, LVT]
|
||||
return false
|
||||
|
||||
# GB7. (LV|V) X (V|T)
|
||||
else if previous in [LV, V] and current in [V, T]
|
||||
return false
|
||||
|
||||
# GB8. (LVT|T) X (T)
|
||||
else if previous in [LVT, T] and current is T
|
||||
return false
|
||||
|
||||
# GB8a. Regional_Indicator X Regional_Indicator
|
||||
else if previous is Regional_Indicator and current is Regional_Indicator
|
||||
return false
|
||||
|
||||
# GB9. X Extend
|
||||
else if current is Extend
|
||||
return false
|
||||
|
||||
# GB9a. X SpacingMark
|
||||
else if current is SpacingMark
|
||||
return false
|
||||
|
||||
# GB9b. Prepend X (there are currently no characters with this class)
|
||||
# else if previous is Prepend
|
||||
# return false
|
||||
|
||||
# GB10. Any ÷ Any
|
||||
return true
|
||||
|
||||
# Returns the next grapheme break in the string after the given index
|
||||
exports.nextBreak = (string, index = 0) ->
|
||||
if index < 0
|
||||
return 0
|
||||
|
||||
if index >= string.length - 1
|
||||
return string.length
|
||||
|
||||
prev = classTrie.get codePointAt(string, index)
|
||||
for i in [index + 1...string.length] by 1
|
||||
# check for already processed low surrogates
|
||||
continue if 0xd800 <= string.charCodeAt(i - 1) <= 0xdbff and
|
||||
0xdc00 <= string.charCodeAt(i) <= 0xdfff
|
||||
|
||||
next = classTrie.get codePointAt(string, i)
|
||||
if shouldBreak prev, next
|
||||
return i
|
||||
|
||||
prev = next
|
||||
|
||||
return string.length
|
||||
|
||||
# Returns the next grapheme break in the string before the given index
|
||||
exports.previousBreak = (string, index = string.length) ->
|
||||
if index > string.length
|
||||
return string.length
|
||||
|
||||
if index <= 1
|
||||
return 0
|
||||
|
||||
index--
|
||||
next = classTrie.get codePointAt(string, index)
|
||||
for i in [index - 1..0] by -1
|
||||
# check for already processed high surrogates
|
||||
continue if 0xd800 <= string.charCodeAt(i) <= 0xdbff and
|
||||
0xdc00 <= string.charCodeAt(i + 1) <= 0xdfff
|
||||
|
||||
prev = classTrie.get codePointAt(string, i)
|
||||
if shouldBreak prev, next
|
||||
return i + 1
|
||||
|
||||
next = prev
|
||||
|
||||
return 0
|
||||
|
||||
# Breaks the given string into an array of grapheme cluster strings
|
||||
exports.break = (str) ->
|
||||
res = []
|
||||
index = 0
|
||||
|
||||
while (brk = exports.nextBreak(str, index)) < str.length
|
||||
res.push str.slice(index, brk)
|
||||
index = brk
|
||||
|
||||
if index < str.length
|
||||
res.push str.slice(index)
|
||||
|
||||
return res
|
||||
|
||||
# Returns the number of grapheme clusters there are in the given string
|
||||
exports.countBreaks = (str) ->
|
||||
count = 0
|
||||
index = 0
|
||||
|
||||
while (brk = exports.nextBreak(str, index)) < str.length
|
||||
index = brk
|
||||
count++
|
||||
|
||||
if index < str.length
|
||||
count++
|
||||
|
||||
return count
|
||||
138
BACK_BACK/node_modules/grapheme-breaker/src/GraphemeBreaker.js
generated
vendored
Executable file
138
BACK_BACK/node_modules/grapheme-breaker/src/GraphemeBreaker.js
generated
vendored
Executable file
|
|
@ -0,0 +1,138 @@
|
|||
// Generated by CoffeeScript 1.8.0
|
||||
(function() {
|
||||
var CR, Control, Extend, L, LF, LV, LVT, Regional_Indicator, SpacingMark, T, UnicodeTrie, V, classTrie, codePointAt, fs, shouldBreak, _ref;
|
||||
|
||||
_ref = require('./classes.json'), CR = _ref.CR, LF = _ref.LF, Control = _ref.Control, Extend = _ref.Extend, Regional_Indicator = _ref.Regional_Indicator, SpacingMark = _ref.SpacingMark, L = _ref.L, V = _ref.V, T = _ref.T, LV = _ref.LV, LVT = _ref.LVT;
|
||||
|
||||
UnicodeTrie = require('unicode-trie');
|
||||
|
||||
fs = require('fs');
|
||||
|
||||
classTrie = new UnicodeTrie(fs.readFileSync(__dirname + '/classes.trie'));
|
||||
|
||||
codePointAt = function(str, idx) {
|
||||
var code, hi, low;
|
||||
idx = idx || 0;
|
||||
code = str.charCodeAt(idx);
|
||||
if ((0xD800 <= code && code <= 0xDBFF)) {
|
||||
hi = code;
|
||||
low = str.charCodeAt(idx + 1);
|
||||
if ((0xDC00 <= low && low <= 0xDFFF)) {
|
||||
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
|
||||
}
|
||||
return hi;
|
||||
}
|
||||
if ((0xDC00 <= code && code <= 0xDFFF)) {
|
||||
hi = str.charCodeAt(idx - 1);
|
||||
low = code;
|
||||
if ((0xD800 <= hi && hi <= 0xDBFF)) {
|
||||
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
|
||||
}
|
||||
return low;
|
||||
}
|
||||
return code;
|
||||
};
|
||||
|
||||
shouldBreak = function(previous, current) {
|
||||
if (previous === CR && current === LF) {
|
||||
return false;
|
||||
} else if (previous === Control || previous === CR || previous === LF) {
|
||||
return true;
|
||||
} else if (current === Control || current === CR || current === LF) {
|
||||
return true;
|
||||
} else if (previous === L && (current === L || current === V || current === LV || current === LVT)) {
|
||||
return false;
|
||||
} else if ((previous === LV || previous === V) && (current === V || current === T)) {
|
||||
return false;
|
||||
} else if ((previous === LVT || previous === T) && current === T) {
|
||||
return false;
|
||||
} else if (previous === Regional_Indicator && current === Regional_Indicator) {
|
||||
return false;
|
||||
} else if (current === Extend) {
|
||||
return false;
|
||||
} else if (current === SpacingMark) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
exports.nextBreak = function(string, index) {
|
||||
var i, next, prev, _i, _ref1, _ref2, _ref3, _ref4;
|
||||
if (index == null) {
|
||||
index = 0;
|
||||
}
|
||||
if (index < 0) {
|
||||
return 0;
|
||||
}
|
||||
if (index >= string.length - 1) {
|
||||
return string.length;
|
||||
}
|
||||
prev = classTrie.get(codePointAt(string, index));
|
||||
for (i = _i = _ref1 = index + 1, _ref2 = string.length; _i < _ref2; i = _i += 1) {
|
||||
if ((0xd800 <= (_ref3 = string.charCodeAt(i - 1)) && _ref3 <= 0xdbff) && (0xdc00 <= (_ref4 = string.charCodeAt(i)) && _ref4 <= 0xdfff)) {
|
||||
continue;
|
||||
}
|
||||
next = classTrie.get(codePointAt(string, i));
|
||||
if (shouldBreak(prev, next)) {
|
||||
return i;
|
||||
}
|
||||
prev = next;
|
||||
}
|
||||
return string.length;
|
||||
};
|
||||
|
||||
exports.previousBreak = function(string, index) {
|
||||
var i, next, prev, _i, _ref1, _ref2, _ref3;
|
||||
if (index == null) {
|
||||
index = string.length;
|
||||
}
|
||||
if (index > string.length) {
|
||||
return string.length;
|
||||
}
|
||||
if (index <= 1) {
|
||||
return 0;
|
||||
}
|
||||
index--;
|
||||
next = classTrie.get(codePointAt(string, index));
|
||||
for (i = _i = _ref1 = index - 1; _i >= 0; i = _i += -1) {
|
||||
if ((0xd800 <= (_ref2 = string.charCodeAt(i)) && _ref2 <= 0xdbff) && (0xdc00 <= (_ref3 = string.charCodeAt(i + 1)) && _ref3 <= 0xdfff)) {
|
||||
continue;
|
||||
}
|
||||
prev = classTrie.get(codePointAt(string, i));
|
||||
if (shouldBreak(prev, next)) {
|
||||
return i + 1;
|
||||
}
|
||||
next = prev;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
exports["break"] = function(str) {
|
||||
var brk, index, res;
|
||||
res = [];
|
||||
index = 0;
|
||||
while ((brk = exports.nextBreak(str, index)) < str.length) {
|
||||
res.push(str.slice(index, brk));
|
||||
index = brk;
|
||||
}
|
||||
if (index < str.length) {
|
||||
res.push(str.slice(index));
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
exports.countBreaks = function(str) {
|
||||
var brk, count, index;
|
||||
count = 0;
|
||||
index = 0;
|
||||
while ((brk = exports.nextBreak(str, index)) < str.length) {
|
||||
index = brk;
|
||||
count++;
|
||||
}
|
||||
if (index < str.length) {
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
};
|
||||
|
||||
}).call(this);
|
||||
1
BACK_BACK/node_modules/grapheme-breaker/src/classes.json
generated
vendored
Executable file
1
BACK_BACK/node_modules/grapheme-breaker/src/classes.json
generated
vendored
Executable file
|
|
@ -0,0 +1 @@
|
|||
{"Other":0,"CR":1,"LF":2,"Control":3,"Extend":4,"Regional_Indicator":5,"SpacingMark":6,"L":7,"V":8,"T":9,"LV":10,"LVT":11}
|
||||
BIN
BACK_BACK/node_modules/grapheme-breaker/src/classes.trie
generated
vendored
Executable file
BIN
BACK_BACK/node_modules/grapheme-breaker/src/classes.trie
generated
vendored
Executable file
Binary file not shown.
33
BACK_BACK/node_modules/grapheme-breaker/src/generate_data.coffee
generated
vendored
Executable file
33
BACK_BACK/node_modules/grapheme-breaker/src/generate_data.coffee
generated
vendored
Executable file
|
|
@ -0,0 +1,33 @@
|
|||
request = require 'request'
|
||||
fs = require 'fs'
|
||||
UnicodeTrieBuilder = require 'unicode-trie/builder'
|
||||
|
||||
UNICODE_VERSION = '8.0.0'
|
||||
BASE_URL = "http://www.unicode.org/Public/#{UNICODE_VERSION}/ucd"
|
||||
|
||||
# this loads the GraphemeBreakProperty.txt file for Unicode 8.0.0 and parses it to
|
||||
# combine ranges and generate CoffeeScript
|
||||
request "#{BASE_URL}/auxiliary/GraphemeBreakProperty.txt", (err, res, data) ->
|
||||
re = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)/gm
|
||||
nextClass = 1
|
||||
classes =
|
||||
Other: 0
|
||||
|
||||
trie = new UnicodeTrieBuilder classes.Other
|
||||
|
||||
# collect entries in the table into ranges
|
||||
# to keep things smaller.
|
||||
while match = re.exec(data)
|
||||
start = match[1]
|
||||
end = match[2] ? start
|
||||
type = match[3]
|
||||
unless classes[type]?
|
||||
classes[type] = nextClass++
|
||||
|
||||
trie.setRange parseInt(start, 16), parseInt(end, 16), classes[type]
|
||||
|
||||
# write the trie to a file
|
||||
fs.writeFile __dirname + '/classes.trie', trie.toBuffer()
|
||||
|
||||
# write classes to a file
|
||||
fs.writeFile __dirname + '/classes.json', JSON.stringify classes
|
||||
36
BACK_BACK/node_modules/grapheme-breaker/src/generate_data.js
generated
vendored
Executable file
36
BACK_BACK/node_modules/grapheme-breaker/src/generate_data.js
generated
vendored
Executable file
|
|
@ -0,0 +1,36 @@
|
|||
// Generated by CoffeeScript 1.8.0
|
||||
(function() {
|
||||
var BASE_URL, UNICODE_VERSION, UnicodeTrieBuilder, fs, request;
|
||||
|
||||
request = require('request');
|
||||
|
||||
fs = require('fs');
|
||||
|
||||
UnicodeTrieBuilder = require('unicode-trie/builder');
|
||||
|
||||
UNICODE_VERSION = '8.0.0';
|
||||
|
||||
BASE_URL = "http://www.unicode.org/Public/" + UNICODE_VERSION + "/ucd";
|
||||
|
||||
request("" + BASE_URL + "/auxiliary/GraphemeBreakProperty.txt", function(err, res, data) {
|
||||
var classes, end, match, nextClass, re, start, trie, type, _ref;
|
||||
re = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)/gm;
|
||||
nextClass = 1;
|
||||
classes = {
|
||||
Other: 0
|
||||
};
|
||||
trie = new UnicodeTrieBuilder(classes.Other);
|
||||
while (match = re.exec(data)) {
|
||||
start = match[1];
|
||||
end = (_ref = match[2]) != null ? _ref : start;
|
||||
type = match[3];
|
||||
if (classes[type] == null) {
|
||||
classes[type] = nextClass++;
|
||||
}
|
||||
trie.setRange(parseInt(start, 16), parseInt(end, 16), classes[type]);
|
||||
}
|
||||
fs.writeFile(__dirname + '/classes.trie', trie.toBuffer());
|
||||
return fs.writeFile(__dirname + '/classes.json', JSON.stringify(classes));
|
||||
});
|
||||
|
||||
}).call(this);
|
||||
Loading…
Add table
Add a link
Reference in a new issue