flow like the river

This commit is contained in:
root 2025-11-07 00:06:12 +01:00
commit 013fe673f3
42435 changed files with 5764238 additions and 0 deletions

View file

@ -0,0 +1,149 @@
{CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT} = require './classes.json'
UnicodeTrie = require 'unicode-trie'
fs = require 'fs'
classTrie = new UnicodeTrie fs.readFileSync __dirname + '/classes.trie'
# Gets a code point from a UTF-16 string
# handling surrogate pairs appropriately
codePointAt = (str, idx) ->
idx = idx or 0
code = str.charCodeAt(idx)
# High surrogate
if 0xD800 <= code <= 0xDBFF
hi = code
low = str.charCodeAt(idx + 1)
if 0xDC00 <= low <= 0xDFFF
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000
return hi
# Low surrogate
if 0xDC00 <= code <= 0xDFFF
hi = str.charCodeAt(idx - 1)
low = code
if 0xD800 <= hi <= 0xDBFF
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000
return low
return code
# Returns whether a break is allowed between the
# two given grapheme breaking classes
shouldBreak = (previous, current) ->
# GB3. CR X LF
if previous is CR and current is LF
return false
# GB4. (Control|CR|LF) ÷
else if previous in [Control, CR, LF]
return true
# GB5. ÷ (Control|CR|LF)
else if current in [Control, CR, LF]
return true
# GB6. L X (L|V|LV|LVT)
else if previous is L and current in [L, V, LV, LVT]
return false
# GB7. (LV|V) X (V|T)
else if previous in [LV, V] and current in [V, T]
return false
# GB8. (LVT|T) X (T)
else if previous in [LVT, T] and current is T
return false
# GB8a. Regional_Indicator X Regional_Indicator
else if previous is Regional_Indicator and current is Regional_Indicator
return false
# GB9. X Extend
else if current is Extend
return false
# GB9a. X SpacingMark
else if current is SpacingMark
return false
# GB9b. Prepend X (there are currently no characters with this class)
# else if previous is Prepend
# return false
# GB10. Any ÷ Any
return true
# Returns the next grapheme break in the string after the given index
exports.nextBreak = (string, index = 0) ->
if index < 0
return 0
if index >= string.length - 1
return string.length
prev = classTrie.get codePointAt(string, index)
for i in [index + 1...string.length] by 1
# check for already processed low surrogates
continue if 0xd800 <= string.charCodeAt(i - 1) <= 0xdbff and
0xdc00 <= string.charCodeAt(i) <= 0xdfff
next = classTrie.get codePointAt(string, i)
if shouldBreak prev, next
return i
prev = next
return string.length
# Returns the next grapheme break in the string before the given index
exports.previousBreak = (string, index = string.length) ->
if index > string.length
return string.length
if index <= 1
return 0
index--
next = classTrie.get codePointAt(string, index)
for i in [index - 1..0] by -1
# check for already processed high surrogates
continue if 0xd800 <= string.charCodeAt(i) <= 0xdbff and
0xdc00 <= string.charCodeAt(i + 1) <= 0xdfff
prev = classTrie.get codePointAt(string, i)
if shouldBreak prev, next
return i + 1
next = prev
return 0
# Breaks the given string into an array of grapheme cluster strings
exports.break = (str) ->
res = []
index = 0
while (brk = exports.nextBreak(str, index)) < str.length
res.push str.slice(index, brk)
index = brk
if index < str.length
res.push str.slice(index)
return res
# Returns the number of grapheme clusters there are in the given string
exports.countBreaks = (str) ->
count = 0
index = 0
while (brk = exports.nextBreak(str, index)) < str.length
index = brk
count++
if index < str.length
count++
return count

View file

@ -0,0 +1,138 @@
// Generated by CoffeeScript 1.8.0
(function() {
var CR, Control, Extend, L, LF, LV, LVT, Regional_Indicator, SpacingMark, T, UnicodeTrie, V, classTrie, codePointAt, fs, shouldBreak, _ref;
_ref = require('./classes.json'), CR = _ref.CR, LF = _ref.LF, Control = _ref.Control, Extend = _ref.Extend, Regional_Indicator = _ref.Regional_Indicator, SpacingMark = _ref.SpacingMark, L = _ref.L, V = _ref.V, T = _ref.T, LV = _ref.LV, LVT = _ref.LVT;
UnicodeTrie = require('unicode-trie');
fs = require('fs');
classTrie = new UnicodeTrie(fs.readFileSync(__dirname + '/classes.trie'));
codePointAt = function(str, idx) {
var code, hi, low;
idx = idx || 0;
code = str.charCodeAt(idx);
if ((0xD800 <= code && code <= 0xDBFF)) {
hi = code;
low = str.charCodeAt(idx + 1);
if ((0xDC00 <= low && low <= 0xDFFF)) {
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
}
return hi;
}
if ((0xDC00 <= code && code <= 0xDFFF)) {
hi = str.charCodeAt(idx - 1);
low = code;
if ((0xD800 <= hi && hi <= 0xDBFF)) {
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
}
return low;
}
return code;
};
shouldBreak = function(previous, current) {
if (previous === CR && current === LF) {
return false;
} else if (previous === Control || previous === CR || previous === LF) {
return true;
} else if (current === Control || current === CR || current === LF) {
return true;
} else if (previous === L && (current === L || current === V || current === LV || current === LVT)) {
return false;
} else if ((previous === LV || previous === V) && (current === V || current === T)) {
return false;
} else if ((previous === LVT || previous === T) && current === T) {
return false;
} else if (previous === Regional_Indicator && current === Regional_Indicator) {
return false;
} else if (current === Extend) {
return false;
} else if (current === SpacingMark) {
return false;
}
return true;
};
exports.nextBreak = function(string, index) {
var i, next, prev, _i, _ref1, _ref2, _ref3, _ref4;
if (index == null) {
index = 0;
}
if (index < 0) {
return 0;
}
if (index >= string.length - 1) {
return string.length;
}
prev = classTrie.get(codePointAt(string, index));
for (i = _i = _ref1 = index + 1, _ref2 = string.length; _i < _ref2; i = _i += 1) {
if ((0xd800 <= (_ref3 = string.charCodeAt(i - 1)) && _ref3 <= 0xdbff) && (0xdc00 <= (_ref4 = string.charCodeAt(i)) && _ref4 <= 0xdfff)) {
continue;
}
next = classTrie.get(codePointAt(string, i));
if (shouldBreak(prev, next)) {
return i;
}
prev = next;
}
return string.length;
};
exports.previousBreak = function(string, index) {
var i, next, prev, _i, _ref1, _ref2, _ref3;
if (index == null) {
index = string.length;
}
if (index > string.length) {
return string.length;
}
if (index <= 1) {
return 0;
}
index--;
next = classTrie.get(codePointAt(string, index));
for (i = _i = _ref1 = index - 1; _i >= 0; i = _i += -1) {
if ((0xd800 <= (_ref2 = string.charCodeAt(i)) && _ref2 <= 0xdbff) && (0xdc00 <= (_ref3 = string.charCodeAt(i + 1)) && _ref3 <= 0xdfff)) {
continue;
}
prev = classTrie.get(codePointAt(string, i));
if (shouldBreak(prev, next)) {
return i + 1;
}
next = prev;
}
return 0;
};
exports["break"] = function(str) {
var brk, index, res;
res = [];
index = 0;
while ((brk = exports.nextBreak(str, index)) < str.length) {
res.push(str.slice(index, brk));
index = brk;
}
if (index < str.length) {
res.push(str.slice(index));
}
return res;
};
exports.countBreaks = function(str) {
var brk, count, index;
count = 0;
index = 0;
while ((brk = exports.nextBreak(str, index)) < str.length) {
index = brk;
count++;
}
if (index < str.length) {
count++;
}
return count;
};
}).call(this);

View file

@ -0,0 +1 @@
{"Other":0,"CR":1,"LF":2,"Control":3,"Extend":4,"Regional_Indicator":5,"SpacingMark":6,"L":7,"V":8,"T":9,"LV":10,"LVT":11}

BIN
VISUALIZACION/node_modules/grapheme-breaker/src/classes.trie generated vendored Executable file

Binary file not shown.

View file

@ -0,0 +1,33 @@
request = require 'request'
fs = require 'fs'
UnicodeTrieBuilder = require 'unicode-trie/builder'
UNICODE_VERSION = '8.0.0'
BASE_URL = "http://www.unicode.org/Public/#{UNICODE_VERSION}/ucd"
# this loads the GraphemeBreakProperty.txt file for Unicode 8.0.0 and parses it to
# combine ranges and generate CoffeeScript
request "#{BASE_URL}/auxiliary/GraphemeBreakProperty.txt", (err, res, data) ->
re = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)/gm
nextClass = 1
classes =
Other: 0
trie = new UnicodeTrieBuilder classes.Other
# collect entries in the table into ranges
# to keep things smaller.
while match = re.exec(data)
start = match[1]
end = match[2] ? start
type = match[3]
unless classes[type]?
classes[type] = nextClass++
trie.setRange parseInt(start, 16), parseInt(end, 16), classes[type]
# write the trie to a file
fs.writeFile __dirname + '/classes.trie', trie.toBuffer()
# write classes to a file
fs.writeFile __dirname + '/classes.json', JSON.stringify classes

View file

@ -0,0 +1,36 @@
// Generated by CoffeeScript 1.8.0
(function() {
var BASE_URL, UNICODE_VERSION, UnicodeTrieBuilder, fs, request;
request = require('request');
fs = require('fs');
UnicodeTrieBuilder = require('unicode-trie/builder');
UNICODE_VERSION = '8.0.0';
BASE_URL = "http://www.unicode.org/Public/" + UNICODE_VERSION + "/ucd";
request("" + BASE_URL + "/auxiliary/GraphemeBreakProperty.txt", function(err, res, data) {
var classes, end, match, nextClass, re, start, trie, type, _ref;
re = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)/gm;
nextClass = 1;
classes = {
Other: 0
};
trie = new UnicodeTrieBuilder(classes.Other);
while (match = re.exec(data)) {
start = match[1];
end = (_ref = match[2]) != null ? _ref : start;
type = match[3];
if (classes[type] == null) {
classes[type] = nextClass++;
}
trie.setRange(parseInt(start, 16), parseInt(end, 16), classes[type]);
}
fs.writeFile(__dirname + '/classes.trie', trie.toBuffer());
return fs.writeFile(__dirname + '/classes.json', JSON.stringify(classes));
});
}).call(this);