Java源码示例:com.ibm.icu.text.RuleBasedCollator

示例1
private static void setCollatorStrength(RuleBasedCollator collator, CollationSpecifier specifier) {
    if (specifier.caseSensitive() && specifier.accentSensitive()) {
        collator.setStrength(Collator.TERTIARY);
        collator.setCaseLevel(false);
    }
    else if (specifier.caseSensitive() && !specifier.accentSensitive()) {
        collator.setCaseLevel(true);
        collator.setStrength(Collator.PRIMARY);
    }
    else if (!specifier.caseSensitive() && specifier.accentSensitive()) {
        collator.setStrength(Collator.SECONDARY);
        collator.setCaseLevel(false);
    }
    else {
        collator.setStrength(Collator.PRIMARY);
        collator.setCaseLevel(false);
    }
}
 
示例2
/**
 * Read custom rules from a file, and create a RuleBasedCollator
 * The file cannot support comments, as # might be in the rules!
 */
static Collator createFromRules(String fileName, ResourceLoader loader) {
  InputStream input = null;
  try {
   input = loader.openResource(fileName);
   String rules = IOUtils.toString(input, "UTF-8");
   return new RuleBasedCollator(rules);
  } catch (Exception e) {
    // io error or invalid rules
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(input);
  }
}
 
示例3
/**
 * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
 * These are largish files, and jvm-specific (as our documentation says, you should always
 * look out for jvm differences with collation).
 * So it's preferable to create this file on-the-fly.
 */
public static String setupSolrHome() throws Exception {
  String tmpFile = createTempDir().toFile().getAbsolutePath();
  // make data and conf dirs
  new File(tmpFile  + "/collection1", "data").mkdirs();
  File confDir = new File(tmpFile + "/collection1", "conf");
  confDir.mkdirs();
  
  // copy over configuration files
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
  
  // generate custom collation rules (DIN 5007-2), saving to customrules.dat
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  final String osFileName = "customrules.dat";
  final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
  IOUtils.write(tailoredRules, os, "UTF-8");
  os.close();

  final ResourceLoader loader;
  if (random().nextBoolean()) {
    loader = new StringMockResourceLoader(tailoredRules);
  } else {
    loader = new FilesystemResourceLoader(confDir.toPath());
  }
  final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
  assertEquals(tailoredCollator, readCollator);

  return tmpFile;
}
 
示例4
/**
 * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
 * These are largish files, and jvm-specific (as our documentation says, you should always
 * look out for jvm differences with collation).
 * So it's preferable to create this file on-the-fly.
 */
public static String setupSolrHome() throws Exception {
  File tmpFile = createTempDir().toFile();
  
  // make data and conf dirs
  new File(tmpFile + "/collection1", "data").mkdirs();
  File confDir = new File(tmpFile + "/collection1", "conf");
  confDir.mkdirs();
  
  // copy over configuration files
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml"), new File(confDir, "schema.xml"));
  
  // generate custom collation rules (DIN 5007-2), saving to customrules.dat
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  FileOutputStream os = new FileOutputStream(new File(confDir, "customrules.dat"));
  IOUtils.write(tailoredRules, os, "UTF-8");
  os.close();

  return tmpFile.getAbsolutePath();
}
 
示例5
private void requireThatArabicHasCorrectRules(Collator col) {
    final int reorderCodes [] = {UScript.ARABIC};
    assertEquals("6.2.0.0", col.getUCAVersion().toString());
    assertEquals("58.0.0.6", col.getVersion().toString());
    assertEquals(Arrays.toString(reorderCodes), Arrays.toString(col.getReorderCodes()));
    assertTrue(col.compare("a", "b") < 0);
    assertTrue(col.compare("a", "aس") < 0);
    assertFalse(col.compare("س", "a") < 0);

    assertEquals(" [reorder Arab]&ت<<ة<<<ﺔ<<<ﺓ&ي<<ى<<<ﯨ<<<ﯩ<<<ﻰ<<<ﻯ<<<ﲐ<<<ﱝ", ((RuleBasedCollator) col).getRules());
    assertFalse(col.compare("س", "a") < 0);
}
 
示例6
private void requireThatChineseHasCorrectRules(Collator col) {
    final int reorderCodes [] = {UScript.HAN};
    assertEquals("8.0.0.0", col.getUCAVersion().toString());
    assertEquals("153.64.29.0", col.getVersion().toString());
    assertEquals(Arrays.toString(reorderCodes), Arrays.toString(col.getReorderCodes()));

    assertNotEquals("", ((RuleBasedCollator) col).getRules());
}
 
示例7
public RuleBasedCollator createCollator() {
    ULocale ulocale = new ULocale(locale);
    checkLocale(ulocale, scheme);
    ulocale = setKeywords(ulocale, keywordsToValues);

    RuleBasedCollator collator = (RuleBasedCollator) RuleBasedCollator.getInstance(ulocale);
    checkKeywords(collator.getLocale(ULocale.VALID_LOCALE), keywordsToValues,
            scheme);

    if (shouldSetStrength()) {
        setCollatorStrength(collator, this);
    }
    
    return collator;
}
 
示例8
/**
 * Construct an actual ICU Collator given a collation specifier. The
 * result is a Collator that must be use in a thread-private manner.
 */
static synchronized Collator forScheme(final CollationSpecifier specifier) {
    RuleBasedCollator collator = (RuleBasedCollator) sourceMap.get(specifier.toString());
    if (collator == null) {
        collator = specifier.createCollator();
        sourceMap.put(specifier.toString(), collator);
    }
    collator = collator.cloneAsThawed();
    return collator;
}
 
示例9
public void testCustomRules() throws Exception {
    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
    String DIN5007_2_tailorings =
            "& ae , a\u0308 & AE , A\u0308& oe , o\u0308 & OE , O\u0308& ue , u\u0308 & UE , u\u0308";

    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
    String tailoredRules = tailoredCollator.getRules();

    Settings settings = Settings.builder()
            .put("index.analysis.analyzer.myAnalyzer.type", "icu_collation")
            .put("index.analysis.analyzer.myAnalyzer.rules", tailoredRules)
            .put("index.analysis.analyzer.myAnalyzer.strength", "primary")
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY));
    Analyzer analyzer = analysis.indexAnalyzers.get("myAnalyzer");

    String germanUmlaut = "Töne";
    TokenStream tsUmlaut = analyzer.tokenStream(null, germanUmlaut);
    BytesRef b1 = bytesFromTokenStream(tsUmlaut);

    String germanExpandedUmlaut = "Toene";
    TokenStream tsExpanded = analyzer.tokenStream(null, germanExpandedUmlaut);
    BytesRef b2 = bytesFromTokenStream(tsExpanded);

    assertTrue(compare(b1.bytes, b2.bytes) == 0);
}
 
示例10
private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
    this.collator = rbc;
}
 
示例11
/**
 * Setup the field according to the provided parameters
 */
private void setup(ResourceLoader loader, Map<String,String> args) {
  String custom = args.remove("custom");
  String localeID = args.remove("locale");
  String strength = args.remove("strength");
  String decomposition = args.remove("decomposition");
  
  String alternate = args.remove("alternate");
  String caseLevel = args.remove("caseLevel");
  String caseFirst = args.remove("caseFirst");
  String numeric = args.remove("numeric");
  String variableTop = args.remove("variableTop");

  if (custom == null && localeID == null)
    throw new SolrException(ErrorCode.SERVER_ERROR, "Either custom or locale is required.");
  
  if (custom != null && localeID != null)
    throw new SolrException(ErrorCode.SERVER_ERROR, "Cannot specify both locale and custom. "
        + "To tailor rules for a built-in language, see the javadocs for RuleBasedCollator. "
        + "Then save the entire customized ruleset to a file, and use with the custom parameter");
  
  final Collator collator;
  
  if (localeID != null) { 
    // create from a system collator, based on Locale.
    collator = createFromLocale(localeID);
  } else { 
    // create from a custom ruleset
    collator = createFromRules(custom, loader);
  }
  
  // set the strength flag, otherwise it will be the default.
  if (strength != null) {
    if (strength.equalsIgnoreCase("primary"))
      collator.setStrength(Collator.PRIMARY);
    else if (strength.equalsIgnoreCase("secondary"))
      collator.setStrength(Collator.SECONDARY);
    else if (strength.equalsIgnoreCase("tertiary"))
      collator.setStrength(Collator.TERTIARY);
    else if (strength.equalsIgnoreCase("quaternary"))
      collator.setStrength(Collator.QUATERNARY);
    else if (strength.equalsIgnoreCase("identical"))
      collator.setStrength(Collator.IDENTICAL);
    else
      throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid strength: " + strength);
  }
  
  // set the decomposition flag, otherwise it will be the default.
  if (decomposition != null) {
    if (decomposition.equalsIgnoreCase("no"))
      collator.setDecomposition(Collator.NO_DECOMPOSITION);
    else if (decomposition.equalsIgnoreCase("canonical"))
      collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    else
      throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid decomposition: " + decomposition);
  }
  
  // expert options: concrete subclasses are always a RuleBasedCollator
  RuleBasedCollator rbc = (RuleBasedCollator) collator;
  if (alternate != null) {
    if (alternate.equalsIgnoreCase("shifted")) {
      rbc.setAlternateHandlingShifted(true);
    } else if (alternate.equalsIgnoreCase("non-ignorable")) {
      rbc.setAlternateHandlingShifted(false);
    } else {
      throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid alternate: " + alternate);
    }
  }
  if (caseLevel != null) {
    rbc.setCaseLevel(Boolean.parseBoolean(caseLevel));
  }
  if (caseFirst != null) {
    if (caseFirst.equalsIgnoreCase("lower")) {
      rbc.setLowerCaseFirst(true);
    } else if (caseFirst.equalsIgnoreCase("upper")) {
      rbc.setUpperCaseFirst(true);
    } else {
      throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid caseFirst: " + caseFirst);
    }
  }
  if (numeric != null) {
    rbc.setNumericCollation(Boolean.parseBoolean(numeric));
  }
  if (variableTop != null) {
    rbc.setVariableTop(variableTop);
  }

  analyzer = new ICUCollationKeyAnalyzer(collator);
}
 
示例12
/**
 * Unit test for ICU generation of Unicode sort keys.
 * <pre>
 * Input   : "__globalRowStore"
 * 
 * Expected: [7, -124, 7, -124, 53, 63, 69, 43, 41, 63, 75, 69, 85, 77, 79, 69, 75, 49, 1, 20, 1, 126, -113, -124, -113, 8]
 * </pre>
 */
public void test_ICU_Unicode_SortKey() {
    
    final String input = "__globalRowStore";

    // Buffer reused for each String from which a sort key is derived.
    final RawCollationKey raw = new RawCollationKey(128);

    /*
     * Setup the collator by specifying the locale, strength, and
     * decomposition mode.
     */
    final Locale locale = new Locale("en", "US");
    
    final RuleBasedCollator collator = (RuleBasedCollator) Collator
            .getInstance(locale);

    collator.setStrength(Collator.TERTIARY);

    collator.setDecomposition(Collator.NO_DECOMPOSITION);

    collator.getRawCollationKey(input, raw);

    // do not include the nul byte
    final byte[] actual = new byte[raw.size - 1];

    // copy data from the buffer.
    System.arraycopy(raw.bytes/* src */, 0/* srcPos */, actual/* dest */,
            0/* destPos */, actual.length);

    if (log.isInfoEnabled()) {
        log.info("Actual  : " + Arrays.toString(actual));
    }
    
    /*
     * The expected Unicode sort key (this depends on the runtime ICU
     * version).
     */
    final byte[] expected;
    if (VersionInfo.ICU_VERSION.getMajor() == 3
            && VersionInfo.ICU_VERSION.getMinor() == 6) {
        /*
         * bigdata was initially deployed against v3.6.
         */
        expected = new byte[] { 7, -124, 7, -124, 53, 63, 69, 43, 41, 63,
                75, 69, 85, 77, 79, 69, 75, 49, 1, 20, 1, 126, -113, -124,
                -113, 8 };
    } else if (VersionInfo.ICU_VERSION.getMajor() == 4
            && VersionInfo.ICU_VERSION.getMinor() == 8) {
        /*
         * The next bundled version was 4.8.
         */
        expected = new byte[] { 6, 12, 6, 12, 51, 61, 67, 41, 39, 61, 73,
                67, 83, 75, 77, 67, 73, 47, 1, 20, 1, 126, -113, -124,
                -113, 8};
    } else {

        throw new AssertionFailedError("Not an expected ICU version: "
                + VersionInfo.ICU_VERSION);

    }

    if (log.isInfoEnabled()) {
        log.info("Expected: " + Arrays.toString(expected));
    }

    if (!Arrays.equals(expected, actual)) {
        fail("Expected: " + Arrays.toString(expected) + ", " + //
                "Actual: " + Arrays.toString(actual));
    }

}
 
示例13
BlobDescriptorList(Application app, DescriptorStore<BlobDescriptor> store, int maxSize) {
    this.app = app;
    this.store = store;
    this.maxSize = maxSize;
    this.list = new ArrayList<BlobDescriptor>();
    this.filteredList = new ArrayList<BlobDescriptor>();
    this.dataSetObservable = new DataSetObservable();
    this.filter = "";
    keyComparator = Slob.Strength.QUATERNARY.comparator;

    nameComparatorAsc = new Comparator<BlobDescriptor>() {
        @Override
        public int compare(BlobDescriptor b1, BlobDescriptor b2) {
        return keyComparator.compare(b1.key, b2.key);
        }
    };

    nameComparatorDesc = Collections.reverseOrder(nameComparatorAsc);

    timeComparatorAsc = new Comparator<BlobDescriptor>() {
        @Override
        public int compare(BlobDescriptor b1, BlobDescriptor b2) {
        return Util.compare(b1.createdAt, b2.createdAt);
        }
    };

    timeComparatorDesc = Collections.reverseOrder(timeComparatorAsc);

    lastAccessComparator = new Comparator<BlobDescriptor>() {
        @Override
        public int compare(BlobDescriptor b1, BlobDescriptor b2) {
            return  Util.compare(b2.lastAccess, b1.lastAccess);
        }
    };

    order = SortOrder.TIME;
    ascending = false;
    setSort(order, ascending);

    try {
        filterCollator = (RuleBasedCollator) Collator.getInstance(Locale.ROOT).clone();
    } catch (CloneNotSupportedException e) {
        throw new RuntimeException(e);
    }
    filterCollator.setStrength(Collator.PRIMARY);
    filterCollator.setAlternateHandlingShifted(true);
    handler = new Handler(Looper.getMainLooper());
}
 
示例14
private Collator createCollator() {
    ULocale locale = ULocale.forLanguageTag(this.locale);
    if ("search".equals(usage)) {
        // "search" usage cannot be set through unicode extensions (u-co-search), handle here:
        locale = locale.setKeywordValue("collation", "search");
    }
    RuleBasedCollator collator = (RuleBasedCollator) Collator.getInstance(locale);
    collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    collator.setNumericCollation(numeric);
    switch (caseFirst) {
    case "upper":
        collator.setUpperCaseFirst(true);
        break;
    case "lower":
        collator.setLowerCaseFirst(true);
        break;
    case "false":
        if (collator.isLowerCaseFirst()) {
            collator.setLowerCaseFirst(false);
        }
        if (collator.isUpperCaseFirst()) {
            collator.setUpperCaseFirst(false);
        }
        break;
    default:
        throw new AssertionError();
    }
    switch (sensitivity) {
    case "base":
        collator.setStrength(Collator.PRIMARY);
        break;
    case "accent":
        collator.setStrength(Collator.SECONDARY);
        break;
    case "case":
        collator.setStrength(Collator.PRIMARY);
        collator.setCaseLevel(true);
        break;
    case "variant":
        collator.setStrength(Collator.TERTIARY);
        break;
    default:
        throw new AssertionError();
    }
    collator.setAlternateHandlingShifted(ignorePunctuation);
    return collator;
}