forked from gunnarmorling/1brc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
hundredwatt - 2nd submission (gunnarmorling#338)
* check full hash before comparing * implement merykitty suggestions to simplify temperature masking; required refactoring to little-endian * standalone script for offline Perfect Hash seed searching * stop using an oversized hash table --------- Co-authored-by: Jason Nochlin <hundredwatt@users.noreply.github.com>
- Loading branch information
1 parent
80c5c2e
commit 6181996
Showing
2 changed files
with
195 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
170 changes: 170 additions & 0 deletions
170
src/main/java/dev/morling/onebrc/PerfectHashSearch_hundredwatt.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
/* | ||
* Copyright 2023 The original authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package dev.morling.onebrc; | ||
|
||
import java.io.File; | ||
import java.io.FileWriter; | ||
import java.io.IOException; | ||
import java.nio.ByteBuffer; | ||
import java.security.NoSuchAlgorithmException; | ||
import java.security.SecureRandom; | ||
import java.util.BitSet; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.concurrent.ExecutorService; | ||
import java.util.concurrent.Executors; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.atomic.AtomicLong; | ||
import java.util.random.RandomGenerator; | ||
import java.util.random.RandomGeneratorFactory; | ||
|
||
/** Offline script used to find the perfect hash seed for CalculateAverage_hundredwatt. */ | ||
public class PerfectHashSearch_hundredwatt { | ||
public static final int DESIRED_SLOTS = 5003; | ||
public static final int N_THREADS = Runtime.getRuntime().availableProcessors() - 1; | ||
|
||
public static void main(String[] args) throws IOException, InterruptedException { | ||
AtomicLong magicSeed = new AtomicLong(0); | ||
AtomicLong totalAttempts = new AtomicLong(0); | ||
AtomicLong maxCardinality = new AtomicLong(0); | ||
|
||
long start = System.currentTimeMillis(); | ||
|
||
System.out.println("Searching for perfect hash seed for " + DESIRED_SLOTS + " slots"); | ||
|
||
// Figure out encoding for all possible temperature values (1999 total) | ||
Map<Long, Short> decodeTemperatureMap = new HashMap<>(); | ||
for (short i = -999; i <= 999; i++) { | ||
long word = 0; | ||
int shift = 0; | ||
if (i < 0) { | ||
word |= ((long) '-') << shift; | ||
shift += 8; | ||
} | ||
if (Math.abs(i) >= 100) { | ||
int hh = Math.abs(i) / 100; | ||
int tt = (Math.abs(i) - hh * 100) / 10; | ||
|
||
word |= ((long) (hh + '0')) << shift; | ||
shift += 8; | ||
word |= ((long) (tt + '0')) << shift; | ||
} | ||
else { | ||
int tt = Math.abs(i) / 10; | ||
// convert to ascii | ||
word |= ((long) (tt + '0')) << shift; | ||
} | ||
shift += 8; | ||
word |= ((long) '.') << shift; | ||
shift += 8; | ||
int uu = Math.abs(i) % 10; | ||
word |= ((long) (uu + '0')) << shift; | ||
|
||
// 31302e3000000000 | ||
decodeTemperatureMap.put(word, i); | ||
} | ||
|
||
ExecutorService executor = Executors.newFixedThreadPool(N_THREADS); | ||
|
||
RandomGeneratorFactory factory = RandomGeneratorFactory.of("L64X256MixRandom"); | ||
|
||
Runnable search = () -> { | ||
// Brute force to find seed: | ||
// generate a cryptographically secure random seed | ||
RandomGenerator rand; | ||
try { | ||
byte[] seed = new byte[16]; | ||
SecureRandom.getInstanceStrong().nextBytes(seed); | ||
rand = factory.create(ByteBuffer.wrap(seed).getLong()); | ||
System.out.println(Thread.currentThread().getName() + " | Using seed: " + rand.nextLong()); | ||
} | ||
catch (NoSuchAlgorithmException e) { | ||
throw new RuntimeException(e); | ||
} | ||
|
||
int max = 0; | ||
int attempts = 0; | ||
while (true) { | ||
BitSet bs = new BitSet(DESIRED_SLOTS); | ||
var seed = rand.nextLong(); | ||
seed |= 0b1; // make sure it's odd | ||
for (var word : decodeTemperatureMap.keySet()) { | ||
var h = (word * seed) & ~(1L << 63); | ||
var pos = (int) (h % DESIRED_SLOTS); | ||
bs.set(pos); | ||
} | ||
var c = bs.cardinality(); | ||
if (c == decodeTemperatureMap.size()) { | ||
System.out.println("FOUND seed: " + seed + " cardinality: " + c + " max cardinality: " + max); | ||
magicSeed.set(seed); | ||
return; | ||
} | ||
max = Math.max(max, c); | ||
if (attempts % 100_000 == 0) { | ||
if (magicSeed.get() != 0) | ||
return; | ||
int finalMax = max; | ||
long currentMaxCardinality = maxCardinality.updateAndGet(currentMax -> Math.max(currentMax, finalMax)); | ||
long currentTotalAttempts = totalAttempts.addAndGet(100_000); | ||
|
||
if (Thread.currentThread().getName().endsWith("-1")) | ||
System.out.println(Thread.currentThread().getName() + " | max cardinality: " + currentMaxCardinality + " attempts: " | ||
+ String.format("%,d", currentTotalAttempts)); | ||
} | ||
attempts++; | ||
} | ||
}; | ||
|
||
for (int i = 0; i < Runtime.getRuntime().availableProcessors(); i++) { | ||
executor.submit(search); | ||
} | ||
|
||
// Wait for the search to complete | ||
executor.shutdown(); | ||
executor.awaitTermination(1, TimeUnit.DAYS); | ||
|
||
short[] TEMPERATURES = new short[DESIRED_SLOTS]; | ||
long seed = magicSeed.get(); | ||
|
||
decodeTemperatureMap.entrySet().stream().forEach(e -> { | ||
var word = e.getKey(); | ||
var h = (word * seed) & ~(1L << 63); | ||
var pos = (int) (h % DESIRED_SLOTS); | ||
if (TEMPERATURES[pos] != 0) | ||
throw new RuntimeException("collision at " + pos); | ||
TEMPERATURES[pos] = e.getValue(); | ||
}); | ||
System.out.println("SUCCESS seed: " + seed + " total attempts: " + totalAttempts.get()); | ||
|
||
try { | ||
File file = new File("seeds.txt"); | ||
file.delete(); | ||
file.createNewFile(); | ||
|
||
// Write the seed to seeds.txt | ||
FileWriter myWriter = new FileWriter("seeds.txt"); | ||
myWriter.write(Long.toString(seed)); | ||
myWriter.write("\n"); | ||
myWriter.close(); | ||
|
||
} | ||
catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
|
||
System.out.println("Search took " + ((System.currentTimeMillis() - start) / 1000) + "s"); | ||
} | ||
} |