hive的distribute by如何partition long型的数据
public static int hashCode(Object o, ObjectInspector objIns) { if (o == null) { return 0; } switch (objIns.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objIns); switch (poi.getPrimitiveCategory()) { case VOID: return 0; case BOOLEAN: return ((BooleanObjectInspector) poi).get(o) ? 1 : 0; case BYTE: return ((ByteObjectInspector) poi).get(o); case SHORT: return ((ShortObjectInspector) poi).get(o); case INT: return ((IntObjectInspector) poi).get(o); case LONG: { long a = ((LongObjectInspector) poi).get(o); return (int) ((a >>> 32) ^ a); } case FLOAT: return Float.floatToIntBits(((FloatObjectInspector) poi).get(o)); case DOUBLE: { // This hash function returns the same result as Double.hashCode() // while DoubleWritable.hashCode returns a different result. long a = Double.doubleToLongBits(((DoubleObjectInspector) poi).get(o)); return (int) ((a >>> 32) ^ a); } case STRING: { // This hash function returns the same result as String.hashCode() when // all characters are ASCII, while Text.hashCode() always returns a // different result. Text t = ((StringObjectInspector) poi).getPrimitiveWritableObject(o); int r = 0; for (int i = 0; i < t.getLength(); i++) { r = r * 31 + t.getBytes()[i]; } return r; } case TIMESTAMP: TimestampWritable t = ((TimestampObjectInspector) poi) .getPrimitiveWritableObject(o); return t.hashCode(); default: { throw new RuntimeException("Unknown type: " + poi.getPrimitiveCategory()); } } } case STRUCT: case LIST: case MAP: case UNION: default: throw new RuntimeException( "Hash code on complex types not supported yet."); } }
?hive的Partitioner是DefaultHivePartitioner
/** Use {@link Object#hashCode()} to partition. */ public int getBucket(K2 key, V2 value, int numBuckets) { return (key.hashCode() & Integer.MAX_VALUE) % numBuckets; }
?写了个java程序测试一下发现3591111568这个id的数据确实是分到了reduce0去了
long a = 3591111568L; int hashcode = (int) ((a >>> 32) ^ a); System.out.println((hashcode & Integer.MAX_VALUE) % 20);
?所以如果要达到用户的目的那么需要改成?distribute by sellerId%20