Programming Languages
OpenSubtitles.org is using special hash function to match subtitle files against movie files. Hash is not dependent on file name of movie file. Read about basics of hashing functions.
Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit chksum of the first and last 64k (even if they overlap because the file is smaller than 128k). On opensubtitles.org is movie file size limited to 9000000000 > $moviebytesize > 131072 bytes, if is there any reason to change these sizes, let us know. Licence of hashing source codes is GPL. Source codes was tested on Little Endian - DEC, Intel and compatible
Important: there might be cases, when your calculated hash is not 16 characters, so make sure you add zero-leading padding - some of source codes doesn't implement this ('101eae5380a4769' => '0101eae5380a4769').
Feel free to edit/add source-codes if you have faster/better implementation. Also don't forget to check, if hash is right for test. Test these 2 files please to ensure your algo is completely OK (otherwise you can poison the database and that nobody wants):
- AVI file (12 909 756 bytes)
- hash: 8e245d9679d31e12
- DUMMY RAR file (2 565 922 bytes, 4 295 033 890 after RAR unpacking, test on UNPACKED file)
- hash: 61f7751fc2a72bfb (for UNPACKED file)
C
#include <stdio.h> #include <stdlib.h> #define MAX(x,y) (((x) > (y)) ? (x) : (y)) #ifndef uint64_t #define uint64_t unsigned long long #endif uint64_t compute_hash(FILE * handle) { uint64_t hash, fsize; fseek(handle, 0, SEEK_END); fsize = ftell(handle); fseek(handle, 0, SEEK_SET); hash = fsize; for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++); fseek(handle, (long)MAX(0, fsize - 65536), SEEK_SET); for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++); return hash; } int main(int argc, char *argv) { FILE * handle; uint64_t myhash; handle = fopen("breakdance.avi", "rb"); if (!handle) { printf("Error openning file!"); return 1; } myhash = compute_hash(handle); printf("%I64x", myhash); fclose(handle); return 0; }
C - Public Domain License
#include <stdio.h> #include <stdlib.h> unsigned long long analizefileOSHahs(char *fileName){ /* * Public Domain implementation by Kamil Dziobek. turbos11(at)gmail.com * This code implements Gibest hash algorithm first use in Media Player Classics * For more implementation(various languages and authors) see: * http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes * * -works only on little-endian procesor DEC, Intel and compatible * -sizeof(unsigned long long) must be 8 */ FILE *file; int i; unsigned long long t1=0; unsigned long long buffer1[8192*2]; file = fopen(fileName, "rb"); fread(buffer1, 8192, 8, file); fseek(file, -65536, SEEK_END); fread(&buffer1[8192], 8192, 8, file); for (i=0;i<8192*2;i++) t1+=buffer1[i]; t1+= ftell(file); //add filesize fclose(file); return t1; }; int main(int argc, char *argv){ unsigned long long myhash=analizefileOSHahs("C://tomaszkokowskizoofiliamovies.avi"); printf("hash is %16I64x",myhash); }
C++
#include <iostream> #include <fstream> typedef unsigned __int64 uint64_t; using namespace std; int MAX(int x, int y) { if((x) > (y)) return x; else return y; } uint64_t compute_hash(ifstream& f) { uint64_t hash, fsize; f.seekg(0, ios::end); fsize = f.tellg(); f.seekg(0, ios::beg); hash = fsize; for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp); f.seekg(MAX(0, (uint64_t)fsize - 65536), ios::beg); for(tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp); return hash; } int main(int argc, char *argv) { ifstream f; uint64_t myhash; f.open("c:\\test.avi", ios::in|ios::binary|ios::ate); if (!f.is_open()) { cerr << "Error opening file" << endl; return 1; } myhash = compute_hash(f); cout << setw(16) << setfill('0') << hex << myhash; f.close(); return 0; }
About C and C++ implementation
This only work on little-endian processor: DEC, Intel and compatible
Java
/** * Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit * checksum of the first and last 64k (even if they overlap because the file is smaller than * 128k). */ public class OpenSubtitlesHasher { /** * Size of the chunks that will be hashed in bytes (64 KB) */ private static final int HASH_CHUNK_SIZE = 64 * 1024; public static String computeHash(File file) throws IOException { long size = file.length(); long chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size); FileChannel fileChannel = new FileInputStream(file).getChannel(); try { long head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile)); long tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile)); return String.format("%016x", size + head + tail); } finally { fileChannel.close(); } } public static String computeHash(InputStream stream, long length) throws IOException { int chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length); // buffer that will contain the head and the tail chunk, chunks will overlap if length is smaller than two chunks byte[] chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)]; DataInputStream in = new DataInputStream(stream); // first chunk in.readFully(chunkBytes, 0, chunkSizeForFile); long position = chunkSizeForFile; long tailChunkPosition = length - chunkSizeForFile; // seek to position of the tail chunk, or not at all if length is smaller than two chunks while (position < tailChunkPosition && (position += in.skip(tailChunkPosition - position)) >= 0); // second chunk, or the rest of the data if length is smaller than two chunks in.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile); long head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile)); long tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile)); return String.format("%016x", length + head + tail); } private static long computeHashForChunk(ByteBuffer buffer) { LongBuffer longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer(); long hash = 0; while (longBuffer.hasRemaining()) { hash += longBuffer.get(); } return hash; } }
C#
You can use GetHash?.dll.
http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll
Use Example:
private void openFileDialog1_FileOk(object sender, CancelEventArgs e)
{
byte[] hash = GetHash.Main.ComputeHash(openFileDialog1.FileName);
label1.Text = GetHash.Main.ToHexadecimal(hash);
}
or without using GetHash?.dll:
using System; using System.Text; using System.IO; namespace MovieHasher { class Program { private static byte[] ComputeMovieHash(string filename) { byte[] result; using (Stream input = File.OpenRead(filename)) { result = ComputeMovieHash(input); } return result; } private static byte[] ComputeMovieHash(Stream input) { long lhash, streamsize; streamsize = input.Length; lhash = streamsize; long i = 0; byte[] buffer = new byte[sizeof(long)]; while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0)) { i++; lhash += BitConverter.ToInt64(buffer, 0); } input.Position = Math.Max(0, streamsize - 65536); i = 0; while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0)) { i++; lhash += BitConverter.ToInt64(buffer, 0); } input.Close(); byte[] result = BitConverter.GetBytes(lhash); Array.Reverse(result); return result; } private static string ToHexadecimal(byte[] bytes) { StringBuilder hexBuilder = new StringBuilder(); for(int i = 0; i < bytes.Length; i++) { hexBuilder.Append(bytes[i].ToString("x2")); } return hexBuilder.ToString(); } static void Main(string[] args) { byte[] moviehash = ComputeMovieHash(@"C:\test.avi"); Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash)); } } }
If you get overflow error read this.
VB.Net
Imports System Imports System.Text Imports System.IO 'Note: you must remove integer overflow checking. Namespace MovieHasher Class Program Private Shared Function ComputeMovieHash(ByVal filename As String) As Byte() Dim result As Byte() Using input As Stream = File.OpenRead(filename) result = ComputeMovieHash(input) End Using Return result End Function Private Function ComputeMovieHash(ByVal input As Stream) As Byte() Dim lhash As System.Int64, streamsize As Long streamsize = input.Length lhash = streamsize Dim i As Long = 0 Dim buffer As Byte() = New Byte(Marshal.SizeOf(GetType(Long)) - 1) {} While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0) i += 1 lhash += BitConverter.ToInt64(buffer, 0) End While input.Position = Math.Max(0, streamsize - 65536) i = 0 While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0) i += 1 lhash += BitConverter.ToInt64(buffer, 0) End While input.Close() Dim result As Byte() = BitConverter.GetBytes(lhash) Array.Reverse(result) Return result End Function Private Shared Function ToHexadecimal(ByVal bytes As Byte()) As String Dim hexBuilder As New StringBuilder() For i As Integer = 0 To bytes.Length - 1 hexBuilder.Append(bytes(i).ToString("x2")) Next Return hexBuilder.ToString() End Function Private Shared Sub Main(ByVal args As String()) Dim moviehash As Byte() = ComputeMovieHash("C:\test.avi") Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash)) End Sub End Class End Namespace
Python
import struct, os def hashFile(name): try: longlongformat = '<q' # little-endian long long bytesize = struct.calcsize(longlongformat) f = open(name, "rb") filesize = os.path.getsize(name) hash = filesize if filesize < 65536 * 2: return "SizeError" for x in range(65536/bytesize): buffer = f.read(bytesize) (l_value,)= struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number f.seek(max(0,filesize-65536),0) for x in range(65536/bytesize): buffer = f.read(bytesize) (l_value,)= struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF f.close() returnedhash = "%016x" % hash return returnedhash except(IOError): return "IOError"
Delphi
This is just a quick conversion of Gabest's original C code. Anyone who can come up with a cleaner code, please feel free to do so and post here.
function CalcGabestHash(const fname: string): string;
var
i : integer;
s : array[1..8] of char;
tmp : Int64 absolute s;
hash : Int64;
readed : integer;
aStream: TFileStream;
begin
result := '';
if not FileExists(fname) then Exit;
aStream := TFileStream.Create(fName, fmShareDenyNone);
hash := aStream.Size;
i := 0; readed := 1;
while ((i < 8192) and (readed > 0)) do begin
readed := aStream.Read(s, sizeof(s));
if readed > 0 then
begin
hash := hash + tmp;
end;
i := i + 1;
end;
aStream.Seek(-65536, soFromEnd); // 65536
i := 0; readed:= 1;
while ((i < 8192) and (readed > 0)) do begin
readed := aStream.Read(s, sizeof(s));
if readed > 0 then
hash := hash + tmp;
i := i + 1;
end;
aStream.Free;
result := Format('%.16x',[hash]);
end;
alternate version by TRP
unction CalcGabestHash(const Stream: TStream): Int64; overload;
const HashPartSize = 1 shl 16; // 64 KiB
procedure UpdateHashFromStream(const Stream: TStream; var Hash:
Int64); inline;
var buffer: Array[0..HashPartSize div SizeOf(Int64) - 1] of Int64;
i : integer;
begin
Stream.ReadBuffer(buffer[0], SizeOf(buffer));
for i := Low(buffer) to High(buffer) do
Inc(Hash, buffer[i]);
end;
begin
result:= Stream.Size;
if result < HashPartSize then
begin
// stream too small return invalid hash
result:= 0;
exit;
end;
// first 64 KiB
Stream.Position:= 0;
UpdateHashFromStream(Stream, result);
// last 64 KiB
Stream.Seek(-HashPartSize, soEnd);
UpdateHashFromStream(Stream, result);
// use "IntToHex(result, 16);" to get a string and "StrToInt64('$' +
hash);" to get your Int64 back
end;
function CalcGabestHash(const FileName: TFileName): Int64; overload;
var stream: TStream;
begin
stream:= TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
try
result:= CalcGabestHash(stream);
finally
stream.Free;
end;
end;
Lua
-- will produce a correct hash regardless of architecture (big vs little endian) local function movieHash(fileName) local fil = io.open(fileName, "rb") local lo,hi=0,0 for i=1,8192 do local a,b,c,d = fil:read(4):byte(1,4) lo = lo + a + b*256 + c*65536 + d*16777216 a,b,c,d = fil:read(4):byte(1,4) hi = hi + a + b*256 + c*65536 + d*16777216 while lo>=4294967296 do lo = lo-4294967296 hi = hi+1 end while hi>=4294967296 do hi = hi-4294967296 end end local size = fil:seek("end", -65536) + 65536 for i=1,8192 do local a,b,c,d = fil:read(4):byte(1,4) lo = lo + a + b*256 + c*65536 + d*16777216 a,b,c,d = fil:read(4):byte(1,4) hi = hi + a + b*256 + c*65536 + d*16777216 while lo>=4294967296 do lo = lo-4294967296 hi = hi+1 end while hi>=4294967296 do hi = hi-4294967296 end end lo = lo + size while lo>=4294967296 do lo = lo-4294967296 hi = hi+1 end while hi>=4294967296 do hi = hi-4294967296 end fil:close() return string.format("%08x%08x", hi,lo), size end print("breakdance.avi:") print(movieHash("breakdance.avi")) print("8e245d9679d31e12 <- should be") print("") print("dummy.rar:") print(movieHash("dummy.rar")) print("61f7751fc2a72bfb <- should be according to wiki") print("2a527d74d45f5b1b <- what other hash tools actually report")
RealBasic/Xojo
Combined routine that will calculate a fast hash for videofiles over 65K and a normal md5 for subtitles
dim b as BinaryStream
dim mb as MemoryBlock
dim hash,bytesize as UINT64
dim i, x, chunksize, filelen, difference as integer
hash = 0 //Reset Hash
difference = 0
if f <> nil and f.Exists then
b= f.OpenAsBinaryFile
hash = b.Length
bytesize = b.Length
bytesizestr = str(bytesize)
if bytesize >= 65536 and routine = "video" then
chunksize = 65536
mb = b.Read(65536)
mb.LittleEndian = True
for i= 0 to chunksize -1 step 8
hash = hash+ mb.UINT64Value(i)
next
b.Position = max(b.Length-chunksize, 0)
mb= b.Read(chunksize)
mb.LittleEndian = True
for i= 0 to chunksize -1 step 8
hash = hash+ mb.UINT64Value(i)
next
myhash = Lowercase(str(hex(hash)))
elseif routine = "subtitle" then
dim c,result as string
mb = md5(b.Read(b.Length))
mb.LittleEndian = True
for i = 0 to mb.size-1
x = mb.byte( i )
c = right( "00"+hex( x ), 2 )
result = result + c
next
result = lowercase( result )
myhash = result
end
PHP 4/5
function OpenSubtitlesHash($file)
{
$handle = fopen($file, "rb");
$fsize = filesize($file);
$hash = array(3 => 0,
2 => 0,
1 => ($fsize >> 16) & 0xFFFF,
0 => $fsize & 0xFFFF);
for ($i = 0; $i < 8192; $i++)
{
$tmp = ReadUINT64($handle);
$hash = AddUINT64($hash, $tmp);
}
$offset = $fsize - 65536;
fseek($handle, $offset > 0 ? $offset : 0, SEEK_SET);
for ($i = 0; $i < 8192; $i++)
{
$tmp = ReadUINT64($handle);
$hash = AddUINT64($hash, $tmp);
}
fclose($handle);
return UINT64FormatHex($hash);
}
function ReadUINT64($handle)
{
$u = unpack("va/vb/vc/vd", fread($handle, 8));
return array(0 => $u["a"], 1 => $u["b"], 2 => $u["c"], 3 => $u["d"]);
}
function AddUINT64($a, $b)
{
$o = array(0 => 0, 1 => 0, 2 => 0, 3 => 0);
$carry = 0;
for ($i = 0; $i < 4; $i++)
{
if (($a[$i] + $b[$i] + $carry) > 0xffff )
{
$o[$i] += ($a[$i] + $b[$i] + $carry) & 0xffff;
$carry = 1;
}
else
{
$o[$i] += ($a[$i] + $b[$i] + $carry);
$carry = 0;
}
}
return $o;
}
function UINT64FormatHex($n)
{
return sprintf("%04x%04x%04x%04x", $n[3], $n[2], $n[1], $n[0]);
}
Perl
#!/usr/bin/perl use strict; use warnings; print OpenSubtitlesHash('breakdance.avi'); sub OpenSubtitlesHash { my $filename = shift or die("Need video filename"); open my $handle, "<", $filename or die $!; binmode $handle; my $fsize = -s $filename; my $hash = [$fsize & 0xFFFF, ($fsize >> 16) & 0xFFFF, 0, 0]; $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192); my $offset = $fsize - 65536; seek($handle, $offset > 0 ? $offset : 0, 0) or die $!; $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192); close $handle or die $!; return UINT64FormatHex($hash); } sub ReadUINT64 { read($_[0], my $u, 8); return [unpack("vvvv", $u)]; } sub AddUINT64 { my $o = [0,0,0,0]; my $carry = 0; for my $i (0..3) { if (($_[0]->[$i] + $_[1]->[$i] + $carry) > 0xffff ) { $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry) & 0xffff; $carry = 1; } else { $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry); $carry = 0; } } return $o; } sub UINT64FormatHex { return sprintf("%04x%04x%04x%04x", $_[0]->[3], $_[0]->[2], $_[0]->[1], $_[0]->[0]); }
Ruby
This is a quick translation/transliteration of the Perl script.
class Hasher def open_subtitles_hash(filename) raise "Need video filename" unless filename fh = File.open(filename) fsize = File.size(filename) hash = [fsize & 0xffff, (fsize >> 16) & 0xffff, 0, 0] 8192.times { hash = add_unit_64(hash, read_uint_64(fh)) } offset = fsize - 65536 fh.seek([0,offset].max, 0) 8192.times { hash = add_unit_64(hash, read_uint_64(fh)) } fh.close return uint_64_format_hex(hash) end def read_uint_64(stream) stream.read(8).unpack("vvvv") end def add_unit_64(hash, input) res = [0,0,0,0] carry = 0 hash.zip(input).each_with_index do |(h,i),n| sum = h + i + carry if sum > 0xffff res[n] += sum & 0xffff carry = 1 else res[n] += sum carry = 0 end end return res end def uint_64_format_hex(hash) sprintf("%04x%04x%04x%04x", *hash.reverse) end end if __FILE__ == $0 require 'test/unit' class HashTester < Test::Unit::TestCase def setup @h = Hasher.new end def test_test_file_hash assert_equal("8e245d9679d31e12", @h.open_subtitles_hash('breakdance.avi')) end end end
Another more "rubyesque" implementation.
module MovieHasher CHUNK_SIZE = 64 * 1024 # in bytes def self.compute_hash(filename) filesize = File.size(filename) hash = filesize # Read 64 kbytes, divide up into 64 bits and add each # to hash. Do for beginning and end of file. File.open(filename, 'rb') do |f| # Q = unsigned long long = 64 bit f.read(CHUNK_SIZE).unpack("Q*").each do |n| hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number end f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET) # And again for the end of the file f.read(CHUNK_SIZE).unpack("Q*").each do |n| hash = hash + n & 0xffffffffffffffff end end sprintf("%016x", hash) end end if __FILE__ == $0 require 'test/unit' class MovieHasherTest < Test::Unit::TestCase def test_compute_hash assert_equal("8e245d9679d31e12", MovieHasher::compute_hash('breakdance.avi')) end def test_compute_hash_large_file assert_equal("61f7751fc2a72bfb", MovieHasher::compute_hash('dummy.bin')) end end end
Haskell
import IO(bracket) import System.Environment(getArgs) import System.IO(openBinaryFile,hClose,hFileSize,hSeek,IOMode(ReadMode),SeekMode(AbsoluteSeek,SeekFromEnd)) import qualified Data.ByteString.Lazy as L(hGet,unpack) import Data.Binary.Get(runGet,getWord64le) import Data.Binary.Put(runPut,putWord64le) import Data.Word(Word64) import Control.Monad(foldM) import Data.Bits.Utils(w82s) import Data.Hex(hex) shortsum :: FilePath -> IO Word64 shortsum filename = bracket (openBinaryFile filename ReadMode) hClose $ \h -> do fs <- hFileSize h hSeek h AbsoluteSeek 0 ; begin <- L.hGet h chunksize hSeek h SeekFromEnd (-(toInteger chunksize)) ; end <- L.hGet h chunksize return $ (flip runGet $ begin) $ chunksum $ (flip runGet $ end) (chunksum . fromInteger $ fs) where chunksize = 0x10000 chunksum n = foldM (\a _ -> getWord64le >>= return . (+a)) n [1..(chunksize`div`8)] main :: IO () main = do args <- getArgs let fn = head $ args p <- shortsum fn putStrLn $ "The hash of file " ++ fn ++ ": " ++ (hex $ w82s $ reverse (L.unpack $ runPut $ putWord64le p))
AutoIT
#cs
Hash code is based on Media Player Classic. It calculates: size + 64bit
checksum of the first and last 64k (even if they overlap because the file is smaller than 128k).
Authors: Authenticity & Emanuel "Datenshi" Lindgren @ AutoIT Forums.
AutoIT v3.3.2.0
#ce
Func _Compute_Hash($sFileName)
Local $hFile, $tRet, $tTmp, $iFileSize, $iRead, $iChunk, $iI
$hFile = FileOpen($sFileName, 16)
If Not $hFile Then Return SetError(1, 0, 0)
$iFileSize = FileGetSize($sFileName)
$iChunk = 65536
If $iFileSize < $iChunk * 2 Then
FileClose($hFile)
Return SetError(2, 0, 0)
EndIf
$tRet = DllStructCreate("uint64")
$tTmp = DllStructCreate("uint64")
DllStructSetData($tRet, 1, $iFileSize)
For $iI = 0 To ($iChunk / 8) - 1
DllStructSetData($tTmp, 1, FileRead($hFile, 8))
DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1))
Next
FileSetPos($hFile, $iFileSize - $iChunk, 0)
For $iI = 0 To ($iChunk / 8) - 1
DllStructSetData($tTmp, 1, FileRead($hFile, 8))
DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1))
Next
FileClose($hFile)
Return SetError(0, 0, _HEX(DllStructGetData($tRet, 1)))
EndFunc
Func _HEX($iValue)
Return StringFormat("%#.8x%.8x", $iValue / 4294967296, $iValue)
EndFunc
FoxPro
PARAMETERS cfile
PRIVATE ALL
*******
* enviroment setup
*******
cret=''
glTalk=(SET("TALK")="ON")
IF vartype(cfile)<>'C'
cfile='breakdance.avi'
ENDIF
IF glTalk
? cfile
? cfile=''
? LEN(cfile)
endif
nfile=FOPEN(cfile)
nsize=FSEEK(nfile,0,2)
IF gltalk
? cfile
? 'size?>'
?? nsize
endif
FSEEK(nfile,0,0)
******
* length reencode to 64 uint
*****
chash=hashsize(nsize)
cempty=chr(0)
cret=''
IF LEN(chash)<8
FOR i=1 TO 8-LEN(chash)
cret=cret+cempty
ENDFOR
ENDIF
cret=cret+chash
nSum=0
*******
* first 64kb
******
FOR i=1 TO 8192
cpom=FREAD(nfile,8)
cpom=reverse(cpom)
nSum=nSum+LEN(cpom)
IF gltalk
do buildhex WITH cret
?? '+'
DO buildhex WITH cpom
? '='
ENDIF
cret=adint64(cret,cpom)
ENDFOR
*******
* last 64kb
*******
FSEEK(nfile,-65536,2)
FOR i=1 TO 8192
cpom=FREAD(nfile,8)
cpom=reverse(cpom)
cret=adint64(cret,cpom)
nSum=nSum+LEN(cpom)
ENDFOR
FCLOSE(nfile)
****
* build hexa
****
IF gltalk
DO buildhex WITH cret
?
? 'Spocital som'
?? nSum
ENDIF
RETURN buildhex(cret)
FUNCTION reverse
PARAMETERS cstring
PRIVATE ALL
cret=''
FOR i=1 TO LEN(cstring)
cret=cret+SUBSTR(cstring,LEN(cstring)-i+1,1)
ENDFOR
RETURN cret
FUNCTION buildhex
PARAMETERS cstring,lkam
PRIVATE ALL
gcTalk=SET("TALK")
cret=''
FOR i=1 TO LEN(cstring)
cpom=dec2basx(ASC(SUBSTR(cstring,i,1)),16)
IF LEN(cpom)<2
cout='0'+cpom
cpom=cout
ENDIF
cret=cret+cpom
IF gcTALK="ON"
?? cpom
?? ':'
ENDIF
ENDFOR
RETURN cret
FUNCTION adint64
PARAMETERS cstring1,cstring2
PRIVATE ALL
DIMENSION car (8,1) as Character
***
* 8 bytes both
***
nincrement=0
cret=''
FOR i=8 TO 1 STEP -1
nfir=ASC(SUBSTR(cstring1,i,1))
nsec=ASC(SUBSTR(cstring2,i,1))
nout=nincrement+nfir+nsec
IF nout>255
nincrement=INT(nout/256)
nout=nout-(nincrement*256)
ELSE
nincrement=0
ENDIF
car(i)=CHR(nout)
ENDFOR
FOR i=1 TO 8
cret=cret+car(i)
ENDFOR
RETURN cret
FUNCTION hashsize
PARAMETERS ncislo
PRIVATE ALL
cret=''
creverse=''
DO WHILE .t.
npom=INT(ncislo/256)
npom2=ncislo-npom*256
creverse=creverse+CHR(npom2)
ncislo=npom
IF ncislo=0
EXIT
ENDIF
ENDDO
FOR i=1 TO LEN(creverse)
cret=cret+SUBSTR(creverse,LEN(creverse)-i+1,1)
ENDFOR
RETURN cret
*..............................................................................
* Function: DEC2BASX
* Purpose: Convert whole number 0-?, to base 2-16
*
* Parameters: nTempNum - number to convert (0-9007199254740992)
* base - base to convert to i.e., 2 4 8 16...
* returns: string
* Usage: cresult=Dec2BasX(nParm1, nParm2)
* STORE Dec2BasX(255, 16) TO cMyString &&... cMyString contains 'ff'
*..............................................................................
FUNCTION dec2basx
PARAMETERS nTempNum, nNewBase
STORE 0 TO nWorkVal,;
remainder,;
dividend,;
nextnum,;
digit
nWorkVal = nTempNum
ret_str = ''
DO WHILE .T.
digit = MOD(nWorkVal, nNewBase)
dividend = nWorkVal / nNewBase
nWorkVal = INT(dividend)
DO CASE
CASE digit = 10
ret_str = 'a' + ret_str
CASE digit = 11
ret_str = 'b' + ret_str
CASE digit = 12
ret_str = 'c' + ret_str
CASE digit = 13
ret_str = 'd' + ret_str
CASE digit = 14
ret_str = 'e' + ret_str
CASE digit = 15
ret_str = 'f' + ret_str
OTHERWISE
ret_str = LTRIM(STR(digit)) + ret_str
ENDCASE
IF nWorkVal = 0
EXIT
ENDIF ( nWorkVal = 0 )
ENDDO ( .T. )
RETURN ret_str
Powershell 2.0
You can use GetHash?.dll.
http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll
Use Example:
Add-Type -Path "GetHash.dll"
function MovieHash([string]$path) {
$hash = [GetHash.Main]
$hash::ToHexadecimal($hash::ComputeHash($path))
}
MovieHash $filename
or without using GetHash?.dll:
$dataLength = 65536
function LongSum([UInt64]$a, [UInt64]$b) {
[UInt64](([Decimal]$a + $b) % ([Decimal]([UInt64]::MaxValue) + 1))
}
function StreamHash([IO.Stream]$stream) {
$hashLength = 8
[UInt64]$lhash = 0
[byte[]]$buffer = New-Object byte[] $hashLength
$i = 0
while ( ($i -lt ($dataLength / $hashLength)) -and ($stream.Read($buffer,0,$hashLength) -gt 0) ) {
$i++
$lhash = LongSum $lhash ([BitConverter]::ToUInt64($buffer,0))
}
$lhash
}
function MovieHash([string]$path) {
try {
$stream = [IO.File]::OpenRead($path)
[UInt64]$lhash = $stream.Length
$lhash = LongSum $lhash (StreamHash $stream)
$stream.Position = [Math]::Max(0L, $stream.Length - $dataLength)
$lhash = LongSum $lhash (StreamHash $stream)
"{0:X}" -f $lhash
}
finally { $stream.Close() }
}
MovieHash $filename
MASM
Calc_Hash proc uses esi ebx edx pFile:dword, pBuf:dword LOCAL hFile:dword, fSize:dword, NBR:dword, pMem:dword invoke CreateFile,pFile,GENERIC_ALL,0,0,OPEN_EXISTING,0,0 mov hFile,eax cmp eax,INVALID_HANDLE_VALUE jz @Error invoke SetFilePointer,hFile,0,NULL,FILE_END mov fSize,eax push eax invoke GlobalAlloc,GPTR,131072 mov pMem,eax invoke SetFilePointer,hFile,0,NULL,FILE_BEGIN invoke ReadFile,hFile,pMem,65536,addr NBR,NULL sub fSize,65536 add pMem,65536 invoke SetFilePointer,hFile,fSize,NULL,FILE_BEGIN invoke ReadFile,hFile,pMem,65536,addr NBR,NULL sub pMem,65536 mov esi,pMem mov ecx,131072 pop eax mov edx,eax push eax @@: add edx,[esi] adc ebx,[esi+4] add esi,8 sub ecx,8 jnz @B push edx push ebx invoke wsprintf,pBuf,addr HashFormat pop eax pop eax invoke CloseHandle,hFile invoke GlobalFree,pMem pop ecx @Error: ; If error eax returns (INVALID_HANDLE_VALUE) ; Hash value is copied to pBuf ; eax returns Movie Filesize ret Calc_Hash endp
Objective-C
This is implementation of hash for Objective-C for Mac by subsmarine.com
OSHashAlgorithm.m
#import "OSHashAlgorithm.h" @implementation OSHashAlgorithm +(NSString*)stringForHash:(uint64_t)hash { return [[NSString stringWithFormat:@"%qx", hash ] autorelease]; } +(VideoHash)hashForPath:(NSString*)path { VideoHash hash; hash.fileHash =0; hash.fileSize =0; NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path]; hash = [OSHashAlgorithm hashForFile:readFile]; [readFile closeFile]; return hash; } +(VideoHash)hashForURL:(NSURL*)url { VideoHash hash; hash.fileHash =0; hash.fileSize =0; NSFileHandle *readfile = [NSFileHandle fileHandleForReadingFromURL:url error:NULL]; hash = [OSHashAlgorithm hashForFile:readfile]; return hash; } +(VideoHash)hashForFile:(NSFileHandle*)handle { VideoHash retHash; retHash.fileHash =0; retHash.fileSize =0; if( handle == nil ) return retHash; const NSUInteger CHUNK_SIZE=65536; NSData *fileDataBegin, *fileDataEnd; uint64_t hash=0; fileDataBegin = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE]; [handle seekToEndOfFile]; unsigned long long fileSize = [handle offsetInFile]; if(fileSize < CHUNK_SIZE ) return retHash; [handle seekToFileOffset:MAX(0,fileSize-CHUNK_SIZE) ]; fileDataEnd = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE]; // // Calculate hash // // 1st. File size hash += fileSize; // 2nd. Begining data block uint64_t * data_bytes= (uint64_t*)[fileDataBegin bytes]; for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ ) hash+=data_bytes[i];; // 3rd. Ending data block data_bytes= (uint64_t*)[fileDataEnd bytes]; for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ ) hash+= data_bytes[i]; retHash.fileHash = hash; retHash.fileSize = fileSize; return retHash; } @end
OSHashAlgorithm.h
#import <Cocoa/Cocoa.h> typedef struct { uint64_t fileHash; uint64_t fileSize; } VideoHash; @interface OSHashAlgorithm : NSObject { } +(VideoHash)hashForPath:(NSString*)path; +(VideoHash)hashForURL:(NSURL*)url; +(VideoHash)hashForFile:(NSFileHandle*)handle; +(NSString*)stringForHash:(uint64_t)hash; @end
Vala
public uint64 hash(File file) {
try {
uint64 h;
//get filesize and add it to hash
var file_info = file.query_info("*", FileQueryInfoFlags.NONE);
h = file_info.get_size();
//add first 64kB of file to hash
var dis = new DataInputStream(file.read());
dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN);
for(int i=0; i<65536/sizeof(uint64); i++) {
h += dis.read_uint64();
}
//add last 64kB of file to hash
dis = new DataInputStream(file.read());
dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN);
dis.skip((size_t)(file_info.get_size() - 65536));
for(int i=0; i<65536/sizeof(uint64); i++) {
h += dis.read_uint64();
}
return h;
} catch (Error e) {
error("%s", e.message);
}
}
int main () {
var file = File.new_for_path ("breakdance.avi");
if (!file.query_exists ()) {
stderr.printf ("File '%s' doesn't exist.\n", file.get_path ());
return 1;
}
stdout.printf("%016llx\n", hash(file));
file = File.new_for_path ("dummy.bin");
if (!file.query_exists ()) {
stderr.printf ("File '%s' doesn't exist.\n", file.get_path ());
return 1;
}
stdout.printf("%016llx\n", hash(file));
return 0;
}
Build with: valac --pkg gio-2.0 hash.vala
AutoHotKey
#NoEnv
SetBatchLines, -1
; http://www.opensubti.../breakdance.avi
; OpenSubtitles Hash = 8E245D9679D31E12
FilePath := "Breakdance.avi"
MsgBox, 0, OpenSubtitlesHash, % Filepath . ":`r`n" . GetOpenSubtitlesHash(FilePath)
ExitApp
; ==================================================================================================
GetOpenSubtitlesHash(FilePath) {
; http://trac.opensubt...HashSourceCodes
Static X := { 0: "0", 1: "1", 2: "2", 3: "3", 4: "4", 5: "5", 6: "6", 7: "7"
, 8: "8", 9: "9", 10: "A", 11: "B", 12: "C", 13: "D", 14: "E", 15: "F"}
; Check the file size ---------------------------------------------------------------------------
; 9000000000 > $moviebytesize >= 131072 bytes (changed > to >= for the lower limit)
FileGetSize, FileSize, %FilePath%
If (FileSize < 131072) || (FileSize >= 9000000000)
Return ""
; Read the first and last 64 KB -----------------------------------------------------------------
VarSetCapacity(FileParts, 131072) ; allocate sufficient memory
File := FileOpen(FilePath, "r") ; open the file
File.Seek(0, 0) ; set the file pointer (just for balance)
File.RawRead(FileParts, 65536) ; read the first 64 KB
File.Seek(-65536, 2) ; set the file pointer for the last 64 KB
File.RawRead(&FileParts + 65536, 65536) ; read the last 64 KB
File.Close() ; got all we need, so the file can be closed
; Now calculate the hash using two UINTs for the low- and high-order parts of an UINT64 ---------
LoUINT := FileSize & 0xFFFFFFFF ; store low-order UINT of file size
HiUINT := FileSize >> 32 ; store high-order UINT of file size
Offset := -4 ; to allow adding 4 on first iteration
Loop, 16384 { ; 131072 / 8
LoUINT += NumGet(FileParts, Offset += 4, "UInt") ; add first UINT value to low-order UINT
HiUINT += NumGet(FileParts, Offset += 4, "UInt") ; add second UINT value to high-order UINT
}
; Adjust the probable overflow of the low-order UINT
HiUINT += LoUINT >> 32 ; add the overflow to the high-order UINT
LoUINT &= 0xFFFFFFFF ; remove the overflow from the low-order UINT
; Now get the hex string, i.e. the hash ---------------------------------------------------------
Hash := ""
VarSetCapacity(UINT64, 8, 0)
NumPut((HiUINT << 32) | LoUINT, UINT64, 0, "UInt64")
Loop, 8
Hash .= X[(Byte := NumGet(UINT64, 8 - A_Index, "UChar")) >> 4] . X[Byte & 0x0F]
Return Hash
}
; ==================================================================================================
Lisp
; opensubtitle hash, common lisp, sbcl
; sean langton 2013
(defun get-lvalue(stream)
(let ((c)(n 0)(m 1))
(loop for x from 0 to 7 do
(setf c (read-byte stream))
(setf n (+ n (* c m)))
(setf m (* m 256))
) n))
(defun hashfile(path)
(let ((hash '(unsigned-byte 64))(len))
(with-open-file (in path :element-type '(unsigned-byte 8))
(setf len (file-length in))
(setf hash len)
(cond ((< len (* 2 65536))
(print "file too small to hash")
(return-from hashfile nil)))
(loop for x from 0 to 8191 do
(setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF )))
(file-position in (- len 65536))
(loop for x from 0 to 8191 do
(setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF )))
(format t "~&~16,'0x" hash))))
; (hashfile #p"~/Downloads/breakdance.avi")
; (hashfile #p"~/Downloads/dummy/dummy.bin")
Pascal
procedure ComputeHash(const Stream : TStream;
out Size : qword;
out Hash : string);
var
hashQ : qword;
fsize : qword;
i : integer;
read : integer;
s : array[0..7] of char;
tmp : qword absolute s;
begin
Stream.Seek(0, soFromBeginning);
Size := Stream.Size;
hashQ := size;;
i := 0;
read := 1;
while ((i < 8192) and (read > 0)) do begin
read := Stream.Read(s, sizeof(s));
if read > 0 then begin
hashQ := hashQ + tmp;
end;
i := i + 1;
end;
Stream.Seek(-65536, soFromEnd);
i := 0;
read := 1;
while ((i < 8192) and (read > 0)) do begin
read := Stream.Read(s, sizeof(s));
if read > 0 then begin
hashQ := hashQ + tmp;
end;
i := i + 1;
end;
Hash := lowercase(Format('%.16x',[hashQ]));
end;
Scala
import java.io.{FileInputStream, File}
import java.nio.{LongBuffer, ByteOrder, ByteBuffer}
import java.nio.channels.FileChannel.MapMode
import scala.math._
class OpenSubtitlesHasher {
private val hashChunkSize = 64L * 1024L
def computeHash(file: File) : String = {
val fileSize = file.length
val chunkSizeForFile = min(fileSize, hashChunkSize)
val fileChannel = new FileInputStream(file).getChannel
try {
val head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile))
val tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, max(fileSize - hashChunkSize, 0), chunkSizeForFile))
"%016x".format(fileSize + head + tail)
} finally {
fileChannel.close()
}
}
private def computeHashForChunk(buffer: ByteBuffer) : Long = {
def doCompute(longBuffer: LongBuffer, hash: Long) : Long = {
longBuffer.hasRemaining match {
case false => hash
case true => doCompute(longBuffer, hash + longBuffer.get)
}
}
val longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer()
doCompute(longBuffer, 0L)
}
}
Javascript
There is some WRONG implementations floating around, please always check correct hash codes with test files at start of this document. This implementation works fine, credits go to Rasmus - THANKS!
function(file, callback) {
var HASH_CHUNK_SIZE = 65536, //64 * 1024
longs = [],
temp = file.size;
function read(start, end, callback) {
var reader = new FileReader();
reader.onload = function(e) {
callback.call(reader, process(e.target.result));
};
if (end === undefined) {
reader.readAsBinaryString(file.slice(start));
} else {
reader.readAsBinaryString(file.slice(start, end));
}
}
function process(chunk) {
for (var i = 0; i < chunk.length; i++) {
longs[(i + 8) % 8] += chunk.charCodeAt(i);
}
}
function binl2hex(a) {
var b = 255,
d = '0123456789abcdef',
e = '',
c = 7;
a[1] += a[0] >> 8;
a[0] = a[0] & b;
a[2] += a[1] >> 8;
a[1] = a[1] & b;
a[3] += a[2] >> 8;
a[2] = a[2] & b;
a[4] += a[3] >> 8;
a[3] = a[3] & b;
a[5] += a[4] >> 8;
a[4] = a[4] & b;
a[6] += a[5] >> 8;
a[5] = a[5] & b;
a[7] += a[6] >> 8;
a[6] = a[6] & b;
a[7] = a[7] & b;
for (d, e, c; c > -1; c--) {
e += d.charAt(a[c] >> 4 & 15) + d.charAt(a[c] & 15);
}
return e;
}
for (var i = 0; i < 8; i++) {
longs[i] = temp & 255;
temp = temp >> 8;
}
read(0, HASH_CHUNK_SIZE, function() {
read(file.size - HASH_CHUNK_SIZE, undefined, function() {
callback.call(null, file, binl2hex(longs));
});
});
}
Groovy
import java.nio.ByteBuffer
import java.nio.ByteOrder
import java.nio.channels.FileChannel
import java.nio.channels.FileChannel.MapMode
class OpenSubtitlesHasher {
def static HASH_CHUNK_SIZE = 64 * 1024
def static computeHash(file) {
def size = file.length()
def chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size)
def fileChannel = new FileInputStream(file).getChannel()
try {
def head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile))
def tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile))
return String.format("%016x", size + head + tail)
} finally {
fileChannel.close()
}
}
def static computeHash(stream, length){
def chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length)
def chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)]
def dis = new DataInputStream(stream)
dis.readFully(chunkBytes, 0, chunkSizeForFile)
def position = chunkSizeForFile
def tailChunkPosition = length - chunkSizeForFile
while (position < tailChunkPosition && (position += dis.skip(tailChunkPosition - position)) >= 0)
dis.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile)
def head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile))
def tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile))
return String.format("%016x", length + head + tail)
}
def static computeHashForChunk(buffer) {
def longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer()
def hash = 0
while (longBuffer.hasRemaining()) {
hash += longBuffer.get()
}
return hash
}
}
Bash
#!/bin/bash
# Copyright (C)
# 2014 - Tomasz Wisniewski dagon666
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
correct_64bit() {
local pow32=$(( 1 << 32 ))
while [ "$g_lo" -ge $pow32 ]; do
g_lo=$(( g_lo - pow32 ))
g_hi=$(( g_hi + 1 ))
done
while [ "$g_hi" -ge $pow32 ]; do
g_hi=$(( g_hi - pow32 ))
done
}
hash_part() {
local file="$1"
local curr=0
local dsize=$((8192*8))
local bytes_at_once=2048
local groups=$(( (bytes_at_once / 8) - 1 ))
local k=0
local i=0
local offset=0
declare -a num=()
while [ "$curr" -lt "$dsize" ]; do
num=( $(od -t u1 -An -N "$bytes_at_once" -w$bytes_at_once -j "$curr" "$file") )
for k in $(seq 0 $groups); do
offset=$(( k * 8 ))
g_lo=$(( g_lo + \
num[$(( offset + 0 ))] + \
(num[$(( offset + 1 ))] << 8) + \
(num[$(( offset + 2 ))] << 16) + \
(num[$(( offset + 3 ))] << 24) ))
g_hi=$(( g_hi + \
num[$(( offset + 4 ))] + \
(num[$(( offset + 5 ))] << 8) + \
(num[$(( offset + 6 ))] << 16) + \
(num[$(( offset + 7 ))] << 24) ))
correct_64bit
done
curr=$(( curr + bytes_at_once ))
done
}
hash_file() {
g_lo=0
g_hi=0
local file="$1"
local size=$(stat -c%s "$file")
local offset=$(( size - 65536 ))
local part1=$(mktemp part1.XXXXXXXX)
local part2=$(mktemp part2.XXXXXXXX)
dd if="$file" bs=8192 count=8 of="$part1" 2> /dev/null
dd if="$file" skip="$offset" bs=1 of="$part2" 2> /dev/null
hash_part "$part1"
hash_part "$part2"
g_lo=$(( g_lo + size ))
correct_64bit
unlink "$part1"
unlink "$part2"
printf "%08x%08x\n" $g_hi $g_lo
}
hash_file "breakdance.avi"
echo "8e245d9679d31e12 <- should be"
hash_file "dummy.bin"
echo "61f7751fc2a72bfb <- should be"
GO
https://github.com/oz/osdb/blob/6a89d7f831a6a3874260fe4677e546d551cad79d/osdb.go#L42
import (
"bytes"
"encoding/binary"
"fmt"
"os"
)
const (
ChunkSize = 65536 // 64k
)
// Generate an OSDB hash for an *os.File.
func HashFile(file *os.File) (hash uint64, err error) {
fi, err := file.Stat()
if err != nil {
return
}
if fi.Size() < ChunkSize {
return 0, fmt.Errorf("File is too small")
}
// Read head and tail blocks.
buf := make([]byte, ChunkSize*2)
err = readChunk(file, 0, buf[:ChunkSize])
if err != nil {
return
}
err = readChunk(file, fi.Size()-ChunkSize, buf[ChunkSize:])
if err != nil {
return
}
// Convert to uint64, and sum.
var nums [(ChunkSize * 2) / 8]uint64
reader := bytes.NewReader(buf)
err = binary.Read(reader, binary.LittleEndian, &nums)
if err != nil {
return 0, err
}
for _, num := range nums {
hash += num
}
return hash + uint64(fi.Size()), nil
}
// Read a chunk of a file at `offset` so as to fill `buf`.
func readChunk(file *os.File, offset int64, buf []byte) (err error) {
n, err := file.ReadAt(buf, offset)
if err != nil {
return
}
if n != ChunkSize {
return fmt.Errorf("Invalid read %v", n)
}
return
}
SWIFT 2
// OSHash.swift
// Originally implemented from Objective-C version for Swift by omerucel 18/04/2015
// http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes#Objective-C
// Updated for Swift 2 by eduo on 15/06/15.
// Copyright © 2015 Eduardo Gutierrez. All rights reserved.
//
import Foundation
class OSHashAlgorithm: NSObject {
let chunkSize: Int = 65536;
struct VideoHash {
var fileHash: String
var fileSize: UInt64
}
func hashForPath (path: String) -> VideoHash {
var fileHash = VideoHash(fileHash: "", fileSize: 0)
let fileHandler = NSFileHandle(forReadingAtPath: path)!
let fileDataBegin: NSData = fileHandler.readDataOfLength(chunkSize)
fileHandler.seekToEndOfFile()
let fileSize: UInt64 = fileHandler.offsetInFile
if (UInt64(chunkSize) > fileSize) {
return fileHash
}
fileHandler.seekToFileOffset(max(0, fileSize - UInt64(chunkSize)))
let fileDataEnd: NSData = fileHandler.readDataOfLength(chunkSize)
var hash: UInt64 = fileSize
var data_bytes = UnsafeBufferPointer<UInt64>(
start: UnsafePointer(fileDataBegin.bytes),
count: fileDataBegin.length/sizeof(UInt64)
)
hash = data_bytes.reduce(hash,combine: &+)
data_bytes = UnsafeBufferPointer<UInt64>(
start: UnsafePointer(fileDataEnd.bytes),
count: fileDataEnd.length/sizeof(UInt64)
)
hash = data_bytes.reduce(hash,combine: &+)
fileHash.fileHash = String(format:"%qx", arguments: [hash])
fileHash.fileSize = fileSize
fileHandler.closeFile()
return fileHash
}
}
///var osha = OSHashAlgorithm()
///var result = osha.hashForPath(fileName)
///println(result.fileHash)
///println(result.fileSize)
SWIFT 3
Source codes: https://github.com/niklasberglund/OpenSubtitlesHash.swift
//
// This Swift 3 version is based on Swift 2 version by eduo:
// https://gist.github.com/eduo/7188bb0029f3bcbf03d4
//
// Created by Niklas Berglund on 2017-01-01.
//
import Foundation
class OpenSubtitlesHash: NSObject {
static let chunkSize: Int = 65536
struct VideoHash {
var fileHash: String
var fileSize: UInt64
}
public class func hashFor(_ url: URL) -> VideoHash {
return self.hashFor(url.path)
}
public class func hashFor(_ path: String) -> VideoHash {
var fileHash = VideoHash(fileHash: "", fileSize: 0)
let fileHandler = FileHandle(forReadingAtPath: path)!
let fileDataBegin: NSData = fileHandler.readData(ofLength: chunkSize) as NSData
fileHandler.seekToEndOfFile()
let fileSize: UInt64 = fileHandler.offsetInFile
if (UInt64(chunkSize) > fileSize) {
return fileHash
}
fileHandler.seek(toFileOffset: max(0, fileSize - UInt64(chunkSize)))
let fileDataEnd: NSData = fileHandler.readData(ofLength: chunkSize) as NSData
var hash: UInt64 = fileSize
var data_bytes = UnsafeBufferPointer<UInt64>(
start: UnsafePointer(fileDataBegin.bytes.assumingMemoryBound(to: UInt64.self)),
count: fileDataBegin.length/MemoryLayout<UInt64>.size
)
hash = data_bytes.reduce(hash,&+)
data_bytes = UnsafeBufferPointer<UInt64>(
start: UnsafePointer(fileDataEnd.bytes.assumingMemoryBound(to: UInt64.self)),
count: fileDataEnd.length/MemoryLayout<UInt64>.size
)
hash = data_bytes.reduce(hash,&+)
fileHash.fileHash = String(format:"%016qx", arguments: [hash])
fileHash.fileSize = fileSize
fileHandler.closeFile()
return fileHash
}
}
// Usage example:
// let videoUrl = Bundle.main.url(forResource: "dummy5", withExtension: "rar")
// let videoHash = OpenSubtitlesHash.hashFor(videoUrl!)
// debugPrint("File hash: \(videoHash.fileHash)\nFile size: \(videoHash.fileSize)")
RUST
use std::fs;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, BufReader};
use std::mem;
const HASH_BLK_SIZE: u64 = 65536;
fn create_hash(file: File, fsize: u64) -> Result<String, std::io::Error> {
let mut buf = [0u8; 8];
let mut word: u64;
let mut hash_val: u64 = fsize; // seed hash with file size
let iterations = HASH_BLK_SIZE / 8;
let mut reader = BufReader::with_capacity(HASH_BLK_SIZE as usize, file);
for _ in 0..iterations {
try!(reader.read(&mut buf));
unsafe { word = mem::transmute(buf); };
hash_val = hash_val.wrapping_add(word);
}
try!(reader.seek(SeekFrom::Start(fsize - HASH_BLK_SIZE)));
for _ in 0..iterations {
try!(reader.read(&mut buf));
unsafe { word = mem::transmute( buf); };
hash_val = hash_val.wrapping_add(word);
}
let hash_string = format!("{:01$x}", hash_val, 16);
Ok(hash_string)
}
fn main() {
let fname = "breakdance.avi";
let fsize = fs::metadata(fname).unwrap().len();
if fsize>HASH_BLK_SIZE {
let file = File::open(fname).unwrap();
let fhash = create_hash(file, fsize).unwrap();
println!("Hash for {} is {}", fname, fhash);
}
}
添付ファイル (1)
-
GetHash.dll
(4.5 KB) -
guest が8年前に追加。
GetHash?.dll
すべての添付ファイルをダウンロード: .zip