// Copyright 1997-1998 Omni Development, Inc.  All rights reserved.
//
// This software may only be used and reproduced according to the
// terms in the file OmniSourceLicense.html, which should be
// distributed with this project and can also be found at
// http://www.omnigroup.com/DeveloperResources/OmniSourceLicense.html.

#import "OWUnknownDataStreamProcessor.h"

#import <Foundation/Foundation.h>
#import <OmniBase/OmniBase.h>
#import <OmniFoundation/OmniFoundation.h>

#import "OWDataStream.h"
#import "OWDataStreamCursor.h"
#import "OWPipeline.h"
#import "OWContentType.h"

RCS_ID("$Header: /Network/Developer/Source/CVS/OmniGroup/OWF/Processors.subproj/OWUnknownDataStreamProcessor.m,v 1.7 1998/12/08 04:05:59 kc Exp $")

@interface OWUnknownDataStreamProcessor (Private)
@end

static OWContentType *unknownContentType, *unknownDecodedContentType;
static OWContentType *textPlainContentType;
static OWContentType *applicationOctetStreamContentType;
static NSMutableDictionary *guessPrefixDictionary;
static NSMutableDictionary *guessAnywhereDictionary;

@implementation OWUnknownDataStreamProcessor

+ (void)initialize;
{
    static BOOL initialized = NO;

    [super initialize];
    if (initialized)
	return;
    initialized = YES;

    guessPrefixDictionary = [[NSMutableDictionary alloc] initWithCapacity:32];
    guessAnywhereDictionary = [[NSMutableDictionary alloc] initWithCapacity:32];

    unknownContentType = [OWContentType contentTypeForString:@"www/unknown"];
    unknownDecodedContentType = [OWContentType contentTypeForString:@"OWDataStream/UnknownDecodedContent"];
    textPlainContentType = [OWContentType contentTypeForString:@"text/plain"];
    applicationOctetStreamContentType = [OWContentType contentTypeForString:@"application/octet-stream"];
}

+ (void)didLoad;
{
    [self registerProcessorClass:self fromContentType:unknownContentType toContentType:[OWContentType wildcardContentType] cost:1.0];
    [self registerProcessorClass:self fromContentType:unknownDecodedContentType toContentType:[OWContentType wildcardContentType] cost:1.0];
}

+ (OWContentType *)unknownContentType;
{
    return unknownContentType;
}

static inline void
readGuessesIntoDictionary(NSMutableDictionary *dictionary, id guessObject, OWContentType *contentType)
{
    if ([guessObject isKindOfClass:[NSArray class]]) {
	NSEnumerator *guessEnumerator;
	NSString *guessString;
    
	guessEnumerator = [(NSArray *)guessObject objectEnumerator];
	while ((guessString = [guessEnumerator nextObject]))
	    [dictionary setObject:contentType forKey:guessString];
    } else if ([guessObject isKindOfClass:[NSString class]])
	[dictionary setObject:contentType forKey:guessObject];
}

+ (void)registerGuessesDictionary:(NSDictionary *)guessesDictionary;
{
    NSEnumerator *contentTypeEnumerator;
    NSString *contentTypeString;
    NSEnumerator *guessDictionaryEnumerator;

    contentTypeEnumerator = [guessesDictionary keyEnumerator];
    guessDictionaryEnumerator = [guessesDictionary objectEnumerator];

    while ((contentTypeString = [contentTypeEnumerator nextObject])) {
	OWContentType *contentType;
	NSDictionary *guessDictionary;

	contentType = [OWContentType contentTypeForString:contentTypeString];
	guessDictionary = [guessDictionaryEnumerator nextObject];

	readGuessesIntoDictionary(guessPrefixDictionary, [guessDictionary objectForKey:@"prefix"], contentType);
	readGuessesIntoDictionary(guessAnywhereDictionary, [guessDictionary objectForKey:@"anywhere"], contentType);
    }
}


//

- (OWContentType *)contentTypeGuessForData:(NSData *)data;
{
    unsigned const char *buffer;
    int length;
    int index;
    NSEnumerator *guessEnumerator;
    NSString *guessString;
    int controlCount;
    int textCount;
    int linefeedCount;
    int highCount;
    NSString *string;

    buffer = [data bytes];
    length = [data length];

#warning TODO: Make this match the encoding used in reading the property list
    // Currently hardcoded to NSNEXTSTEPStringEncoding because that's what they currently use in DR2 even when the +defaultCStringEncoding is something else.
    string = [[NSString alloc] initWithData:data encoding:NSNEXTSTEPStringEncoding];
    guessEnumerator = [guessPrefixDictionary keyEnumerator];
    while ((guessString = [guessEnumerator nextObject])) {
        if ([string hasPrefix:guessString]) {
            [string release];
	    return [guessPrefixDictionary objectForKey:guessString];
        }
    }

    guessEnumerator = [guessAnywhereDictionary keyEnumerator];
    while ((guessString = [guessEnumerator nextObject])) {
        if ([string containsString:guessString]) {
            [string release];
	    return [guessAnywhereDictionary objectForKey:guessString];
        }
    }
    [string release];

    // Try a heuristic based on the ratio of text to line feeds (and no control characters).
    textCount = 0;
    controlCount = 0;
    linefeedCount = 0;
    highCount = 0;
    index = length;
    while (index--) {
        unsigned char ch;

        ch = buffer[index];
        switch (ch) {
            case '\n':
                linefeedCount++;
                break;
            case '\r':
            case '\f': // ignore FF
                break;
            case '\t':
                textCount++;
                break;
            default:
                if (ch < 32)
                    controlCount++;
                else if (ch < 128)
                    textCount++;
                else
                    highCount++;
        }
    }

    // This is the same questionable heuristic that the CERN library uses.
    if (controlCount == 0 || (textCount + linefeedCount >= 16 * (controlCount + highCount)))
	return textPlainContentType;
    else
	return applicationOctetStreamContentType;
}


- (void)process;
{
    OWContentType *contentType;
    OWDataStream *dataStream;
    NSData *headerData;

    [self setStatusString:@"Taking a guess at content type"];
    NS_DURING {
	headerData = [dataCursor readBytes:1024];
    } NS_HANDLER {
	if (![[localException name] isEqualToString:@"Underflow"]) {
	    [localException raise];
            headerData = nil; // keep compiler happy
	} else
	    headerData = [dataCursor readAllData];
    } NS_ENDHANDLER;

    dataStream = [dataCursor dataStream];
    contentType = [self contentTypeGuessForData:headerData];
    if ([contentType isEncoding]) {
        [dataStream setContentType:unknownDecodedContentType];
	[dataStream setContentEncoding:contentType];
    } else
	[dataStream setContentType:contentType];
    [pipeline addContent:dataStream];
    if ([pipeline contextObjectForKey:@"OWUnknownDataStreamProcessorContent"] == dataStream) {
        [NSException raise:@"AlreadyTyped" format:@"Data stream has already been typed"];
    } else {
        [pipeline setContextObject:dataStream forKey:@"OWUnknownDataStreamProcessorContent"];
    }
    [pipeline startProcessingContent];
}


@end
