Skip to content
Snippets Groups Projects
Commit 6d89b39d authored by Christoph Cullmann's avatar Christoph Cullmann 🍨
Browse files

Removed for KF6, the 'kf5' branch contains the last maintained state.

parent 0c0f3136
Branches
1 merge request!11Removed for KF6, the 'kf5' branch contains the last maintained state.
Showing with 1 addition and 7364 deletions
# Ignore the following files
*~
*.[oa]
*.diff
*.kate-swp
*.kdev4
.kdev_include_paths
*.kdevelop.pcs
*.moc
*.moc.cpp
*.orig
*.user
.*.swp
.swp.*
Doxyfile
Makefile
avail
random_seed
/build*/
CMakeLists.txt.user*
*.unc-backup*
.cmake/
/.clang-format
/compile_commands.json
.clangd
.idea
/cmake-build*
.cache
# SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
# SPDX-License-Identifier: CC0-1.0
include:
- https://invent.kde.org/sysadmin/ci-utilities/raw/master/gitlab-templates/linux.yml
- https://invent.kde.org/sysadmin/ci-utilities/raw/master/gitlab-templates/freebsd.yml
- https://invent.kde.org/sysadmin/ci-utilities/raw/master/gitlab-templates/windows.yml
Dependencies:
- 'on': ['Linux', 'FreeBSD', 'Windows', 'macOS']
'require':
'frameworks/extra-cmake-modules': '@same'
'frameworks/karchive' : '@same'
'frameworks/ki18n' : '@same'
'frameworks/kcoreaddons' : '@same'
'frameworks/kguiaddons' : '@same'
'frameworks/kdbusaddons' : '@same'
'frameworks/kservice' : '@same'
'frameworks/kwindowsystem' : '@same'
'frameworks/kcrash' : '@same'
'frameworks/kinit' : '@same'
'frameworks/kjs' : '@same'
'frameworks/kconfigwidgets' : '@same'
'frameworks/kitemviews' : '@same'
'frameworks/kiconthemes' : '@same'
'frameworks/knotifications' : '@same'
'frameworks/kcompletion' : '@same'
'frameworks/ktextwidgets' : '@same'
'frameworks/kxmlgui' : '@same'
'frameworks/kparts' : '@same'
'frameworks/kio' : '@same'
'frameworks/kwallet' : '@same'
'libraries/phonon' : '@stable'
- 'on': ['Linux', 'FreeBSD']
'require':
'frameworks/kglobalaccel' : '@same'
Options:
test-before-installing: True
#regular chars are needed in khtml
EXCLUDE doublequote_chars
#SadEagle recommends excluding these checkers because khtml uses pimpl-patterned classes
EXCLUDE dpointer,explicit
SKIP test_regression.*\|testkhtml.cpp\|testemca.cpp
cmake_minimum_required(VERSION 3.16)
set(KF_VERSION "5.102.0") # handled by release scripts
set(KF_DEP_VERSION "5.102.0") # handled by release scripts
project(KHtml VERSION ${KF_VERSION})
include(FeatureSummary)
find_package(ECM 5.102.0 NO_MODULE)
set_package_properties(ECM PROPERTIES TYPE REQUIRED DESCRIPTION "Extra CMake Modules." URL "https://commits.kde.org/extra-cmake-modules")
feature_summary(WHAT REQUIRED_PACKAGES_NOT_FOUND FATAL_ON_MISSING_REQUIRED_PACKAGES)
set(CMAKE_MODULE_PATH ${ECM_MODULE_PATH})
include(GenerateExportHeader)
include(CMakePackageConfigHelpers)
include(ECMSetupVersion)
include(ECMGenerateHeaders)
include(ECMQtDeclareLoggingCategory)
set(REQUIRED_QT_VERSION 5.15.2)
find_package(Qt5 "${REQUIRED_QT_VERSION}" CONFIG REQUIRED Widgets Network DBus PrintSupport Xml)
include(KDEInstallDirs)
include(KDEFrameworkCompilerSettings NO_POLICY_SCOPE)
include(KDECMakeSettings)
# Most of KJS doesn't even require Qt, thus we can't use override
string(REPLACE "-Wsuggest-override" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
find_package(KF5Archive ${KF_DEP_VERSION} REQUIRED)
find_package(KF5Codecs ${KF_DEP_VERSION} REQUIRED)
find_package(KF5I18n ${KF_DEP_VERSION} REQUIRED)
find_package(KF5IconThemes ${KF_DEP_VERSION} REQUIRED)
find_package(KF5KIO ${KF_DEP_VERSION} REQUIRED)
find_package(KF5JS ${KF_DEP_VERSION} REQUIRED)
find_package(KF5Notifications ${KF_DEP_VERSION} REQUIRED)
find_package(KF5Parts ${KF_DEP_VERSION} REQUIRED)
find_package(KF5Sonnet ${KF_DEP_VERSION} REQUIRED)
find_package(KF5TextWidgets ${KF_DEP_VERSION} REQUIRED)
find_package(KF5Wallet ${KF_DEP_VERSION} REQUIRED)
find_package(KF5WidgetsAddons ${KF_DEP_VERSION} REQUIRED)
find_package(KF5WindowSystem ${KF_DEP_VERSION} REQUIRED)
find_package(KF5XmlGui ${KF_DEP_VERSION} REQUIRED)
if(NOT WIN32 AND NOT APPLE)
find_package(KF5GlobalAccel ${KF_DEP_VERSION} REQUIRED)
endif()
set(HAVE_KGLOBALACCEL ${KF5GlobalAccel_FOUND})
ecm_setup_version(PROJECT VARIABLE_PREFIX KHTML
VERSION_HEADER "${CMAKE_CURRENT_BINARY_DIR}/khtml_version.h"
PACKAGE_VERSION_FILE "${CMAKE_CURRENT_BINARY_DIR}/KF5KHtmlConfigVersion.cmake"
SOVERSION 5)
find_package(Perl)
set_package_properties(Perl PROPERTIES
URL "https://www.perl.org/"
TYPE REQUIRED
PURPOSE "Required for generating JS bindings for elements"
)
if(NOT WIN32 AND NOT APPLE)
option(WITH_X11 "Build with X11 integration" ON)
if(WITH_X11)
find_package(X11 REQUIRED)
set(HAVE_X11 1)
find_package(Qt5 ${REQUIRED_QT_VERSION} CONFIG REQUIRED X11Extras)
endif()
endif()
find_package(Phonon4Qt5 4.6.60)
set_package_properties(Phonon4Qt5 PROPERTIES
DESCRIPTION "Qt Multimedia Library"
URL "https://phonon.kde.org/"
TYPE REQUIRED
PURPOSE "Required for HTML5 multimedia elements"
)
find_package(JPEG)
set_package_properties(JPEG PROPERTIES DESCRIPTION "JPEG decoding library"
URL "https://www.ijg.org"
TYPE REQUIRED
PURPOSE "Required for decoding and displaying JPEG images"
)
find_package(GIF)
set_package_properties(GIF PROPERTIES DESCRIPTION "GIF decoding library"
URL "https://sourceforge.net/projects/giflib"
TYPE REQUIRED
PURPOSE "Required for decoding and displaying GIF images"
)
find_package(PNG)
set_package_properties(PNG PROPERTIES DESCRIPTION "PNG decoding library"
URL "http://www.libpng.org/pub/png"
TYPE REQUIRED
PURPOSE "Required for decoding and displaying PNG images"
)
find_package(Gperf REQUIRED)
# remove definitions set by KDEFrameworkCompilerSettings which we fail to meet
remove_definitions(-DQT_NO_KEYWORDS)
remove_definitions(-DQT_NO_FOREACH)
remove_definitions(-DQT_NO_CAST_FROM_ASCII)
remove_definitions(-DQT_NO_CAST_FROM_BYTEARRAY)
# ideally we would support QT_NO_KEYWORDS, but at least we can do this
add_definitions(-DQT_NO_SIGNALS_SLOTS_KEYWORDS)
add_definitions(-DQT_DISABLE_DEPRECATED_BEFORE=0)
add_definitions(-DENABLE_SVG)
add_definitions(-DENABLE_SVG_FONTS)
add_definitions(-DWTF_PLATFORM_QT)
# avoid warning for "this use of "defined" may not be portable" for all the #if PLATFORM(Foo)
set(_flag "-Wno-expansion-to-defined")
check_cxx_compiler_flag(${_flag} _flag_present)
if(_flag_present)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_flag}")
endif()
set(KHTML_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
set(KHTML_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/src)
add_definitions(-DTRANSLATION_DOMAIN=\"khtml5\")
ki18n_install(po)
add_subdirectory(src)
if(BUILD_TESTING)
add_subdirectory(tests)
add_subdirectory(autotests)
endif()
# create a Config.cmake and a ConfigVersion.cmake file and install them
set(CMAKECONFIG_INSTALL_DIR "${KDE_INSTALL_CMAKEPACKAGEDIR}/KF5KHtml")
include(CMakePackageConfigHelpers)
configure_package_config_file(
"${CMAKE_CURRENT_SOURCE_DIR}/KF5KHtmlConfig.cmake.in"
"${CMAKE_CURRENT_BINARY_DIR}/KF5KHtmlConfig.cmake"
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR}
)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/KF5KHtmlConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/KF5KHtmlConfigVersion.cmake"
DESTINATION "${CMAKECONFIG_INSTALL_DIR}"
COMPONENT Devel
)
install(EXPORT KF5KHtmlTargets DESTINATION "${CMAKECONFIG_INSTALL_DIR}" FILE KF5KHtmlTargets.cmake NAMESPACE KF5:: )
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/khtml_version.h
DESTINATION ${KDE_INSTALL_INCLUDEDIR_KF5}/KHtml COMPONENT Devel
)
feature_summary(WHAT ALL FATAL_ON_MISSING_REQUIRED_PACKAGES)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(Qt5Gui @REQUIRED_QT_VERSION@)
find_dependency(KF5Codecs "@KF_DEP_VERSION@")
find_dependency(KF5I18n "@KF_DEP_VERSION@")
find_dependency(KF5JS "@KF_DEP_VERSION@")
find_dependency(KF5KIO "@KF_DEP_VERSION@")
find_dependency(KF5Parts "@KF_DEP_VERSION@")
find_dependency(KF5TextWidgets "@KF_DEP_VERSION@")
include("${CMAKE_CURRENT_LIST_DIR}/KF5KHtmlTargets.cmake")
......@@ -2,37 +2,4 @@
HTML rendering engine
## Introduction
KHTML is a web rendering engine, based on the KParts technology and using KJS for JavaScript support.
## Usage
If you are using CMake, you need to have
find_package(KF5KHtml NO_MODULE)
(or similar) in your CMakeLists.txt file, and you need to link to KF5::KHtml.
To use KHTML in your application, create an instance of KHTMLPart, embed it in
your application like any other KPart, and call methods to control what it
displays:
QUrl url("https://www.kde.org");
KHTMLPart *w = new KHTMLPart();
w->openUrl(url);
w->view()->resize(500, 400);
w->show();
## Alternatives
Note that using KHTMLPart may introduce security vulnerabilities and unnecessary
bloat to your application. Qt's text widgets are rich-text capable, and will
interpret a limited subset of HTML.
Another option is to use KDEWebKit, WebKit is a fork of KHTML with substantial
industry support.
Removed for KF6, the 'kf5' branch contains the last maintained state.
include(ECMMarkAsTest)
include(ECMAddTests)
find_package(Qt5Test "${REQUIRED_QT_VERSION}" CONFIG REQUIRED)
set_package_properties(Qt5Test PROPERTIES PURPOSE "Required for autotests")
include_directories(
${KHTML_SOURCE_DIR}/
${KHTML_SOURCE_DIR}/misc
${KHTML_SOURCE_DIR}/dom
${KHTML_SOURCE_DIR}/xml
${KHTML_SOURCE_DIR}/html
${KHTML_SOURCE_DIR}/rendering
${KHTML_SOURCE_DIR}/ecma
${KHTML_SOURCE_DIR}/imload
${KHTML_SOURCE_DIR}/imload/decoders
${KHTML_SOURCE_DIR}/svg
${KHTML_SOURCE_DIR}/svg/graphics
${KHTML_SOURCE_DIR}/platform/graphics
${KHTML_SOURCE_DIR}/platform/text
${KHTML_SOURCE_DIR}/platform
${KHTML_SOURCE_DIR}/compat
${KHTML_SOURCE_DIR}/css
)
ecm_add_test(khtmlparttest.cpp
LINK_LIBRARIES Qt5::Test KF5::KHtml Qt5::Widgets KF5::XmlGui KF5::TextWidgets KF5::Parts
TEST_NAME parttest
NAME_PREFIX "khtml-"
GUI
)
set(kencodingdetector_SRCS
${KHTML_SOURCE_DIR}/misc/kencodingdetector.cpp
${KHTML_SOURCE_DIR}/misc/guess_ja.cpp
)
ecm_add_test(kencodingdetectortest.cpp ${kencodingdetector_SRCS}
LINK_LIBRARIES KF5::Codecs KF5::I18n KF5::KHtml Qt5::Test
TEST_NAME kencodingdetectortest
NAME_PREFIX "khtml-"
GUI
)
/* This file is part of the KDE libraries
Copyright (c) 2009 Germain Garand <germain@ebooksfrance.org>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License version 2 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "kencodingdetectortest.h"
#include <QTest>
#include <kencodingdetector.h>
static const char data1[] = "this should decode correctly";
static const char data2[] = "this is an invalid utf-8 byte: \xBF and another one: \xBE";
static KEncodingDetector *ed = nullptr;
void KEncodingDetectorTest::initTestCase()
{
ed = new KEncodingDetector();
}
void KEncodingDetectorTest::testSetEncoding()
{
QCOMPARE(ed->setEncoding("iso8859-1", KEncodingDetector::UserChosenEncoding), true);
QCOMPARE(ed->setEncoding("utf-8", KEncodingDetector::UserChosenEncoding), true);
}
void KEncodingDetectorTest::testDecode()
{
QString s = ed->decode(data1, sizeof(data1) - 1);
QCOMPARE(ed->decodedInvalidCharacters(), false);
QString s2 = ed->decode(data2, sizeof(data2) - 1);
QCOMPARE(ed->decodedInvalidCharacters(), true);
QCOMPARE(s, QString::fromLatin1(data1));
ed->resetDecoder();
QVERIFY(!ed->decodedInvalidCharacters());
// set to automatic detection
ed->setEncoding("", KEncodingDetector::DefaultEncoding);
// decodeWithBuffering should just accumulate the buffer here,
// waiting for some HTML/XML encoding tags
s = ed->decodeWithBuffering(data2, sizeof data2 - 1);
// shouldn't even decode anything yet, so:
QCOMPARE(s.isEmpty(), true);
QCOMPARE(ed->decodedInvalidCharacters(), false);
// force encoding, as the high bytes must have switched the encoding
// to anything *but* utf-8
QCOMPARE(QString::fromLatin1("utf-8").startsWith(QString::fromLatin1(ed->encoding()), Qt::CaseInsensitive), false);
ed->setEncoding("utf-8", KEncodingDetector::UserChosenEncoding);
QCOMPARE(QString::fromLatin1("utf-8").startsWith(QString::fromLatin1(ed->encoding()), Qt::CaseInsensitive), true);
// force decoding now
s = ed->flush();
QCOMPARE(s.isEmpty(), false);
QCOMPARE(ed->decodedInvalidCharacters(), true);
// now check that resetDecoder() empties the buffer
s2 = ed->decodeWithBuffering(data1, sizeof data1 - 1);
ed->resetDecoder();
s2 = ed->flush();
QCOMPARE(s2.isEmpty(), true);
// check that buffered decoding with non-overridable specified codec decodes right away
ed->setEncoding("utf-8", KEncodingDetector::EncodingFromHTTPHeader);
s = ed->decodeWithBuffering(data2, sizeof data2 - 1);
QCOMPARE(s.isEmpty(), false);
QCOMPARE(ed->decodedInvalidCharacters(), true);
}
QTEST_MAIN(KEncodingDetectorTest)
/* This file is part of the KDE libraries
Copyright (c) 2009 Germain Garand <germain@ebooksfrance.org>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License version 2 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#ifndef KENCODINGDETECTORTEST_H
#define KENCODINGDETECTORTEST_H
#include <QObject>
class KEncodingDetectorTest : public QObject
{
Q_OBJECT
private Q_SLOTS:
void initTestCase();
void testSetEncoding();
void testDecode();
};
#endif // KENCODINGDETECTORTEST_H
/* This file is part of the KDE project
*
* Copyright (C) 2007 Germain Garand <germain@ebooksfrance.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#define QT_GUI_LIB 1
#define QT_WIDGETS_LIB 1
#include <khtml_part.h>
#include <QTest>
#include <khtmlview.h>
#include "khtmlparttest.h"
#include <csignal>
#include <cstdlib>
QTEST_MAIN(KHTMLPartTest)
void __abort(int)
{
std::signal(SIGABRT, SIG_DFL);
std::signal(SIGSEGV, SIG_DFL);
QVERIFY(false);
}
void KHTMLPartTest::initTestCase()
{
std::signal(SIGABRT, &__abort);
std::signal(SIGSEGV, &__abort);
}
class MyKHTMLPart : public KHTMLPart
{
public:
MyKHTMLPart() : KHTMLPart(new KHTMLView(this, nullptr)) {}
};
void KHTMLPartTest::testConstructKHTMLViewFromInitList()
{
// test that KHTMLView can be built from a derived KHTMLPart's initialization list
KHTMLPart *aPart = new MyKHTMLPart();
QVERIFY(true);
QVERIFY(aPart->view()->part() == aPart);
delete aPart;
}
void KHTMLPartTest::testConstructKHTMLViewBeforePart()
{
// test that a KHTMLView can be constructed before a KHTMLPart
KHTMLView *view = new KHTMLView(nullptr, nullptr);
KHTMLPart *part = new KHTMLPart(view);
QVERIFY(true);
QVERIFY(view->part() == part);
delete part;
}
/* This file is part of the KDE project
*
* Copyright (C) 2007 Germain Garand <germain@ebooksfrance.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include <QObject>
class KHTMLPart;
class KHTMLView;
class KHTMLPartTest : public QObject
{
Q_OBJECT
private Q_SLOTS:
void initTestCase();
void testConstructKHTMLViewFromInitList();
void testConstructKHTMLViewBeforePart();
};
<html>
<head>
<title>Internal design of khtml</title>
<style>
dt { font-weight: bold; }
</style>
<body bgcolor=white>
<h1>Internal design of khtml</h1>
<p>
This document tries to give a short overview about the internal design of the khtml
library. I've written this, because the lib has gotten quite big, and it is hard at first to find your
way in the source code. This doesn't mean that you'll understand khtml after reading this
document, but it'll hopefully make it easier for you to read the source code.
</p>
<p>
The library is build up out of several different parts. Basically, when you use the lib, you
create an instance of a KHTMLPart, and feed data to it. That's more or less all you need to
know if you want to use khtml for another application. If you want to start hacking khtml,
here's a sketch of the objects that will get constructed, when eg. running testkhtml with
a url argument.
</p>
<p>
In the following I'll assume that you're familiar with all the buzzwords used in current web
techology. In case you aren't here's a more or less complete list of references:
</p>
<blockquote>
<p>
<b>Document Object model (DOM):</b><br>
<a href="https://www.w3.org/DOM/">DOM Level1 and 2</a><br>
We support DOM Level2 except for the events model at the moment.
</p>
<p>
<b>HTML:</b><br>
<a href="https://www.w3.org/TR/html4/">HTML4 specs</a><br>
<a href="https://www.w3.org/TR/xhtml1/">xhtml specs</a><br>
We support almost all of HTML4 and xhtml.
</p>
<p>
<b>Cascading style sheets (CSS):</b><br>
<a href="https://www.w3.org/TR/CSS2/">CSS2 specs</a><br>
We support almost all of CSS1, and most parts of CSS2.
</p>
<p>
<b>Javascript:</b><br>
<a href="http://msdn.microsoft.com/workshop/author/dhtml/reference/objects.asp">Microsoft javascript bindings</a><br>
<a href="http://docs.sun.com/source/816-6408-10/index.html">Netscape javascript reference</a><br>
Netscapes javascript bindings are outdated. We shouldn't follow them. Let's focus on getting the bindings
compatible to IE.
<a href="https://developer.mozilla.org/docs/Web">Mozilla JS/DOM reference</a>
</p>
</blockquote>
<p>
<a href="khtml_part.h">KHTMLPart</a> creates one instance of a
<a href="khtmlview.h">KHTMLView</a> (derived from QScrollView),
the widget showing the whole thing. At the same time a DOM tree
is built up from the HTML or XML found in the specified file.
<p>
Let me describe this with an example.
<p>
khtml makes use of the document object model (DOM) for storing the document
in a tree like structure. Imagine some html like
<pre>
&lt;html&gt;
&lt;head&gt;
&lt;style&gt;
h1: { color: red; }
&lt;/style&gt;
&lt;/head&gt;
&lt;body&gt;
&lt;H1&gt;
some red text
&lt;/h1&gt;
more text
&lt;p&gt;
a paragraph with an
&lt;img src="foo.png"&gt;
embedded image.
&lt;/p&gt;
&lt;/body&gt;
&lt;/html&gt;
</pre>
In the following I'll show how this input will be processed step by step to generate the visible output
you will finally see on your screen. I'm describing the things as if they happen one after the other,
to make the principle more clear. In reality, to get visible output on the screen as soon as possible,
all these things (from tokenization to the build up and layouting of the rendering tree) happen
more or less in parallel.
<h2>Tokenizer and parser</h2>
<p>
The first thing that happens when you start parsing a new document is that a
DocumentImpl* (for XML documents) or an HTMLDocumentImpl* object will get
created by the Part (in khtml_part.cpp::begin()). A Tokenizer*
object is created as soon as DocumentImpl::open() is called by the part, also
in begin() (can be either an XMLTokenizer or an HTMLTokenizer).
<p>
The XMLTokenizer uses the QXML classes in Qt to parse the document, and it's SAX interface
to parse the stuff into khtmls DOM.
<p>
For HTML, the tokenizer is located in khtmltokenizer.cpp. The tokenizer uses the contents
of a HTML-file as input and breaks this contents up in a linked list of
tokens. The tokenizer recognizes HTML-entities and HTML-tags. Text between
begin- and end-tags is handled distinctly for several tags. The distinctions
are in the way how spaces, linefeeds, HTML-entities and other tags are
handled.
<p>
The tokenizer is completely state-driven on a character by character basis.
All text passed over to the tokenizer is directly tokenized. A complete
HTML-file can be passed to the tokenizer as a whole, character by character
(not very efficient) or in blocks of any (variable) size.
<p>
The HTMLTokenizer creates an HTMLParser which
interprets the stream of tokens provided by the tokenizer
and constructs the tree of Nodes representing the document according
to the Document Object Model.
<p>
<h2>The DOM in khtml</h2>
<p>
Parsing the document given above gives the following DOM tree:
<pre>
HTMLDocumentElement
|--> HTMLHeadElement
| \--> HTMLStyleElement
| \--> CSSStyleSheet
\--> HTMLBodyElement
|--> HTMLHeadingElement
| \--> Text
|--> Text
\--> HTMLParagraphElement
|--> Text
|--> HTMLImageElement
\--> Text
</pre>
<p>
Actually, the classes mentioned above are the interfaces for accessing the
DOM. The actual data is stored in *Impl classes, providing the implementation
for all of the above mentioned elements. So internally we have a tree
looking like:
<pre>
HTMLDocumentElementImpl*
|--> HTMLHeadElementImpl*
| \--> HTMLStyleElementImpl*
| \--> CSSStyleSheetImpl*
\--> HTMLBodyElementImpl*
|--> HTMLHeadingElementImpl*
| \--> TextImpl*
|--> TextImpl*
\--> HTMLParagraphElementImpl*
|--> TextImpl*
|--> HTMLImageElementImpl*
\--> TextImpl*
</pre>
<p>
We use a refcounting scheme to assure that all the objects get deleted, in
case the root element gets deleted (as long as there's no interface class
holding a pointer to the Implementation).
<p>
The interface classes (the ones without the Impl) are defined in the <code>dom/</code>
subdirectory, and are not used by khtml itself at all. The only place they are used are in the
javascript bindings, which uses them to access the DOM tree. The big advantage of having this
separation between interface classes and imlementation classes, is that we can have several
interface objects pointing to the same implementation. This implements the requirement of
explicit sharing of the DOM specs.
<p>
Another advantage is, that (as the implementation classes are not exported) it gives us a lot
more freedom to make changes in the implementation without breaking binary compatibility.
<p>
You will find almost a one to one correspondence between the interface classes and the implementation
classes. In the implementation classes we have added a few more intermediate classes, that can
not be seen from the outside for various reasons (make implementation of shared features easier
or to reduce memory consumption).
<p>
In C++, you can access the whole DOM tree from outside KHTML by using the interface classes.
For a description see the <a href="http://developer.kde.org/documentation/library/kdeqt/kde3arch/khtml/index.html">introduction to khtml</a> on <a href="http://developer.kde.org/">developer.kde.org</a>.
One thing that has been omitted in the discussion above is the style sheet defined inside the
<code>&lt;style&gt;</code> element (as an example of a style sheet) and the image element
(as an example of an external resource that needs to be loaded). This will be done in the following
two sections.
<h2>CSS</h2> The contents of the <code>&lt;style&gt;</code> element (in this
case the <code>h1 { color: red; }</code> rule) will get passed to the
<a href="html/html_headimpl.h">HTMLStyleElementImpl object</a>. This object creates an
<a href="css/cssstylesheetimpl.h">CSSStyleSheetImpl object</a> and passes the
data to it. The <a href="css/cssparser.h">CSS parser</a> will take
the data, and parse it into a DOM structure for CSS (similar to the one for
HTML, see also the DOM level 2 specs). This will be later on used to define the
look of the HTML elements in the DOM tree.
<p>
Actually "later on" is relative, as we will see later, that this happens partly in parallel to
the build up of the DOM tree.
<h2>Loading external objects</h2>
<p>
Some HTML elements (as <code>&lt;img&gt;, &lt;link&gt;, &lt;object&gt;, etc.</code>) contain
references to external objects, that have to be loaded. This is done by the
Loader and related classes (misc/loader.*). Objects that might need to load external objects
inherit from <a href="misc/loader_client.h">CachedObjectClient</a>, and can ask
the <a href="misc/loader.h">loader</a> (that also acts as a memory cache) to
download the object they need for them from the web.
<p>
Once the <a href="misc/loader.h">loader</a> has the requested object ready, it will notify the
<a href="misc/loader_client.h">CachedObjectClient</a> of this, and the client can
then process the received data.
<h2>Making it visible</h2>
Now once we have the DOM tree, and the associated style sheets and external objects, how
do we get the stuff actually displayed on the screen?
<p>
For this we have a rendering engine, that is completely based on CSS. The first
thing that is done is to collect all style sheets that apply to the document
and create a nice list of style rules that need to be applied to the
elements. This is done in the <a href="css/cssstyleselector.h">CSSStyleSelector</a> class.
It takes the <a href="css/html4.css">default HTML style sheet</a> (defined in css/html4.css),
an optional user defined style sheet, and all style sheets from the document,
and combines them to a nice list of parsed style rules (optimised for fast
lookup). The exact rules of how these style sheets should get applied to HTML
or XML documents can be found in the CSS2 specs.
<p>
Once we have this list, we can get a <a
href="rendering/render_style.h">RenderStyle object</a>
for every DOM element from the <a
href="css/cssstyleselector.h">CSSStyleSelector</a> by calling
"styleForElement(DOM::ElementImpl *)".
The style object describes in a compact form all the
<a href="css/css_properties.in">CSS properties</a>
that should get applied to the Node.
<p>
After that, a rendering tree gets built up. Using the style object, the
<a href="xml/dom_nodeimpl.h">DOM Node</a> creates an appropriate render object
(all these are defined in the rendering subdirectory) and adds it to the
rendering tree. This will give another tree like structure, that resembles in
it's general structure the DOM tree, but might have some significant
differences too. First of all, so called
<a href="https://www.w3.org/TR/CSS2/visuren.html#anonymous-block-level">anonymous boxes</a> - (see
<a href="https://www.w3.org/TR/CSS2/">CSS specs</a>) that
have no DOM counterpart might get inserted into the rendering tree to satisfy
DOM requirements. Second, the display property of the style affects which type
of rendering object is chosen to represent the current DOM object.
<p>
In the above example we would get the following rendering tree:
<pre>
RenderRoot*
\--> RenderBody*
|--> RenderFlow* (&lt;H1&gt;)
| \--> RenderText* ("some red text")
|--> RenderFlow* (anonymous box)
| \--> RenderText* ("more text")
\--> RenderFlow* (&lt;P&gt;)
|--> RenderText* ("a paragraph with an")
|--> RenderImage*
\--> RenderText* ("embedded image.")
</pre>
<p>
A call to of <a href="rendering/render_root.cpp">layout()</a> on the
<a href="rendering/render_root.h">RenderRoot </a> (the root of the rendering tree)
object causes the rendering tree to layout itself into the available space
(width) given by the KHTMLView. After that, the drawContents() method of
KHTMLView can call RenderRoot->print() with appropriate parameters to actually
paint the document. This is not 100% correct, when parsing incrementally, but
is exactly what happens when you resize the document.
As you can see, the conversion to the rendering tree removed the head part of
the HTML code, and inserted an anonymous render object around the string "more
text". For an explanation why this is done, see the CSS specs.
<p>
<h2>Directory structure</h2>
A short explanation of the subdirectories in khtml.
<dl>
<dt><a href="css/">css:</a>
<dd>Contains all the stuff relevant to the CSS part of DOM Level2 (implementation classes only),
the <a href="css/cssparser.h">CSS parser</a>, and the stuff to create
RenderStyle object out of Nodes and the CSS style sheets.
<dt><a href="dom/">dom: </a>
<dd>Contains the external DOM API (the DOM interface classes) for all of the DOM
<dt><a href="ecma/">ecma:</a>
<dd>The javascript bindings to the DOM and khtml.
<dt><a href="html/">html:</a>
<dd>The html subpart of the DOM (implementation only), the HTML tokenizer and parser and a class
that defines the DTD to use for HTML (used mainly in the parser).
<dt><a href="java/">java:</a>
<dd>Java related stuff.
<dt><a href="misc/">misc:</a>
<dd>Some misc stuff needed in khtml. Contains the image loader, some misc definitions and the
decoder class that converts the incoming stream to unicode.
<dt><a href="rendering">rendering:</a>
<dd>Everything thats related to bringing a DOM tree with CSS declarations to the screen. Contains
the definition of the objects used in the rendering tree, the layouting code, and the RenderStyle objects.
<dt><a href="xml/">xml:</a>
<dd>The XML part of the DOM implementation, the xml tokenizer.
</dl>
<h2>Exception handling</h2>
To save on library size, C++-exceptions are only enabled in the dom/ subdirectory,
since exceptions are mandated by the DOM API. In the rest of KHTML's code,
we pass an error flag (usually called "exceptionCode"), and the class that
is part of dom/* checks for this flag and throws the exception.
<h2>Final words...</h2>
<p>
All the above is to give you a quick introduction into the way khtml brings an HTML/XML file to the screen.
It is by no way complete or even 100% correct. I left out many problems, I will perhaps add either on request
or when I find some time to do so. Let me name some of the missing things:
<ul>
<li>The decoder to convert the incoming stream to Unicode
<li>interaction with konqueror/applications
<li>javascript
<li>dynamic reflow and how to use the DOM to manipulate khtmls visual output
<li>mouse/event handling
<li>real interactions when parsing incrementally
<li>java
</ul>
Still I hope that this short introduction will make it easier for you to get a first hold of khtml and the way it works.
<p>
Now before I finish let me add a small <b>warning</b> and <b>advice</b> to all of you who plan hacking khtml themselves:
<p>
khtml is by now a quite big library and it takes some time to understand how it works. Don't let yourself get frustrated
if you don't immediately understand how it works. On the other hand, it is by now one of the libraries that
get used a lot, that probably has the biggest number of remaining bugs (even though it's sometimes hard to
know if some behavior is really a bug).
<blockquote>
Some parts of it's code are however <b>extremely touchy</b> (especially the layouting algorithms),
and making changes there (that might fix a bug on one web page) might introduce severe bugs.
All the people developing khtml have already spend huge amounts of time searching for such bugs,
that only showed up on some web pages, and thus were found only a week after the change that
introduced the bug was made. This can be very frustrating for us, and we'd appreciate if people
that are not completely familiar with khtml post changes touching these critical regions to kfm-devel
for review before applying them.
</blockquote>
<div style="margin-top: 2em; font-size: large;">
And now have fun hacking khtml.
<div style="margin-left: 10em; margin-bottom: 1em;">Lars</div>
</div>
</body>
</html>
This document gives an example of how a given rendering of inline elements,
with its associated HTML code, will map to 2 different trees:
-a tree of render objects, mirroring the corresponding DOM tree.
-a tree of lineboxes reflecting how inline-level render objects are laid out in distinct lines inside
block-level elements. (cf. https://www.w3.org/TR/CSS2/visuren.html#inline-formatting and https://www.w3.org/TR/CSS2/visudet.html#line-height)
Rendering:
=========
_________________
|Lorem ipsum dolor|
|sit amet. |
|_________________|
=> 1 block level element
=> 2 lines (because of wrapping).
HTML:
====
<div>Lorem <span>ipsum <i>dolor sit</i> amet.</span></div>
Render tree:
===========
[RenderBlock1
[RenderText1 "Lorem "]
[RenderInline1
[RenderText2 "ipsum "]
[RenderInline2
[RenderText3 "dolor sit"]
]
[RenderText4 " amet."]
]
]
Linebox tree:
============
[RootInlineBox1
[InlineTextBox1 "Lorem"]
[InlineFlowBox1
[InlineTextBox2 "ipsum"]
[InlineFlowBox3
[InlineTextBox3 "dolor"]
]
]
]
[RootInlineBox2
[InlineFlowBox2
[InlineFlowBox4
[InlineTextBox4 "sit"]
]
[InlineTextBox5 " amet."]
]
]
Linkage of LineBox tree:
========================
(just for first line:)
RootBox1->nextOnLine() == InlineTextBox1 ->nextOnLine() == InlineFlowBox1 -> nextOnLine() == InlineTextBox2 ->
nextOnLine() == InlineFlowBox2-> nextOnLine() == InlineTextBox2 ->nextOnLine() == InlineFlowBox2 ->
nextOnLine() == InlineTextBox3
Correspondence Render tree / Linebox tree:
==========================================
(just for some objects:)
( RenderBlock1 -> firstLineBox() == RootBox1 ) ->nextLineBox() == (RootBox2 == RenderBlock1 -> lastLineBox() )
( RenderInline1 ->firstLineBox() == InlineFlowBox1 ) -> nextLineBox() == (InlineFlowBox2 == RenderInline1 -> lastLineBox() )
( RenderText3 -> firstTextBox() == InlineTextBox3 ) -> nextTextBox() == (InlineTextBox4 == RenderText3 -> lastLineBox() )
FIXME: update these docs for Qt5 / KF5
In order to be able to use testregression, you have to build your Qt and kdelibs in a special manner.
Here are the needed steps.
1. Set your QTDIR to point to the Qt source tree
2. Configure Qt as:
configure -prefix $PWD -no-reduce-exports -qt-gif -no-exceptions -debug -fast -qdbus -nomake examples -nomake demos
The first 2 parameters are what's needed for testregression, the rest are standard. If you already have Qt configured,
make sure to do "make confclean" first.
3. Build & Install Qt
4. Configure kdelibs:
cmake /path/to/kdelibs-src/ -DCMAKE_INSTALL_PREFIX=$KDEDIR -DCMAKE_BUILD_TYPE=debug -DKHTML_BUILD_TESTREGRESSION=true
Obviously, you may use whatever prefix and other flags you want, but you must pass
-DKHTML_BUILD_TESTREGRESSION=true and must use a debug or debugfull build type.
Unfortunately, other KDE modules don't like visibility-less kdelibs and Qt much, so you may also want to apply this:
--- cmake/modules/FindKDE4Internal.cmake (revision 738780)
+++ cmake/modules/FindKDE4Internal.cmake (working copy)
@@ -884,6 +884,8 @@
exec_program(${CMAKE_C_COMPILER} ARGS -v OUTPUT_VARIABLE _gcc_alloc_info)
string(REGEX MATCH "(--enable-libstdcxx-allocator=mt)" _GCC_COMPILED_WITH_BAD_ALLOCATOR "${_gcc_alloc_info}")
endif (GCC_IS_NEWER_THAN_4_1)
+
+ set (__KDE_HAVE_GCC_VISIBILITY 0)
if (__KDE_HAVE_GCC_VISIBILITY AND GCC_IS_NEWER_THAN_4_1 AND NOT _GCC_COMPILED_WITH_BAD_ALLOCATOR)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
5. Build & Install kdelibs
6. You can now use testregression from kdelibs build dirs bin/ directory. Pass it the path to the regression/ directory in khtmltests.
Don't forget to make install after changing KHTML or KJS!
This diff is collapsed.
This diff is collapsed.
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment